lucene入门例子

df274119386

浏览: 54108 次
性别:
来自: 广州

最近访客更多访客>>

mqp123

hui8080

fcr1209973

我叫陈大炮

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

lucene Apache

一个lucene的例子 , 其它的就不多说了

package com.fei.lucene;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;

/**
 * test lucene
 * 
 * @author
 * 
 */
public class TxtFileIndexer {

	/**
	 * @param args
	 */
	public static void main(String[] args) throws IOException {

		// 保存索引文件的地方
		String indexDir = "E:\\indexDir";

		// 将要搜索TXT文件的地方
		String dataDir = "E:\\dataDir";

		TxtFileIndexer ifi = new TxtFileIndexer();
		ifi.createIndex(indexDir, dataDir);
		try {
			ifi.searchIndex(indexDir);
		} catch (ParseException e) {
			e.printStackTrace();
		}
	}

	/**
	 * 通过索引查找
	 * 
	 * @throws IOException
	 * @throws ParseException
	 */
	public void searchIndex(String indexDir) throws IOException, ParseException {

		// 得到保存索引的Directory
		Directory dir = new SimpleFSDirectory(new File(indexDir));
		
		// IndexSearcher对象
		IndexSearcher indexSearch = new IndexSearcher(dir);
		StandardAnalyzer sa = new StandardAnalyzer(Version.LUCENE_31);
		
		// 创建QueryParser对象,第二个表示搜索Field的字段
		String[] fields = new String[] { "contents", "filename" };
		QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_31,
				fields, sa);

		// QueryParser queryParser = new QueryParser(Version.LUCENE_31,
		// "filename", sa);

		// 生成Query对象
		Query query = queryParser.parse("森");

		// 搜索结果 TopDocs里面有scoreDocs[]数组，里面保存着索引值
		TopDocs hits = indexSearch.search(query, 10);

		// hits.totalHits表示一共搜到多少个
		System.out.println("找到了" + hits.totalHits + "个");

		// 循环hits.scoreDocs数据，并使用indexSearch.doc方法把Document还原
		
		
		ScoreDoc[] scoreDocs = hits.scoreDocs;
		
		int lenScoreDoc =  scoreDocs.length;
		
		for (int i = 0; i < lenScoreDoc; i++) {

			ScoreDoc sdoc = scoreDocs[i];
			Document doc = indexSearch.doc(sdoc.doc);
			
			System.out.println(doc.get("filename"));
			
			//不清楚这里怎么从doc里面拿出contents， 不过这里有文件文件名，  还有路径， 就可读取出文件的内容
			System.out.println(doc.get("contents"));
			Field ff = doc.getField("contents");
		}
		indexSearch.close();

	}

	/**
	 * 建索引
	 * 
	 * @throws IOException
	 */
	public void createIndex(String indexDir, String dataDir) throws IOException {

		IndexWriter indexWriter = null;

		// 创建Directory对象
		Directory dir = new SimpleFSDirectory(new File(indexDir));

		// 创建IndexWriter对象， 第二个是分词器， 第三个表示是否是创建，false
		// 为在此基础上面修改， 第四表示表示分词的最大值，比如说new MaxFieldLength(2), 就表示两个字一分，
		// 一般用indexWriter.MaxFieldLength.LIMITED
		indexWriter = new IndexWriter(dir, new StandardAnalyzer(
				Version.LUCENE_31), true, IndexWriter.MaxFieldLength.UNLIMITED);

		File[] files = new File(dataDir).listFiles();

		for (int i = 0; i < files.length; i++) {
			Document doc = new Document();
			// 创建Field对象， 并放入doc对象中
			doc.add(new Field("contents", new FileReader(files[i])));

			// Field.Index.ANALYZED 建立index标识符通过分词器（）
			doc.add(new Field("filename", files[i].getName(), Field.Store.YES,
					Field.Index.ANALYZED));

			doc.add(new Field("indexDate", DateTools.dateToString(new Date(),
					DateTools.Resolution.DAY), Field.Store.YES,
					Field.Index.NOT_ANALYZED));

			indexWriter.addDocument(doc);

		}
		System.out.println("numDocs:" + indexWriter.numDocs());

		indexWriter.close();

	}

}

分享到：