lucene3相关文章搜索的实现
Analyzer analyzer = new IKAnalyzer(); //分词器选择
Directory ramDir = new RAMDirectory();
public void createRamIndex() throws CorruptIndexException,
LockObtainFailedException, IOException {
IndexWriter writer = new IndexWriter(ramDir, analyzer,
Document doc1 = new Document();
doc1.add(new Field("title", "wenhq", Field.Store.YES, Field.Index.ANALYZED));
doc1.add(new Field("author", "callan", Field.Store.YES, Field.Index.ANALYZED));
doc1.add(new Field("subject",
Field.Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
Document doc2 = new Document();
doc2.add(new Field("title", "english", Field.Store.YES, Field.Index.ANALYZED));
doc2.add(new Field("author", "wcq", Field.Store.YES, Field.Index.ANALYZED));
doc2.add(new Field("subject", "学习english的人很多,亲亲宝宝网站的人也在学习", Field.Store.YES, Field.Index.ANALYZED,
Document doc3 = new Document();
doc3.add(new Field("title", "asp", Field.Store.YES, Field.Index.ANALYZED));
doc3.add(new Field("author", "ca", Field.Store.YES, Field.Index.ANALYZED));
doc3.add(new Field("subject", "asp是一种网站开发语言", Field.Store.YES, Field.Index.ANALYZED,
public void search() throws CorruptIndexException, IOException {
IndexReader reader = IndexReader.open(ramDir);
IndexSearcher searcher = new IndexSearcher(reader);
Term term = new Term("title", "wenhq"); // 在title里查询wenhq词条
TermQuery query = new TermQuery(term);
TopScoreDocCollector collector = TopScoreDocCollector.create(10000,
searcher.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
for (int i = 0; i < hits.length; i++) {
Document doc = searcher.doc(hits[i].doc);
System.out.println("search: ");
System.out.println(doc.get("title") + "###" + doc.get("subject"));
morelikeSearch(reader, hits[i].doc);
private void morelikeSearch(IndexReader reader, int id) throws IOException {
System.out.println("moreLike search: ");
// 根据这个document的id获取这个field的Term Vector
// 信息,就是这个field分词之后在这个field里的频率、位置、等信息
TermFreqVector vector = reader.getTermFreqVector(id, "subject");
BooleanQuery query = new BooleanQuery();
for (int i = 0; i < vector.size(); i++) {
TermQuery tq = new TermQuery(new Term("subject",
vector.getTerms()[i])); // 获取每个term保存的Token
query.add(tq, BooleanClause.Occur.SHOULD);
IndexSearcher searcher = new IndexSearcher(ramDir);
TopScoreDocCollector collector = TopScoreDocCollector.create(10000,
searcher.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
for (int i = 0; i < hits.length; i++) {
Document doc = searcher.doc(hits[i].doc);
System.out.println(doc.get("title") + "###" + doc.get("subject"));
public static void main(String[] args) throws CorruptIndexException,
IOException {
MoreLike t = new MoreLike();
欢迎转载,请注明出处:亲亲宝宝