`
rockethj8
  • 浏览: 24081 次
  • 性别: Icon_minigender_1
  • 来自: 深圳
社区版块
存档分类
最新评论

使用Lucene 2.4.0 建立索引+搜索结果高亮显示(二)

阅读更多
使用2.4版本查询并高亮显示
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.TokenSources;

public class NewSearch
{
    public static void main(String[] args) throws Exception
    {
        String indexDir = "index";
        String queryString = "oracle";
        IndexSearcher isearcher = new IndexSearcher(indexDir);
        
    
        
        BooleanClause.Occur[] clauses = {BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD};
        TopDocCollector collector = new TopDocCollector(10); // 启用这个
        Query query = MultiFieldQueryParser.parse(queryString, new String[]{"path", "body"}, clauses,
                new StandardAnalyzer());
        isearcher.search(query, collector); // 作为参数
        ScoreDoc[] hits = collector.topDocs().scoreDocs; // 拿到结果
        int docId;
        SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<strong><font color='red'>", "</font></strong>");
        SimpleFragmenter fragmenter = new SimpleFragmenter(60);
        Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
        highlighter.setTextFragmenter(fragmenter);
        int maxNumFragmentsRequired = 10;
        String fragmentSeparator = "";
        TermPositionVector tpv = null;
        TokenStream tokenstream = null;
        for (int i = 0; i < hits.length; i++)
        {
            // 循环
            docId = hits[i].doc; // 一个内部编号
            Document doc = isearcher.doc(docId); // 通过编号,拿到文档
            System.out.println("所在文件路径:"+doc.get("path"));
            tpv = (TermPositionVector) isearcher.getIndexReader().getTermFreqVector(docId, "body");     
            tokenstream = TokenSources.getTokenStream(tpv);
            String result = highlighter.getBestFragments(tokenstream, doc.get("body"),maxNumFragmentsRequired, fragmentSeparator);
            System.out.println("内容"+result);
        }
    }
}



Lucene 2.4里一些过期方法的解决方案
http://extjs2.iteye.com/blog/268014
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics