代码示例:Lucene Highlighter

这里用的是FastVectorHighlighter,可以高效地对付大文件

<!--pom.xml-->
		<dependency>
			<groupId>org.apache.lucene</groupId>
			<artifactId>lucene-fast-vector-highlighter</artifactId>
			<version>3.0.0</version>
		</dependency>

package player.kent.chen.temp.lucene.highlight;

import java.io.File;

import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class MyHighlightIndexer {

    public static void main(String[] args) throws Exception {
        String rootDir = "/home/kent/diskD/home-kent-dev/workspace/kent-temp/data/lucene-sanguo";

        File contentDir = new File(rootDir, "content");
        File indexDir = new File(rootDir, "index");

        FileUtils.deleteDirectory(indexDir);
        indexDir.mkdirs();

        Directory indexDir1 = FSDirectory.open(indexDir);
        IndexWriter writer = new IndexWriter(indexDir1, new StandardAnalyzer(Version.LUCENE_30),
                true, IndexWriter.MaxFieldLength.UNLIMITED);

        File[] files = contentDir.listFiles();
        for (File file : files) {
            System.out.println("Indexing ... " + file.getAbsolutePath());
            String text = FileUtils.readFileToString(file, "UTF-8");
            Document doc = new Document();
            doc.add(new Field("contents", text, Store.YES, Index.ANALYZED,
                    TermVector.WITH_POSITIONS_OFFSETS));
            writer.addDocument(doc);
        }

        writer.numDocs();
        writer.close();

    }

}

package player.kent.chen.temp.lucene.highlight;

import java.io.File;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter;
import org.apache.lucene.search.vectorhighlight.FieldQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class MyHighlighter {

    public static void main(String[] args) throws Exception {
        String rootDir = "/home/kent/diskD/home-kent-dev/workspace/kent-temp/data/lucene-sanguo";
        File id = new File(rootDir, "index");

        String keyword = "heed";

        Directory indexDir = FSDirectory.open(id);
        IndexSearcher searcher = new IndexSearcher(indexDir);

        QueryParser qp = new QueryParser(Version.LUCENE_30, "contents", new StandardAnalyzer(
                Version.LUCENE_30));
        Query query = qp.parse(keyword);

        TopDocs hits = searcher.search(query, 10);
        FastVectorHighlighter highlighter = new FastVectorHighlighter(true, true);
        FieldQuery fieldQuery = highlighter.getFieldQuery(query);

        for (ScoreDoc scoreDoc : hits.scoreDocs) {
            String snippet = highlighter.getBestFragment(fieldQuery, searcher.getIndexReader(),
                    scoreDoc.doc, "contents", 200);
            System.out.println(snippet);
        }

        searcher.close();

    }

}

Leave a Comment

Your email address will not be published.

This site uses Akismet to reduce spam. Learn how your comment data is processed.