无干货,仅供复制
位置信息类
package player.kent.chen.temp.lucene.span;
import org.apache.commons.lang.builder.ToStringBuilder;
public class KeywordLocation {
private String file;
/**
* position in the token stream
*/
private int position;
private KeywordLocation() {
}
public static final KeywordLocation createInstance(String file, int position) {
KeywordLocation instance = new KeywordLocation();
instance.file = file;
instance.position = position;
return instance;
}
public String getFile() {
return file;
}
public void setFile(String file) {
this.file = file;
}
public int getPosition() {
return position;
}
public void setPosition(int position) {
this.position = position;
}
@Override
public String toString() {
return ToStringBuilder.reflectionToString(this, ToStringStyle.SHORT_PREFIX_STYLE);
}
}
搜索器
package player.kent.chen.temp.lucene.span;
import java.io.File;
public class FindFirstOccurenceSearcher {
public static void main(String[] args) throws Exception {
String rootDir = "/home/kent/diskD/home-kent-dev/workspace/kent-temp/data/lucene-sanguo";
File id = new File(rootDir, "index");
Directory indexDir = FSDirectory.open(id);
IndexSearcher searcher = new IndexSearcher(indexDir);
String keyword = "Brotherhood";
KeywordLocation kl = findFirstOccurence(searcher, keyword);
System.out.println(MessageFormat.format("\"{0}\":{1}", keyword, kl));
searcher.close();
}
private static KeywordLocation findFirstOccurence(IndexSearcher searcher, String keyword)
throws IOException, CorruptIndexException {
SpanTermQuery spanTermQuery = new SpanTermQuery(new Term("contents", keyword.toLowerCase()));
IndexReader indexReader = searcher.getIndexReader();
Spans spans = spanTermQuery.getSpans(indexReader);
TopDocs hits = searcher.search(spanTermQuery, 1);
if (hits.totalHits == 0) {
return null;
}
spans.next();
Document doc = indexReader.document(spans.doc());
String file = doc.get("filepath");
int position = spans.start();
return KeywordLocation.createInstance(file, position);
}
}
另附索引器
package player.kent.chen.temp.lucene.span;
import java.io.File;
public class LearnSpanLuceneIndexer {
public static void main(String[] args) throws Exception {
String rootDir = "/home/kent/diskD/home-kent-dev/workspace/kent-temp/data/lucene-sanguo";
File contentDir = new File(rootDir, "content");
File indexDir = new File(rootDir, "index");
FileUtils.deleteDirectory(indexDir);
indexDir.mkdirs();
long begin = now();
doIndex(contentDir, indexDir);
System.out.println("Done in miliseconds of : " + (now() - begin));
}
private static void doIndex(File cd, File id) throws IOException {
Directory indexDir = FSDirectory.open(id);
IndexWriter writer = new IndexWriter(indexDir, new StandardAnalyzer(Version.LUCENE_30),
true, IndexWriter.MaxFieldLength.UNLIMITED);
File[] files = cd.listFiles();
for (File file : files) {
System.out.println("Indexing ... " + file.getAbsolutePath());
Document doc = new Document();
doc.add(new Field("contents", new FileReader(file)));
doc.add(new Field("filepath", file.getAbsolutePath(), Field.Store.YES,
Field.Index.ANALYZED));
writer.addDocument(doc);
}
writer.numDocs();
writer.close();
}
private static long now() {
return System.currentTimeMillis();
}
}