當前位置:成語大全網 - 英語詞典 - java 怎麽用lucenes進行分詞

java 怎麽用lucenes進行分詞

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.document.StringField;

import org.apache.lucene.document.TextField;

import org.apache.lucene.index.CorruptIndexException;

import org.apache.lucene.index.DirectoryReader;

import org.apache.lucene.index.IndexReader;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriterConfig;

import org.apache.lucene.index.IndexWriterConfig.OpenMode;

import org.apache.lucene.queryparser.classic.ParseException;

import org.apache.lucene.queryparser.classic.QueryParser;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.ScoreDoc;

import org.apache.lucene.search.TopDocs;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.LockObtainFailedException;

import org.apache.lucene.store.RAMDirectory;

import org.apache.lucene.util.Version;

import org.wltea.analyzer.lucene.IKAnalyzer;

/**

* 使用IKAnalyzer進行Lucene索引和查詢的演示

* 2012-3-2

*

* 以下是結合Lucene4.0 API的寫法

*

*/

public class LuceneIndexAndSearchDemo {

/**

* 模擬:

* 創建壹個單條記錄的索引,並對其進行搜索

* @param args

*/

public static void main(String[] args){

//Lucene Document的域名

String fieldName = "text";

//檢索內容

String text = "IK Analyzer是壹個結合詞典分詞和文法分詞的中文分詞開源工具包。它使用了全新的正向叠代最細粒度切分算法。";

//實例化IKAnalyzer分詞器

Analyzer analyzer = new IKAnalyzer(true);

Directory directory = null;

IndexWriter iwriter = null;

IndexReader ireader = null;

IndexSearcher isearcher = null;

try {

//建立內存索引對象

directory = new RAMDirectory();

//配置IndexWriterConfig

IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_40 , analyzer);

iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);

iwriter = new IndexWriter(directory , iwConfig);

//寫入索引

Document doc = new Document();

doc.add(new StringField("ID", "10000", Field.Store.YES));

doc.add(new TextField(fieldName, text, Field.Store.YES));

iwriter.addDocument(doc);

iwriter.close();

//搜索過程**********************************

//實例化搜索器

ireader = DirectoryReader.open(directory);

isearcher = new IndexSearcher(ireader);

String keyword = "中文分詞工具包";

//使用QueryParser查詢分析器構造Query對象

QueryParser qp = new QueryParser(Version.LUCENE_40, fieldName, analyzer);

qp.setDefaultOperator(QueryParser.AND_OPERATOR);

Query query = qp.parse(keyword);

System.out.println("Query = " + query);

//搜索相似度最高的5條記錄

TopDocs topDocs = isearcher.search(query , 5);

System.out.println("命中:" + topDocs.totalHits);

//輸出結果

ScoreDoc[] scoreDocs = topDocs.scoreDocs;

for (int i = 0; i < topDocs.totalHits; i++){

Document targetDoc = isearcher.doc(scoreDocs[i].doc);

System.out.println("內容:" + targetDoc.toString());

}

} catch (CorruptIndexException e) {

e.printStackTrace();

} catch (LockObtainFailedException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

} catch (ParseException e) {

e.printStackTrace();

} finally{

if(ireader != null){

try {

ireader.close();

} catch (IOException e) {

e.printStackTrace();

}

}

if(directory != null){

try {

directory.close();

} catch (IOException e) {

e.printStackTrace();

}

}

}

}

}