當前位置:成語大全網 - 漢語詞典 - Java中文分詞算法

Java中文分詞算法

這兩天正好在玩lucene,沒有用分詞,主要是覺得配置環境比較麻煩。

下面是演示。記得加lucene-core-2.3.2.jar,lucene-Analyzer.jar和IKAnalyzer.jar有問題打電話給我。

導入Java . io . reader;

導入Java . io . string reader;

導入org . Apache . Lucene . analysis . analyzer;

導入org . Apache . Lucene . analysis . stop filter;

導入org . Apache . Lucene . analysis . token;

導入org . Apache . Lucene . analysis . token filter;

導入org . Apache . Lucene . analysis . token stream;

導入org . Apache . Lucene . analysis . cjk . cjkanalyzer;

進口org.apache.lucene.analysis.cn.ChineseAnalyzer;

導入org . Apache . Lucene . analysis . standard . standard analyzer;

導入org . Mira . Lucene . analysis . ik _ CAnalyzer;

公共類測試分析器{

private static string test string 1 = "長代碼往往是復雜的標誌,這會使代碼難以測試和維護。";

公共靜態void test standard(String test String)引發異常{

analyzer analyzer = new standard analyzer();

reader r = new string reader(test string);

stop filter SF =(stop filter)analyzer . token stream(",r);

system . err . println(" = = = = =標準分析儀= = = = ");

System.err.println("分析方法:默認情況下,只有文字沒有文字");

token t;

while ((t = sf.next())!= null) {

system . out . println(t . term text());

}

}

公共靜態void testCJK(String testString)引發異常{

analyzer analyzer = new CJKAnalyzer();

reader r = new string reader(test string);

stop filter SF =(stop filter)analyzer . token stream(",r);

system . err . println(" = = = = = cjk analyzer = = = = ");

System.err.println("分析方法:跨詞切分");

token t;

while ((t = sf.next())!= null) {

system . out . println(t . term text());

}

}

公共靜態void testChiniese(String test String)引發異常{

analyzer analyzer = new Chinese analyzer();

reader r = new string reader(test string);

token filter TF =(token filter)analyzer . token stream(",r);

system . err . println(" = = = = =中文分析器= = = = ");

System.err.println("分析方法:基本等同於standard analyzer ");

token t;

while ((t = tf.next())!= null) {

system . out . println(t . term text());

}

}

公共靜態void testJe(String testString)引發異常{

// Analyzer analyzer =新MIK _ can alyzer();

analyzer analyzer = new IK _ can alyzer();

reader r = new string reader(test string);

token stream ts =(token stream)analyzer . token stream(",r);

system . err . println(" = = = = = je analyzer = = = = ");

System.err.println("分析方法:詞典切分,正反雙向搜索,具體未知");

token t;

while ((t = ts.next())!= null) {

system . out . println(t . term text());

}

}

公共靜態void main(String[] args)引發異常{

//String test String = test String 1;

string test string = test string 1;

system . out . println(test string);

test standard(test string);

test cjk(test string);

//testPaoding(testString);

testChiniese(test string);

testJe(test string);

}

}