當前位置:成語大全網 - 新華字典 - 以下哪壹個不是搜索引擎中使用的運算符

以下哪壹個不是搜索引擎中使用的運算符

有前面註釋我們可以知道查詢關鍵字和字典文件準備好好後,將進入用戶關鍵字分詞階段

//TSESearch.cpp中:

[csharp] view plaincopy

CHzSeg iHzSeg; //include ChSeg/HzSeg.h

//

iQuery.m_sSegQuery = iHzSeg.SegmentSentenceMM(iDict, iQuery.m_sQuery); //將get到的查詢變量分詞分成 "我/ 愛/ 妳們/ 的/ 格式"

vector<string></string> vecTerm;

iQuery.ParseQuery(vecTerm); //將以"/"劃分開的關鍵字壹壹順序放入壹個向量容器中

set<string></string> setRelevantRst;

iQuery.GetRelevantRst(vecTerm, mapBuckets, setRelevantRst);

gettimeofday(&end_tv,&tz);

// search end

//搜索完畢

[php] view plaincopy

看CHzSeg 中的這個方法

[php] view plaincopy

//ChSeg/HzSeg.h

[html] view plaincopy

/**

* 程序翻譯說明

* 進壹步凈化數據,轉換漢字

* @access public

* @param CDict, string 參數的漢字說明:字典,查詢字符串

* @return string 0

*/

// process a sentence before segmentation

//在分詞前處理句子

string CHzSeg::SegmentSentenceMM (CDict &dict, string s1) const

{

string s2="";

unsigned int i,len;

while (!s1.empty())

{

unsigned char ch=(unsigned char) s1[0];

if(ch<128)

{ // deal with ASCII

i=1;

len = s1.size();

while (i<len cr="" s2="" ch="=13)" added="" by="" s1="s1.substr(i);" yhf="" else="" if="" 中文標點等非漢字字符="" i="0;" len="s1.length();"></len>=161)

&& (!((unsigned char)s1[i]==161 && ((unsigned char)s1[i+1]>=162 && (unsigned char)s1[i+1]<=168)))

&& (!((unsigned char)s1[i]==161 && ((unsigned char)s1[i+1]>=171 && (unsigned char)s1[i+1]<=191)))

&& (!((unsigned char)s1[i]==163 && ((unsigned char)s1[i+1]==172 || (unsigned char)s1[i+1]==161)

|| (unsigned char)s1[i+1]==168 || (unsigned char)s1[i+1]==169 || (unsigned char)s1[i+1]==186

|| (unsigned char)s1[i+1]==187 || (unsigned char)s1[i+1]==191)))

{

i=i+2; // 假定沒有半個漢字

}

if (i==0) i=i+2;

// 不處理中文空格

if (!(ch==161 && (unsigned char)s1[1]==161))

{

if (i <= s1.size()) // yhf

// 其他的非漢字雙字節字符可能連續輸出

s2 += s1.substr(0, i) + SEPARATOR;

else break; // yhf

}

if (i <= s1.size()) // yhf

s1=s1.substr(i);

else break; //yhf

continue;

}

}

// 以下處理漢字串

i = 2;

len = s1.length();

while(i<len></len>=176)

// while(i<len></len>=128 && (unsigned char)s1[i]!=161)

i+=2;

s2+=SegmentHzStrMM(dict, s1.substr(0,i));

if (i <= len) // yhf

s1=s1.substr(i);

else break; // yhf

}

return s2;

}

[html] view plaincopy

[html] view plaincopy

//Query.cpp

[html] view plaincopy

<pre class="csharp" name="code">/**

* 程序翻譯說明

* 將以"/"劃分開的關鍵字壹壹順序放入壹個向量容器中

*

* @access public

* @param vector<string></string> 參數的漢字說明:向量容器

* @return void

*/

void CQuery::ParseQuery(vector<string></string> &vecTerm)

{

string::size_type idx;

while ( (idx = m_sSegQuery.find("/ ")) != string::npos ) {

vecTerm.push_back(m_sSegQuery.substr(0,idx));

m_sSegQuery = m_sSegQuery.substr(idx+3);

}

}

</pre>

<pre class="csharp" name="code"> </pre>

<pre class="csharp" name="code"><pre class="csharp" name="code">/**

* 程序翻譯說明

* 相關性分析查詢,構造結果集合setRelevantRst //瓶頸所在

*

* @access public

* @param vector<string></string> map set<string></string> 參數的漢字說明: 用戶提交關鍵字的分詞組,倒排索引映射,相關性結果集合

* @return string 0

*/

bool CQuery::GetRelevantRst

(

vector<string></string> &vecTerm,

map &mapBuckets,

set<string></string> &setRelevantRst

) const

{

set<string></string> setSRst;

bool bFirst=true;

vector<string></string>::iterator itTerm = vecTerm.begin();

for ( ; itTerm != vecTerm.end(); ++itTerm )

{

setSRst.clear();

copy(setRelevantRst.begin(), setRelevantRst.end(), inserter(setSRst,setSRst.begin()));

map mapRstDoc;

string docid;

int doccnt;

map::iterator itBuckets = mapBuckets.find(*itTerm);

if (itBuckets != mapBuckets.end())

{

string strBucket = (*itBuckets).second;

string::size_type idx;

idx = strBucket.find_first_not_of(" ");

strBucket = strBucket.substr(idx);

while ( (idx = strBucket.find(" ")) != string::npos )

{

docid = strBucket.substr(0,idx);

doccnt = 0;

if (docid.empty()) continue;

map::iterator it = mapRstDoc.find(docid);

if ( it != mapRstDoc.end() )

{

doccnt = (*it).second + 1;

mapRstDoc.erase(it);

}

mapRstDoc.insert( pair(docid,doccnt) );

strBucket = strBucket.substr(idx+1);

}

// remember the last one

docid = strBucket;

doccnt = 0;

map::iterator it = mapRstDoc.find(docid);

if ( it != mapRstDoc.end() )

{

doccnt = (*it).second + 1;

mapRstDoc.erase(it);

}

mapRstDoc.insert( pair(docid,doccnt) );

}

// sort by term frequencty

multimap > newRstDoc;

map::iterator it0 = mapRstDoc.begin();

for ( ; it0 != mapRstDoc.end(); ++it0 ){

newRstDoc.insert( pair((*it0).second,(*it0).first) );

}

multimap::iterator itNewRstDoc = newRstDoc.begin();

setRelevantRst.clear();

for ( ; itNewRstDoc != newRstDoc.end(); ++itNewRstDoc ){

string docid = (*itNewRstDoc).second;

if (bFirst==true) {

setRelevantRst.insert(docid);

continue;

}

if ( setSRst.find(docid) != setSRst.end() ){

setRelevantRst.insert(docid);

}

}

//cout << "setRelevantRst.size(): " << setRelevantRst.size() << "<br>";

bFirst = false;

}

return true;

}</pre>

</pre>

接下來的就是現實了