1 // ./CrtInvertedIdx moon.fidx.sort > sun.iidx 2 #include <iostream> 3 #include <fstream> 4 5 using namespace std; 6 7 int main(int argc, char* argv[]) 8 { 9 ifstream ifsImgInfo(argv[1]);//打开输入流 10 if (!ifsImgInfo) { 11 cerr << "Cannot open " << argv[1] << " for input\n"; 12 return -1; 13 } 14 15 string strLine,strDocNum,tmp1=""; 16 int cnt = 0; 17 while (getline(ifsImgInfo, strLine)) { 18 string::size_type idx; 19 string tmp; 20 21 22 idx = strLine.find("\t"); 23 tmp = strLine.substr(0,idx); 24 //得到索引词 25 if (tmp.size()<2 || tmp.size() > 8) continue;//不可能构成中文或者大于最大匹配单元 26 27 if (tmp1.empty()) tmp1=tmp; 28 29 if (tmp == tmp1) {//如果是相同的索引词,则将文档编号追加到strDocNum中 30 strDocNum = strDocNum + " " + strLine.substr(idx+1);//索引词相同,则加起来 31 } 32 else {//索引词不同 33 if ( strDocNum.empty() ) 34 strDocNum = strDocNum + " " + strLine.substr(idx+1); 35 36 cout << tmp1 << "\t" << strDocNum << endl;//输出 37 tmp1 = tmp; 38 strDocNum.clear(); 39 strDocNum = strDocNum + " " + strLine.substr(idx+1); 40 } 41 42 cnt++; 43 //if (cnt==100) break; 44 } 45 cout << tmp1 << "\t" << strDocNum << endl; 46 47 return 0; 48 }