1 //./ForwardDocIdx Tianwang.raw.2559638448.seg > moon.fdx 2 #include <iostream> 3 #include <fstream> 4 5 using namespace std; 6 7 const string SEPARATOR("/ "); //词间分割符 8 int main(int argc, char* argv[]) 9 { 10 ifstream ifsImgInfo(argv[1]);//打开输入流 11 if (!ifsImgInfo) { 12 cerr << "Cannot open " << argv[1] << " for input\n"; 13 return -1; 14 } 15 16 string strLine,strDocNum; 17 int cnt = 0; 18 while (getline(ifsImgInfo, strLine)) { 19 string::size_type idx; 20 21 cnt++; 22 if (cnt%2 == 1){//奇数行是文档编号 23 strDocNum = strLine.substr(0,strLine.size()); 24 continue; 25 } 26 27 28 29 30 if (strLine[0]=='\0' || strLine[0]=='#' || strLine[0]=='\n'){ 31 continue; 32 } 33 34 35 //偶数行是文档分词的结果 36 while ( (idx = strLine.find(SEPARATOR)) != string::npos ) { 37 string tmp1 = strLine.substr(0,idx); 38 cout << tmp1 << "\t" << strDocNum << endl; 39 strLine = strLine.substr(idx + SEPARATOR.size()); 40 } 41 42 //if (cnt==100) break; 43 } 44 45 return 0; 46 }