zoukankan      html  css  js  c++  java
  • CrtInvertedIdx

     1 // ./CrtInvertedIdx moon.fidx.sort > sun.iidx
     2 #include <iostream>
     3 #include <fstream>
     4 
     5 using namespace std;
     6 
     7 int main(int argc, char* argv[])
     8 {
     9     ifstream ifsImgInfo(argv[1]);//打开输入流
    10     if (!ifsImgInfo) {
    11         cerr << "Cannot open " << argv[1] << " for input\n";
    12         return -1;
    13     }
    14 
    15     string strLine,strDocNum,tmp1="";
    16     int cnt = 0;
    17     while (getline(ifsImgInfo, strLine)) {
    18         string::size_type idx;
    19         string tmp;
    20 
    21 
    22         idx = strLine.find("\t");
    23         tmp = strLine.substr(0,idx);
    24         //得到索引词
    25         if (tmp.size()<2 || tmp.size() > 8) continue;//不可能构成中文或者大于最大匹配单元
    26 
    27         if (tmp1.empty()) tmp1=tmp;
    28 
    29         if (tmp == tmp1) {//如果是相同的索引词,则将文档编号追加到strDocNum中
    30             strDocNum = strDocNum + " " + strLine.substr(idx+1);//索引词相同,则加起来
    31         }
    32         else {//索引词不同
    33             if ( strDocNum.empty() )
    34                 strDocNum = strDocNum + " " + strLine.substr(idx+1);
    35 
    36             cout << tmp1 << "\t" << strDocNum << endl;//输出
    37             tmp1 = tmp;
    38             strDocNum.clear();
    39             strDocNum = strDocNum + " " + strLine.substr(idx+1);
    40         }
    41 
    42         cnt++;
    43         //if (cnt==100) break;
    44     }
    45     cout << tmp1 << "\t" << strDocNum << endl;
    46 
    47     return 0;
    48 }
  • 相关阅读:
    C99新增内容之复合文字(compound literal)
    直接编译caffe出现的两个问题
    安装cuda8.0中所遇到的问题-解决办法
    windows环境Caffe安装配置步骤(无GPU)及mnist训练
    leetcodeTop100好题
    只不过是从头再来,读java源码
    文章收藏
    java代码优化技巧
    xshell
    MVC框架笔记
  • 原文地址:https://www.cnblogs.com/kakamilan/p/2592345.html
Copyright © 2011-2022 走看看