zoukankan      html  css  js  c++  java
  • 面试题:一千万条短信中,找到重复次数最多的10条

      1 #include<iostream>
      2 #include<fstream>
      3 #include <string>
      4 #include <unordered_map>
      5 #include <vector>
      6 #include <algorithm>
      7 #include <queue>
      8 
      9 using namespace std;
     10 
     11 typedef unsigned long int uint64_t;
     12 
     13 //获取字符串的hash值
     14 uint64_t getHash(const string& s) {
     15     uint64_t hashVal = 0;
     16     uint64_t a = 3589;
     17     uint64_t b = 81369;
     18     for (char ch : s) {
     19         hashVal = hashVal * a + ch;
     20         a = a * b;
     21     }
     22     return hashVal;
     23 }
     24 
     25 struct posAndCount {
     26     int pos;
     27     int count;
     28     posAndCount(int a = 0, int b = 0) :
     29             pos(a), count(b) {
     30     };
     31 };
     32 
     33 struct NameHashPosAndCount {
     34     uint64_t namehash;
     35     int pos;
     36     int count;
     37 
     38     friend bool operator <(const NameHashPosAndCount& a,
     39             const NameHashPosAndCount& b) {
     40         return a.count > b.count;
     41     }
     42 };
     43 
     44 
     45 //从N个数据中找出出现次数最多的N个
     46 void MfromN() {
     47     unordered_map<uint64_t, posAndCount> names;
     48     unordered_map<uint64_t, posAndCount>::iterator it;
     50     posAndCount pc;
     51     int pos = 0;
    52 ifstream fin("message.txt"); 53 string l; 54 while (fin >> l) { 55 uint64_t hashVal = getHash(l); 56 it = names.find(hashVal); 57 if (it == names.end()) { 58 pc.pos = pos; 59 pc.count = 0; 60 names[hashVal] = pc; 61 } else { 62 names[hashVal].count++; 63 } 64 pos++; 65 } 66 fin.close(); 67 68 69 //优先队列寻找出现次数最多的10条 70 priority_queue<NameHashPosAndCount> name10; 71 NameHashPosAndCount n; 72 for (it = names.begin(); it != names.end(); it++) { 73 if (name10.size() < 10) { 74 n.namehash = it->first; 75 n.pos = it->second.pos; 76 n.count = it->second.count; 77 name10.push(n); 78 } else { 79 if (name10.top().count < it->second.count) { 80 name10.pop(); 81 n.namehash = it->first; 82 n.pos = it->second.pos; 83 n.count = it->second.count; 84 name10.push(n); 85 } 86 } 87 } 88 vector<int> index; 89 while (!name10.empty()) { 90 index.push_back(name10.top().pos); 91 name10.pop(); 92 } 93 sort(index.begin(),index.end()); 94 95 ifstream fin2("message.txt"); 96 int i = 0; 97 int k = 0; 98 while(k < 10 && fin2 >> l){ 99 if(i == index[k]){ 100 cout << l << endl; 101 k++; 102 } 103 i++; 104 } 105 fin2.close(); 106 }
  • 相关阅读:
    1114: 零起点学算法21——求平均值
    1113: 零起点学算法20——输出特殊值II
    1112: 零起点学算法19——输出特殊值
    1111: 零起点学算法18——3个数比较大小
    1110: 零起点学算法17——比较2个数大小
    1109: 零起点学算法16——鸡兔同笼
    1108: 零起点学算法15——交换变量
    1107: 零起点学算法14——三位数反转
    1106: 零起点学算法13——求2个时间之间的分钟数
    设计模式之—中介者模式<Mediator Pattern>
  • 原文地址:https://www.cnblogs.com/wxquare/p/6478774.html
Copyright © 2011-2022 走看看