我写的第一个版本,返回的是map<size_t, string>这个数据量很大,效率低下。
TextQuery.h
#inlucde<vector> #include<map> #include<set> #include<iterator> #include<fstream> #include<iostream> #include<sstream> using namespace std; #ifndef TEXTQUERY__H #define TEXTQUERY__H class TextQuery { public: TextQuery() = default; TextQuery(ifstream &is); map<size_t, string> query(const string &str); private: vector<string> text; map<string, set<size_t>> lut; }; #endif
TextQuery.cpp
include "TextQuery.h" TextQuery::TextQuery(ifstream &is) { string line; istringstream iss; string word; size_t line_cnt = 0; while(getline(is, line)) { iss.clear(); iss.str(line); text.push_back(line); while(iss>>word) { if(lut.fine(word) != lut.end()) { lut[word].insert(line_cnt); } else { lut[owrd] = set<size_t>{line_cnt}; } } ++line_cnt; } } map<size_t, string> TextQuery::query(const string &str) { map<size_t, string> sh; auto p = lut.find(str); if((p != lut.end()) { for(auto iter = p->second.begin(); iter != p->second.end(); ++iter) { sh[*iter] = text[*text]; } } return sh; }
这里用到了set<size_t>{line_cnt}来构造一个匿名的对象,赋值给map。但是不能用set<size_t> (line_cnt),因为set没有有参数的构造函数,有参数的也是拷贝的那种。
书上的版本
TextQuery.h
#pragma once #include<vector> #include<string> #include<map> #include<set> #include<memory> #include<fstream> #include<sstream> #include<iostream> using namespace std; class QueryResult; class TextQuery { public: TextQuery(ifstream &s); QueryResult query(const string &str) const; private: shared_ptr<vector<string>> text; map < string, shared_ptr<set<size_t>>> lut; }; class QueryResult { public: QueryResult(const string &str, shared_ptr<vector<string>> ptr, shared_ptr<set<size_t>> ln):sh(str), contents(ptr), lines(ln) {} shared_ptr<vector<string>> contents; shared_ptr<set<size_t>> lines; string sh; }; void print(ostream &os, const QueryResult &qr);
TextQuery.cpp
#include "TextQuery.h" TextQuery::TextQuery(ifstream &is) : text(make_shared<vector<string>>()) { string line; istringstream iss; string word; while (getline(is, line)) { iss.clear(); iss.str(line); text->push_back(line); //cout << line << endl; while (iss >> word) { //cout << word << endl; auto &lines = lut[word]; if (!lines) { lines.reset(new set<size_t>{ text->size() - 1 });
// lines = make_shared<set<size_t>>();
// lines->insert(text-size() -1); } else { lines->insert(text->size() - 1); } } } } QueryResult TextQuery::query(const string &str) const{ static shared_ptr<set<size_t>> st_ptr(new set<size_t>); auto loc = lut.find(str); if(loc != lut.end()) return QueryResult(str, text, loc->second); else return QueryResult(str, text, st_ptr); } void print(ostream &os, const QueryResult &pr) { os << pr.sh << " occurs " << pr.lines->size() << " times"<<endl; for (auto iter = pr.lines->begin(); iter != pr.lines->end(); ++iter) { os << "(line " << *iter + 1 << ") " << (*pr.contents)[*iter] << endl; } }
测试代码
#include<iostream> #include<fstream> #include"TextQuery.h" using namespace std; int main() { ifstream is("TextQuery.cpp"); TextQuery tq(is); QueryResult qr = tq.query("QueryResult"); print(cout, qr); return 1; }
这个代码相比我写的代码的优点
1、使用指针返回查找的数据,返回的数据量比较小,没有大量拷贝。
2、为了使用指针使用了shared_ptr
3、在判断map中lut[word]这个数据是否存在是巧妙的使用了map在没有word对应键的元素的时候会插入这个键,值使用值初始化,shared_ptr值初始化就是nullptr,根据是否为nullptr来确定是否需要分配set的空间。