zoukankan      html  css  js  c++  java
  • C++ 10 文件文本查询示例-容器的使用

    解决思路

    1. 读取文件到set容器,行号与索引对应,内容为行文本。
    2. 构建Map容器,保存单词到所在行数set容器的映射。
    3. 循环读取输入单词,使用map.find()获取行号列表,再跟进行号获取对应文本。
    4. 输出结果。

    TextQuery.h

    #ifndef TEXTQUERY_H
    #define TEXTQUERY_H
    #include <string>
    #include <vector>
    #include <map>
    #include <set>
    #include <iostream>
    #include <fstream>
    #include <cctype>
    #include <cstring>
    class TextQuery {
    public:
    	// 字符串长度类型
    	typedef std::string::size_type str_size;
    	// 行号
    	typedef std::vector<std::string>::size_type line_no;
    	// 读取文件构造容器和映射关系
    	void read_file(std::ifstream &is)
    	{
    		std::cout << "读取文件" << std::endl;
    		store_file(is); // 从文件输入流到容器
    		build_map();  // 用容器构建单词-行号列表的map
    	}
    	// 执行查询,返回所在行号列表
    	std::set<line_no> run_query(const std::string&) const;
    	// 根据行号获取行文本
    	std::string text_line(line_no) const;
    	// 获取文本容器的大小
    	str_size size() const {
    		return lines_of_text.size();
    	}
    	void display_map();
    private:
    	// 从文件输入流到容器
    	void store_file(std::ifstream&);
    	// 用容器构建单词-行号列表的map
    	void build_map();
    	// 储存文件文本的容器
    	std::vector<std::string> lines_of_text;
    	// 储存单词对应行号列表的映射
    	std::map<std::string, std::set<line_no> > word_map;
    	// 去空格转小写
    	static std::string cleanup_str(const std::string&);
    };
    #endif
    

    mian.cpp

    #include "TextQuery.h"
    #include <iostream>
    #include <fstream>
    #include <sstream>
    #include <set>
    #include <map>
    #include <string>
    
    using std::ifstream; using std::map; using std::set; using std::multimap; using std::multiset;
    using std::string; using std::cin; using std::cout; using std::endl;
    ifstream& open_file(ifstream&, const string&);
    void print_results(const set<TextQuery::line_no>& locs,	const string& sought, const TextQuery &file);
    string make_plural(size_t ctr, const string &word, const string &ending);
    
    int main()
    {
    	string file1 = "/share/file.txt";
    	ifstream infile; // 打开特点文件
    
    	if (!open_file(infile, file1)) {
    		std::cerr << "error: unable to open input file:" << file1 << endl;
    		return -1;
    	}
    	TextQuery tq;
    	tq.read_file(infile); // 读取文件、构造map
    	tq.display_map();
    	while (true) {
    		cout << "enter word to look for , q to quit:";
    		string s;
    		cin >> s;
    		if (!cin || s == "q") break;
    
    		set<TextQuery::line_no> locs = tq.run_query(s);
    
    		print_results(locs, s, tq);
    
    	}
    
    	return 0;
    }
    
    ifstream& open_file(ifstream &is, const string &file_path)
    {
    	is.close();
    	is.clear();
    	is.open(file_path.c_str());
    	return is;
    }
    void print_results(const set<TextQuery::line_no>& locs,	const string& sought, const TextQuery &file)
    {
    	typedef set<TextQuery::line_no> line_nums;
    	line_nums::size_type size = locs.size();
    	cout << "
    " << sought << " occurs " << size << " "
    		<< make_plural(size, "time", "s") << endl;
    
    	for (line_nums::const_iterator it = locs.begin(); it != locs.end(); ++it)
    	{
    		cout << "	(line" << (*it) + 1 << ") "
    			<< file.text_line(*it) << endl;
    	}
    }
    
    string make_plural(size_t ctr, const string &word, const string &ending)
    {
    	return ctr == 1 ? word : word + ending;
    }
    

    TextQuery.cpp

    #include "TextQuery.h"
    #include <sstream>
    #include <string>
    #include <vector>
    #include <map>
    #include <set>
    #include <iostream>
    #include <fstream>
    #include <cctype>
    #include <cstring>
    #include <stdexcept>
    
    using std::istringstream;
    using std::set;
    using std::string;
    using std::getline;
    using std::map;
    using std::vector;
    using std::cerr;
    using std::cout;
    using std::cin;
    using std::ifstream;
    using std::endl;
    using std::ispunct;
    using std::tolower;
    using std::strlen;
    using std::out_of_range;
    
    // 根据行号获取行文本
    string TextQuery::text_line(line_no line) const
    {
    	if (line < lines_of_text.size())
    		return lines_of_text[line];
    	throw std::out_of_range("line number out of range");
    }
    
    // 从文件输入流到容器
    void TextQuery::store_file(ifstream &is)
    {
    	cout << "从文件输入流到容器" << endl;
    	string textline;
    	while (getline(is, textline))
    	{
    		cout << "读取文件: " << textline << endl;
    		lines_of_text.push_back(textline);
    	}
    }
    
    
    // 用容器构建单词-行号列表的map
    void TextQuery::build_map()
    {
    	for (line_no line_num = 0;
    		line_num != lines_of_text.size();
    		++line_num)
    	{
    		istringstream line(lines_of_text[line_num]);
    		string word;
    		while (line >> word)
    		{
    			word_map[cleanup_str(word)].insert(line_num);
    		}
    	}
    }
    
    set<TextQuery::line_no>
    TextQuery::run_query(const string &query_word) const
    {
    	map < string, set<line_no> >::const_iterator
    		loc = word_map.find(cleanup_str(query_word));
    	if (loc == word_map.end())
    		return set<line_no>();
    	else
    		return loc->second;
    }
    
    void TextQuery::display_map()
    {
    	map< string, set<line_no> >::iterator iter = word_map.begin(),
    		iter_end = word_map.end();
    	for (; iter != iter_end; ++iter)
    	{
    		cout << "word: " << iter->first << " {";
    
    		const set<line_no> &text_locs = iter->second;
    		set<line_no>::const_iterator loc_iter = text_locs.begin(),
    			loc_iter_end = text_locs.end();
    
    		while (loc_iter != loc_iter_end)
    		{
    			cout << *loc_iter;
    
    			if (++loc_iter != loc_iter_end)
    				cout << ", ";
    		}
    		cout << "}
    ";
    	}
    	cout << endl;
    }
    
    string TextQuery::cleanup_str(const string &word)
    {
    	string ret;
    	for (string::const_iterator it = word.begin(); it != word.end(); ++it)
    	{
    		if (!ispunct(*it))
    			ret += tolower(*it);
    	}
    	return ret;
    }
    
  • 相关阅读:
    CF979D Kuro and GCD and XOR and SUM(01Trie)
    2020中国计量大学校赛题解
    CF16E Fish (状压dp)
    2017ccpc杭州站题解
    HDU6274 Master of Sequence(二分+预处理)
    CF899F Letters Removing(树状数组+二分)
    牛客 tokitsukaze and Soldier(优先队列+排序)
    HDU6268 Master of Subgraph(点分治)
    CF862E Mahmoud and Ehab and the function(二分)
    CF1108F MST Unification(生成树+思维)
  • 原文地址:https://www.cnblogs.com/hiqianqian/p/7071445.html
Copyright © 2011-2022 走看看