zoukankan      html  css  js  c++  java
  • POJ 2050 Searching the Web

    题意简述:做一个极其简单的搜索系统,对以下四种输入进行分析与搜索:

        1. 只有一个单词:如 term, 只需找到含有这个单词的document,然后把这个document的含有这个单词term的那些行输出。

        2.term1 AND term2, 找到同时含有term1 和 term2 的document,然后把这个document的含有这个单词term1 或 term2 的那些行输出。

        3.term1 OR term2, 找到含有term1 或 term2 的document,然后把这个document的含有这个单词term1 或 term2 的那些行输出。

        4.NOT term,将不含有 term的document全部输出

    思路简述:

        做一个set集合 Src, 记录了一个单词出现在哪些文件的哪些行;用一个map做映射,指出一个单词出现在哪些文件中,这样子,如果是两个单词,可以分别求出各个单词属于哪些文件,根据“AND”则进行set_intersection运算, 根据“OR”进行set_union运算。

    /*
    	Poj 2050
    	Emerald
    	10 May 2015
    */
    #include <iostream>
    #include <cstring>
    #include <cstdio>
    #include <cctype>
    #include <sstream>
    #include <string>
    #include <vector>
    #include <map>
    #include <set>
    #include <algorithm>
    
    using namespace std;
    
    class Figure{ // meaning : a word can be found in the lineOrder'th line of the docOrder'th Doc
    public:
    	int docOrder, lineOrder;
    	Figure() {}
    	Figure( int d, int l ) {
    		docOrder = d;
    		lineOrder = l;
    	}
    };
    
    bool operator < ( const Figure f1, const Figure f2 ) { // the comparisions used in set
    	if( f1.docOrder!=f2.docOrder ) {
    		return f1.docOrder < f2.docOrder;
    	} else {
    		return f1.lineOrder < f2.lineOrder;
    	}
    }
    
    class Doc{ // meaning : the order of a Doc, and how many lines the Doc contains
    public :
    	int docOrder, lineLimit;
    	Doc() {}
    	Doc( int d, int l ) {
    		this->docOrder = d;
    		this->lineLimit = l;
    	}
    };
    
    // set < Figure > src; // this defination defines an accurate instant
    typedef set < Figure > Src; // a src contains the figures of a word
    typedef set < int > docSrc;
    map < string, docSrc > docMap; // referring to a word, wordLine get a line
    map < string, Src > dict; 
    map < Figure, string > wordLine; // referring to a word, wordLine get a line
    vector < Doc > docs;
    
    const string DOC_END = "**********";
    #define ALL(x) x.begin(),x.end()
    #define INS(x) inserter(x,x.begin())
    
    void Standard( string &line ); // make words tolower and other chars to whitespace
    void WordRecord( string &line, int docOrder, int lineOrder ); // transfer words into src
    void PrintSrc( Src &src ); // print as the problem commands
    void WordsToFigure( Src &src, docSrc &dsrc, string& w1, string& w2 ); // know the docs and words, get target figures
    void NotWord( string& word, Src &src ); // not the word w
    
    int main() {
    	int allDocs, queries;
    	
    	// input docs
    	scanf( "%d", &allDocs );
    	cin.get();
    	for( int i=0; i<allDocs; i++ ) {
    		string line;
    		int lineCounter = 0;
    		while( getline( cin, line ) && line != DOC_END ) { // read until end
    			wordLine[ Figure( i, lineCounter ) ] = line;
    			Standard( line );
    			WordRecord( line, i, lineCounter ++ );
    		}
    		docs.push_back( Doc( i, lineCounter ) );
    	}
    
    	// input queries
    	string command;
    	scanf( "%d", &queries );
    	cin.get();
    	while( queries -- ) {
    		getline( cin, command );
    		Standard( command );
    		Src src;
    		if( command.find_last_of( ' ' ) == string::npos ) { // no whitespace
    			Standard( command );
    			src = dict[ command ];
    		} else if( command.find_last_of( ' ' ) != command.find_first_of( ' ' ) ) { // if there're two different whitespaces
    			stringstream ss( command );                                          // xxx AND/OR xxx
    			string w1, w2, connected;
    			ss >> w1 >> connected >> w2;
    			docSrc dSrc1 = docMap[ w1 ];
    			docSrc dSrc2 = docMap[ w2 ];
    
    			docSrc dsrc;
    			if( connected == "and" ) {
    				set_intersection( ALL( dSrc1 ), ALL( dSrc2 ), INS( dsrc ) ); // intersection
    			} else {
    				set_union( ALL( dSrc1 ), ALL( dSrc2 ), INS( dsrc ) ); // union
    			}
    
    			WordsToFigure( src, dsrc, w1, w2 );
    		} else { // only one whitespace -> Not xxx
    			stringstream ss( command );
    			string w1;
    			ss >> w1 >> w1;
    			NotWord( w1, src );
    		}
    		PrintSrc( src );
    	}
    	return 0;
    }
    
    void Standard( string &line ) {
    	int length = line.length();
    	for( int i=0; i<length; i++ ) {
    		if( isalpha( line[i] ) ) {
    			line[i] = tolower( line[i] ); // tolower, such as 'A' to 'a'
    		} else {
    			line[i] = ' '; // if c isn't a alpha, c will be transferred to a whitespace
    		}
    	}
    }
    
    void WordRecord( string &line, int docOrder, int lineOrder ) {
    	stringstream ss( line );
    	string word;
    	while( ss >> word ) {
    		if( dict.count( word ) ) { // whether the word has been found in the total input
    			if( !dict[word].count( Figure( docOrder, lineOrder ) ) ) { // whether the word has been found in this line
    				dict[word].insert( Figure( docOrder, lineOrder ) ) ;
    			}
    		} else {
    			Src src;
    			src.insert( Figure( docOrder, lineOrder ) );
    			dict[ word ] = src;
    		}
    		if( docMap.count( word ) ) { // whether the word has been found in this document
    			docMap[word].insert( docOrder );
    		} else {
    			docSrc ds;
    			docMap[word] = ds;
    			docMap[word].insert( docOrder );
    		}
    	}
    }
    
    void PrintSrc( Src &src ) { // print the result
    	if( src.size() == 0 ) {
    		printf("Sorry, I found nothing.
    ");
    		printf( "==========
    " );
    		return ;
    	}
    	Src :: iterator it = src.begin(), bef; // bef represents the former one 
    	printf( "%s
    ", wordLine[ *it ].c_str() );
    	bef = it++;
    	while( it != src.end() ) {
    		if( it->docOrder != bef->docOrder ) {
    			printf( "----------
    " );
    		}
    		printf( "%s
    ", wordLine[ *it ].c_str() );
    		bef = it;
    		it ++;
    	}
    	printf( "==========
    " );
    }
    
    void WordsToFigure( Src &src, docSrc &dsrc, string& w1, string& w2 ) {
    	docSrc :: iterator it;
    	for( it=dsrc.begin(); it != dsrc.end(); it ++ ) {
    		Src :: iterator it2 ;
    		for( it2 = dict[ w1 ].begin(); it2 !=dict[w1].end(); it2 ++ ) {
    			if( *it == it2 -> docOrder ) {
    				src.insert( *it2 ); // the w1 appears in this line of this document
    			}
    		}
    		for( it2 = dict[ w2 ].begin(); it2 !=dict[w2].end(); it2 ++ ) {
    			if( *it == it2 -> docOrder ) {
    				src.insert( *it2 );	// the w1 appears in this line of this document
    			}
    		}
    	}
    }
    
    void NotWord( string& word, Src &src ) { // not this word
    	docSrc dsrc = docMap[ word ];
    	vector< Doc > :: iterator it;
    	for( it = docs.begin(); it != docs.end(); it ++ ) {
    		if( !dsrc.count( it->docOrder ) ) {
    			for( int i=0; i< it->lineLimit; i ++ ) {
    				src.insert( Figure( it->docOrder, i ) );
    			}
    		}
    	}
    }
    
  • 相关阅读:
    在不申请第三方变量的情况下交换a和b
    指针(pointer)的使用
    数组和链表(arrary & linkedlist)
    Python的学习计划
    Python——函数
    Python——文件操作
    Python——深浅Copy
    我发现的新大陆——好用的~
    我发现的新大陆——好吃儿的
    我发现的新大陆——好玩儿的
  • 原文地址:https://www.cnblogs.com/Emerald/p/4495519.html
Copyright © 2011-2022 走看看