zoukankan      html  css  js  c++  java
  • 编程算法

    字典分词 代码(C)


    本文地址: http://blog.csdn.net/caroline_wendy


    给定字典, 给定一句话, 进行分词. 

    使用深度遍历(DFS)的方法.

    使用一个參数string, 保存当前分支的分词后的句子; 使用一个參数vector, 保存全部可能的组合.

    使用一个验证函数, 推断句子能否够分词.


    代码:


    /*
     * main.cpp
     *
     *  Created on: 2014.9.18
     *  Author: Spike
     *  Copyright (c) 2014年 WCL. All rights reserved.
     */
    
    /*eclipse cdt, gcc 4.8.1*/
    
    #include <iostream>
    #include <vector>
    #include <string>
    #include <set>
    
    using namespace std;
    
    bool Match(string s, string m) {
    	int l = m.length();
    	if (s.substr(0, l) == m) {
    		return true;
    	}
    	return false;
    }
    
    bool Validate(string s, vector<string> &dict) {
    	//1. calculate all alphabets in the query
    	set<char> sc;
    	for (size_t i = 0; i < s.length(); i++) {
    		sc.insert(s[i]);
    	}
    	//2. calculate all alphabets in the dictionary
    	set<char> dc;
    	for (vector<string>::iterator it = dict.begin();
    			it != dict.end(); it++)
    	{
    		for (size_t i = 0; i < (*it).length(); i++) {
    			dc.insert((*it)[i]);
    		}
    	}
    	for (set<char>::iterator it = sc.begin(); it != sc.end(); it++) {
    		if (dc.find(*it) == dc.end()) {
    			return false;
    		}
    	}
    	return true;
    }
    
    string Split(string s, vector<string> &dict, string cur, vector<string>& list) {
    	if (s.length() == 0) {
    		list.push_back(cur);
    		return s;
    	}
    	for (vector<string>::iterator it = dict.begin(); it != dict.end(); it++) {
    		if (Match(s, *it)) {
    			string tmp = cur;
    			string latter = s.substr(it->length(), s.length() - it->length());
    			cur += (*it) + "~"; // add current word to cur_str
    			cur += Split(latter, dict, cur, list); // split remaining words
    			cur = tmp; //back to last status
    		}
    	}
    	return "No Result";
    }
    
    vector<string> SplitWords(string s, vector<string> &dict) {
    	string cur = "";
    	vector<string> list;
    	if (!Validate(s, dict)) {
    		return list;
    	}
    	Split(s, dict, cur, list);
    	return list;
    }
    
    
    int main()
    {
        vector<string> dict={"程序猿","公务员","员","我","喜","做","程序","一","欢","喜欢","做一个","一个"};
        vector<string> words = SplitWords("我喜欢做一个程序猿", dict);
        for (vector<string>::iterator it=words.begin(); it!=words.end(); it++) {
            cout<<(*it)<<endl;
        }
        return 0;
    }
    


    简化版本号(没有验证):

    /*
     * main.cpp
     *
     *  Created on: 2014.9.18
     *  Author: Spike
     *  Copyright (c) 2014年 WCL. All rights reserved.
     */
    
    /*eclipse cdt, gcc 4.8.1*/
    
    #include <iostream>
    #include <vector>
    #include <string>
    #include <set>
    
    using namespace std;
    
    bool Match(string s, string m) {
    	int l = m.length();
    	if (s.substr(0, l) == m) {
    		return true;
    	}
    	return false;
    }
    
    string Split(string s, vector<string> &dict, string cur, vector<string>& list) {
    	if (s.length() == 0) {
    		list.push_back(cur);
    		return s;
    	}
    
    	for (vector<string>::iterator it = dict.begin(); it != dict.end(); it++) {
    		if (Match(s, *it)) {
    			string tmp = cur;
    			string latter = s.substr(it->length());
    			cur += (*it) + " | "; // add current word to cur_str
    			cur += Split(latter, dict, cur, list); // split remaining words
    			cur = tmp; //back to last status
    		}
    	}
    
    	return "No Result";
    }
    
    vector<string> SplitWords(string s, vector<string> &dict) {
    	string cur = "";
    	vector<string> list;
    	Split(s, dict, cur, list);
    	return list;
    }
    
    
    int main()
    {
        vector<string> dict={"程序猿","公务员","员","我","喜","做","程序","一","欢","喜欢","做一个","一个"};
        string s = "我喜欢做一个程序猿";
        vector<string> words = SplitWords(s, dict);
        for (vector<string>::iterator it=words.begin(); it!=words.end(); it++) {
            cout<<(*it)<<endl;
        }
        return 0;
    }
    



    输出:

    我~喜~欢~做~一个~程序猿~
    我~喜~欢~做~一个~程序~员~
    我~喜~欢~做一个~程序猿~
    我~喜~欢~做一个~程序~员~
    我~喜欢~做~一个~程序猿~
    我~喜欢~做~一个~程序~员~
    我~喜欢~做一个~程序猿~
    我~喜欢~做一个~程序~员~
    







  • 相关阅读:
    max 获取硬件信息。
    创建文件夹 dos 找到外部链接
    max poly 合并 速度很快
    poly 查找因素脚本、
    下来要研究的东西。(文件之间的关系)
    上一个 有bug 线的形状节点的的判断,改
    mysql修改用户名密码
    ubuntu下ibus输入法的安装
    Android 版本与Kernel的对应关系
    You are attempting to build on a 32bit system Only 64bit build environments are supported beyond froyo/2.2
  • 原文地址:https://www.cnblogs.com/yxwkf/p/4511444.html
Copyright © 2011-2022 走看看