zoukankan      html  css  js  c++  java
  • c++ boost 汉字和模式串混用的例子

    *===============================================================
    *   Copyright (C) 2013 All rights reserved.
    *   
    *   文件名称:StringProcess.cpp
    *   创 建 者:
    *   创建日期:2013年04月24日
    *   描    述:
    *   备    注: 
    *   更新日志:
    *
    ================================================================*/
    #include<stdio.h>
    #include<string.h>
    #include<stdlib.h> 
    #include <sys/time.h>
    #include<ctype.h>
    #include<locale.h>
    #include "boost/regex.hpp"
    #include <iconv.h> 
    #include <errno.h>
    #include<algorithm>
    // please add your code here!
    using namespace std;
    #define MAX_LINE_LENGTH 1048576
    #define TAGLEN 50
    /************************************************************
    * @brief <funcName:trim> Author:刘禹 finallyly 20130425 去掉字符串首尾空格
    ==================================================
    * @param s
    ==================================================
    **********************************************************/
    void trim(char *s)
    {
        char *start;
        char *end;
        int len=strlen(s);
        start=s;
        end=s+len-1;
        while(1)
        {
            char c=*start;
            if(!isspace(c))
            {
                break;
            }
            start++;
            if(start>end)
            {
                s[0]='';
                return ;
            }
        }
        while(1)
        {
            char c=*end;
            if(!isspace(c))
            {
                break;
            }
            end --;
            if(start>end)
            {
                s[0]='';
                return;
            }
        }
        memmove(s,start,end-start+1);
        s[end-start+1]='';
        return;
    }
    
    inline bool strTolower( char* str )
    {
        if ( !str )
            return false;
        int i = 0;
        bool flag = true;
        while ( str[i] )
        {
            if ( 'A' <= str[i] && 'Z' >= str[i] )
            {
                str[i] += 32;
            }
            else if ( 'a' <= str[i] && 'z' >= str[i] )
            {
            }
            else
            {
                flag = false;
            }
            ++i;
        }
        return flag;
    }
    
    /************************************************************
    * @brief <funcName:> Author:刘禹 finallyly
    * 从系统默认的汉字编码本机是GBK转unicode,宽字符保存
    ==================================================
    * @param sToMatch
    ==================================================
    * @return 
    **********************************************************/
    wstring String2Wstring(string sToMatch)
    {     
        wstring wsToMatch;
        setlocale( LC_CTYPE, "" ); // 很重要,没有这一句,转换会失败。   
        int iWLen = mbstowcs( NULL, sToMatch.c_str(), sToMatch.length() ); // 计算转换后宽字符串的长度。(不包含字符串结束符)
        if(iWLen>0)
        {
            wchar_t *lpwsz = new wchar_t[iWLen + 1];  
            int i = mbstowcs( lpwsz, sToMatch.c_str(), sToMatch.length() ); // 转换。(转换后的字符串有结束符)   
            wsToMatch.assign(lpwsz);  
            delete []lpwsz;  
        }
        else
        {
            wsToMatch=L"";    
        }
        return wsToMatch;
    }  
    /************************************************************
    * @brief <funcName:> Author:刘禹 finallyly
    * Unicode转系统自带编码,用于输出
    ==================================================
    * @param sToMatch
    ==================================================
    * @return 
    **********************************************************/
    string Wstring2String(wstring sToMatch)  
    {     
        string sResult;
        int iLen = wcstombs( NULL, sToMatch.c_str(), 0 ); // 计算转换后字符串的长度。(不包含字符串结束符)   
        if(iLen>0)
        {
            char *lpsz = new char[iLen + 1];  
            int i = wcstombs( lpsz, sToMatch.c_str(), iLen ); // 转换。(没有结束符)   
            lpsz[iLen] = '';  
            sResult.assign(lpsz); 
            delete []lpsz;  
        }
        else
        {
            sResult="";
        }
        return sResult;  
    }
    /************************************************************
    * @brief <funcName:> Author:刘禹 finallyly
    * 从指定编码转换到目标编码
    ==================================================
    * @param toCode
    ==================================================
    * @param fromCode
    ==================================================
    * @param srcstr
    ==================================================
    * @param deststr
    ==================================================
    * @param srclen
    ==================================================
    * @param destlen
    ==================================================
    * @return 
    **********************************************************/
    int toAnotherCode(const char *toCode,const char *fromCode,char *srcstr, char *deststr, size_t srclen,size_t &destlen)
    {
        iconv_t convertor=iconv_open(toCode,fromCode);
        size_t inputsize;
        size_t outputsize;
        size_t oldoutputsize;
        char *input, *inputold;
        char *output=NULL;
        char *outputold=NULL;
        int flag=0;
        if(convertor==iconv_t(-1))
        {
            fprintf(stderr,"convertor device initailization failed!
    ");
            return 1;
        }
        else
        {
            inputsize=srclen;
            input=new char[inputsize+1];
            memcpy(input,srcstr,inputsize);
            input[inputsize]='';
            inputold=input;
            outputsize=inputsize*5;
            oldoutputsize=outputsize;
            output=new char[outputsize];
            output[0]=0;
            outputold=output;
            size_t rc = iconv(convertor,&input,&inputsize,&output,&outputsize);
            memcpy(deststr,outputold,oldoutputsize-outputsize);
            deststr[destlen]=0;
            destlen=oldoutputsize-outputsize;
            if(rc>0)
            {
                flag=1;
            }
            
            delete []inputold;
            delete []outputold;
    
        }
        iconv_close(convertor);
        if(flag==1)
        {
            return 0;
        }
        else
        {
            return 1;
        }
    
    }
    /************************************************************
    * @brief <funcName:PrintUsage> Author:刘禹 finallyly 20130424
    ==================================================
    **********************************************************/
    void PrintUsage()
    {
        fprintf( stderr, "prog [IN]hzpylist_file [IN]input_file [OUT]output_file [OUT]errdmp_file
    " );
    }
    void testRegex()
    {
        string s="刘禹,刘德华,刘佳佳。。。王大虎。。。刘长春,xixi";
        string t="刘[^刘]*?,";
        wstring p=String2Wstring(t);
        wstring ws=String2Wstring(s);
        boost::wregex wreg(p,boost::regbase::icase|boost::regex::perl);
        boost::wsmatch wm;
        vector<string> results;
        wstring::const_iterator  it=ws.begin();
        wstring::const_iterator  end=ws.end();
        while(boost::regex_search(it,end,wm,wreg))
        {
            wstring wtemp=wm[0];
            string temp=Wstring2String(wtemp);
            results.push_back(temp);
            it=wm[0].second;
        }
        fprintf(stdout,"输出正则匹配结果
    ");
        for(vector<string>::iterator it=results.begin();it!=results.end();it++)
        {
                printf("%s
    ",(*it).c_str());
        }
    }
    int LoadFile(char* inputfile)
    {
        FILE *fin = NULL;
        char line[102400] = {0};
        char word[102400] = {0};
        int len = 0;
        fin = fopen(inputfile, "r");
        if (NULL == fin)
        {
            fprintf(stderr,"LoadAddress can not open inputfilename %s
    ", inputfile);
            return 1;
        }
        
        while(true)
        {
            fgets(line, 102400, fin);
            if (feof(fin))
            {
                break;
            }
            len = strlen(line);
            if (0 == line[0] || '
    ' != line[len - 1])
            {
                continue;
            }
            line[len - 1] = 0;
            string pattern ="首都或首府:";
            string p1="([u2E80-u9FFF])+";
            wstring wp1 = String2Wstring(p1);
            //wstring wpattern = L"([u2E80-u9FFF])+";
            wstring wpattern = L"([u2E80-u9FFF]+)"+String2Wstring(pattern)+L"([u2E80-u9FFF]+)";
            wstring winputstr = String2Wstring(line);
            boost::wregex wreg(wpattern, boost::regex::perl|boost::regbase::icase);
            boost::smatch what;
            boost::wsmatch wswhat;
            wstring::const_iterator wstrit = winputstr.begin();
            wstring::const_iterator wstrend = winputstr.end();
            while (boost::regex_search(wstrit, wstrend, wswhat, wreg))
            {
                wstring ws1 = wswhat[1];
                wstring ws2 = wswhat[2]; 
                string s1 = Wstring2String(ws1);
                string s2 = Wstring2String(ws2);
                fprintf(stdout, "%s	%s
    ", s1.c_str(), s2.c_str());
                wstrit=wswhat[0].second;  
            }
        }
        
        if (NULL != fin)
        {
            fclose(fin);
            fin = NULL;
        }
        return 0;
    }
    int main( int argc, char *argv[] )
    {
        timeval tv1, tv2;
        gettimeofday(&tv1, NULL); 
        
        if ( 2 != argc )
        {
            PrintUsage();
            return 1;
        }
        
        LoadFile(argv[1]);
        gettimeofday(&tv2, NULL);
        fprintf(stderr,"%s has finished congratulations!
    ",argv[0]);
        fprintf( stderr,"time elapsed: %.2f ms
    ", (float)((tv2.tv_sec - tv1.tv_sec)*1000000+(tv2.tv_usec-tv1.tv_usec))/1000);
        return 0;
    }
    View Code
  • 相关阅读:
    .Net利用core实现简单的加解密例程全解析
    HTTPS抓包,利用telnet检查网络连接的几种方式
    每个人都应该知道的(cJSON)JSON处理库
    HashMap和Hashtable的区别
    Map集合
    Set集合
    同步异步以及泛型
    ArrayList的输出方式以及因子增长数
    (转)C++内存分配方式详解——堆、栈、自由存储区、全局/静态存储区和常量存储区
    数据结构复习---最短路径
  • 原文地址:https://www.cnblogs.com/finallyliuyu/p/3745097.html
Copyright © 2011-2022 走看看