zoukankan      html  css  js  c++  java
  • 获取电驴首页推荐信息和指定栏目信息

    标 题: 获取电驴首页推荐信息和指定栏目信息
    作 者: itdef
    链 接: http://www.cnblogs.com/itdef/p/4081963.html 

    欢迎转帖 请保持文本完整并注明出处

    /*******************************************************************************
    *  @file        
    *  @author      def< qq group: 324164944 >
    *  @blog        http://www.cnblogs.com/itdef/
    *  @brief     
    /*******************************************************************************/
     
    #include "stdafx.h"
     
    #include <afxinet.h>
    #include <atlsimpstr.h>
    #include <fstream>  
    #include <iostream>
    #include <sstream>
     
    #include <set>
     
    using namespace std;
     
    #ifdef _DEBUG
    #define new DEBUG_NEW
    #endif
     
    int GetHttpFileData(CString strUrl,char* DownloadHtmFileName);
    int ParseHomePageDownloadFile(char* szfileName);
    int UTF8Str2GBK(const string& strUTF8,string& strGBK);
    void GetHomePageRecommend(char* szName,const string& strGbk);
     
    // 唯一的应用程序对象
     
    CWinApp theApp;
     
    using namespace std;
     
     
     
    int ParseUpdateFile(char* szfileName)
    {
        int iRet = -1;
     
        if(NULL == szfileName)
            return iRet;
     
        fstream fs(szfileName);
        stringstream ss ;   // 创建字符串流对象
        ss << fs.rdbuf(); // 把文件流中的字符输入到字符串流中
        fs.close();
        string str = ss.str();  // 获取流中的字符串
        string strGbk;
     
        int i = UTF8Str2GBK(str,strGbk);
     
        if(strGbk.size() == 0 || i != 0)
        {
            cerr << "transfer utf8 to gbk error" << endl;
            return iRet;
        }
     
     
        basic_string <char>::size_type keyWordStart = strGbk.find("<title>");
        basic_string <char>::size_type keyWordEnd = strGbk.find("</title>",keyWordStart+1);
     
        if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart) )
        {
            string strKeyWord = strGbk.substr(keyWordStart+7,keyWordEnd - keyWordStart -7);
            cout << strKeyWord << endl;
        }
     
     
     
        keyWordStart = strGbk.find("<div class=\"cv-title\">");
        keyWordEnd = strGbk.find("</div>",keyWordStart+1);
     
        if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart) )
        {
            string strKeyWord = strGbk.substr(keyWordStart+22,keyWordEnd - keyWordStart -22);
            cout << strKeyWord << endl;
        }
     
        iRet = 0;
        return iRet;
    }
     
    void ShowUpdateInfo(char* szHtmAddress)
    {
        if ( 0 != GetHttpFileData(szHtmAddress,"HtmDownloadFile"))
        {
            cerr << "GetHttpFileData error once" << endl;   
        }
     
        if( 0 != ParseUpdateFile("HtmDownloadFile"))
        {
            cerr << "ParseUpdateFile error once" << endl;   
        }
     
     
    }
     
     
    void ShowHomePageElement(char* szHomePageAddress)
    {
        if ( 0 != GetHttpFileData(szHomePageAddress,"HtmDownloadFile"))
        {
            cerr << "GetHttpFileData error once" << endl;   
        }
        if( 0 != ParseHomePageDownloadFile("HtmDownloadFile"))
        {
            cerr << "GetHttpFileData error once" << endl;   
        }
    }
     
     
     
    int _tmain(int argc, TCHAR* argv[], TCHAR* envp[])
    {
        int nRetCode = 0;
     
        // 初始化 MFC 并在失败时显示错误
        if (!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0))
        {
            // TODO: 更改错误代码以符合您的需要
            _tprintf(_T("错误: MFC 初始化失败\n"));
            nRetCode = 1;
        }
        else
        {
            // TODO: 在此处为应用程序的行为编写代码。
            ShowHomePageElement("http://www.verycd.com/");
            cout << "****************************************************" << endl;
     
            ShowUpdateInfo("http://www.verycd.com/entries/790244/");
            cout << "****************************************************" << endl;
     
            ShowUpdateInfo("http://www.verycd.com/entries/519062/");
            cout << "****************************************************" << endl;
             
            ShowUpdateInfo("http://www.verycd.com/entries/780306/");
            cout << "****************************************************" << endl;
     
            ShowUpdateInfo("http://www.verycd.com/entries/522227/");
            cout << "****************************************************" << endl;
     
            ShowUpdateInfo("http://www.verycd.com/entries/507338/");
            cout << "****************************************************" << endl;
     
            ShowUpdateInfo("http://www.verycd.com/entries/515005/");
            cout << "****************************************************" << endl;
     
            ShowUpdateInfo("http://www.verycd.com/entries/794197/");
            cout << "****************************************************" << endl;
     
            ShowUpdateInfo("http://www.verycd.com/entries/511135/");
            cout << "****************************************************" << endl;
             
        }
     
     
        system("pause");
             
        return nRetCode;
    }
     
     
     
     
    int UTF8Str2GBK(const string& strUTF8,string& strGBK)
    {
        int i = MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, NULL, 0);
        WCHAR *wsz = NULL;
        TCHAR *tsz = NULL;
        int iRet = -1;
     
        wsz = new WCHAR[i+1];
        if( NULL == wsz)
        {
            goto UTF8Str2GBK_EXIT;
        }
        MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, wsz, i);
     
        i = WideCharToMultiByte(CP_ACP, 0, wsz, -1, NULL, 0, NULL, NULL);
        tsz = new TCHAR[i+1];
        if( NULL == tsz)
        {
            goto UTF8Str2GBK_EXIT;
        }
        WideCharToMultiByte(CP_ACP, 0, wsz, -1, tsz, i, NULL, NULL);
         
        strGBK = string(tsz);
     
        iRet = 0;
    UTF8Str2GBK_EXIT:
     
        delete []wsz;
        delete []tsz;
     
        return iRet;
    }
     
     
    int ParseHomePageDownloadFile(char* szfileName)
    {
        int iRet = -1;
     
        if(NULL == szfileName)
            return iRet;
     
        fstream fs(szfileName);
        stringstream ss ;   // 创建字符串流对象
        ss << fs.rdbuf(); // 把文件流中的字符输入到字符串流中
        fs.close();
        string str = ss.str();  // 获取流中的字符串
        string strGbk;
     
        int i = UTF8Str2GBK(str,strGbk);
     
        if(strGbk.size() == 0 || i != 0)
        {
            cerr << "transfer utf8 to gbk error" << endl;
            return iRet;
        }
        cout << "首页大推" << endl;
        GetHomePageRecommend("VeryCD.TrackEvent('base','首页大推',",strGbk);
        cout << "首页小推" << endl;
        GetHomePageRecommend("VeryCD.TrackEvent('base','首页小推',",strGbk);
     
     
        iRet = 0;
        return iRet;
    }
     
     
    void GetHomePageRecommend(char* szName,const string& strGbk)
    {
        set<string> setKeyWord;
        //cout << strGbk;
        basic_string <char>::size_type keyWordStart = strGbk.find(szName);
        basic_string <char>::size_type keyWordEnd = strGbk.find("')",keyWordStart+1);
     
        if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart + 37) )
        {
            string strKeyWord = strGbk.substr(keyWordStart+37,keyWordEnd - keyWordStart - 37);
            setKeyWord.insert(strKeyWord);
     
            //cout << "电驴首页小推  " << strKeyWord << endl;
        }
     
        while( keyWordStart != string::npos && keyWordEnd != string::npos)
        {
            keyWordStart = strGbk.find(szName,keyWordEnd+1);
            keyWordEnd = strGbk.find("')",keyWordStart+1);
            if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart + 37) )
            {
                string strKeyWord = strGbk.substr(keyWordStart+37,keyWordEnd - keyWordStart - 37);
                setKeyWord.insert(strKeyWord);
                //cout << "电驴首页小推  " << strKeyWord << endl;
            }
     
        }
     
        set<string>::iterator pos;
        for(pos = setKeyWord.begin();pos != setKeyWord.end();++ pos)
        {
            cout << "电驴首页推荐  " << *pos << endl;
        }
     
    }
     
     
     
    int GetHttpFileData(CString strUrl,char* szDownloadHtmFileName)
    {
        CInternetSession Session("Internet Explorer", 0);
        CHttpFile *pHttpFile = NULL;
        CString strData;
        CString strClip;
        int iRet = -1;
     
        if(szDownloadHtmFileName == NULL)
        {   
            cerr << "DownloadHtmFileName is NULL" << endl;
            Session.Close();
            return iRet;
        }
     
        ofstream of(szDownloadHtmFileName);
        if (of.bad())
        {
            cerr << "of create file error" << endl;
            Session.Close();
            return iRet;
        }
     
        try
        {
            pHttpFile = (CHttpFile*)Session.OpenURL(strUrl);
            while ( pHttpFile->ReadString(strClip) )
            {
                of << strClip;
            }
        }catch(CInternetException* pEx)
        {
            TCHAR pszError[64];
            pEx->GetErrorMessage(pszError, 64);
            cerr << __FUNCTION__ << pszError << endl;
            goto GetHttpFileData_EXIT;
        }
     
        iRet = 0;
     
    GetHttpFileData_EXIT:
        Session.Close();
        of.close();
     
        return iRet;
    }
    

      关于字符集转换的 文章 

    C++11与Unicode及使用标准库进行UTF-8、UTF-16、UCS2、UCS4/UTF-32编码转换

  • 相关阅读:
    MPS和MRP之间有什么样的关系呢
    java中静态代码块详解
    SQL server 分组后每组取出任意一行
    人是否能成功,其实可能很早就能看出来
    国内外产品经理的区别
    Yarn 和 NPM 国内快速镜像(淘宝镜像)
    vue-cli 使用less 遇到的问题 || vue-cli 使用less
    布隆过滤器
    PHP性能优化
    Redis-高并发代言词,为什么做分布式要Redis?
  • 原文地址:https://www.cnblogs.com/itdef/p/4081963.html
Copyright © 2011-2022 走看看