标 题: 获取电驴首页推荐信息和指定栏目信息
作 者: itdef
链 接: http://www.cnblogs.com/itdef/p/4081963.html
欢迎转帖 请保持文本完整并注明出处

/*******************************************************************************
* @file
* @author def< qq group: 324164944 >
* @blog http://www.cnblogs.com/itdef/
* @brief
/*******************************************************************************/
#include "stdafx.h"
#include <afxinet.h>
#include <atlsimpstr.h>
#include <fstream>
#include <iostream>
#include <sstream>
#include <set>
using namespace std;
#ifdef _DEBUG
#define new DEBUG_NEW
#endif
int GetHttpFileData(CString strUrl,char* DownloadHtmFileName);
int ParseHomePageDownloadFile(char* szfileName);
int UTF8Str2GBK(const string& strUTF8,string& strGBK);
void GetHomePageRecommend(char* szName,const string& strGbk);
// 唯一的应用程序对象
CWinApp theApp;
using namespace std;
int ParseUpdateFile(char* szfileName)
{
int iRet = -1;
if(NULL == szfileName)
return iRet;
fstream fs(szfileName);
stringstream ss ; // 创建字符串流对象
ss << fs.rdbuf(); // 把文件流中的字符输入到字符串流中
fs.close();
string str = ss.str(); // 获取流中的字符串
string strGbk;
int i = UTF8Str2GBK(str,strGbk);
if(strGbk.size() == 0 || i != 0)
{
cerr << "transfer utf8 to gbk error" << endl;
return iRet;
}
basic_string <char>::size_type keyWordStart = strGbk.find("<title>");
basic_string <char>::size_type keyWordEnd = strGbk.find("</title>",keyWordStart+1);
if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart) )
{
string strKeyWord = strGbk.substr(keyWordStart+7,keyWordEnd - keyWordStart -7);
cout << strKeyWord << endl;
}
keyWordStart = strGbk.find("<div class=\"cv-title\">");
keyWordEnd = strGbk.find("</div>",keyWordStart+1);
if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart) )
{
string strKeyWord = strGbk.substr(keyWordStart+22,keyWordEnd - keyWordStart -22);
cout << strKeyWord << endl;
}
iRet = 0;
return iRet;
}
void ShowUpdateInfo(char* szHtmAddress)
{
if ( 0 != GetHttpFileData(szHtmAddress,"HtmDownloadFile"))
{
cerr << "GetHttpFileData error once" << endl;
}
if( 0 != ParseUpdateFile("HtmDownloadFile"))
{
cerr << "ParseUpdateFile error once" << endl;
}
}
void ShowHomePageElement(char* szHomePageAddress)
{
if ( 0 != GetHttpFileData(szHomePageAddress,"HtmDownloadFile"))
{
cerr << "GetHttpFileData error once" << endl;
}
if( 0 != ParseHomePageDownloadFile("HtmDownloadFile"))
{
cerr << "GetHttpFileData error once" << endl;
}
}
int _tmain(int argc, TCHAR* argv[], TCHAR* envp[])
{
int nRetCode = 0;
// 初始化 MFC 并在失败时显示错误
if (!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0))
{
// TODO: 更改错误代码以符合您的需要
_tprintf(_T("错误: MFC 初始化失败\n"));
nRetCode = 1;
}
else
{
// TODO: 在此处为应用程序的行为编写代码。
ShowHomePageElement("http://www.verycd.com/");
cout << "****************************************************" << endl;
ShowUpdateInfo("http://www.verycd.com/entries/790244/");
cout << "****************************************************" << endl;
ShowUpdateInfo("http://www.verycd.com/entries/519062/");
cout << "****************************************************" << endl;
ShowUpdateInfo("http://www.verycd.com/entries/780306/");
cout << "****************************************************" << endl;
ShowUpdateInfo("http://www.verycd.com/entries/522227/");
cout << "****************************************************" << endl;
ShowUpdateInfo("http://www.verycd.com/entries/507338/");
cout << "****************************************************" << endl;
ShowUpdateInfo("http://www.verycd.com/entries/515005/");
cout << "****************************************************" << endl;
ShowUpdateInfo("http://www.verycd.com/entries/794197/");
cout << "****************************************************" << endl;
ShowUpdateInfo("http://www.verycd.com/entries/511135/");
cout << "****************************************************" << endl;
}
system("pause");
return nRetCode;
}
int UTF8Str2GBK(const string& strUTF8,string& strGBK)
{
int i = MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, NULL, 0);
WCHAR *wsz = NULL;
TCHAR *tsz = NULL;
int iRet = -1;
wsz = new WCHAR[i+1];
if( NULL == wsz)
{
goto UTF8Str2GBK_EXIT;
}
MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, wsz, i);
i = WideCharToMultiByte(CP_ACP, 0, wsz, -1, NULL, 0, NULL, NULL);
tsz = new TCHAR[i+1];
if( NULL == tsz)
{
goto UTF8Str2GBK_EXIT;
}
WideCharToMultiByte(CP_ACP, 0, wsz, -1, tsz, i, NULL, NULL);
strGBK = string(tsz);
iRet = 0;
UTF8Str2GBK_EXIT:
delete []wsz;
delete []tsz;
return iRet;
}
int ParseHomePageDownloadFile(char* szfileName)
{
int iRet = -1;
if(NULL == szfileName)
return iRet;
fstream fs(szfileName);
stringstream ss ; // 创建字符串流对象
ss << fs.rdbuf(); // 把文件流中的字符输入到字符串流中
fs.close();
string str = ss.str(); // 获取流中的字符串
string strGbk;
int i = UTF8Str2GBK(str,strGbk);
if(strGbk.size() == 0 || i != 0)
{
cerr << "transfer utf8 to gbk error" << endl;
return iRet;
}
cout << "首页大推" << endl;
GetHomePageRecommend("VeryCD.TrackEvent('base','首页大推',",strGbk);
cout << "首页小推" << endl;
GetHomePageRecommend("VeryCD.TrackEvent('base','首页小推',",strGbk);
iRet = 0;
return iRet;
}
void GetHomePageRecommend(char* szName,const string& strGbk)
{
set<string> setKeyWord;
//cout << strGbk;
basic_string <char>::size_type keyWordStart = strGbk.find(szName);
basic_string <char>::size_type keyWordEnd = strGbk.find("')",keyWordStart+1);
if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart + 37) )
{
string strKeyWord = strGbk.substr(keyWordStart+37,keyWordEnd - keyWordStart - 37);
setKeyWord.insert(strKeyWord);
//cout << "电驴首页小推 " << strKeyWord << endl;
}
while( keyWordStart != string::npos && keyWordEnd != string::npos)
{
keyWordStart = strGbk.find(szName,keyWordEnd+1);
keyWordEnd = strGbk.find("')",keyWordStart+1);
if( (keyWordStart != string::npos) && (keyWordEnd != string::npos) && (keyWordEnd > keyWordStart + 37) )
{
string strKeyWord = strGbk.substr(keyWordStart+37,keyWordEnd - keyWordStart - 37);
setKeyWord.insert(strKeyWord);
//cout << "电驴首页小推 " << strKeyWord << endl;
}
}
set<string>::iterator pos;
for(pos = setKeyWord.begin();pos != setKeyWord.end();++ pos)
{
cout << "电驴首页推荐 " << *pos << endl;
}
}
int GetHttpFileData(CString strUrl,char* szDownloadHtmFileName)
{
CInternetSession Session("Internet Explorer", 0);
CHttpFile *pHttpFile = NULL;
CString strData;
CString strClip;
int iRet = -1;
if(szDownloadHtmFileName == NULL)
{
cerr << "DownloadHtmFileName is NULL" << endl;
Session.Close();
return iRet;
}
ofstream of(szDownloadHtmFileName);
if (of.bad())
{
cerr << "of create file error" << endl;
Session.Close();
return iRet;
}
try
{
pHttpFile = (CHttpFile*)Session.OpenURL(strUrl);
while ( pHttpFile->ReadString(strClip) )
{
of << strClip;
}
}catch(CInternetException* pEx)
{
TCHAR pszError[64];
pEx->GetErrorMessage(pszError, 64);
cerr << __FUNCTION__ << pszError << endl;
goto GetHttpFileData_EXIT;
}
iRet = 0;
GetHttpFileData_EXIT:
Session.Close();
of.close();
return iRet;
}
关于字符集转换的 文章