http://www.diyifanwen.com/jinyici/jinyici-A/
页面抓取
#include <stdlib.h> #include <stdio.h> #include <string.h> #include "lyGetHttpResult.h" #include "lyPublic/lyCodeConvert.h" int main() { char szUrl[512] = ""; char *svData= NULL; char *szData = NULL; FILE *fp; char *p, *q, *s, *t; char strFrom[100] = "http://www.diyifanwen.com/", strTo[100] = ""; //char andStr[20] = "</span> - ";//查找标记串 char outStr[1024*50]= ""; char str[500]= "",str2[500]= ""; char next[100]= "",End[100] = "http://www.diyifanwen.com//jinyici/jinyici-A/"; int len; fp = fopen("1.txt","rt+"); sprintf(szUrl,"http://www.diyifanwen.com/jinyici/jinyici-A/"); szData = GetDataFromWeb(szUrl,NULL,NULL,1, 5); // fputs(szData, fp); // CodeConvert(szData, svData, sizeof(svData),1); p = strstr(szData , " title="); // printf("%c", End[43]); // p = strstr(szData , "昂首挺立"); while(p !=NULL) { q=p; q-=60; while(*q != '/') q++; strcpy(str, strFrom); strcpy(strTo,str); len = strlen(strTo); --p; while(q<p) { strTo[len++] = *(q++); } q+=9; while(*q != '"') { printf("%c",*q); fputc(*(q++) , fp); } fputs(" ",fp); puts(strTo); svData = GetDataFromWeb(strTo , NULL, NULL , 1, 5); while(!szData) svData = GetDataFromWeb(strTo , NULL, NULL , 1, 5); strcpy(szData,szData); // CodeConvert(svData, outStr,sizeof(outStr),1); // fputs(svData,fp); s = strstr(svData, "】</span>"); t = strstr(svData, "<br><span>"); s+=3;; while(s < t) { if(*s != '】' && *s!='<'&&*s != '/'&& *s != '>' && *s != '&' &&(*s < 'a'||*s > 'z')) { printf("%c",*s); fputc(*s, fp); } else if(*s==';') fputc(' ',fp); s++; } fputc(' ',fp); p+=20; q = strstr(p, " title="); p = q; memset(strTo,0,sizeof(strTo)); memset(outStr,0,sizeof(outStr)); if(p == NULL) { strcpy(strTo,str); if(!strstr(szData,"下一页</a> <a href")) { printf("oooo"); End[43]++; if(End[43]>'Z') break; szData =GetDataFromWeb(End,NULL,NULL,1 ,5); while(!szData) svData = GetDataFromWeb(strTo , NULL, NULL , 1, 5); p = strstr(szData," title="); continue; } s = strstr(szData ,"下一页"); t = s-55; s-=15; while(*t != '=') t++; t+=2; s-=3; len = strlen(strTo); while(t < s) { strTo[len++] = *t; t++; } szData = GetDataFromWeb(strTo,NULL,NULL,1 ,5); while(!szData) svData = GetDataFromWeb(strTo , NULL, NULL , 1, 5); p = strstr(szData, " title="); } memset(str,0,sizeof(str)); memset(strTo,0,sizeof(strTo)); memset(outStr,0,sizeof(outStr)); } fclose(fp); free(svData); free(szData); free(p); free(q); free(s); free(t); return 0; }