这是一个简单的c++爬虫,效率并不是很高...
1 #include<stdio.h> 2 int s1[1000000],s2[1000000]; 3 void fun(int a, int b) 4 { 5 int i,ii; 6 bool t1,t2,t3,t4; 7 s1[0] = s2[0] = s1[1] = s2[1] = 0; 8 for(i=a; i <= b; i++){ 9 ii = i; 10 t1 = t2 = t3 = t4 =false; 11 while(ii != 0 ){ 12 int a = ii %10; 13 if( a == 5) 14 { 15 t1 = true; 16 } 17 else if( a == 2) 18 { 19 t2 = true; 20 } 21 else if( a == 1) 22 { 23 t3 = true; 24 } 25 ii = ii / 10; 26 } 27 if(t1 && t2 && t3){ 28 s1[i-1] = s1[i-2] + 1; 29 ii = i; 30 while(ii != 0 ){ 31 int a = ii % 10; 32 int b = (ii / 10) % 10; 33 int c = (ii / 100) % 10; 34 if( c > 0 && a == 1 && b == 2 && c ==5) 35 t4 = true; 36 ii = ii / 10; 37 } 38 if(t4) 39 s2[i-1] = s2[i-2] + 1; 40 else 41 s2[i-1] = s2[i-2]; 42 } 43 else{ 44 s2[i-1] = s2[i-2]; 45 s1[i-1] = s1[i-2]; 46 } 47 } 48 } 49 50 int main() 51 { 52 int a,b,i=1; 53 fun(2,1000000); 54 while(scanf("%d%d",&a,&b) != EOF){ 55 if(a == 1) 56 printf("Case %d:%d %d ",i,s1[b-1]-s1[a-1],s2[b-1]-s2[a-1]); 57 else 58 printf("Case %d:%d %d ",i,s1[b-1]-s1[a-2],s2[b-1]-s2[a-2]); 59 i++; 60 } 61 return 0; 62 }
1 #include"urlThread.h" 2 #include<QFile> 3 #include<QMessageBox> 4 #include<QTextStream> 5 #include <QMainWindow> 6 void urlThread::run() 7 { 8 open(); 9 } 10 11 void urlThread::startThread() 12 { 13 start(); 14 } 15 16 //显示找到的url 17 void urlThread::open() 18 { 19 QString path = "url.txt"; 20 QFile file(path); 21 if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) { 22 // QMessageBox::warning(this,tr("Read File"), 23 // tr("Cannot open file: %1").arg(path)); 24 send("error!cannot open url.txt!"); 25 return; 26 } 27 QTextStream in(&file); 28 while(in.readLine().compare("") != 0){ 29 //ui->textBrowser->append(in.readLine()); 30 send(q2s(in.readLine())); 31 Sleep(1); 32 } 33 file.close(); 34 }
1 #include "mainwindow.h" 2 #include <QApplication> 3 4 int main(int argc, char *argv[]) 5 { 6 QApplication a(argc, argv); 7 MainWindow w; 8 w.setWindowTitle("小小爬虫"); 9 w.show(); 10 11 return a.exec(); 12 }
1 #include "mainwindow.h" 2 #include "ui_mainwindow.h" 3 4 MainWindow::MainWindow(QWidget *parent) : 5 QMainWindow(parent), 6 ui(new Ui::MainWindow) 7 { 8 ui->setupUi(this); 9 QObject::connect(ui->start,SIGNAL(released()),this,SLOT(beginGeturl())); 10 //QObject::connect(ui->display,SIGNAL(released()),this,SLOT(open())); 11 QObject::connect(ui->display,SIGNAL(released()),&uth,SLOT(startThread())); 12 QObject::connect(&uth,&urlThread::sendMessage,this,&MainWindow::receiveMessage); 13 QObject::connect(&crawler,&Crawler::sendMessage,this,&MainWindow::receiveMessage); 14 } 15 16 MainWindow::~MainWindow() 17 { 18 delete ui; 19 } 20 21 void MainWindow::receiveMessage(const QString name) 22 { 23 ui->textBrowser->append(name); 24 ui->textBrowser->moveCursor(QTextCursor::End); 25 } 26 27 void MainWindow::open() 28 { 29 QString path = "url.txt"; 30 QFile file(path); 31 if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) { 32 QMessageBox::warning(this,tr("Read File"), 33 tr("Cannot open file: %1").arg(path)); 34 return; 35 } 36 QTextStream in(&file); 37 while(in.readLine().compare("") != 0){ 38 //ui->textBrowser->append(in.readLine()); 39 crawler.send(q2s(in.readLine())); 40 } 41 file.close(); 42 } 43 44 void MainWindow::beginGeturl() 45 { 46 //crawler = new Crawler(); 47 string url = "" ,dep, filter = "www"; 48 if(!ui->site->text().isEmpty()) 49 url = q2s(ui->site->text()); 50 crawler.addURL(url); 51 int depth = 1; 52 if(!ui->depth->text().isEmpty()) 53 { 54 url = q2s(ui->depth->text()); 55 depth = atoi(url.c_str()); 56 } 57 if(!ui->filter->text().isEmpty()) 58 filter = q2s(ui->filter->text()); 59 crawler.setJdugeDomain(filter); 60 crawler.setDepth(depth); 61 crawler.startThread(); 62 }
1 #ifndef CRAWLER_H 2 #define CRAWLER_H 3 4 #include<set> 5 #include<string> 6 #include<queue> 7 #include "winsock2.h" 8 #include <iostream> 9 #include <fstream> 10 #include <stdio.h> 11 #include<time.h> 12 #include<winsock.h> 13 #include<QThread> 14 15 #pragma comment(lib, "ws2_32.lib") 16 using namespace std; 17 18 bool ParseURL(const string & url, string & host, string & resource); 19 bool GetHttpResponse(const string & url, char * &response, int &bytesRead); 20 QString s2q(const string &s); 21 string q2s(const QString &s); 22 23 #define DEFAULT_PAGE_BUF_SIZE 1000000 24 25 class Crawler: public QThread 26 { 27 Q_OBJECT 28 private: 29 queue<string> urlWaiting; 30 set<string> urlWaitset; 31 set<string> urlProcessed; 32 set<string> urlError; 33 set<string> disallow; 34 set<string>::iterator it; 35 int numFindUrl; 36 time_t starttime, finish; 37 string filter; 38 int depth; 39 40 public: 41 Crawler(){ filter = "