1、单线程采集100个页面
class Program { static int i = 6991275; static void Main(string[] args) { Stopwatch sw = new Stopwatch(); sw.Start(); getTitle(); sw.Stop(); Console.WriteLine("采集100个页面完成,用时:" + sw.ElapsedMilliseconds + "毫秒"); Console.ReadKey(); } static void getTitle() { for (int j = 0; j < 100; j++) { WebClient wc = new WebClient(); wc.BaseAddress = "http://www.juedui100.com/"; wc.Encoding = Encoding.UTF8; string html = wc.DownloadString("user/" + ++i + ".html"); Regex reg = new Regex(@"<title>(.*)</title>"); Console.WriteLine(reg.Match(html)); } } }
输出:
2、多线程采集100个页面
class Program { static int i = 6991275; static volatile int k = 1; static void Main(string[] args) { Stopwatch sw = new Stopwatch(); sw.Start(); for (int i = 0; i < 5; i++) { ThreadPool.QueueUserWorkItem(getTitle); } while (true) { if (k == 5) { sw.Stop(); break; } } Console.WriteLine("采集100个页面完成,用时:" + sw.ElapsedMilliseconds + "毫秒"); Console.ReadKey(); } static void getTitle(object o) { while(i < 6991375) { WebClient wc = new WebClient(); wc.BaseAddress = "http://www.juedui100.com/"; wc.Encoding = Encoding.UTF8; string html = wc.DownloadString("user/" + Interlocked.Increment(ref i) + ".html"); Regex reg = new Regex(@"<title>(.*)</title>"); Console.WriteLine(reg.Match(html)); } k++; } }
输出如下:
单纯从执行时间来看,采集100个页面,用5个线程效率提升2倍多,当然这跟带宽也有关系啦。
有问题,好像K++多个线程执行的时候有问题,也要Interlocked.Increment。以后再改、