zoukankan      html  css  js  c++  java
  • 简简单单C#爬虫小计

    using System;
    using System.Collections.Generic;
    using System.IO;
    using System.Linq;
    using System.Net;
    using System.Text;
    using System.Text.RegularExpressions;
    using System.Threading.Tasks;
    
    namespace 正则
    {
        class Program
        {
            static void Main(string[] args)
            {
                string url = "http://www.admin5.com/browse/177/";
                string html = GetHtml(url, Encoding.UTF8);
                Regex r = new Regex("(?<=href=").*?(?=")");
                MatchCollection mc = r.Matches(html);
                int a = 1;
                foreach (Match m in mc)
                {
                    if (m.Value.Contains("article"))
                    {
                        Console.WriteLine("http://www.admin5.com/" + m.Value);
                        Console.WriteLine("抓取内容");
                        string content = GetHtml(m.Value, Encoding.UTF8);
                        Regex i = new Regex("(?<=title>).*?(?=</title>)");
                        MatchCollection mm = i.Matches(content);
                        Regex rcontent = new Regex("<div class="content">[\s\S]*?</div>");
                        MatchCollection nr = rcontent.Matches(content);
                        string title = mm[0].Value;
                        string neirong = nr[0].Value;
                        Console.WriteLine("保存数据");
                        string path = Directory.GetCurrentDirectory();
                        if (!Directory.Exists(path + "\data"))
                        {
                            Directory.CreateDirectory(path + "\data");
                        }
                        File.WriteAllText(path + "\data" + "\" + a + ".txt", title + "
    " + neirong);
                        a++;
                        Console.WriteLine("保存成功");
                    }
                }
                Console.WriteLine("ok");
                Console.ReadKey();
            }
    
            private static string GetHtml(string url, Encoding encoding)
            {
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                Stream s = response.GetResponseStream();
                StreamReader sr = new StreamReader(s);
                return sr.ReadToEnd();
            }
        }
    }
    

      

    谢谢你长得这么好看还来看我的博客!
  • 相关阅读:
    实例15_C语言绘制万年历
    医生酒精
    实例13_求解二维数组的最大元素和最小元素
    用二维数组实现矩阵转置
    C语言中的typedef跟define的区别
    C语言设计ATM存取款界面
    MyBatis,动态传入表名,字段名的解决办法
    在mybatis执行SQL语句之前进行拦击处理
    使用Eclipse构建Maven的SpringMVC项目
    Debug过程中的mock (及display窗口的使用)
  • 原文地址:https://www.cnblogs.com/hexd1230/p/4781526.html
Copyright © 2011-2022 走看看