zoukankan      html  css  js  c++  java
  • 爬虫

    提取页面HTML代码

    方法一:采用 WebClient ,代码如下
    using System.Net;
    {
       string strurl="网址";
       WebClient aWebClient = new WebClient();
       aWebClient.Encoding = System.Text.Encoding.UTF8;
       string htmlcode = aWebClient.DownloadString(strurl);
       txtbox.Text=htmlcode;//txtbox为一个文本框
    }
    方法二:采用 HttpWebRequest HttpWebResponse ,代码如下
    using System.Net;
    {
     string strurl = "http://www.weather.com.cn/weather/101310101.shtml?from=cn";
     HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create(strurl);
        myReq.Accept = "Accept-Languat:zh-cn";
        myReq.AllowAutoRedirect = true;
        myReq.MaximumAutomaticRedirections = 1;
        myReq.Referer = "weather";
        HttpWebResponse myResponse = (HttpWebResponse)myReq.GetResponse();
        Stream myStream = myResponse.GetResponseStream();
        StreamReader myReader = new StreamReader(myStream, System.Text.Encoding.UTF8);
        txtbox.Text = myReader.ReadToEnd();//txtbox为一个文本框

    }

     2.添加引用

    Winista.HtmlParser.dll

    static void Main(string[] args)
            {
                string strurl = "http://hq.cnpc/cwb/news/bnxx/Pages/20171228_C1852.aspx ";
                WebClient aWebClient = new WebClient();
                aWebClient.Encoding = System.Text.Encoding.UTF8;
                string htmlcode = aWebClient.DownloadString(strurl);
                //Console.WriteLine(htmlcode);
                Parser parser = Parser.CreateParser(htmlcode, "utf-8");//GBK
                HtmlPage page = new HtmlPage(parser);
                HasAttributeFilter filter = new HasAttributeFilter("id", "contentText");
                NodeList result = parser.Parse(filter);
                TagNode tag = (TagNode)result[0];
    
                Console.WriteLine(result.ToHtml().ToString());
                Console.WriteLine(tag.GetAttribute("id"));
                Console.ReadKey();
            }
    

      3.输出:

  • 相关阅读:
    dp有哪些种类
    hibernate 双向n-n
    TextView——setCompoundDrawables说明
    Codeforces Round #FF (Div. 2)
    波折yosemite下载过程
    谁,例如下列方法区的指导下
    [LeetCode] 032. Longest Valid Parentheses (Hard) (C++)
    TCP/IP的经典网络编程
    NYOJ353 3D dungeon 【BFS】
    Ubuntu 14.04 grub2 温馨提示
  • 原文地址:https://www.cnblogs.com/liuqiyun/p/8696194.html
Copyright © 2011-2022 走看看