zoukankan      html  css  js  c++  java
  • C#获取指定网页源码的几种方法

           // WebClient
           private string GetWebClient(string url)
            {
                string strHTML = "";
                WebClient myWebClient = new WebClient();
                Stream myStream = myWebClient.OpenRead(url);
                StreamReader sr = new StreamReader(myStream, System.Text.Encoding.GetEncoding("utf-8"));
                strHTML = sr.ReadToEnd();
                myStream.Close();
                return strHTML;
            }
    
            // WebRequest
            private string GetWebRequest(string url)
            {
                Uri uri = new Uri(url);
                WebRequest myReq = WebRequest.Create(uri);
                WebResponse result = myReq.GetResponse();
                Stream receviceStream = result.GetResponseStream();
                StreamReader readerOfStream = new StreamReader(receviceStream,System.Text.Encoding.GetEncoding("gb2312"));
                string strHTML = readerOfStream.ReadToEnd();
                readerOfStream.Close();
                receviceStream.Close();
                result.Close();
                return strHTML;
            }
    
            // HttpWebRequest
            private string GetHttpWebRequest(string url)
            {
                try
                {
                    Uri uri = new Uri(url);
                    HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create(uri);
                    myReq.UserAgent = "User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705";
                    myReq.Accept = "*/*";
                    myReq.KeepAlive = true;
                    myReq.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");
                    HttpWebResponse result = (HttpWebResponse)myReq.GetResponse();
                    Stream receviceStream = result.GetResponseStream();
                    StreamReader readerOfStream = new StreamReader(receviceStream, System.Text.Encoding.GetEncoding("gb2312"));
                    string strHTML = readerOfStream.ReadToEnd();
                    readerOfStream.Close();
                    receviceStream.Close();
                    result.Close();
                    
                    return strHTML;
                }
                catch (Exception ex)
                {
                    throw new Exception("采集指定网址异常," + ex.Message);
                }
            }
    // 获取网页源码,如果启用了gzip压缩后页面获取会产生乱码,采用此方法可解决gzip压缩而产生的乱码情况
           private string GetHtmlCode(string url)
           {
               string htmlCode;
               HttpWebRequest webRequest = (System.Net.HttpWebRequest)System.Net.WebRequest.Create(url);
                webRequest.Timeout = 30000;
                webRequest.Method = "GET";
                webRequest.UserAgent = "Mozilla/4.0";
                webRequest.Headers.Add("Accept-Encoding", "gzip, deflate");
                HttpWebResponse webResponse = (System.Net.HttpWebResponse)webRequest.GetResponse();
                if (webResponse.ContentEncoding.ToLower() == "gzip")//如果使用了GZip则先解压            {
                    using (System.IO.Stream streamReceive = webResponse.GetResponseStream())
                    {
                        using (var zipStream =
                            new System.IO.Compression.GZipStream(streamReceive, System.IO.Compression.CompressionMode.Decompress))
                        {
                            using (StreamReader sr = new System.IO.StreamReader(zipStream, Encoding.Default))
                            {
                                htmlCode = sr.ReadToEnd();
                            }
                        }
                    }
                }
                else
                {
                    using (System.IO.Stream streamReceive = webResponse.GetResponseStream())
                    {
                        using (System.IO.StreamReader sr = new System.IO.StreamReader(streamReceive, Encoding.Default))
                        {
                            htmlCode = sr.ReadToEnd();
                        }
                    }
                }
    
                return htmlCode;
            }
  • 相关阅读:
    【Anagrams】 cpp
    【Count and Say】cpp
    【Roman To Integer】cpp
    【Integer To Roman】cpp
    【Valid Number】cpp
    重构之 实体与引用 逻辑实体 逻辑存在的形式 可引用逻辑实体 不可引用逻辑实体 散弹式修改
    Maven项目聚合 jar包锁定 依赖传递 私服
    Oracle学习2 视图 索引 sql编程 游标 存储过程 存储函数 触发器
    mysql案例~tcpdump的使用
    tidb架构~本地化安装
  • 原文地址:https://www.cnblogs.com/tyjsjl/p/3232229.html
Copyright © 2011-2022 走看看