zoukankan      html  css  js  c++  java
  • 三种asp.net 抓取网页源代码,推荐方法一

            /// <summary>方法一:比较推荐
            /// 用HttpWebRequest取得网页源码
            /// 对于带BOM的网页很有效,不管是什么编码都能正确识别
            /// </summary>
            /// <param name="url">网页地址" </param>
            /// <returns>返回网页源文件</returns>
            public static string GetHtmlSource2(string url)
            {
                //处理内容
                string html = "";
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
                request.Accept = "*/*"; //接受任意文件
                request.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.1.4322)"; // 模拟使用IE在浏览 http://www.52mvc.com
                request.AllowAutoRedirect = true;//是否允许302
                //request.CookieContainer = new CookieContainer();//cookie容器,
                request.Referer = url; //当前页面的引用
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                Stream stream = response.GetResponseStream();
                StreamReader reader = new StreamReader(stream, Encoding.Default);
                html = reader.ReadToEnd();
                stream.Close();
                return html;
            }
            

            //方法二:
            public static string GetHttpData2(string Url)
            {
                string sException = null;
                string sRslt = null;
                WebResponse oWebRps = null;
                WebRequest oWebRqst = WebRequest.Create(Url);
                oWebRqst.Timeout = 50000;
                try
                {
                    oWebRps = oWebRqst.GetResponse();
                }
                catch (WebException e)
                {
                    sException = e.Message.ToString();
                }
                catch (Exception e)
                {
                    sException = e.ToString();
                }
                finally
                {
                    if (oWebRps != null)
                    {
                        StreamReader oStreamRd = new StreamReader(oWebRps.GetResponseStream(), Encoding.GetEncoding("utf-8"));
                        sRslt = oStreamRd.ReadToEnd();
                        oStreamRd.Close();
                        oWebRps.Close();
                    }
                }
                return sRslt;
            }


            /// <summary>方法三:
            ///
            /// </summary>
            /// <param name="url">/要访问的网站地址</param>
            /// <param name="charSets">目标网页的编码,如果传入的是null或者"",那就自动分析网页的编码</param>
            /// <returns></returns>
            public static string getHtml(string url, params  string[] charSets)
            {
                try
                {
                    string charSet = null;
                    if (charSets.Length == 1)
                    {
                        charSet = charSets[0];
                    }
                    WebClient myWebClient = new WebClient(); //创建WebClient实例myWebClient
                    // 需要注意的:
                    //有的网页可能下不下来,有种种原因比如需要cookie,编码问题等等
                    //这是就要具体问题具体分析比如在头部加入cookie
                    // webclient.Headers.Add("Cookie", cookie);
                    //这样可能需要一些重载方法.根据需要写就可以了
                    //获取或设置用于对向 Internet 资源的请求进行身份验证的网络凭据.
                    myWebClient.Credentials = CredentialCache.DefaultCredentials;
                    //如果服务器要验证用户名,密码
                    //NetworkCredential mycred = new NetworkCredential(struser, strpassword);
                    //myWebClient.Credentials = mycred;
                    //从资源下载数据并返回字节数组.(加@是因为网址中间有"/"符号)
                    byte[] myDataBuffer = myWebClient.DownloadData(url);
                    string strWebData = Encoding.Default.GetString(myDataBuffer);
                    //获取网页字符编码描述信息
                    Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)", RegexOptions.IgnoreCase | RegexOptions.Multiline);
                    string webCharSet = charSetMatch.Groups[2].Value;
                    if (charSet == null || charSet == "")
                        charSet = webCharSet;
                    if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default)
                    {
                        strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer);
                    }
                    else
                    {
                        strWebData = Encoding.GetEncoding("utf-8").GetString(myDataBuffer);
                    }
                    return strWebData;
                }
                catch (Exception e) { return ""; }
            }
  • 相关阅读:
    WPF之感触
    C# WinForm 给DataTable中指定位置添加列
    MyEclipse 8.6 download 官方下载地址
    将博客搬至CSDN
    Building Microservices with Spring Cloud
    Building Microservices with Spring Cloud
    Building Microservices with Spring Cloud
    Building Microservices with Spring Cloud
    Building Microservices with Spring Cloud
    Building Microservices with Spring Cloud
  • 原文地址:https://www.cnblogs.com/ful1021/p/4804500.html
Copyright © 2011-2022 走看看