zoukankan      html  css  js  c++  java
  • C#的一个URL加载器,能处理编码、相对地址解析、GET/POST、HTML的include、页面重定向

    要让机器模拟上网,首要的问题是解决HTTP请求响应,看我们的Url加载器,功能比较强。它考虑了编码、URL的相对地址解析(见RFC),还可以POST数据,还有HTML里的<!--include-->,还有<head>里的重定向,很好用的。 

      1        /// <summary>
      2        /// 最基本的Url加载函数,其它重载函数均调用它
      3        /// </summary>
      4        /// <param name="url"></param>
      5        /// <param name="encoding"></param>
      6        /// <param name="postdata"></param>
      7        /// <param name="include">是否在客户端包含include文件</param>
      8        /// <param name="redirectioncounter">计算重定向的次数</param>
      9        /// <returns></returns>

     10        public static string LoadUrl(ref UrlOperation uo, string encoding, string postdata, bool include, int redirectioncounter)
     11        {
     12            string str;
     13
     14            string url=uo.Url;
     15            HttpWebRequest request;
     16            HttpWebResponse response;
     17
     18            //采用HTTP GET或者POST
     19            if (postdata == null)
     20                postdata = "";
     21            if (postdata.Length == 0)//HTTP GET
     22            {
     23                try
     24                {
     25                    request = (HttpWebRequest)System.Net.HttpWebRequest.Create(url);
     26                }

     27                catch
     28                {
     29                    return "";
     30                }

     31
     32                request.UserAgent = "Mozilla/4.0 (compatible; MSIE 5.5; Windows 98)";
     33
     34                //超时异常发生在这里
     35                try
     36                {
     37                    response = (HttpWebResponse)request.GetResponse();
     38                    //uo.Url = response.ResponseUri.ToString();
     39                }

     40                catch
     41                {
     42                    return "";
     43                }

     44                
     45                System.IO.Stream stream = response.GetResponseStream();
     46
     47                Encoding source;
     48                try
     49                {
     50                    source = Encoding.GetEncoding(encoding);
     51                }

     52                catch
     53                {
     54                    source = Encoding.UTF8;
     55                }

     56
     57                StreamReader sr = new StreamReader(stream, source);
     58                try
     59                {
     60                    str = sr.ReadToEnd();
     61                }

     62                catch 
     63                {
     64                    return "";
     65                }

     66                sr.Close();
     67                stream.Close();
     68            }

     69            else//HTTP POST
     70            {
     71                try
     72                {
     73                    ASCIIEncoding asciiencoding = new ASCIIEncoding();
     74                    byte[] bytes = asciiencoding.GetBytes(postdata);
     75
     76                    request = (HttpWebRequest)System.Net.HttpWebRequest.Create(url);
     77                    request.Method = "POST";
     78                    request.ContentType = "application/x-www-form-urlencoded";
     79                    request.ContentLength = postdata.Length;
     80
     81                    Stream poststream = request.GetRequestStream();
     82                    poststream.Write(bytes, 0, bytes.Length);
     83                    poststream.Close();
     84
     85                    response = (HttpWebResponse)request.GetResponse();
     86
     87                    StreamReader sr = new StreamReader(response.GetResponseStream(), System.Text.Encoding.GetEncoding("GB2312"));
     88                    str = sr.ReadToEnd();
     89                    response.Close();
     90                }

     91                catch
     92                {
     93                    return "";
     94                }

     95            }

     96
     97            uo.Url = response.ResponseUri.ToString();
     98
     99            //在客户端包含include文件
    100            if (include)
    101            {
    102                System.Text.RegularExpressions.Regex regex = new Regex(@"<!--\W*include.*?-->", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline);
    103                MatchCollection mc = regex.Matches(str);
    104                if (mc.Count > 0)
    105                {
    106                    System.Text.RegularExpressions.Regex urlregex = new Regex("(?<=\").*(?=\")", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline);
    107
    108                    string[] segments = regex.Split(str);
    109
    110                    StringBuilder sb = new StringBuilder();
    111                    sb.Append(segments[0]);
    112                    for (int i = 1; i <= mc.Count; i++)
    113                    {
    114                        string s = mc[i - 1].Value;
    115                        string newurl = urlregex.Match(s).Value;
    116                        UrlOperation newuo = uo.Forward(newurl);
    117                        string included = LoadUrl(ref newuo, encoding, ""true);
    118                        sb.Append(included);
    119                        sb.Append(segments[i]);
    120                    }

    121
    122                    str = sb.ToString();
    123                }

    124            }

    125
    126            //页面重定向
    127            string redirection=GetRedirection(str).Trim();
    128            if (redirection.Length > 0&&redirectioncounter<5)
    129            {
    130                uo=uo.Forward(redirection);
    131                return LoadUrl(ref uo, encoding, postdata, include, redirectioncounter + 1);
    132            }

    133            else
    134                return str;
    135        }

  • 相关阅读:
    李宏毅 Keras手写数字集识别(优化篇)
    李宏毅 Keras2.0演示
    李宏毅 线性回归预测PM2.5
    李宏毅 Gradient Descent Demo 代码讲解
    Pandas导入导出&pickle文件模块
    python(29)Tinker+BeautifulSoup+Request抓取美女壁纸
    golang(11) 反射用法详解
    golang(10)interface应用和复习
    golang(09) golang 接口内部实现
    golang(08)接口介绍
  • 原文地址:https://www.cnblogs.com/fery/p/1606867.html
Copyright © 2011-2022 走看看