zoukankan      html  css  js  c++  java
  • 一个比较完善的httpWebRequest 封装,适合网络爬取及暴力破解

    大家在模拟http请求的时候,对保持长连接及cookies,http头部信息等了解的不是那么深入。在各种网络请求过程中,发送N种问题。

    可能问题如下:

    1)登录成功后session保持

    2)保证所有cookies回传到服务器

    3)http头这么多,少一个,请求可能会失败

    4)各种编码问题,gzip等压缩问题

    为了解决这些问题,本人花了一天时间写了以下一个类,专门做http请求

      1 using System;
      2 using System.Collections.Generic;
      3 using System.IO;
      4 using System.IO.Compression;
      5 using System.Linq;
      6 using System.Net;
      7 using System.Text;
      8 using System.Threading.Tasks;
      9 
     10 namespace ScanWeb
     11 {
     12     //zetee
     13     //不能Host、Connection、User-Agent、Referer、Range、Content-Type、Content-Length、Expect、Proxy-Connection、If-Modified-Since
     14     //等header. 这些header都是通过属性来设置的 。
     15     public class HttpRequestClient
     16     {
     17         static HashSet<String> UNCHANGEHEADS = new HashSet<string>();
     18         static HttpRequestClient()
     19         {
     20             UNCHANGEHEADS.Add("Host");
     21             UNCHANGEHEADS.Add("Connection");
     22             UNCHANGEHEADS.Add("User-Agent");
     23             UNCHANGEHEADS.Add("Referer");
     24             UNCHANGEHEADS.Add("Range");
     25             UNCHANGEHEADS.Add("Content-Type");
     26             UNCHANGEHEADS.Add("Content-Length");
     27             UNCHANGEHEADS.Add("Expect");
     28             UNCHANGEHEADS.Add("Proxy-Connection");
     29             UNCHANGEHEADS.Add("If-Modified-Since");
     30             UNCHANGEHEADS.Add("Keep-alive");
     31             UNCHANGEHEADS.Add("Accept");
     32 
     33             ServicePointManager.DefaultConnectionLimit = 1000;//最大连接数
     34 
     35         }
     36 
     37         /// <summary>
     38         /// 默认的头
     39         /// </summary>
     40         public static string defaultHeaders = @"Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
     41 Accept-Encoding:gzip, deflate, sdch
     42 Accept-Language:zh-CN,zh;q=0.8
     43 Cache-Control:no-cache
     44 Connection:keep-alive
     45 Pragma:no-cache
     46 Upgrade-Insecure-Requests:1
     47 User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36";
     48 
     49         /// <summary>
     50         /// 是否跟踪cookies
     51         /// </summary>
     52         bool isTrackCookies = false;
     53         /// <summary>
     54         /// cookies 字典
     55         /// </summary>
     56         Dictionary<String, Cookie> cookieDic = new Dictionary<string, Cookie>();
     57 
     58         /// <summary>
     59         /// 平均相应时间
     60         /// </summary>
     61         long avgResponseMilliseconds = -1;
     62 
     63         /// <summary>
     64         /// 平均相应时间
     65         /// </summary>
     66         public long AvgResponseMilliseconds
     67         {
     68             get
     69             {
     70                 return avgResponseMilliseconds;
     71             }
     72 
     73             set
     74             {
     75                 if (avgResponseMilliseconds != -1)
     76                 {
     77                     avgResponseMilliseconds = value + avgResponseMilliseconds / 2;
     78                 }
     79                 else
     80                 {
     81                     avgResponseMilliseconds = value;
     82                 }
     83 
     84             }
     85         }
     86 
     87         public HttpRequestClient(bool isTrackCookies = false)
     88         {
     89             this.isTrackCookies = isTrackCookies;
     90         }
     91         /// <summary>
     92         /// http请求
     93         /// </summary>
     94         /// <param name="url"></param>
     95         /// <param name="method">POST,GET</param>
     96         /// <param name="headers">http的头部,直接拷贝谷歌请求的头部即可</param>
     97         /// <param name="content">content,每个key,value 都要UrlEncode才行</param>
     98         /// <param name="contentEncode">content的编码</param>
     99         /// <param name="proxyUrl">代理url</param>
    100         /// <returns></returns>
    101         public string http(string url, string method, string headers, string content, Encoding contentEncode, string proxyUrl)
    102         {
    103             HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
    104             request.Method = method;
    105             if(method.Equals("GET",StringComparison.InvariantCultureIgnoreCase))
    106             {
    107                 request.MaximumAutomaticRedirections = 100;
    108                 request.AllowAutoRedirect = false;
    109             }
    110             
    111             fillHeaders(request, headers);
    112             fillProxy(request, proxyUrl);
    113 
    114             #region 添加Post 参数  
    115             if (contentEncode == null)
    116             {
    117                 contentEncode = Encoding.UTF8;
    118             }
    119             if (!string.IsNullOrWhiteSpace(content))
    120             {
    121                 byte[] data = contentEncode.GetBytes(content);
    122                 request.ContentLength = data.Length;
    123                 using (Stream reqStream = request.GetRequestStream())
    124                 {
    125                     reqStream.Write(data, 0, data.Length);
    126                     reqStream.Close();
    127                 }
    128             }
    129             #endregion
    130 
    131             HttpWebResponse response = null;
    132             System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
    133             try
    134             {
    135                 sw.Start();
    136                 response = (HttpWebResponse)request.GetResponse();
    137                 sw.Stop();
    138                 AvgResponseMilliseconds = sw.ElapsedMilliseconds;
    139                 CookieCollection cc = new CookieCollection();
    140                 string cookieString = response.Headers[HttpResponseHeader.SetCookie];
    141                 if (!string.IsNullOrWhiteSpace(cookieString))
    142                 {
    143                     var spilit = cookieString.Split(';');
    144                     foreach (string item in spilit)
    145                     {
    146                         var kv = item.Split('=');
    147                         if (kv.Length == 2)
    148                             cc.Add(new Cookie(kv[0].Trim(), kv[1].Trim()));
    149                     }
    150                 }
    151                 trackCookies(cc);
    152             }
    153             catch (Exception ex)
    154             {
    155                 sw.Stop();
    156                 AvgResponseMilliseconds = sw.ElapsedMilliseconds;
    157                 return "";
    158             }
    159 
    160             string result = getResponseBody(response);
    161             return result;
    162         }
    163 
    164         /// <summary>
    165         /// post 请求
    166         /// </summary>
    167         /// <param name="url"></param>
    168         /// <param name="headers"></param>
    169         /// <param name="content"></param>
    170         /// <param name="contentEncode"></param>
    171         /// <param name="proxyUrl"></param>
    172         /// <returns></returns>
    173         public string httpPost(string url, string headers, string content, Encoding contentEncode, string proxyUrl = null)
    174         {
    175             return http(url, "POST", headers, content, contentEncode, proxyUrl);
    176         }
    177 
    178         /// <summary>
    179         /// get 请求
    180         /// </summary>
    181         /// <param name="url"></param>
    182         /// <param name="headers"></param>
    183         /// <param name="content"></param>
    184         /// <param name="proxyUrl"></param>
    185         /// <returns></returns>
    186         public string httpGet(string url, string headers, string content=null, string proxyUrl=null)
    187         {
    188             return http(url, "GET", headers, null, null, proxyUrl);
    189         }
    190 
    191         /// <summary>
    192         /// 填充代理
    193         /// </summary>
    194         /// <param name="proxyUri"></param>
    195         private void fillProxy(HttpWebRequest request, string proxyUri)
    196         {
    197             if (!string.IsNullOrWhiteSpace(proxyUri))
    198             {
    199                 WebProxy proxy = new WebProxy();
    200                 proxy.Address = new Uri(proxyUri);
    201                 request.Proxy = proxy;
    202             }
    203         }
    204 
    205 
    206         /// <summary>
    207         /// 跟踪cookies
    208         /// </summary>
    209         /// <param name="cookies"></param>
    210         private void trackCookies(CookieCollection cookies)
    211         {
    212             if (!isTrackCookies) return;
    213             if (cookies == null) return;
    214             foreach (Cookie c in cookies)
    215             {
    216                 if (cookieDic.ContainsKey(c.Name))
    217                 {
    218                     cookieDic[c.Name] = c;
    219                 }
    220                 else
    221                 {
    222                     cookieDic.Add(c.Name, c);
    223                 }
    224             }
    225 
    226         }
    227 
    228         /// <summary>
    229         /// 格式cookies
    230         /// </summary>
    231         /// <param name="cookies"></param>
    232         private string getCookieStr()
    233         {
    234             StringBuilder sb = new StringBuilder();
    235             foreach (KeyValuePair<string, Cookie> item in cookieDic)
    236             {
    237                 if (!item.Value.Expired)
    238                 {
    239                     if (sb.Length == 0)
    240                     {
    241                         sb.Append(item.Key).Append("=").Append(item.Value.Value);
    242                     }
    243                     else
    244                     {
    245                         sb.Append("; ").Append(item.Key).Append(" = ").Append(item.Value.Value);
    246                     }
    247                 }
    248             }
    249             return sb.ToString();
    250 
    251         }
    252 
    253         /// <summary>
    254         /// 填充头
    255         /// </summary>
    256         /// <param name="request"></param>
    257         /// <param name="headers"></param>
    258         private void fillHeaders(HttpWebRequest request, string headers, bool isPrint = false)
    259         {
    260             if (request == null) return;
    261             if (string.IsNullOrWhiteSpace(headers)) return;
    262             string[] hsplit = headers.Split(new String[] { "
    " }, StringSplitOptions.RemoveEmptyEntries);
    263             foreach (string item in hsplit)
    264             {
    265                 string[] kv = item.Split(':');
    266                 string key = kv[0].Trim();
    267                 string value = string.Join(":", kv.Skip(1)).Trim();
    268                 if (!UNCHANGEHEADS.Contains(key))
    269                 {
    270                     request.Headers.Add(key, value);
    271                 }
    272                 else
    273                 {
    274                     #region  设置http头
    275                     switch (key)
    276                     {
    277 
    278                         case "Accept":
    279                             {
    280                                 request.Accept = value;
    281                                 break;
    282                             }
    283                         case "Host":
    284                             {
    285                                 request.Host = value;
    286                                 break;
    287                             }
    288                         case "Connection":
    289                             {
    290                                 if (value == "keep-alive")
    291                                 {
    292                                     request.KeepAlive = true;
    293                                 }
    294                                 else
    295                                 {
    296                                     request.KeepAlive = false;//just test
    297                                 }
    298                                 break;
    299                             }
    300                         case "Content-Type":
    301                             {
    302                                 request.ContentType = value;
    303                                 break;
    304                             }
    305 
    306                         case "User-Agent":
    307                             {
    308                                 request.UserAgent = value;
    309                                 break;
    310                             }
    311                         case "Referer":
    312                             {
    313                                 request.Referer = value;
    314                                 break;
    315                             }
    316 
    317                         case "Content-Length":
    318                             {
    319                                 request.ContentLength = Convert.ToInt64(value);
    320                                 break;
    321                             }
    322                         case "Expect":
    323                             {
    324                                 request.Expect = value;
    325                                 break;
    326                             }
    327                         case "If-Modified-Since":
    328                             {
    329                                 request.IfModifiedSince = Convert.ToDateTime(value);
    330                                 break;
    331                             }
    332                         default:
    333                             break;
    334                     }
    335                     #endregion
    336                 }
    337             }
    338             CookieCollection cc = new CookieCollection();
    339             string cookieString = request.Headers[HttpRequestHeader.Cookie];
    340             if (!string.IsNullOrWhiteSpace(cookieString))
    341             {
    342                 var spilit = cookieString.Split(';');
    343                 foreach (string item in spilit)
    344                 {
    345                     var kv = item.Split('=');
    346                     if (kv.Length == 2)
    347                         cc.Add(new Cookie(kv[0].Trim(), kv[1].Trim()));
    348                 }
    349             }
    350             trackCookies(cc);
    351             if (!isTrackCookies)
    352             {
    353                 request.Headers[HttpRequestHeader.Cookie] = "";
    354             }
    355             else
    356             {
    357                 request.Headers[HttpRequestHeader.Cookie] = getCookieStr();
    358             }
    359 
    360             #region 打印头
    361             if (isPrint)
    362             {
    363                 for (int i = 0; i < request.Headers.AllKeys.Length; i++)
    364                 {
    365                     string key = request.Headers.AllKeys[i];
    366                     System.Console.WriteLine(key + ":" + request.Headers[key]);
    367                 }
    368             }
    369             #endregion
    370 
    371         }
    372 
    373 
    374         /// <summary>
    375         /// 打印ResponseHeaders
    376         /// </summary>
    377         /// <param name="response"></param>
    378         private void printResponseHeaders(HttpWebResponse response)
    379         {
    380             #region 打印头
    381             if (response == null) return;
    382             for (int i = 0; i < response.Headers.AllKeys.Length; i++)
    383             {
    384                 string key = response.Headers.AllKeys[i];
    385                 System.Console.WriteLine(key + ":" + response.Headers[key]);
    386             }
    387             #endregion
    388         }
    389 
    390 
    391         /// <summary>
    392         /// 返回body内容
    393         /// </summary>
    394         /// <param name="response"></param>
    395         /// <returns></returns>
    396         private string getResponseBody(HttpWebResponse response)
    397         {
    398             Encoding defaultEncode = Encoding.UTF8;
    399             string contentType = response.ContentType;
    400             if (contentType != null)
    401             {
    402                 if (contentType.ToLower().Contains("gb2312"))
    403                 {
    404                     defaultEncode = Encoding.GetEncoding("gb2312");
    405                 }
    406                 else if (contentType.ToLower().Contains("gbk"))
    407                 {
    408                     defaultEncode = Encoding.GetEncoding("gbk");
    409                 }
    410                 else if (contentType.ToLower().Contains("zh-cn"))
    411                 {
    412                     defaultEncode = Encoding.GetEncoding("zh-cn");
    413                 }
    414             }
    415 
    416             string responseBody = string.Empty;
    417             if (response.ContentEncoding.ToLower().Contains("gzip"))
    418             {
    419                 using (GZipStream stream = new GZipStream(response.GetResponseStream(), CompressionMode.Decompress))
    420                 {
    421                     using (StreamReader reader = new StreamReader(stream))
    422                     {
    423                         responseBody = reader.ReadToEnd();
    424                     }
    425                 }
    426             }
    427             else if (response.ContentEncoding.ToLower().Contains("deflate"))
    428             {
    429                 using (DeflateStream stream = new DeflateStream(response.GetResponseStream(), CompressionMode.Decompress))
    430                 {
    431                     using (StreamReader reader = new StreamReader(stream, defaultEncode))
    432                     {
    433                         responseBody = reader.ReadToEnd();
    434                     }
    435                 }
    436             }
    437             else
    438             {
    439                 using (Stream stream = response.GetResponseStream())
    440                 {
    441                     using (StreamReader reader = new StreamReader(stream, defaultEncode))
    442                     {
    443                         responseBody = reader.ReadToEnd();
    444                     }
    445                 }
    446             }
    447             return responseBody;
    448         }
    449 
    450 
    451         public static string UrlEncode(string item, Encoding code)
    452         {
    453             return System.Web.HttpUtility.UrlEncode(item.Trim('	').Trim(), Encoding.GetEncoding("gb2312"));
    454         }
    455 
    456         public static string UrlEncodeByGB2312(string item)
    457         {
    458             return UrlEncode(item, Encoding.GetEncoding("gb2312"));
    459         }
    460 
    461 
    462         public static string UrlEncodeByUTF8(string item)
    463         {
    464             return UrlEncode(item, Encoding.GetEncoding("utf-8"));
    465         }
    466 
    467         public static string HtmlDecode(string item)
    468         {
    469             return WebUtility.HtmlDecode(item.Trim('	').Trim());
    470         }
    471  
    472     }
    473 }
    完整的封装类

    使用方式:

    1)打开谷歌浏览器,或者F12

    复制Request Headers 里面的所有内容,然后执行代码:

    string heads = @"Accept:text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01
    Accept-Encoding:gzip, deflate
    Accept-Language:zh-CN,zh;q=0.8
    Cache-Control:no-cache
    Content-Length:251
    Content-Type:application/x-www-form-urlencoded; charset=UTF-8
    Cookie:JSESSIONID=B1716F5DAC2F78D1E592F5421D859CFA; Hm_lvt_f44f38cf69626ed8bcfe92d72ed55922=1498099203; Hm_lpvt_f44f38cf69626ed8bcfe92d72ed55922=1498099203; cache_cars=152%7C152%7CBDL212%7C111111%7C111111%2C152%7C152%7CBy769x%7C111111%7C111111%2C152%7C152%7Cd12881%7C111111%7C111111
    Host:www.xxxxxxxx.com
    Origin:http://www.xxxxxxxx.com
    Pragma:no-cache
    Proxy-Connection:keep-alive
    Referer:http://www.cheshouye.com/api/weizhang/
    User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36
    X-Requested-With:XMLHttpRequest";
    
                string url = "http://www.xxxxxxxxxxxx.com/api/weizhang/open_task?callback=jQuery1910816327";
                HttpRequestClient s = new HttpRequestClient(true);
                string content = "chepai_no=b21451&chejia_no=111111&engine_no=111111&city_id=152&car_province_id=12&input_cost=0&vcode=%7B%22cookie_str%22%3A%22%22%2C%22verify_code%22%3A%22%22%2C%22vcode_para%22%3A%7B%22vcode_key%22%3A%22%22%7D%7D&td_key=qja5rbl2d97n&car_type=02&uid=0";
                string response= s.httpPost(url, heads, content, Encoding.UTF8);

    就这样,你会惊喜的发现,卧槽!反回来的值和谷歌上显示值一个样子,

    只要域名没变化,HttpRequestClient 对象就不要去改变, 多线程请使用ThreadLocal<HttpRequestClient > 

    配合我很久之前写的多线类 QueueThreadBase 让你起飞.

    你想暴力破解网站登录密码吗?基本思路如下:

    1)强大的用户名+密码字典

    2)多线程Http+代理(代理可以不用,如果服务器做了ip限制,那么代理就非常有用了,最好是透明的http代理,并且有规则剔除慢的代理)

    3)验证码破解.(只要验证码不复杂,在某宝就能买的dll 可用,1000块钱上下)

    4)慢慢等......看奇迹发生,(我已经做好了一个,各位程序员我屁股已经翘好,等你一脚)

  • 相关阅读:
    数据分析面试题
    二、初始化superset
    一、下载安装superset
    leaflet如何加载10万数据
    leaflet中如何优雅的解决百度、高德地图的偏移问题
    oracle 获取数据库时间
    dev中gridcontrol中改变符合某一条件的行的背景色
    dev中动态初始化菜单栏
    oracle向数据库中插入时间数据
    Silverlight中全屏处理
  • 原文地址:https://www.cnblogs.com/zetee/p/7064915.html
Copyright © 2011-2022 走看看