zoukankan      html  css  js  c++  java
  • Web爬虫的C#请求发送

    public class HttpControler
        {
            //post请求发送
            private Encoding m_Encoding = Encoding.GetEncoding("gb2312");
            public string Request(string strUrl,string postStr)
            {
                HttpWebRequest tHWRq = (HttpWebRequest)HttpWebRequest.Create(strUrl);
                tHWRq.CookieContainer = new CookieContainer();
                CookieContainer cookie = tHWRq.CookieContainer;//如果用不到Cookie,删去即可  
                //以下是发送的http头,随便加,其中referer挺重要的,有些网站会根据这个来反盗链  
                tHWRq.Referer = "http://www.cninfo.com.cn/cninfo-new/announcement/show";
                tHWRq.Accept = "application/json, text/javascript, */*; q=0.01";
                tHWRq.Headers["Accept-Language"] = "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3";
                //tHWRq.Headers["Accept-Charset"] = "GBK,utf-8;q=0.7,*;q=0.3";
                tHWRq.Headers["Accept-Encoding"] = "gzip, deflate";          
                tHWRq.UserAgent = "User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0";
                tHWRq.KeepAlive = true;
                //上面的http头看情况而定,但是下面俩必须加  
                tHWRq.ContentType = "application/x-www-form-urlencoded; charset=UTF-8";
                tHWRq.Method = "POST";
                tHWRq.Timeout = 600 * 1000;
    
    
                Encoding encoding = Encoding.UTF8;//根据网站的编码自定义  
               
                byte[] postData = encoding.GetBytes(postStr);//postDataStr即为发送的数据,格式还是和上次说的一样 
    
                try
                {
                    tHWRq.ContentLength = postData.Length;
                    Stream requestStream = tHWRq.GetRequestStream();
                    requestStream.Write(postData, 0, postData.Length);
                    requestStream.Close();
                    using (HttpWebResponse tHWRp = (HttpWebResponse)tHWRq.GetResponse())
                    {
                        using (Stream tStreamRp = tHWRp.GetResponseStream())
                        {
                            using (StreamReader tSR = new StreamReader(tStreamRp, m_Encoding))
                            {
                                string result = tSR.ReadToEnd();
                                tHWRq.Abort();
                                return result;//请求响应后返回的内容
                            }
                        }
                    }
                }
                catch (Exception e)
                {
                    try
                    {
                        tHWRq.Abort();
                    }
                    catch (Exception err)
                    {
                        throw err;
                    }
                    return "NoUrl";
                }
    
            }
    
            //Get请求发送
            public bool RequestCode(string strUrl,string path)
            {
                HttpWebRequest tHWRq = (HttpWebRequest)HttpWebRequest.Create(strUrl);
                tHWRq.CookieContainer = new CookieContainer();
                CookieContainer cookie = tHWRq.CookieContainer;//如果用不到Cookie,删去即可  
                //以下是发送的http头,随便加,其中referer挺重要的,有些网站会根据这个来反盗链  
                tHWRq.Referer = "http://www.cninfo.com.cn/cninfo-new/announcement/show";
                tHWRq.Accept = "application/json, text/javascript, */*; q=0.01";
                tHWRq.Headers["Accept-Language"] = "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3";
                tHWRq.Headers["Accept-Charset"] = "GBK,utf-8;q=0.7,*;q=0.3";
                tHWRq.UserAgent = "User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0";
                tHWRq.KeepAlive = true;
                //上面的http头看情况而定,但是下面俩必须加  
                tHWRq.ContentType = "application/x-www-form-urlencoded; charset=UTF-8";
                tHWRq.Method = "GET";
                tHWRq.Timeout = 600 * 1000;
                string result = null;
                try
                {
                    using (HttpWebResponse tHWRp = (HttpWebResponse)tHWRq.GetResponse())
                    {
                        using (Stream tStreamRp = tHWRp.GetResponseStream())
                        {
                            using (StreamReader tSR = new StreamReader(tStreamRp))
                            {
                                result = tSR.ReadToEnd();
                            }
                        }
                    }
                    //正则表达式过滤想要的内容
                    string patternCode = ""code":"\d{6,}"";
                    List<string> lstCode = new List<string>();
                    Regex rgxUrl = new Regex(patternCode, RegexOptions.IgnoreCase);
                    MatchCollection matches = rgxUrl.Matches(result);
                    if (matches.Count > 0)
                    {
                        foreach (Match matPage in matches)
                        {
                            string codeItem = matPage.Value;
                            if (!string.IsNullOrEmpty(codeItem))
                            {
                                string code = codeItem.Substring(codeItem.IndexOf(":") + 1);
                                lstCode.Add(code);
                            }
                        }
                    } 
    
                    using (FileStream fs = new FileStream(path, FileMode.Create, FileAccess.Write))
                    {
                        using (StreamWriter sw = new StreamWriter(fs))
                        {
                            foreach (string code in lstCode) 
                            {
                                sw.WriteLine(code);
                            }
                           
                        }
                    }
                    tHWRq.Abort();
                    return true;
                }
                catch (Exception e)
                {
                    try
                    {
                        tHWRq.Abort();
                    }
                    catch (Exception err)
                    {
                        throw err;
                    }
                    return false;
                }
            }
        }
  • 相关阅读:
    陶瓷电容的结构、工艺、失效模式
    Vue.js最佳实践
    Vue 超快速学习
    CSS 小技巧
    HTML5 Canvas
    webkit下面的CSS设置滚动条
    Some untracked working tree files would be overwritten by checkout. Please move or remove them before you can checkout. View them
    JSCS: Please specify path to 'JSCS' package
    React中ref的使用方法
    React 60S倒计时
  • 原文地址:https://www.cnblogs.com/sumuncle/p/5166501.html
Copyright © 2011-2022 走看看