zoukankan      html  css  js  c++  java
  • 爬虫 蜘蛛 信息采集

    HttpWebRequest
    System.Net.HttpWebRequest request = (System.Net.HttpWebRequest)System.Net.WebRequest.Create("");
    request.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
    System.Net.WebResponse response = request.GetResponse();
    System.IO.Stream resStream = response.GetResponseStream();
    System.IO.StreamReader sr = new System.IO.StreamReader(resStream, encoding);
    string content=sr.ReadToEnd();
    resStream.Close();
    sr.Close();

    webrequest,WebClient
    System.Net.WebClient wc = new System.Net.WebClient();
    wc.Credentials = System.Net.CredentialCache.DefaultCredentials;
    Byte[] pageData = wc.DownloadData("");
    string content= System.Text.Encoding.Default.GetString(pageData);

    /// <summary> /// 实现登录 /// </summary> /// <param name="targetURL">请求的路径,必须是实现登录的路径(*)</param> /// <param name="cc">用于维持cookies Or Session</param> /// <param name="param">Post提交的信息(用户名,密码)</param> /// <returns>html page</returns> public static CookieContainer cc = new CookieContainer();//维持cookie或Session public static string PostAndGetHTML(string targetURL, Hashtable param) { //formData用于保存提交的信息 string formData = ""; foreach (DictionaryEntry de in param) { formData += de.Key.ToString() + "=" + de.Value.ToString() + "&"; } if (formData.Length > 0) formData = formData.Substring(0, formData.Length - 1); //去除最后一个 '&' //把提交的信息转码(post提交必须转码) ASCIIEncoding encoding = new ASCIIEncoding(); byte[] data = encoding.GetBytes(formData); //开始创建请求 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(targetURL); request.Method = "POST"; //提交方式:post request.ContentType = "application/x-www-form-urlencoded"; request.ContentLength = data.Length; request.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; SV1; .NET CLR 2.0.1124)"; request.AllowAutoRedirect = true; request.KeepAlive = true; Stream newStream = request.GetRequestStream(); newStream.Write(data, 0, data.Length);//将请求的信息写入request newStream.Close(); request.CookieContainer = cc; //向服务器发送请求 HttpWebResponse response = (HttpWebResponse)request.GetResponse(); //获得Cookie 保存到Appliction中 string cookieHeader = request.CookieContainer.GetCookieHeader(new Uri("http://login.xiaonei.com/Login.do")); HttpContext.Current.Application.Lock(); HttpContext.Current.Application["cookieHeader"] = cookieHeader; HttpContext.Current.Application.UnLock(); return "OK"; } /// <summary> /// 访问其他页面 /// </summary> /// <param name="strUrl"></param> /// <returns></returns> public static string ReGetHtml(string strUrl) { //第二次请求 HttpWebRequest request1 = (HttpWebRequest)WebRequest.Create(strUrl); string cookhead = HttpContext.Current.Application["cookieHeader"].ToString(); request1.Method = "GET"; request1.Headers.Add("cookie:"+cookhead); request1.KeepAlive = true; request1.AllowAutoRedirect = true; HttpWebResponse response1 = (HttpWebResponse)request1.GetResponse(); Stream stream2 = response1.GetResponseStream();//获得回应的数据流 //将数据流转成 String string result1 = new StreamReader(stream2, System.Text.Encoding.UTF8).ReadToEnd(); return result1; }


  • 相关阅读:
    【bzoj2653】【middle】【主席树+二分答案】
    Codeforces 464E. The Classic Problem
    关于主席树的入门,讲解和题单
    BZOJ3531-[Sdoi2014]旅行(树剖+线段树动态开点)
    [bzoj3123][洛谷P3302] [SDOI2013]森林(树上主席树+启发式合并)
    1018_两个圆相交的面积
    String对象中常用的方法
    张爱玲写的信
    React Native拆包及热更新方案 · Solartisan
    vue项目实战
  • 原文地址:https://www.cnblogs.com/zzxap/p/2175820.html
Copyright © 2011-2022 走看看