zoukankan      html  css  js  c++  java
  • 网站数据抓取

    1:
    private
    bool ImportSubjectQuesData(int bankId, int categoryId, int curpage, int pagesize) { string postData = "bankid=" + System.Web.HttpUtility.UrlEncode(bankId.ToString(), System.Text.Encoding.ASCII); postData += "&categoryid=" + System.Web.HttpUtility.UrlEncode(categoryId.ToString(), System.Text.Encoding.ASCII); postData += "&curpage=" + System.Web.HttpUtility.UrlEncode(curpage.ToString(), System.Text.Encoding.ASCII); ....参数省.... string url = "http://www.*****.com/Web/Handler1.ashx?action=queslistquery"; byte[] byteArray = Encoding.UTF8.GetBytes(postData); var webRequest = (HttpWebRequest)WebRequest.Create(new Uri(url)); webRequest.Method = "post"; webRequest.ContentType = "application/x-www-form-urlencoded"; webRequest.ContentLength = byteArray.Length; Stream newStream = webRequest.GetRequestStream(); newStream.Write(byteArray, 0, byteArray.Length); newStream.Close(); //接收 HttpWebResponse response = (HttpWebResponse)webRequest.GetResponse(); StreamReader php = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("utf-8")); string result = php.ReadToEnd(); int Index = result.IndexOf("###"); var retCount = DeserializeObject(result.Substring(0, Index)); int Count = Convert.ToInt32(retCount.quescount.Value); int intPagesize = Convert.ToInt32(pagesize); int pageCount = Count % intPagesize == 0 ? Count / intPagesize : Count / intPagesize + 1; result = result.Substring(Index + "###".Length); var questlt = DeserializeObject(result); //数据录入 return InserData(questlt); }


    2:

    string url = txtshiUrl.Text + i;
    var webRequest = (HttpWebRequest)WebRequest.Create(new Uri(url));
    webRequest.Method = "GET";
    webRequest.UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)";
    webRequest.ContentType = "text/html; charset=gbk";
    webRequest.KeepAlive = true;
    webRequest.Referer = url;
    webRequest.CookieContainer = new CookieContainer();
    HttpWebResponse webResponse = (HttpWebResponse)webRequest.GetResponse();
    responseStream = webResponse.GetResponseStream();
    StreamReader streamReader = new StreamReader(responseStream, Encoding.GetEncoding("gbk"));
    string shtml = streamReader.ReadToEnd();

     
  • 相关阅读:
    《Linux性能及调优指南》第二章:监控和基准工具2.1-2.2
    《Linux 性能及调优指南》1.5 网络子系统
    《Linux 性能及调优指南》1.4 硬盘I/O子系统
    《Linux性能及调优指南》1.3 Linux文件系统
    《linux性能及调优指南》 3.5 网络瓶颈
    《linux性能及调优指南》 3.4 硬盘瓶颈
    《linux性能及调优指南》 3.3 内存瓶颈
    你应该使用Python3里的这些新特性
    python异步编程
    异步网络编程aiohttp的使用
  • 原文地址:https://www.cnblogs.com/wzq806341010/p/3034734.html
Copyright © 2011-2022 走看看