zoukankan      html  css  js  c++  java
  • 网站数据抓取

    1:
    private
    bool ImportSubjectQuesData(int bankId, int categoryId, int curpage, int pagesize) { string postData = "bankid=" + System.Web.HttpUtility.UrlEncode(bankId.ToString(), System.Text.Encoding.ASCII); postData += "&categoryid=" + System.Web.HttpUtility.UrlEncode(categoryId.ToString(), System.Text.Encoding.ASCII); postData += "&curpage=" + System.Web.HttpUtility.UrlEncode(curpage.ToString(), System.Text.Encoding.ASCII); ....参数省.... string url = "http://www.*****.com/Web/Handler1.ashx?action=queslistquery"; byte[] byteArray = Encoding.UTF8.GetBytes(postData); var webRequest = (HttpWebRequest)WebRequest.Create(new Uri(url)); webRequest.Method = "post"; webRequest.ContentType = "application/x-www-form-urlencoded"; webRequest.ContentLength = byteArray.Length; Stream newStream = webRequest.GetRequestStream(); newStream.Write(byteArray, 0, byteArray.Length); newStream.Close(); //接收 HttpWebResponse response = (HttpWebResponse)webRequest.GetResponse(); StreamReader php = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("utf-8")); string result = php.ReadToEnd(); int Index = result.IndexOf("###"); var retCount = DeserializeObject(result.Substring(0, Index)); int Count = Convert.ToInt32(retCount.quescount.Value); int intPagesize = Convert.ToInt32(pagesize); int pageCount = Count % intPagesize == 0 ? Count / intPagesize : Count / intPagesize + 1; result = result.Substring(Index + "###".Length); var questlt = DeserializeObject(result); //数据录入 return InserData(questlt); }


    2:

    string url = txtshiUrl.Text + i;
    var webRequest = (HttpWebRequest)WebRequest.Create(new Uri(url));
    webRequest.Method = "GET";
    webRequest.UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)";
    webRequest.ContentType = "text/html; charset=gbk";
    webRequest.KeepAlive = true;
    webRequest.Referer = url;
    webRequest.CookieContainer = new CookieContainer();
    HttpWebResponse webResponse = (HttpWebResponse)webRequest.GetResponse();
    responseStream = webResponse.GetResponseStream();
    StreamReader streamReader = new StreamReader(responseStream, Encoding.GetEncoding("gbk"));
    string shtml = streamReader.ReadToEnd();

     
  • 相关阅读:
    Linux Shell脚本编程基础
    UBoot常用命令及内核下载与引导
    经典C面试真题精讲
    文本相似度分析(基于jieba和gensim)
    python中lambda,map,reduce,filter,zip函数
    机器学习-——损失函数
    Tensorflow中的数据对象Dataset
    github 相关操作知识
    机器学习——LightGBM
    机器学习——超参数搜索
  • 原文地址:https://www.cnblogs.com/wzq806341010/p/3034734.html
Copyright © 2011-2022 走看看