zoukankan      html  css  js  c++  java
  • 简单抓取小程序大全,并展示

    前言,想利用小程序导航页面来提升网站的流量,找到www.xcxdh666.com该小程序导航网站。

    分析网页

          1发现网站其实也是用异步分页请求加载数据的,所以根本用不着xpath解析html,直接分析其请求URL

          2点击加载更多找到请求,发现其实就是pageNum,cagegory两个参数

          3所以直接请求URL,带入参数,分析起返回json结果

    编写代码

            1首先建立接收类型

                public class XcxApplet

    {

    public int id{get;set;}

    public string categoryName{get;set;}

    public string name {get;set;}

    public string saomaUrl{get;set;}

    public string sum{get;set;}

    public string logoUrl{get;set;}

    }

    public class Result

    {

    public List<XcxApplet> dataList{get;set;}

    public string category{get;set;}

    public int status{get;set;}

    public int pageNum{get;set;}

    }

    2 封装请求页面方法

    public static string GetPostPage(this string posturl,string postData)

    {

    Encoding encoding=Encoding.UTF8;

    byte[] data=null;

    if(!string.IsNullOrEmpty(postData)) data=encoding.GetBytes(postData);

    try

    {

    //设置参数

    var request=WebRequest.Create(posturl) as HttpWebRequest;

    if(request ==null) return string.Empty;

    var cookieContainer=new CookieContainer();

    request.cookieContainer=cookieContainer();

    request.AllowAutoRedirect=true;

    request.Method="POST";

    request.ContentType="application/x-www-form=urlencoded";

    if(data !=null)

    {

    request.ContentLength=data.Length;

    Stream outstream=request.GetRequestStream();

    outstream.Write(data,0,data.Length);

    outstream.Close();

    }

    //发送请求并获取相应回应数据

    var response=request.GetResponse() as HttpWebResponse;

    if(response==null)return string.Empty;

    //直到request.GetResponse()程序才开始向目标网页发送POST请求

    Stream instream =response.GetResponseStream();

    if(instream==null)return string.Empty;

    var sr=new StreamReader(instream,encoding);

    //返回结果网页(html)代码

    string content=sr.ReadToEnd();

    string err=string.Empty;

    return content;

    }

    catch(Exception ex)

    {

    string err=ex.Message;

    return string.Empty;

    }

    }

    3 图片url处理,思路就是要将其返回的URL请求下载到本地或者上川到自己对应的图片服务器,我这里是用七牛云存储img的

    这里你可以改成下载到本地返回本地的URL就好

    public string QiniuUplod(string imgurl)

    {

      var accessKey="你的accesskey";

     var secretkey="你的secretkey";

    //生成(上传)凭证时需要使用此Mac

    //这个示例单独使用了一个Setting类,其中包含AccessKEY和SecretKey

    //实际应用中,请自行设置您的AccessKey和SecretKey

    Mac mac=new Mac(accessKey,secretKey);

    string bucket="siyoku";

    string saveKey=imgurl.Substring(imgurl.LastIndexOf('/')+1,imgurl.Length-imgurl.LastIndexof('/')-1);

    //使用前请确保AK和BUCKET正确,否则此函数会抛出异常

    Qiniu.Common.Config.AutoZone(accessKey,bucket,false);

    //上传策略

    PutPolicy  putPolicy=new PutPolicy();

    putPolicy.Scope=bucket+":"+saveKey;

    putPolicy.Scope=bucket;

    putPolicy.SetExpires(3600);

    string jstr=putPolicy.ToJsonString();

    string token=Auth.CreateUploadToken(mac,jstr);

    try

    {

    var wReq=System.Net.WebRequest.Create(imgurl) as System.Net.HttpWebRequest;

    var resp=wReq.GetResponse() as System.Net.HttpWebResponse;

    using(var stream=resp.GetResponseStream())

    {

    FormUploader fu= new FormUploader;

    var result=fu.UploadStream(stream,saveKey,token);

    var x=Newtonsoft.Json.JsonConvert.DeserializeObject<QiniuResult>(result.Text);

    return $"http://img.siyouku.cn/{x.key}";

    }

    }

    catch (Exception ex)

    {

    return "";

    }

    }

    4 最后是请求主体方法

    public ActionResult GetxcxList()

    {

    Stopwatch watch=new Stopwatch();

    watch.Start();

    var result=new Result();

    for(int j=0;j<54;j++)

    {

    string url=$"https://www.xcxdh666.com/pageList.htm?pageNum={j}";

    var str=url.GetPostPage(null);

    if(str !=null)

    {

    result=str.JsonConvert<Result>();

    }

    result.dataList.ForEach(i=>

    {

    if(!Db.Applet.Any(x=>x.Name==i.name))

    {

    var x=new Applet()

    {

     CategoryName=string.IsNullOrEmpty(i.categoryName)?"其它":i.categoryName,

    Name=i.name,

    SaomiaoUrl=QiniuUpload($"http://img.xcxdh666.com/wxppnav/{i.saomaUrl}",

    summary=i.sum,

    LogoUrl=QiniuUpload($"http://img.xcxdh666.com/wxappnav/{i.logoUrl}"),

    SortNum=j,

    CreateUser="wenqing",

    CreateTime=DateTime.Now

    };

    Db.Applet.Add(x);

    }

    });

    Db.SaveChanges();

    }

    watch.Stop();

    return Content("派取完成!本次请求总共耗时:"+watch.ElapsedMilliseconds);

    }

    }

  • 相关阅读:
    Help-IntelliJIDEA-2019.3.4-基础设置:6. 开启自动编译
    Help-IntelliJIDEA-2019.3.4-基础设置:5.Tomcat Server
    Help-IntelliJIDEA-2019.3.4-基础设置:4.自动导包和智能移除 (默认配置)
    Help-IntelliJIDEA-2019.3.4-基础设置:3. 版本控制Git
    Help-IntelliJIDEA-2019.3.4-基础设置:2. 全局Maven(默认配置)
    Help-IntelliJIDEA-2019.3.4-基础设置:1. 全局JDK(默认配置)
    Help-IntelliJIDEA-2019.3.4:IntelliJ IDEA 使用教程
    汉语-词语:笃行
    去除某个元素的属性
    select选中
  • 原文地址:https://www.cnblogs.com/zzp0320/p/7111222.html
Copyright © 2011-2022 走看看