zoukankan      html  css  js  c++  java
  • 简单抓取小程序大全,并展示。

              前言 ,想利用小程序导航页面来提升网站的流量,找到  www.xcxdh666.com  该小程序导航网站。

     分析网页

            1 发现网站其实也是用异步分页请求加载数据的  ,所以根本用不着xpath  解析html,直接分析其请求url

            2点击加载更多找到请求,发现其实就 pageNum ,cagegory 两个参数

           3所以直接请求url    带入参数,分析起返回json结果

      编写代码

             1 首先建立接收类型       

     public class XcxApplet
        {
            public int id { get; set; }
    
            public string categoryName { get; set; }
    
            public string name { get; set; }
    
            public string saomaUrl { get; set; }
    
            public string sum { get; set; }
    
            public string logoUrl { get; set; }
        }
    
        public class Result
        {
            public List<XcxApplet> dataList { get; set; }
            public string category { get; set; }
            public int  status { get; set; }
            public int pageNum { get; set; }
        }
    

      

         2  封装请求页面方法

      

       public static string GetPostPage(this string posturl, string postData)
                {
                    Encoding encoding = Encoding.UTF8;
                    byte[] data = null;
                    if (!string.IsNullOrEmpty(postData)) data = encoding.GetBytes(postData);
                    try
                    {
                        // 设置参数
                        var request = WebRequest.Create(posturl) as HttpWebRequest;
                        if (request == null) return string.Empty;
                        var cookieContainer = new CookieContainer();
                        request.CookieContainer = cookieContainer;
                        request.AllowAutoRedirect = true;
                        request.Method = "POST";
                        request.ContentType = "application/x-www-form-urlencoded";
                        if (data != null)
                        {
                            request.ContentLength = data.Length;
                            Stream outstream = request.GetRequestStream();
                            outstream.Write(data, 0, data.Length);
                            outstream.Close();
                        }
                        //发送请求并获取相应回应数据
                        var response = request.GetResponse() as HttpWebResponse;
                        if (response == null) return string.Empty;
    
                        //直到request.GetResponse()程序才开始向目标网页发送Post请求
                        Stream instream = response.GetResponseStream();
                        if (instream == null) return string.Empty;
                        var sr = new StreamReader(instream, encoding);
                        //返回结果网页(html)代码
                        string content = sr.ReadToEnd();
                        string err = string.Empty;
                        //Response.Write(content);
                        return content;
                    }
                    catch (Exception ex)
                    {
                        string err = ex.Message;
                        return string.Empty;
                    }
                }
    

      3  图片url处理   思路就是要将其返回的url 请求下载到本地或者上传到自己对应的图片服务器,

               我这里是用七牛云存储img的 ,这里你可以改成下载到本地 返回本地的url就好。

      

    public string  QiniuUplod(string imgurl)
            {
                  
                var accessKey = "你的accesskey";
                var secretKey = "你的secretkey";
    
                // 生成(上传)凭证时需要使用此Mac
                // 这个示例单独使用了一个Settings类,其中包含AccessKey和SecretKey
                // 实际应用中,请自行设置您的AccessKey和SecretKey
                Mac mac = new Mac(accessKey, secretKey);
                string bucket = "siyouku";
                string saveKey = imgurl.Substring(imgurl.LastIndexOf('/')+1,imgurl.Length- imgurl.LastIndexOf('/')-1);
    
    
                // 使用前请确保AK和BUCKET正确,否则此函数会抛出异常(比如code612/631等错误)
                Qiniu.Common.Config.AutoZone(accessKey, bucket, false);
    
    
                // 上传策略,参见 
                // https://developer.qiniu.com/kodo/manual/put-policy
                PutPolicy putPolicy = new PutPolicy();
                // 如果需要设置为"覆盖"上传(如果云端已有同名文件则覆盖),请使用 SCOPE = "BUCKET:KEY"
                 putPolicy.Scope = bucket + ":" + saveKey;
                putPolicy.Scope = bucket;
                // 上传策略有效期(对应于生成的凭证的有效期)          
                putPolicy.SetExpires(3600);
                // 上传到云端多少天后自动删除该文件,如果不设置(即保持默认默认)则不删除
                //putPolicy.DeleteAfterDays = 1;
    
                // 生成上传凭证,参见
                // https://developer.qiniu.com/kodo/manual/upload-token            
                string jstr = putPolicy.ToJsonString();
                string token = Auth.CreateUploadToken(mac, jstr);
                try
                {
                    
                    var wReq = System.Net.WebRequest.Create(imgurl) as System.Net.HttpWebRequest;
                    var resp = wReq.GetResponse() as System.Net.HttpWebResponse;
                    using (var stream = resp.GetResponseStream())
                    {
                        // 请不要使用UploadManager的UploadStream方法,因为此流不支持查找(无法获取Stream.Length)
                        // 请使用FormUploader或者ResumableUploader的UploadStream方法
                        FormUploader fu = new FormUploader();
                        var result = fu.UploadStream(stream, saveKey, token);
                        var x = Newtonsoft.Json.JsonConvert.DeserializeObject<QiniuResult>(result.Text);
                        return $"http://img.siyouku.cn/{x.key}";
                    }
                }
                catch (Exception ex)
                {
                    return "";
                }
    
     
            }
    

      

       4 最后是请求主体方法 

    public ActionResult GetxcxList()
            {
                Stopwatch watch = new Stopwatch();//监控抓取耗时
                watch.Start();
                //https://www.xcxdh666.com/pageList.htm?pageNum=0  dataList
                var result = new Result();
    
                for (int j = 0; j <54; j++)
                {
                    string url =
                        $"https://www.xcxdh666.com/pageList.htm?pageNum={j}";
    
                    var str = url.GetPostPage(null);//HttpWebRequest 请求页面
                    if (str != null)
                    {
                        result = str.JsonConvert<Result>();  //string   的序列化扩展方法
                    }
    
                    result.dataList.ForEach(i =>
                    {
                        if (!Db.Applet.Any(x => x.Name == i.name))//判断重复插入
                        {
                            var x = new Applet()
                            {
                                CategoryName = string.IsNullOrEmpty(i.categoryName) ? "其它" : i.categoryName,
                                Name = i.name,
                                SaomiaoUrl = QiniuUplod($"http://img.xcxdh666.com/wxappnav/{i.saomaUrl}"),
                                Summary = i.sum,
                                LogoUrl = QiniuUplod($"http://img.xcxdh666.com/wxappnav/{i.logoUrl}"),
                                SortNum = j,
                                CreateUser = "wenqing",
                                CreateTime = DateTime.Now
    
                            };
                            Db.Applet.Add(x);
                        }
    
                    });
    
                    Db.SaveChanges();
    
    
                }
                watch.Stop();
                return Content("爬取完成!本次请求总共耗时:"+ watch.ElapsedMilliseconds);
            }
        }
    

      

    ok  到这里就全部抓取完成

             这里附上 展示地址  http://siyouku.cn/Applet

    博主网址:http://www.siyouku.cn

    本文永久更细地址:http://siyouku.cn/article/6806.html

  • 相关阅读:
    CodeForces Gym 100500A A. Poetry Challenge DFS
    CDOJ 486 Good Morning 傻逼题
    CDOJ 483 Data Structure Problem DFS
    CDOJ 482 Charitable Exchange bfs
    CDOJ 481 Apparent Magnitude 水题
    Codeforces Gym 100637G G. #TheDress 暴力
    Gym 100637F F. The Pool for Lucky Ones 暴力
    Codeforces Gym 100637B B. Lunch 找规律
    Codeforces Gym 100637A A. Nano alarm-clocks 前缀和
    TC SRM 663 div2 B AABB 逆推
  • 原文地址:https://www.cnblogs.com/fighting2014/p/7093571.html
Copyright © 2011-2022 走看看