zoukankan      html  css  js  c++  java
  • c# 利用cef3抓取京东关键词商品列表,以手机为例

    利用c# 的cef3 模拟浏览器操作,抓取京东商城关键词商品列表

    获取商品标题,商品id,商品链接,商品价格

     public partial class Form1 : Form
        {
            public ChromiumWebBrowser browser { get; set; }
            public string key = "手机";  //抓取的关键词
            string jdTab="综合";     //选择排序
            int goTab = 0;        //判断是否已排序
            public Form1()
            {
                InitializeComponent();
            }
            private void Form1_Load(object sender, EventArgs e)
            {
                browser = new ChromiumWebBrowser("https://www.jd.com/"); //初始化浏览器地址
                this.Controls.Add(browser);
                browser.FrameLoadEnd += Web_FrameLoadEnd;  //设置监听,当浏览器加载完毕
            }
            private async void Web_FrameLoadEnd(object sender, FrameLoadEndEventArgs e)
            {
                Debug.WriteLine("进入页面:" + e.Url);
                //MessageBox.Show(e.Url.ToString());
                if (e.Url.Contains("https://www.jd.com/"))  //进入主页时
    {
    // 设置关键词,点击搜索 await browser.GetMainFrame().EvaluateScriptAsync(" $('#key').focus()"); await browser.GetMainFrame().EvaluateScriptAsync(" $('#key').val('" + key + "')"); await browser.GetMainFrame().EvaluateScriptAsync(" $('.button').click()"); } else if (e.Url.Contains("https://passport.jd.com/uc/login")) { //京东反爬页面,跳回主页,重新搜索 browser.Load("https://www.jd.com/"); } else if (e.Url.Contains("Search?")) { string url = ""; string title = ""; string price = ""; string code = ""; string searchResult = ""; //判断该关键词是否有效 await browser.GetMainFrame().EvaluateScriptAsync("(function(){ return $('.ns-content').text();})()").ContinueWith(x => { try { searchResult += x.Result.Result.ToString(); } catch (NullReferenceException s) { } }); await browser.GetMainFrame().EvaluateScriptAsync("(function(){ return $('.check-error').text();})()").ContinueWith(x => { try { searchResult += x.Result.Result.ToString(); } catch (NullReferenceException s) { } }); Debug.WriteLine("searchResult:" + searchResult); if (!searchResult.Contains("没有") && !searchResult.Contains("仍然搜索") && !searchResult.Contains("点击查看")) //当页面字段不包含这些字样时,关键词搜索有效 {
                //选择排序,并将标识置为1
    switch (jdTab) { case "综合": await browser.GetMainFrame().EvaluateScriptAsync("$('.f-sort a')[0].click()"); goTab = 1; break; case "销量": await browser.GetMainFrame().EvaluateScriptAsync("$('.f-sort a')[1].click()"); goTab = 1; break; case "价格": await browser.GetMainFrame().EvaluateScriptAsync("$('.f-sort a')[4].click()"); goTab = 1; break; case "评论数": await browser.GetMainFrame().EvaluateScriptAsync("$('.f-sort a')[2].click()"); goTab = 1; break; } Thread.Sleep(2000); browser.ExecuteScriptAsync(" scrollTo(0, document.body.scrollHeight)"); Thread.Sleep(3000); int p = 0; //数据丢失标志 int max = 0;  //商品总数 Thread.Sleep(500); await browser.GetMainFrame().EvaluateScriptAsync("(function(){ return $('.gl-item').length})()").ContinueWith(x => { try { max = int.Parse(x.Result.Result.ToString()); } catch (NullReferenceException s) { } }); for (int index = 0; index <= max; index++) { await browser.GetMainFrame().EvaluateScriptAsync("(function(){ return $('.gl-item .p-name a')[" + index.ToString() + "].href})()").ContinueWith(x => { try { url = x.Result.Result.ToString(); code = "JD" + Regex.Replace(url, @"[^d]*", ""); } catch (Exception) { p = 1; } }); await browser.GetMainFrame().EvaluateScriptAsync("(function(){ return $('.gl-item .p-name em')[" + index.ToString() + "].innerText})()").ContinueWith(x => { try { title = x.Result.Result.ToString().Replace(" ",""); } catch (Exception) { p = 1; } }); await browser.GetMainFrame().EvaluateScriptAsync("(function(){ return $('.gl-item .p-price strong')[" + index.ToString() + "].innerText.replace('¥','')})()").ContinueWith(x => { try { price = x.Result.Result.ToString(); } catch (Exception) { p = 1; } }); if (p == 0) { Debug.WriteLine("url:" + url + " title=" + title + " code=" + code + " price=" + price); } } } else { Debug.WriteLine("没有找到词:" + key); } } } }

    抓取结果:

  • 相关阅读:
    金融系列4《PUTKEY指令》
    数据分析≠Hadoop+NoSQL,不妨先看完善现有技术的10条捷径(分享)
    ASP.NET对HTML元素进行权限控制(三)
    ASP.NET对HTML元素进行权限控制(二)
    ASP.NET对HTML元素进行权限控制(一)
    作弊控制——心态
    SQL多表连接
    ASP.NET Repeater嵌套Repeater实现菜单加载
    ASP.NET从数据库中取出数据,有数据的复选框为选中
    ASP.NET——拒绝访问。 (异常来自HRESULT:0x80070005 (E_ACCESSDENIED))
  • 原文地址:https://www.cnblogs.com/yhood/p/11534409.html
Copyright © 2011-2022 走看看