zoukankan      html  css  js  c++  java
  • [python]网络爬虫 京/东 售卖PS4的情况

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    # @File  : HtmlParser.py
    # @Author: 赵路仓
    # @Date  : 2020/3/17
    # @Desc  :
    # @Contact : 398333404@qq.com
    import json
    
    from lxml import etree
    import requests
    from bs4 import BeautifulSoup
    
    
    url="https://search.jd.com/Search?keyword=ps4&enc=utf-8&wq=ps4&pvid=cf0158c8664442799c1146a461478c9c"
    head={
        'authority': 'search.jd.com',
        'method': 'GET',
        'path': '/s_new.php?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq=%E6%89%8B%E6%9C%BA&cid2=653&cid3=655&page=4&s=84&scrolling=y&log_id=1529828108.22071&tpl=3_M&show_items=7651927,7367120,7056868,7419252,6001239,5934182,4554969,3893501,7421462,6577495,26480543553,7345757,4483120,6176077,6932795,7336429,5963066,5283387,25722468892,7425622,4768461',
        'scheme': 'https',
        'referer': 'https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq=%E6%89%8B%E6%9C%BA&cid2=653&cid3=655&page=3&s=58&click=0',
        'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36',
        'x-requested-with': 'XMLHttpRequest',
    }
    
    def page(page):
        print("开始")
        url = "https://search.jd.com/Search?keyword=ps4&enc=utf-8&qrst=1&rt=1&stop=1&vt=1&wq=ps4&page="+page+"&s=181&click=0"
        r=requests.get(url,timeout=3,headers=head)
        r.encoding=r.apparent_encoding
        # print(r.text)
        b=BeautifulSoup(r.text,"html.parser")
        #print(b.prettify())
        _element = etree.HTML(r.text)
        datas = _element.xpath('//li[contains(@class,"gl-item")]')
        print(datas)
        for data in datas:
            p_price = data.xpath('div/div[@class="p-price"]/strong/i/text()')
            p_comment = data.xpath('div/div[5]/strong/a/text()')
            p_name = data.xpath('div/div[@class="p-name p-name-type-2"]/a/em/text()')
            p_href = data.xpath('div/div[@class="p-name p-name-type-2"]/a/@href')
            comment=' '.join(p_comment)
            name = ' '.join(p_name)
            price = ' '.join(p_price)
            href = ' '.join(p_href)
            print(name,price,p_comment,href)
    
    if __name__=="__main__":
        page("5")
  • 相关阅读:
    JQuery实现模糊查询关键字高亮输入框
    jq--实现自定义下拉框
    js 实现颜色值格式转换 rgb和十六进制的转换
    原生JavaScript设置、获取 单选框、复选框 的值
    win10 开启端口,auto.js端口
    AES的加密和解密(Java and javascript)
    去除移动端 alert / confirm 显示的 url
    移动端拖拽
    前台传入base64图片,java后台转为MultipartFile文件
    移动端调试vConsole
  • 原文地址:https://www.cnblogs.com/zlc364624/p/12874090.html
Copyright © 2011-2022 走看看