zoukankan      html  css  js  c++  java
  • 爬取京东商品信息

    import re,time,requests,bs4,csv
    from bs4 import BeautifulSoup
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.support.wait import WebDriverWait
    from selenium.webdriver.chrome.options import Options
    from selenium.webdriver.common.keys import Keys
    
    def get_infotext(ulist, html):
        soup = BeautifulSoup(html, "html.parser")
        das = soup.find(name = 'div',attrs={"id":"J_goodsList"})
        lis = das.find_all('li')
        for li in lis:
            img = li.find(name='div', attrs={"""class""": 'p-img'}).a.img.get('src')
            if img == None:
                img = li.find(name='div', attrs={"""class""": 'p-img'}).a.img.get('data-lazy-img')
            price = li.find(name='div', attrs={"""class""": 'p-price'}).i.string
            name = li.find(name='div', attrs={"""class""": 'p-name'}).a.em.text
            commit = li.find(name='div', attrs={"""class""": 'p-commit'}).a.string
            shopnum = li.find(name='div', attrs={"""class""": 'p-shopnum'}).a.text  # 可能会报错,原因未知
            ulist.append([name,price,img,commit,shopnum])
            # ulist.append([shopnum])
    def print_infotext(ulist,num):
        for i in range(num):
            u = ulist[i]
            print(u[0],u[1],u[2],u[3],u[4])
    
    def print_infocsv(ulist):
        with open('D:picsjindong.csv','w',newline='') as f:
            writer = csv.writer(f)
            for row in ulist:
                writer.writerow(row)
    url = "https://search.jd.com/Search?keyword=python&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&page=1&s=1&click=0"
    exe_data = r'C:Userslsk17AppDataLocal360ChromeChromeApplication360chrome.exe'  # 浏览器根目录所在地
    chrome_options = Options()
    chrome_options.binary_location = exe_data
    
    browser = webdriver.Chrome(chrome_options=chrome_options)
    browser.get(url)
    num = 2
    uinfo = []
    for i in range(0,3):
        length=1000
        for i in range(0,4):
            js ="var q=document.documentElement.scrollTop="+str(length)
            browser.execute_script(js)
            time.sleep(1)
            length+=length
            time.sleep(1)
        get_infotext(uinfo,browser.page_source)
        div = browser.find_element_by_id("J_bottomPage")
        elem = div.find_element_by_class_name("input-txt")
        elem.clear()
        elem.send_keys(num)
        elem.send_keys(Keys.RETURN)
        num += 1
    browser.quit()
    # print_infotext(uinfo,10)
    print_infocsv(uinfo)
    # print(uinfo)
  • 相关阅读:
    leetcode-Minimum Path Sum
    第三十二章 自说明代码
    第三十一章 布局与风格
    第三十章 编程工具
    第二十九章 集成
    第二十八章 管理构建
    第二十五章 代码调整策略
    第二十六章 代码调整技术
    第二十七章 程序规模对构建的影响
    第二十四章 重构
  • 原文地址:https://www.cnblogs.com/lskai/p/11936589.html
Copyright © 2011-2022 走看看