爬取京东
"""
########
# 爬取京东商品信息
########
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
bro = webdriver.Chrome(executable_path='./chromedriver')
def get_goods(bro):
# find_elements_by_class_name 找所有
# find_element_by_class_name 找一个
li_list = bro.find_elements_by_class_name('gl-item')
# ul_list=bro.find_elements_by_css_selector('.gl-item')
for li in li_list:
url = li.find_element_by_css_selector('.p-img>a').get_attribute('href')
url_img = li.find_element_by_css_selector('.p-img img').get_attribute("src")
if not url_img:
url_img = 'https:' + li.find_element_by_css_selector('.p-img img').get_attribute("data-lazy-img")
price = li.find_element_by_css_selector('.p-price i').text
name = li.find_element_by_css_selector('.p-name em').text
commit = li.find_element_by_css_selector('.p-commit a').text
print('''
商品名字:%s
商品价格:%s
商品图片地址:%s
商品地址:%s
商品评论数:%s
''' % (name, price, url, url_img, commit))
# 查找下一页按钮
next = bro.find_element_by_partial_link_text('下一页')
time.sleep(1)
next.click()
# 继续抓取下一页
get_goods(bro)
try:
bro.get('https://www.jd.com')
# 隐士等待
bro.implicitly_wait(10)
input_search = bro.find_element_by_id('key')
input_search.send_keys("精品内衣")
# 模拟键盘操作(模拟键盘敲回车)
input_search.send_keys(Keys.ENTER)
get_goods(bro)
except Exception as e:
print(e)
finally:
bro.close()
"""