from selenium import webdriver from selenium.webdriver.common.keys import Keys import time import pymongo def get_goods(driver): #连接mongodb client=pymongo.MongoClient('localhost',27017) #指定数据库 db=client.jd #指定表 collection=db.messages number = 400 # 循环拖动鼠标滚轮,使当前页的所有商品信息加载完成 for line in range(20): js = 'window.scrollTo(0,%s)' % number number += 500 driver.execute_script(js) time.sleep(0.1) # 拿到商品信息最大的div goods_div = driver.find_element_by_id('J_goodsList') # 找到每个商品的item goods_list = goods_div.find_elements_by_class_name('gl-item') # 循环拿到的所有商品 good_item={} for good in goods_list: # 依次取出名字,价格,链接,图片,评论数 good_item['名字'] = good.find_element_by_css_selector('.p-name em').text.replace(" ", "") good_item['价格'] = good.find_element_by_css_selector('.p-price').text.replace(" ", "") good_item['链接'] = good.find_element_by_css_selector('.p-img a').get_attribute('href') good_item['图片'] = good.find_element_by_css_selector('.p-img img').get_attribute('src') good_item['评论数'] = good.find_element_by_css_selector('.p-commit').text.replace(" ", "") # 将爬取的信息写入文件 # with open('jd.txt', 'a', encoding='utf-8') as f: # f.write(goods + ' ') #将数据插入mongodb good=dict(good_item) collection.insert_one(good) # 找到当前页的“下一页”按钮 next_tag = driver.find_element_by_class_name('pn-next') next_tag.click() time.sleep(3) # 递归执行当前函数 get_goods(driver) # 获得谷歌控制器 driver = webdriver.Chrome() try: # 使用控制器,打开京东网站 driver.get('https://www.jd.com/') driver.implicitly_wait(10) # 得到文本搜索框 input_tag = driver.find_element_by_id('key') # 写入搜索关键字 input_tag.send_keys('电脑') # 回车 # input_tag.send_keys(Keys.ENTER) #获取点击搜索按钮 button = driver.find_element_by_xpath('//*[@id="search"]/div/div[2]/button') button.click() get_goods(driver) time.sleep(1000) finally: driver.close()