zoukankan      html  css  js  c++  java
  • 爬取深圳交易所

    #encoding:utf8
    import re
    import requests
    from selenium import webdriver
    from selenium.webdriver.support.ui import WebDriverWait
    import time
    dr = webdriver.PhantomJS()
    dr.maximize_window()
    dr.get('http://www.szse.cn/main/marketdata/jypz/colist/')
    num = ['2','3','4','6']
    for k in num:
    WebDriverWait(dr,10).until(lambda the_driver:
    the_driver.find_element_by_xpath('//*[@id="REPORT_ID_1110"]/table/tbody/tr/td/table[3]/tbody/tr/td[1]/div['+k+']').is_displayed())
    dr.find_element_by_xpath('//*[@id="REPORT_ID_1110"]/table/tbody/tr/td/table[3]/tbody/tr/td[1]/div['+k+']').click()
    time.sleep(2)
    thepage = dr.page_source
    demo1 = re.compile('<td.*?>当前第.*?页 共(.*?)页</td>',re.S)
    list1 = demo1.findall(thepage)
    for j in range(1,int(list1[0])):
    for i in range(2,12):
    WebDriverWait(dr, 10).until(lambda the_driver:
    the_driver.find_element_by_xpath(
    '//*[@id="REPORTID_tab'+str(k)+'"]/tbody/tr['+str(i)+']/td[2]/img').is_displayed())
    dr.find_element_by_xpath('//*[@id="REPORTID_tab'+str(k)+'"]/tbody/tr['+str(i)+']/td[2]/img').click()
    WebDriverWait(dr, 10).until(lambda the_driver:
    the_driver.find_element_by_xpath(
    '//*[@id="1743_detail_smetab1"]/tbody/tr[1]/td[2]').is_displayed())
    yuan = dr.page_source
    demo = re.compile('<table.*?><tbody>.*?<tr><td.*?>公司名称</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?><a.*?>(.*?)</a></td></tr><tr></tr>.*?</tbody></table>',re.S)
    list = demo.findall(yuan)
    print(list)
    WebDriverWait(dr, 10).until(lambda the_driver:
    the_driver.find_element_by_xpath(
    '//*[@id="REPORT_ID_1743_detail_sme"]/div/input').is_displayed())
    dr.find_element_by_xpath('//*[@id="REPORT_ID_1743_detail_sme"]/div/input').click()
    WebDriverWait(dr, 10).until(lambda the_driver:
    the_driver.find_element_by_css_selector('.cls-navigate-next').is_displayed())
    dr.find_element_by_css_selector('.cls-navigate-next').click()
    time.sleep(2)

  • 相关阅读:
    IE的有条件注释
    JavaScript 正则表达式判断是否有小数点
    设置<li>前边圆点样式
    CSS3 Gradient
    z-index
    Linux下weblogic启动报错unable to get file lock的问题
    Linux下启动关闭weblogic
    Java 自动装箱与拆箱(Autoboxing and unboxing)
    jquery怎么跳出当前的each循环
    Integer与int的种种比较你知道多少
  • 原文地址:https://www.cnblogs.com/zhisy/p/6880247.html
Copyright © 2011-2022 走看看