zoukankan      html  css  js  c++  java
  • 爬取深圳交易所

    #encoding:utf8
    import re
    import requests
    from selenium import webdriver
    from selenium.webdriver.support.ui import WebDriverWait
    import time
    dr = webdriver.PhantomJS()
    dr.maximize_window()
    dr.get('http://www.szse.cn/main/marketdata/jypz/colist/')
    num = ['2','3','4','6']
    for k in num:
    WebDriverWait(dr,10).until(lambda the_driver:
    the_driver.find_element_by_xpath('//*[@id="REPORT_ID_1110"]/table/tbody/tr/td/table[3]/tbody/tr/td[1]/div['+k+']').is_displayed())
    dr.find_element_by_xpath('//*[@id="REPORT_ID_1110"]/table/tbody/tr/td/table[3]/tbody/tr/td[1]/div['+k+']').click()
    time.sleep(2)
    thepage = dr.page_source
    demo1 = re.compile('<td.*?>当前第.*?页 共(.*?)页</td>',re.S)
    list1 = demo1.findall(thepage)
    for j in range(1,int(list1[0])):
    for i in range(2,12):
    WebDriverWait(dr, 10).until(lambda the_driver:
    the_driver.find_element_by_xpath(
    '//*[@id="REPORTID_tab'+str(k)+'"]/tbody/tr['+str(i)+']/td[2]/img').is_displayed())
    dr.find_element_by_xpath('//*[@id="REPORTID_tab'+str(k)+'"]/tbody/tr['+str(i)+']/td[2]/img').click()
    WebDriverWait(dr, 10).until(lambda the_driver:
    the_driver.find_element_by_xpath(
    '//*[@id="1743_detail_smetab1"]/tbody/tr[1]/td[2]').is_displayed())
    yuan = dr.page_source
    demo = re.compile('<table.*?><tbody>.*?<tr><td.*?>公司名称</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?>(.*?)</td></tr><tr><td.*?>.*?</td><td.*?><a.*?>(.*?)</a></td></tr><tr></tr>.*?</tbody></table>',re.S)
    list = demo.findall(yuan)
    print(list)
    WebDriverWait(dr, 10).until(lambda the_driver:
    the_driver.find_element_by_xpath(
    '//*[@id="REPORT_ID_1743_detail_sme"]/div/input').is_displayed())
    dr.find_element_by_xpath('//*[@id="REPORT_ID_1743_detail_sme"]/div/input').click()
    WebDriverWait(dr, 10).until(lambda the_driver:
    the_driver.find_element_by_css_selector('.cls-navigate-next').is_displayed())
    dr.find_element_by_css_selector('.cls-navigate-next').click()
    time.sleep(2)

  • 相关阅读:
    【转】理清基本的git(github)流程
    GIT CHEAT SHEET
    failed to push some refs to 'git@github.com:*/learngit.git'
    catch(…) vs catch(CException *)?
    char[]与TCHAR[]互相转换引发的一个问题!
    关于 AfxSocketInit()
    href="#"与href="javascript:void(0)"的区别
    Camera帧率和AE的关系(转)
    详细的摄像头模组工作原理!!!(转)
    高清摄像头MIPI接口与ARM处理器的连接(转)
  • 原文地址:https://www.cnblogs.com/zhisy/p/6880247.html
Copyright © 2011-2022 走看看