zoukankan      html  css  js  c++  java
  • 浏览器分离

    from selenium import webdriver
    import time, random


    def gen_browser(btype='chrome'):
    return webdriver.Chrome()


    browser = webdriver.Chrome()

    url = 'https://www.wukong.com/'
    browser.get(url)
    browser.refresh()
    time.sleep(20)
    for isc in range(20):
    time.sleep(random.randint(1, 10))
    js = 'window.scrollTo(0,document.body.scrollHeight)'
    browser.execute_script(js)

    all_a = browser.find_elements_by_tag_name('a')
    href_l = []
    wukong_l = []
    for i in all_a:
    try:
    href_t = i.get_attribute('href')
    # wukong
    if 'uid' in href_t and i.text != '':
    # wukong_l.append((href_t.split('uid=')[-1], i.text))
    uid, txt = href_t.split('uid=')[-1], i.text
    wukong_detail_url = 'https://www.wukong.com/user/?uid=60607752166'.replace('60607752166', uid)
    browser_tmp = gen_browser()
    browser_tmp.get(wukong_detail_url)
    time.sleep(2)
    selfd = browser_tmp.find_element_by_class_name('user-title').text
    wukong_l.append((uid, txt, selfd))
    browser_tmp.quit()
    except Exception as e:
    print(e)
    browser.quit()

    import pymysql

    h, pt, u, p, db = '192.168.6.20', 3306, 'root', 'n126', 'media'


    def mysql_write(sql):
    global h, pt, u, p, db
    try:
    conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8')
    except Exception as e:
    print(e)
    return 1
    cursor = conn.cursor()
    cursor.execute(sql)
    conn.commit()
    cursor.close()
    conn.close()
    return 0


    sql, fi = 'INSERT INTO toutiao_uid_gathered_wukong (wukong_uid,wukong_nickname,selfintroduction,time_script) VALUES ', ''
    for i in wukong_l:
    sql_i = '("{}","{}","{}","{}")'.format(i[0], i[1], i[2], int(time.time()))
    sql = '{}{}{}'.format(sql, fi, sql_i)
    if fi == '':
    fi = ','

    time.sleep(32)
    print(sql)
    with (open('sql.tmp.sql', 'a', encoding='utf-8')) as fo:
    fo.write(sql)

    mysql_write(sql)



  • 相关阅读:
    从一个Fragment跳转到另一个Fragment
    网站关键字排名查询
    wordpress添加百度统计
    WordPress:自定义页面模板
    wordpress的系统卡
    Android APK反编译就这么简单 详解(附图)
    关于使用apktool可以反编译无法回编译的解决问题
    移动广告联盟
    android studio 设备 unauthorized 问题解决
    使用Android Studio开发遇到的问题集合
  • 原文地址:https://www.cnblogs.com/rsapaper/p/8305146.html
Copyright © 2011-2022 走看看