zoukankan      html  css  js  c++  java
  • 代码与性格

    从代码里能分析出一个人的处事的积极与消极,妥协退让与迎面直击

    from selenium import webdriver
    import os
    import time
    import pymysql
    from bs4 import BeautifulSoup
    import requests
    import threading
    from selenium.webdriver.common.keys import Keys
    
    h, pt, u, p, db = 'localhost', 3306, 'root', '', 'qqzone'
    
    
    def mysql_fetch(sql, res_type='tuple'):
        global h, pt, u, p, db
        try:
            conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8mb4')
        except Exception as e:
            print(e)
            return ()
        if res_type == 'dic':
            cursor = conn.cursor(pymysql.cursors.DictCursor)
        else:
            cursor = conn.cursor()
        cursor.execute(sql)
        conn.commit()
        r = cursor.fetchall()
        cursor.close()
        conn.close()
        return r
    
    
    def mysql_write(sql):
        global h, pt, u, p, db
        try:
            conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8mb4')
        except Exception as e:
            print(e)
            return 1
        cursor = conn.cursor()
        cursor.execute(sql)
        conn.commit()
        cursor.close()
        conn.close()
        return 0
    
    
    # D:pyaction	outiao_team_win
    
    img_dir = 'C:/Users/Administrator/Desktop/1/toutiao_team/dl_img/'
    img_dir = 'D:/pyaction/toutiao_team_win/dl_img/'
    
    import random
    
    
    def spider_webimg_dl_return_local_img_path(img_dir, img_url, media_type='img',
                                               local_default='default.DONOT_REMOVE.png'):
        r = '%s%s' % (img_dir, local_default)
    
        if media_type == 'img':
            try:
                req = requests.get(img_url)
                time.sleep(3)
                if req.status_code != 200:
                    print('-!=200')
                    return r
                time.sleep(30)
                print(img_url)
                bytes = req._content
                # r = '%s%s%s%s%s' % (
                #     img_dir, time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())), str(threading.get_ident()),
                #     img_url.split('!/c')[0].split('/')[-1].replace('*', '_'), '.png')
                # print(r)
    
                r = '%s%s%s%s%s' % (
                    img_dir, time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())), str(threading.get_ident()),
                    str(random.randrange(1000, 9999)), '.png')
                print(r)
                filter_l = ['&', '=', '?', '-']
                for fi in filter_l:
                    r.replace(fi, '')
                if bytes != 0:
                    with open(r, 'wb')as f:
                        f.write(bytes)
            except Exception as e:
                print(e)
        elif media_type == 'mp4':
            try:
                time.sleep(30)
    
                print(img_url)
                r = '%s%s%s%s%s' % (
                    img_dir, time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())), str(threading.get_ident()),
                    img_url.split('.mp4?')[0].split('/')[-1].replace('*', '_'), '.mp4')
                print(r)
                req = requests.get(img_url)
                time.sleep(3)
                if req.status_code != 200:
                    print('-!=200')
                    return ''
                bytes = req._content
                time.sleep(210)
                if bytes != 0:
                    with open(r, 'wb')as f:
                        f.write(bytes)
            except Exception as e:
                r = ''
                print(e)
    
        return r
    
    
    driver = webdriver.Chrome()
    myurl = 'https://weibo.com/u/1779073702'
    myurl = 'https://weibo.com/u/1779073702?is_all=1'
    myurl = 'https://weibo.com/login.php'
    driver.get(myurl)
    # 此处需要内存和cpu空余,能够支持dom解析和处理重js页面
    time.sleep(10)
    driver.refresh()
    time.sleep(10)
    # xp = '//*[@id="pl_common_top"]/div/div/div[3]/div[2]/ul/li[3]/a'
    # try:
    #     # 此处解决了不能点击该元素报错,第三次尝试ok
    #     driver.find_element_by_xpath(xp).click()
    #     time.sleep(40)
    #
    # except Exception as e:
    #     print(e)
    #     os._exit(1024)
    
    
    js = 'document.getElementsByClassName("username")[1].childNodes[0].value="p.cn";' 
         'document.getElementsByClassName("password")[0].childNodes[0].value="welcome";' 
         'document.getElementsByClassName("form_login_register")[0].childNodes[5].childNodes[0].click();'
    js = 'document.getElementById("loginname").value="p.cn";' 
         'document.getElementsByName("password")[0].value="welcome";' 
         'document.getElementsByClassName("W_btn_a btn_32px")[0].click();'
    try:
        driver.execute_script(js)
        time.sleep(30)
    except Exception as e:
        print(e)
        os._exit(1024)
    
    time.sleep(random.randrange(3, 6))
    for isc in range(2):
        # 今日头条 非iframe  无限次
        # qq空间说说 iframe 固定20条 2次报错
        # memory cpu
        time.sleep(1)
        js = 'window.scrollTo(0,document.body.scrollHeight)'
        driver.execute_script(js)
    
    while True:
        sql = 'SELECT id, words,imgurls,time_site FROM qqzoneshuoshuo WHERE lefttimes_weibo>0 AND  INSTR(imgurls,".mp4")=0 AND id IN ( SELECT MAX(id) FROM qqzoneshuoshuo GROUP BY id_site) ORDER BY time_script DESC,id ASC ;'
        res_content = mysql_fetch(sql, 'dic')
        print(res_content)
        if len(res_content) == 0:
            continue
        comment_l_sq = 0
        for i in res_content[0:]:
            # id, words,imgurls,time_site
            dbid, content, img_list, time_site = i['id'], i['words'], i['imgurls'], i['time_site']
            if 1 > 13:
                if '天' in time_site or '月' in time_site:
                    continue
                lh = int(time.strftime("%H", time.localtime()))
                if lh - int(time_site.split(':')[0]) >= 24:
                    continue
                if '早安' in content and lh >= 11:
                    continue
                elif '晚安' in content and lh <= 20:
                    continue
    
            time.sleep(10)
            # because another element <div> obscures it
            content = content.split('展开全文')[0].split('上传')[0].split('浏览')[0].replace('"', ' ').replace("'", ' ')
            content = content.replace('"', ' ').replace("'", ' ').replace('
    ', ' ')
            filter_l = ['密龄素材空间', '评论']
    
            for fi in filter_l:
                content = content.replace(fi, ' ')
            # js = 'document.getElementsByTagName("textarea")[0].value="{}新年快乐-密龄白藜芦醇DOAEZ朵韵诗-阿静艾卡尔@ http://www.icarei.cn期待与你携手前行!!";'.format(
            #     content)
            # js = 'document.getElementsByTagName("textarea")[0].value="{}白藜芦醇-燕窝美妆-密龄DOAEZ朵韵诗-阿静艾卡尔@ http://www.icarei.cn期待与你携手前行!!";'.format(
            #     content)
            js = 'document.getElementsByTagName("textarea")[0].value="{}南京同仁堂密龄白藜芦醇-燕窝美妆-DOAEZ朵韵诗-阿静@ http://www.icarei.cn期待与你携手前行!!";'.format(
                content)
            # https://item.taobao.com/item.htm?id=567557180229
            ad_url_l = ['567557180229', '565875313425', '545159271159', '546048319163']  # 补水喷雾 手链 面膜  防晒喷雾
            ad_url_l = ['567557180229', '565875313425', '545159271159', '546048319163', '567693004121']  # 补水喷雾 手链 面膜  防晒喷雾
            ad_this = ad_url_l[int(time.time()) % len(ad_url_l)]
            ad_url = 'https://item.taobao.com/item.htm?id={}'.format(ad_this)
            # js = 'document.getElementsByTagName("textarea")[0].value="{}#话题# 磁石娃娃 南京同仁堂密龄白藜芦醇-燕窝美妆-DOAEZ朵韵诗-阿静@ {}!!";'.format(
            # js = 'document.getElementsByTagName("textarea")[0].value="{}#doaez朵韵诗磁石娃娃燕窝润颜面膜# 磁石娃娃 南京同仁堂密龄白藜芦醇-燕窝美妆-DOAEZ朵韵诗-阿静@ {}!!";'.format(
            #     content, ad_url)
            # print(js)
    
    
            hot_topic_list_url = 'https://weibo.com/u/1779073702/home'
            js = 'window.location.href="{}"'.format(hot_topic_list_url)
            driver.execute_script(js)
            time.sleep(10)
    
            time.sleep(10)
            hot_url_l = [i.get_attribute('href') for i in driver.find_elements_by_css_selector('li>p>a')]
    
            try:
                hot_url_l_index = random.choice([int(time.time()) % len(hot_url_l), 0, 1])
            except Exception as e:
                print(e)
                continue
    
            js = 'window.location.href="{}"'.format(hot_url_l[hot_url_l_index])
            driver.execute_script(js)
            time.sleep(10)
            driver.refresh()
            time.sleep(random.randrange(3, 6))
            # for isc in range(2):
            #     # 今日头条 非iframe  无限次
            #     # qq空间说说 iframe 固定20条 2次报错
            #     # memory cpu
            #     time.sleep(1)
            #     js = 'window.scrollTo(0,document.body.scrollHeight)'
            #     driver.execute_script(js)
            #     driver.refresh()
    
            # time.sleep(10)
            time.sleep(15)
            comment_l = driver.find_elements_by_css_selector('.WB_row_line>li:nth-child(3)>a>span>span>span')
    
            ele_clickable = False
            for isc in range(20):
                time.sleep(1)
                js = 'window.scrollTo(0,{})'.format(isc * 50)
                driver.execute_script(js)
                time.sleep(2)
    
                try:
                    #  comment_l_sq = random.choice([0, 0, int(time.time()) % len(comment_l)])
                    comment_l_sq = random.choice([0, 0, 1, 1, 1, 2, 2, 3])
                    comment_l[comment_l_sq].click()
                    comment_l_sq += 1
                    comment_l_sq = 0
                    ele_clickable = True
                    break
                except Exception as e:
                    print(e)
                    continue
            if not ele_clickable:
                continue
    
            time.sleep(12)
            # '.WB_publish>div>textarea'
            mytopic, myname = ' #doaez朵韵诗磁石娃娃燕窝润颜面膜# ', '南京同仁堂密龄白藜芦醇-燕窝美妆-DOAEZ朵韵诗-阿静@ '
            mystr = '{}{}{}{}'.format(mytopic, myname, content, ad_url)
            js = 'document.getElementsByTagName("textarea")[1].value="{}"'.format(mystr)
            try:
                # 需要键盘事件 - response
                driver.find_elements_by_tag_name("textarea")[1].send_keys(Keys.SPACE)
                time.sleep(2)
                driver.find_elements_by_tag_name("textarea")[1].send_keys(Keys.BACK_SPACE)
                driver.execute_script(js)
                time.sleep(2)
            except Exception as e:
                print(e)
                continue
    
            js = "document.getElementsByName('forward')[0].click();"
            driver.execute_script(js)
            time.sleep(2)
            js = "document.getElementsByClassName('btn W_fr')[0].childNodes[0].click()"
            driver.execute_script(js)
            time.sleep(2)
    
            driver.refresh()
            # # 先填充文本:动态d
            # # om
            #
            # for iimg in range(2):
            #     js = 'document.getElementsByClassName("ficon_image")[0].click();'
            #     driver.execute_script(js)
            #     time.sleep(2)
            # upload = driver.find_element_by_id('pic_upload').find_element_by_tag_name('input')
            # img_url_list = img_list.split(',')
            # try:
            #     # MAX=8
            #     for img_url in img_url_list:
            #         if '.gif' in img_url or 'qzonestyle' in img_url:
            #             continue
            #         local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url,
            #                                                                 local_default='default.DONOT_REMOVE.png')
            #         print(local_img_path)
            #         time.sleep(2)
            #         upload.send_keys(local_img_path)
            # except Exception as e:
            #     print(e)
            #     try:
            #         js = 'document.getElementsByClassName("W_layer_close")[0].click();document.getElementsByClassName("func")[0].childNodes[3].click();'
            #         driver.execute_script(js)
            #         time.sleep(10)
            #         driver.refresh()
            #     except:
            #         pass
            #
            #     continue
            #
            # time.sleep(5)
            # js = 'document.getElementsByClassName("W_layer_close")[0].click();document.getElementsByClassName("func")[0].childNodes[3].click();'
            # # js = 'document.getElementsByTagName("textarea")[0].click();document.getElementsByClassName("func")[0].childNodes[3].click();'
            # # js = 'document.getElementsByClassName("func")[0].childNodes[3].click();'
            # driver.execute_script(js)
            #
            # time.sleep(10)
    
            sql = 'UPDATE qqzoneshuoshuo SET lefttimes_weibo=lefttimes_weibo-1 WHERE id={}'.format(dbid)
            print(sql)
            try:
                mysql_write(sql)
            except:
                pass
    
            driver.refresh()
    
            time.sleep(random.randint(60 * 0.5, 60 * 1))
        # 15min后刷新,循环存入数据,期间定时刷新,维持页面
        for si in range(15):
            try:
                driver.refresh()
                time.sleep(60)
                time.sleep(random.randint(0, 10))
                print(si)
            except Exception as e:
                print(145, e)
    

      

  • 相关阅读:
    [LeetCode] 981. Time Based Key-Value Store
    [LeetCode] 97. Interleaving String
    [LeetCode] 953. Verifying an Alien Dictionary
    代价敏感的学习方法
    深度学习中Dropout原理解析
    梯度下降法的三种形式BGD、SGD以及MBGD
    吉布斯采样(Gibbs Sampling)简介(转)
    Java中next()和hasNext() ? nextLine()和hasNextLine()?区别详解
    红黑树介绍及旋转详解
    Pycharm中实现openCV安装好后简单测试
  • 原文地址:https://www.cnblogs.com/rsapaper/p/8893738.html
Copyright © 2011-2022 走看看