zoukankan      html  css  js  c++  java
  • QQ空间动态内容,好友信息,点赞爬虫脚本

    一、安装基础的软件包:

    1、准备好火狐浏览器,并下载geckodriver,将geckodriver加入到环境变量:
    下载geckodriver的地址:https://pan.baidu.com/s/1NDo04Zj8NxmlRe90_CDRow

    2、下载selenium:
    python pip install selenium

    二、脚本说明:

    from selenium import webdriver
    from time import sleep
    
    username='xxxxxxxxx'
    password='xxxxxxxxxx'
    qzone_url ='https://qzone.qq.com/'
    
    '''qq空间自动登录登录'''
    def login_qzone(url,username,password,browser_type='Firefox'):
        if browser_type == 'Firefox':
            driver = webdriver.Firefox()
        elif browser_type == 'Firefox_headless':      #用火狐的无边界浏览器
            fireFoxOptions = webdriver.FirefoxOptions()
            fireFoxOptions.set_headless()
            driver = webdriver.Firefox(firefox_options=fireFoxOptions)
        driver.get(url)
        if 'qzone' in url:
            pass
        else:
            driver.find_element_by_class_name('qzone').click()
            sleep(5)
            driver.switch_to.window(driver.window_handles[1])
        #print(driver.window_handles)
        driver.switch_to.frame('login_frame')           #这个很坑,里面还包括子页面
        pwd_login_button = driver.find_element_by_id('switcher_plogin')
        pwd_login_button.click()   #点击通过密码登录
        input_user =  driver.find_element_by_id('u')
        input_user.send_keys(username)
        input_pwd = driver.find_element_by_name('p')
        input_pwd.send_keys(password)
        login_button = driver.find_element_by_id('login_button')
        login_button.click()
        driver.switch_to.default_content()
        sleep(5)
        return driver
    
    if __name__ == '__main__':
        login_qzone(qzone_url,username,password)
    qzone_login.py
    import qzone_login as login
    from selenium.common.exceptions import  StaleElementReferenceException,NoSuchElementException
    from selenium import webdriver
    import time
    import json
    
    qq_number_dict ={}
    
    def save_qq_number(tag_list):
        for line in tag_list:
            qq_number = line.get_attribute('href').split('/')[-1]
            qq_number_dict.update({qq_number:line.text})
    
    '''得到qq空间里的所有好友,保存到文件里'''
    def save_in_file():
        qq_number_dict.update({'save_time':time.time()})
        browser = login.login_qzone(login.qzone_url,login.username,login.password,browser_type = 'Firefox_headless')
        #browser = login.login_qzone(login.qzone_url,login.username,login.password)
        browser.get('https://user.qzone.qq.com/949885111/myhome/friends/index')
        browser.switch_to.default_content()
        frame = browser.find_element_by_tag_name('iframe')
        browser.switch_to.frame(frame)
        for i in range(60):
            target = browser.find_element_by_class_name('qz-button.btn-pager-next')
            browser.execute_script("arguments[0].scrollIntoView();", target)   #屏幕转到qz-button.btn-pager-next类的标签
            save_qq_number(browser.find_elements_by_class_name('textoverflow'))
            target.click()
            time.sleep(1)
        with open('qq_friends.json', 'w', encoding='utf-8') as f:
            f.write(json.dumps(qq_number_dict))
    
    
    if __name__ == "__main__":
        with open('qq_friends.json', 'r', encoding='utf-8') as f:
            for line in f:
                save_time =json.loads(line)['save_time']
        if time.time() - save_time > 86400:
            save_in_file()
        else:
            with open('qq_friends.json', 'r', encoding='utf-8') as f:
                for line in f:
                    print(len(json.loads(line)))
    get_friends.py
    import qzone_login as login
    from selenium import webdriver
    from selenium.webdriver.common.keys import Keys
    from selenium.common.exceptions import  ElementNotInteractableException,NoSuchFrameException,NoSuchElementException
    import time
    
    friend_qzone_url = 'https://user.qzone.qq.com/2453294057'
    
    '''判断是否登录成功'''
    def judge_login():
        pass
    
    '''进入frame'''
    def switch_frame(broeser,frame):
        while True:
            try:
                broeser.switch_to.frame(frame)     #一共两层
            except NoSuchFrameException:
                pass
            else:
                break
            time.sleep(2)        # 睡几秒,不然抓不到frame
    
    '''抓异常并初步处理数据'''
    def get_erroy(browser,tag,return_sign = False):
        try:
            execute_output = browser.find_element_by_class_name(tag)
        except NoSuchElementException :
            print(111111111111111111111122222)
            execute_output = ''
        else:
            if return_sign == False:
                execute_output = execute_output.text.replace(' ','').replace('
    ','')
        return execute_output
    
    ''''分析生成了的所有页面'''
    def analyse_html(broswser):
        data_dict = {}
        friend_speaks = broswser.find_elements_by_class_name('f-single.f-s-s')
        for speak in friend_speaks:
            qzone_talk = get_erroy(speak,'f-info')
            deta_time = get_erroy(speak,'ui-mr8.state')
            phone_model_tag = get_erroy(speak,'item',return_sign= True)
            if phone_model_tag != '':
                phone_model = phone_model_tag.find_element_by_tag_name('a').text
            else:
                phone_model = ''
            print(qzone_talk,phone_model,deta_time)
            data_dict.update({deta_time: [phone_model, qzone_talk]})
        return  data_dict
    
    '''判断是否有亲密度弹窗'''
    def judge_pop_up(browser):
        if len(browser.find_elements_by_class_name('btn-fs-sure')) != 0:
            browser.find_elements_by_class_name('btn-fs-sure')[0].click()
    
    '''得到好友动态页面的所有数据'''
    def get_all_data(friend_url):
        #driver = login.login_qzone(login.qzone_url, login.username, login.password)
        driver = login.login_qzone(login.qzone_url,login.username,login.password,browser_type = 'Firefox_headless')
        driver.get(friend_url)
        driver.switch_to.default_content()
        time.sleep(4)                            #睡几秒,不然抓不到亲密度弹窗
        judge_pop_up(driver)
        driver.find_element_by_id("aOwnerFeeds").click()
        driver.switch_to.default_content()
        switch_frame(driver,'app_canvas_frame')  #进入第一层frame
        switch_frame(driver,'frameFeedList')      #进入好友动态frame
        while True:
            try:
                target = driver.find_element_by_class_name('b-inline.data_btn_more')
                target.click()
            except ElementNotInteractableException:
                try:
                    target = driver.find_element_by_class_name('b-inline.data_is_loading')
                    target.click()
                except ElementNotInteractableException:
                    pass
                time.sleep(3)
            finally:
                driver.execute_script("arguments[0].scrollIntoView(false);", target)
            finally_sign = driver.find_elements_by_class_name('b-inline.data_no_more.none')     # 找不到这个标签时就退出
            print(finally_sign)
            if len(finally_sign) == 0:
                break
            time.sleep(2)
        return driver
    
    '''点赞'''
    def give_like(browser):
        for like in browser.find_elements_by_class_name('item.qz_like_btn_v3'):
            browser.execute_script("arguments[0].scrollIntoView(false);", like)
            like.click()
            time.sleep(60)
    
    if __name__ == '__main__':
        driver = get_all_data(friend_qzone_url)
        data = analyse_html(driver)
        print(data)
    get_qzone_data.py
    上面三个脚本包括:
    1、第一个是登录的脚本,可以选择使用火狐的无边界模式。
    2、第二个使用第一个登录后,将空间里的QQ好友信息拿到(qq号:好友备注)保存到json文件里面。
    3、第三个使用第一个登录后,有两个功能:
      1.得到该好友的所有动态的内容(只包括说说内容,发表的时间,和使用的手机号)
      2.可以给好友点赞。注:点太快了会被冻结

    注:以上只是学习selenium所写的小脚本,可别用于非法用途。

  • 相关阅读:
    Nodejs in Visual Studio Code 06.新建Module
    Nodejs in Visual Studio Code 05.Swig+Bootstrap
    Nodejs in Visual Studio Code 04.Swig模版
    Nodejs in Visual Studio Code 03.学习Express
    Nodejs in Visual Studio Code 02.学习Nodejs
    Nodejs in Visual Studio Code 01.简单介绍Nodejs
    Visual Studio Code 与 Github 集成
    Windows 10 代理上网用户的正确使用姿势
    Visual Studio创建跨平台移动应用_03.AppBuilder Extension
    Visual Studio创建跨平台移动应用_02.Cordova Extension
  • 原文地址:https://www.cnblogs.com/chimeiwangliang/p/8532521.html
Copyright © 2011-2022 走看看