zoukankan      html  css  js  c++  java
  • twitter api的使用获取关注者的时间线

    # -*- coding: utf-8 -*-
    from tweepy import OAuthHandler
    import datetime
    import pandas as pd
    import tweepy
    import time
    import random
    import traceback
    from dbs.db import *
    
    class Twitter_Spider():
        def __init__(self):
            self.main_tw_url="https://twitter.com/{}/status/{}"
            self.china_time_list = []
            self.twitter_id_list = []
            self.twitter_url_list = []
            self.twitter_text_list = []
            self.twitter_url_list = []
            self.update_time_list = []
            self.twitter_dicts = {}
            self.user_id_list = []
            self.user_name_list = []
            self.crate_time_list=[]
            self.userdicts={}
            self.stopflag=False
    
        def getapi(self):
            consumer_key = 'IAaj345Xf673kzT2'
            consumer_secret = 'ee9WEQ235555We0gP4peRbOPeeHGX1'
            access_token = '9767625356VEnq7s9ZXOHEI'
            access_secret = 'lyqj2122333o9G4fHta'
            auth = OAuthHandler(consumer_key, consumer_secret)
            auth.set_access_token(access_token, access_secret)
            api = tweepy.API(auth)
            return api
    
        def start(self):
            api =None
            try:
                api = self.getapi()
            except tweepy.TweepError:
                print('Error! Failed to get access token.')
    
            nowdate = datetime.datetime.now()
            beforeweek=nowdate-datetime.timedelta(days=7) #一周前
            #只获取关注者id
            # ids = []
            # for page in tweepy.Cursor(api.friends_ids).pages():
            #     ids.extend(page)
            # 获取关注者id和screen_name
    
            for pages in tweepy.Cursor(api.friends).pages():
                for page in pages:
                    userstr = page._json
                    self.userdicts.setdefault(userstr.get("id"),userstr.get("screen_name"))
            for  user_id in self.userdicts.keys():
                self.stopflag=False
                self.twitter_dicts.clear()
                for i,statuslist in enumerate(tweepy.Cursor(api.user_timeline, id=user_id).pages()): #获取第一页,一页20个
                    if not self.stopflag:
                        print("start page:{}".format(i))
                        for status in statuslist:
                            jsonstr = status._json
                            if  self.getItem(jsonstr,beforeweek,user_id):
                                break
                    else:
                         break
                    time.sleep(random.randint(2, 6))
                self.twitter_dicts.setdefault("user_id", self.user_id_list)
                self.twitter_dicts.setdefault("user_name", self.user_name_list)
                self.twitter_dicts.setdefault("china_time",self.china_time_list)
                self.twitter_dicts.setdefault("tw_time", self.crate_time_list)
                self.twitter_dicts.setdefault("tw_id", self.twitter_id_list)
                self.twitter_dicts.setdefault("tw_text", self.twitter_text_list)
                self.twitter_dicts.setdefault("tw_url", self.twitter_url_list)
                self.twitter_dicts.setdefault("updatetime", self.update_time_list)
                try:
                   SaveData().save_object_data(self.twitter_dicts)
                except:
                    print(traceback.format_exc("insert db error"))
    
    
        def getItem(self, jsonstr,beforeweek,user_id):
            create_time = jsonstr.get("created_at")
            china_time=""
            try:
               china_time=datetime.datetime.strptime(create_time,"%a %b %d %H:%M:%S +0000 %Y")
               if beforeweek>china_time:
                   self.stopflag=True
                   return self.stopflag
            except:
                print(traceback.format_exc())
            screen_name=self.userdicts.get(user_id)
            self.china_time_list.append(china_time)
            self.crate_time_list.append(create_time)
            self.user_id_list.append(user_id)
            self.user_name_list.append(screen_name)
            twitter_id = jsonstr.get("id")
            self.twitter_id_list.append(twitter_id)
            self.twitter_url_list.append(self.main_tw_url.format(screen_name,twitter_id))
            twitter_text = jsonstr.get("text")
            print(twitter_text)
            self.twitter_text_list.append(twitter_text)
            self.update_time_list.append(datetime.datetime.now())
            return self.stopflag
    
    
    if __name__ == "__main__":
        Twitter_Spider().start()
    

      

  • 相关阅读:
    web前端防治重复提交
    layabox笔记
    fixfff
    laybax
    小游戏初始化,资源加载异常处理,黑屏处理
    微信小游戏资源加载页与云存储
    前端唠嗑
    css 的一些样式笔记
    小游戏虚拟手柄
    VUE最佳实践
  • 原文地址:https://www.cnblogs.com/c-x-a/p/8623816.html
Copyright © 2011-2022 走看看