zoukankan      html  css  js  c++  java
  • 抽屉网点赞爬虫

    import requests
    from bs4 import BeautifulSoup
    import time
    
    
    user_cookie = ""
    # 注意:抽屉网    cookies中的gpsd是最要的验证数据
    def login(phone_id, phone_pwd):
        global user_cookie
        # 因为抽屉的cookie设置比较特别,其cookie为未登录状态下的cookie通过登录成功来进行验证,从而认证为登录状态下的cookie
        r1 = requests.get(
            url= 'https://dig.chouti.com/',
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
            }
        )
        user_cookie = r1.cookies.get_dict()
    
        r2 = requests.post(
            url= 'https://dig.chouti.com/login',
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
            },
            data = {
                "phone": "86%d" % int(phone_id),
                "password": phone_pwd,
                "oneMonth": 1
            },
            cookies = user_cookie
        )
        # 返回认证后的cookie
        return user_cookie
    
    def thumbsup():
        # 抽屉新热榜只有120页
        i=1
        while i <= 120:
            r3 = requests.get(
                url= 'https://dig.chouti.com/all/hot/recent/%d'% i,
                headers = {
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
                },
                cookies = user_cookie,
            )
            print("正在给第%d页点赞" % i,)
            soup = BeautifulSoup(r3.text, 'html.parser')
            res_list = soup.find_all(name='div', attrs={'class': 'item'})
            for res in res_list:
                requests.post(
                    url='https://dig.chouti.com/link/vote?linksId=%d' % int(res.find(name='img').attrs['lang']),
                    headers={
                        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
                    },
                    cookies=user_cookie
                )
                time.sleep(0.5)
            i+=1
            print("准备执行下一页...")
    
    
    def main():
        login("手机号", "密码")
        thumbsup()
    
    
    if __name__ == "__main__":
        main()
  • 相关阅读:
    TCP/IP协议详解
    linux高性能服务器编程--初见
    聚合类
    类class 2
    继承
    构造函数再探
    静态成员与友元
    MySQL图形工具SQLyog破解版
    MySQL注释符号
    数据库中多对多关系的中间表的命名规则
  • 原文地址:https://www.cnblogs.com/fengbo1113/p/9527199.html
Copyright © 2011-2022 走看看