zoukankan      html  css  js  c++  java
  • python-爬虫:取qq号中各分组成员信息存入数据库,并将qq头像下载保存到文件夹,图片命名为qq号(实例3)

    import requests
    import pymongo
    import requests
    import os

    class QqGroup:
    #三个接口url 获取 qq组号 获取每组成员信息 获取qq头像下载地址
    group_url = "https://qun.qq.com/cgi-bin/qun_mgr/get_group_list"
    member_url = "https://qun.qq.com/cgi-bin/qun_mgr/search_group_members"
    pic_url = "http://q4.qlogo.cn/g?b=qq&nk={%s}&s=140"
    #一般将cookie与链接数据库代码写在构造函数
    def __init__(self,cookie):
    self.cookie = cookie
    self.client = pymongo.MongoClient(host='118.24.3.40',port=27017)
    self.db = self.client['qq_group'] #数据库名称
    self.table = self.db['qq_group'] #表名称
    #获取 qq组号
    def get_all_group(self):
    data = {'bkn':2039193070}
    res = requests.post(self.group_url,data,headers={'cookie':self.cookie}).json()
    #新方法 1、对于返回值为多个list合并,如果返回为空,增加默认值为空 2、对于字典取值用get方法
    all_groups = res.get('join', []) + res.get('create', []) + res.get('manage', [])

    all_gc=[i.get('gc') for i in all_groups]

    return all_gc
    #获取每组成员信息
    def get_group_info(self,num):
    data = {"gc":num,"st":0,"end":200,"sort":0,"bkn":2039193070}
    res = requests.post(url=self.member_url, data=data, headers={'Cookie':self.cookie})
    mems = res.json().get("mems")
    for i in mems:
    mem = {
    "qq": i.get("uin"),
    "gender": i.get("g"),
    "nick": i.get("nick"),
    "card": i.get("card"),
    "qage": i.get("qage")
    }
    if mem.get("gender") == 0:
    mem['gender'] = "男"
    elif mem.get("gender") == 1:
    mem['gender'] = "女"
    else:
    mem['gender'] = "未知"
    if mem.get("card") == "":
    mem['card'] = "没有群名片"
    self.save_mongo(mem)
    self.down_pic(i.get("uin"))
    #存入mogodb数据库
    def save_mongo(self,data):
    self.table.insert(data)

    #下载qq头像
    def down_pic(self,qq_num):
    real_url=self.pic_url %qq_num
    res=requests.get(real_url).content
    pathnew = os.path.dirname(os.path.abspath(__file__))
    pathnew = os.path.join(pathnew, 'imgs')
    if not os.path.isdir('imgs'):
    os.makedirs('imgs')
    with open(os.path.join(pathnew, str(qq_num) + '.jpg'), 'wb') as fw:
    fw.write(res)
    print('%s头像下载完成' % qq_num)
    #main函数用于将各过程统一到一个函数,便于实例化后调用
    def mian(self):
    all_group=self.get_all_group()
    for i in all_group:
    self.get_group_info(i)
    #实例化类 参数传递为cookie
    Q=QqGroup('pgv_pvid=1745803612; pgv_pvi=9485686784; RK=vRZxXdy1Y7; _qpsvr_localtk=0.8876927078641847; pgv_si=s3971407872; ptisp=cnc; ptcz=e42f943c39b23e3d7a94d9deac0de69388506bbb9b09000f197b8681624f70ac; uin=o0106148088; skey=@pH5b3tYaY; pt2gguin=o0106148088; p_uin=o0106148088; pt4_token=Vrwh5LwXxUAe0OaJi5DR-XEt8F13T5OheqfWVI3CJjQ_; p_skey=SPaNeBpdwjMfAGplMvT-msRMptEYPnPykjrJo80kXqY_')
    Q.mian()
  • 相关阅读:
    oracle11g静默安装
    pv vg lv
    oracle日志表
    oracle常用sql
    vulnhub~muzzybox
    vulnhub~sunset:dusk1
    vulnhub~MyExpense
    vulnhub~DC-9
    汇编学习一
    贪心算法和动态规划
  • 原文地址:https://www.cnblogs.com/wenchengqingfeng/p/9396291.html
Copyright © 2011-2022 走看看