zoukankan      html  css  js  c++  java
  • 创意抓取及导出

    # _*_ coding=utf-8 _*_
    import requests
    import time
    import math
    import os
    import pandas as pd
    
    cookies = input('请输入Cookie:')
    
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
        'Cookie': cookies,
        'Host': 'xgop.in.zhihu.com',
        'Referer': '***'
    }
    tempmap = {6: '大图', 7: '文字链', 8: '小图', 10: '多图', 11: '视频'}
    zonemap = {8:'知乎 APP 信息流',152:'知乎极速版首页',153:'知乎极速版回答页',20:'推荐阅读',33:'搜索',30:'App问题页信息流'}
    positionmap = {1:'首页',3:'回答页',2:'问题页'}
    osmap = {1:'安卓',2:'苹果'}
    networkmap = {1:'wifi',2:'2G',3:'3G',4:'4G'}
    equipmentPricemap = {1:'1500元以下',2:'1500-2500元',3:'2500-3500元',4:'3500元-4500元',5:'4500元以上'}
    mobileOperatormap = {0:'中国移动',1:'中国联通',2:'中国电信'}
    gendermap = {0:'',1:''}
    
    all_data = []
    
    
    def get_single_data(url):
        try:
            res = requests.get(url, headers=headers)
        except Exception as e:
            print('异常请求链接--->' + url + str(e))
        else:
            data = res.json().get('result',0)
            if data:
                for i in data:
                    single_data = {}
                    try:
                        single_data['创意id'] = i['id']
                        single_data['账户id'] = i['userId']
                        single_data['目标类型'] = i['targetType']
                        single_data['标题'] = i['asset']['title']['value']
                        single_data['描述'] = i['asset']['desc']['value']
                        try:
                            single_data['图片url'] = i['asset']['main']['url']
                        except Exception as e:
                            single_data['图片url'] = ''
                        single_data['cta'] = i['asset']['cta']['value']
                        single_data['状态'] = i['status']
                        #single_data['落地页url'] = i['url']
                        single_data['创意名称'] = i['name']
                        single_data['曝光'] = i['counter']['impression']
                        single_data['点击'] = i['counter']['click']
                        single_data['点击率'] = i['counter']['clickRatio']
                        single_data['点击价格'] = i['counter']['clickPrice']/100
                        single_data['花费'] = i['counter']['cost']/100
                        single_data['样式'] = tempmap.get(i['ad']['templateId']) #数字
                        single_data['推广开始日期'] = i['ad']['dateBegin']
                        single_data['产品id'] = i['productId']
                        single_data['出价'] = i['ad']['price']/100
                        single_data['投放平台'] =  ', '.join([osmap.get(d,'不限, ') for d in i['ad']['targeting']['os']]) or '不限'
                        try:
                            single_data['app行为'] = i['ad']['targeting']['appCategory']
                        except Exception as e:
                            single_data['app行为'] = ''
                        try:
                            single_data['自定义人群'] = i['ad']['targeting']['crowd']
                        except Exception as e:
                            single_data['自定义人群']  = ''
                        single_data['性别'] = ', '.join([gendermap.get(d,'不限, ') for d in i['ad']['targeting']['gender']]) or '不限'
                        try:
                            single_data['兴趣'] = i['ad']['targeting']['interest'] if len(i['ad']['targeting']['interest']) > 0 else '不限'
                        except Exception as e:
                            single_data['兴趣'] = ''
                        single_data['网络'] = ', '.join([networkmap.get(d,'不限, ') for d in i['ad']['targeting']['network']]) or '不限'
                        try:
                            single_data['运营商'] =  ', '.join([mobileOperatormap.get(d,'不限, ') for d in i['ad']['targeting']['mobileOperator']]) or '不限'
                        except Exception as e:
                            single_data['运营商'] = ''
                        try:
                            single_data['设备价格'] = ', '.join([equipmentPricemap.get(d,'不限, ') for d in i['ad']['targeting']['equipmentPrice']]) or '不限'
                        except Exception as e:
                            single_data['设备价格'] = ''
                        single_data['关键词'] = ', '.join(i['ad']['targeting']['keyword'])
                        single_data['创意展现方式'] = i['ad']['strategy']['creative']
                        single_data['编辑页面地址'] = '****'.format(single_data['账户id'],single_data['创意id'])
                        single_data['展现位置'] = ', '.join([positionmap.get(d,'未知') for d in i['ad']['zoneIds']])
                    except Exception as e:
                        print('异常解析链接--->' + url+ str(e))
                        pass
                    if float(single_data.get('花费',0)) >=0:
                        all_data.append(single_data)
                        print(len(all_data))
    
    
    def get_all_urls(userid, start_time, end_time):
        base_url = '******'
        first_page_url = base_url.format(page=1, userid=int(userid), start_time=str(start_time), end_time=str(end_time))
        try:
            res = requests.get(first_page_url, headers=headers)
        except Exception as e:
            print('异常all链接--->' + first_page_url + str(e))
        else:
    
            total_page = math.ceil(res.json()['totalCount'] / 10)
    
            all_url = [base_url.format(page=int(page), userid=int(userid), start_time=str(start_time), end_time=str(end_time)) for
             page in range(1, int(total_page))]
    
            return all_url
    
    
    
    def main():
        uids = input('请输入uids(格式:111,222,333):')
        start_time = input('请输入开始时间(格式:2018-01-01):')
        end_time = input('请输入结束时间(格式:2018-07-03):')
        if len(uids) > 0:
            for userid in uids.split(','):
                for url in get_all_urls(userid.strip(), start_time, end_time):
                    time.sleep(3)
                    get_single_data(url)
    
        df1 = pd.DataFrame(all_data)
    
        if not os.path.exists(uids):
            os.mkdir(os.path.join(os.getcwd(),uids))
    
        df1.to_excel(uids+'/'+str(uids)+ "-" + time.strftime("%Y%m%d%H%M") +'有消费创意' + '.xlsx',
            index=False)
        print('done')
    
    
    if __name__ == '__main__':
        main()
  • 相关阅读:
    property补充
    利用描述符自定制property
    类的装饰器
    上下文协议管理
    描述符
    迭代器协议
    doc属性__module__属性__del__(垃圾回收)__call__方法
    【移动支付】.NET支付宝App支付接入
    【WPF】PopupColorEdit 的使用
    【MVVM Dev】PART_Editor的使用
  • 原文地址:https://www.cnblogs.com/Erick-L/p/9390223.html
Copyright © 2011-2022 走看看