zoukankan      html  css  js  c++  java
  • 爬虫,百度搜索热点排行

    #!/usr/bin/env python
    # -*- coding:utf-8 -*-
    
    #爬虫,搜索热点排行
    import urllib.request
    import urllib
    import re
    import json
    import xlwt
    import os
    
    #获取网站首页全部内容
    cnt = 50 #只能1-50
    url = 'https://zhidao.baidu.com/question/api/hotword?rn='+cnt.__str__()+'&t=1535421904906'
    print(url)
    user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'
    req = urllib.request.Request(url, headers={'User-Agent': user_agent})
    response = urllib.request.urlopen(req)
    content = response.read().decode('utf-8')
    #print(content)
    
    workbook = xlwt.Workbook()
    sheet1 = workbook.add_sheet('sheet1',cell_overwrite_ok=True)
    
    sheet1.write(0,0,'排名')
    sheet1.write(0,1,'新闻名称')
    sheet1.write(0,2,'搜索人数')
    sheet1.write(0,3,'变化数量')
    sheet1.write(0,4,'新的新闻')
    sheet1.write(0,5,'热度上升')
    
    dataList = json.loads(content)['data']
    j = 1
    for data in dataList:
        print(data)
        sheet1.write(j, 0,j)
        sheet1.write(j, 1,data['keyword'])
        sheet1.write(j, 2, data['searches'])
        sheet1.write(j, 3, data['changeRate'])
        isNew = data['isNew'];
        if isNew==0:
            isNew = '否'
        elif isNew==1:
            isNew = '是'
        sheet1.write(j, 4, isNew.__str__())
        trend = data['trend']
        style5 = xlwt.XFStyle()
        font = xlwt.Font()
        style5.font = font
        if trend == 'fall':
            font.colour_index = 3
            trend = '下降'
        elif trend == 'rise':
            font.colour_index = 2
            trend = '上升'
        sheet1.write(j, 5, trend,style5)
        j = j + 1
    
    #保存该excel文件,有同名文件时直接覆盖
    path = 'D:\Python'
    if not os.path.isdir(path):
        os.makedirs(path)
    paths = path + '\'
    filename = 'test1'
    workbook.save('{}{}.xls'.format(paths,filename))
    print('创建excel文件完成!')
    

      百度时候总能看到热搜排行,以上代码就是爬虫获取排行

  • 相关阅读:
    [Z]芯片设计经验
    ADF4350初始化程序(verilog)
    基于Altera FPGA的LVDS配置应用一例
    M4K使用率
    榨干FPGA片上存储资源
    ios通讯录复制出来的电话号码两端有隐藏字符串
    PHP做APP接口时,如何保证接口的安全性
    【PHP】微信开放平台---消息加解密-php7.1 使用openssl代替Mcrypt
    Gram矩阵(pytorch)
    数据库范式
  • 原文地址:https://www.cnblogs.com/Monster-World/p/9547199.html
Copyright © 2011-2022 走看看