zoukankan      html  css  js  c++  java
  • requests简单爬虫项目实战

    requests实战之搜索引擎爬取搜索内容

    import requests
    #指定url
    url='https://www.sogou.com/web'
    kw=input('enter a word: ')
    header={
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/'
    }
    param={
        'query':kw
    }
    #发起请求
    #UA伪装
    response=requests.get(url=url,params=param,headers=header)
    #获取相应数据
    content=response.text
    fileName=kw+'.html'
    #将数据保存在本地
    with open(fileName,'w',encoding='utf-8') as fp:
        fp.write(content)
    print(fileName,'爬取结束!!!')
    
    

    requests实战之破解百度翻译

    import json
    import requests
    url='https://fanyi.baidu.com/sug'
    word=input('请输入想翻译的词语或句子:')
    data={
        'kw':word
    }
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2626.106 Safari/537.36'
    }
    reponse=requests.post(url=url,data=data,headers=headers)
    dic_obj=reponse.json()
    print(dic_obj)
    filename=word+'.json'
    with open(filename,'w',encoding='utf-8') as fp:
        json.dump(dic_obj,fp=fp,ensure_ascii=False)
    
    print('爬取结束!!!')
    

    requests实战之爬取豆瓣电影榜单

    import json
    import requests
    url='https://movie.douban.com/j/chart/top_list?'
    params={
        'type': '11',
        'interval_id': '100:90',
        'action': '',
        'start': '0',
        'limit': '20',
    }
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2626.106 Safari/537.36'
    }
    reponse=requests.get(url=url,params=params,headers=headers)
    dic_obj=reponse.json()
    print(dic_obj)
    with open('douban.json','w',encoding='utf-8') as fp:
        json.dump(dic_obj,fp=fp,ensure_ascii=False)
    
    print('爬取结束!!!')
    

    requests实战之爬取肯德基门店地址

    import json
    import requests
    url='http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword'
    location=input('请输入你想查询的地点:')
    data={
        'cname':'',
        'pid':'',
        'keyword': location,
        'pageIndex': '1',
        'pageSize': '10',
    }
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2626.106 Safari/537.36'
    }
    reponse=requests.post(url=url,data=data,headers=headers)
    content=reponse.text
    with open(location+'.html','w',encoding='utf-8') as fp:
        fp.write(content)
    
    print('爬取结束!!!')
    

    requests实战之药监总局相关数据

    import json
    import requests
    url='http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do?method=getXkzsList'
    id_list = []
    all_information=[]
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2626.106 Safari/537.36'
    }
    for page in range(1,6):
        page=str(page)
        data={
            'on': 'true',
            'page': page,
            'pageSize': '15',
            'productName': '',
            'conditionType': '1',
            'applyname': '',
            'applysn': '',
        }
        response = requests.post(url=url, data=data, headers=headers).json()
        for dic in response['list']:
            id_list.append(dic['ID'])
    print('爬取商家id结束')
    url1='http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do?method=getXkzsById'
    for id in id_list:
        data = {
            'id': id
        }
        reponse=requests.post(url=url1,data=data,headers=headers).json()
        all_information.append(reponse)
    with open('information.json','w',encoding='utf-8') as fp:
        json.dump(all_information,fp=fp,ensure_ascii='utf-8')
    print('爬取商家具体信息结束!!!')
    

    参考路飞学社视频

  • 相关阅读:
    简单内存泄漏检测方法 解决 Detected memory leaks!
    C++&&XML; “未使用调试信息生成二进制文件” vs assist
    H3C ACL地址转换配置等
    oracle 数据库表导入导出
    内存的使用与windows 内存监控
    Java Development in Flash Builder 4 Standalone
    flex builder
    美国化妆品
    vs 主题
    Boost lib linker error Visual C++
  • 原文地址:https://www.cnblogs.com/Hsiung123/p/13811917.html
Copyright © 2011-2022 走看看