zoukankan      html  css  js  c++  java
  • 天气爬虫

    import  requests
    import re
    def get_url(url): resp = requests.get(url,headers=headers) html=resp.content data=str(html,'gbk') #html_doc=html.decode("utf-8","ignore") link_1="<li><a href='(.*?).html'>.*?</a></li>" link=re.compile(link_1,re.S).findall(data) return link def get_response(link): for i in range(0,len(link)): url2='http://www.tianqihoubao.com'+link[i]+'/2018030' for j in range(1,32): if j <10: url=url2+str(j)+'.html' else: url='http://www.tianqihoubao.com'+link[i]+'/201803'+str(j)+'.html' resp3 = requests.get(url, headers=headers) data3 = resp3.text low1=' <td style="color:#E54600" ><b>(.*?)</b></td>' high1='<td style="color:#000065"><b>(.*?)</b></td>' city1='<meta name="Keywords" content="(.*?)" />' city=re.compile(city1).findall(data3) low=re.compile(low1).findall(data3) high=re.compile(high1).findall(data3) a = city[0] b = low[0] c = high[0] print(a,b,c) if __name__ == '__main__': headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'} url='http://www.tianqihoubao.com/weather/province.aspx?id=330000' link=get_url(url) get_response(link)
  • 相关阅读:
    Shell 传递参数
    Shell 变量
    Shell 教程01
    linux yum 命令
    Linux vi/vim
    Linux 磁盘管理
    你应该知道的基础 Git 命令
    Linux 下五个顶级的开源命令行 Shell
    Fedora 23如何安装LAMP服务器
    如何在Fedora或CentOS上使用Samba共享
  • 原文地址:https://www.cnblogs.com/snackpython/p/10136471.html
Copyright © 2011-2022 走看看