PurpleAir空气质量数据采集
# -*- coding: utf-8 -*- import time, datetime, calendar import urllib, requests import queue, threading import json import os if __name__ == '__main__': #0. 配置 #2017/10/31 - 2018/12/1 os.system("cls") year = 2017 month = 10 day = calendar.monthrange(year,month)[1] filename = 'data/%s/%4d%02d_V1_%s.csv'%(year, year, month, datetime.datetime.now().strftime('%Y%m%d%H%M%S')) #1. 登录 print("%s %4d%02d %s " % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), year, month, "Ready go...")) headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36' } #2. ides = json.load(open("id2.json",'rb')) #写文件 F = open(filename, 'a+') F.write('"ID","Lat","Lon","Year","Month","Day","Hour","Temperature","Humidity","PM2.5","Station" ') # pace = 0 for item in ides: pace+=1 id, label, lat, lon = item[0], item[5], item[6], item[7] attempts = 0 maxtimes = 10 success = False while attempts < maxtimes and not success: try: url1 = "https://www.purpleair.com/json?show=" + str(id) response1 = requests.get(url1, headers=headers, timeout=5) idkeyinfo = response1.json() PRIMARY_ID = idkeyinfo["results"][0]["THINGSPEAK_PRIMARY_ID"] PRIMARY_KEY = idkeyinfo["results"][0]["THINGSPEAK_PRIMARY_ID_READ_KEY"] time_start = '%04d-%02d-%02d 00:00:00'%(year, month, 1) time_end = '%04d-%02d-%02d 23:59:59'%(year, month, calendar.monthrange(year,month)[1]) # print([time_start ,time_end]) url2 = "https://thingspeak.com/channels/" + PRIMARY_ID + "/feed.json?api_key=" + PRIMARY_KEY + "&offset=0&average=60&round=2&start=" + time_start + "&end=" + time_end print([datetime.datetime.now().strftime('%H:%M:%S'), pace, id, label,url2]) response2 = requests.get(url2, headers=headers, timeout=5) feeds = response2.json() location = feeds["channel"]["name"] # print(location) # 考虑空格就要自循环,这里不考虑空格 for values in feeds["feeds"]: try: dt = time.strptime(values['created_at'], "%Y-%m-%dT%H:%M:%SZ") F.write('%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s '%(id, lat, lon, dt.tm_year, dt.tm_mon, dt.tm_mday, dt.tm_hour, values['field6'], values['field7'], values['field8'],label)) except Exception as e: print([repr(e)]) break success = True except Exception as e: print([repr(e)]) attempts += 1 time.sleep(30 * attempts) if attempts == maxtimes: break