#!/usr/bin/env python # -*- coding:utf-8 -*- # Author : zhibo.wang # E-mail : d_1206@qq.com # Date : 18/03/23 14:22:58 # Desc : qq登陆 , 滑动验证暂没处理 import os import time from selenium import webdriver from yichuxing.settings import qq_list from selenium.webdriver.common.desired_capabilities import DesiredCapabilities class Login(object): # 登陆qq,获取cookie LoginURL = "http://c.easygo.qq.com/eg_toc/map.html?origin=csfw&cityid=110000" def __init__(self, **kwargs): self.qq_num = kwargs.get("qq_num") self.qq_passwd = kwargs.get("qq_passwd") def after_smoothly_login(self, driver): cookie = {} for elem in driver.get_cookies(): cookie[elem["name"]] = elem["value"] #driver.quit() return cookie def get_cookie_by_Chrome(self): try: chromedriver = "C:Program Files (x86)GoogleChromeApplicationchromedriver.exe" os.environ["webdriver.chrme.driver"] = chromedriver driver = webdriver.Chrome(chromedriver) #driver = webdriver.Chrome() driver.set_page_load_timeout(10) driver.get(self.LoginURL) driver.find_element_by_id("u").send_keys(self.qq_num) driver.find_element_by_id("p").send_keys(self.qq_passwd) driver.maximize_window() driver.find_element_by_id("go").click() time.sleep(6) if "宜出行" in driver.title: return self.after_smoothly_login(driver) elif "手机统一登录" in driver.title: return None except Exception: # driver.close() return None def get_cookie_by_PhantomJS(self): try: dcap = dict(DesiredCapabilities.PHANTOMJS) dcap["phantomjs.page.settings.userAgent"] = ( "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36" ) driver = webdriver.PhantomJS(desired_capabilities=dcap) driver.set_page_load_timeout(10) driver.get(self.LoginURL) driver.find_element_by_id("u").clear() driver.find_element_by_id("u").send_keys(self.qq_num) driver.find_element_by_id("p").clear() driver.find_element_by_id("p").send_keys(self.qq_passwd) driver.find_element_by_id("go").click() time.sleep(6) if "宜出行" in driver.title: return self.after_smoothly_login(driver) elif "手机统一登录" in driver.title: return None except: # driver.close() return None class CookieException(Exception): # 创建一个异常类,用于在cookie失效时抛出异常 def __init__(self): Exception.__init__(self) """ if __name__ == "__main__": #L = Login(qq_num="xxxx", qq_passwd="xxxx") #L.get_cookie_by_Chrome() """
#!/usr/bin/env python # -*- coding:utf-8 -*- # Author : zhibo.wang # E-mail : d_1206@qq.com # Date : 18/03/23 14:22:58 # Desc : 宜出行热力图 import hashlib import socket import os import json import time import random import datetime import requests from yichuxing.settings import qq_list, s_fre, proxyMeta, is_proxy from requests.exceptions import RequestException #from utils.user_angents import agents from data_utils.ali_oss import OSS2 from data_utils.time_convert import get_time_stamp from yichuxing.yichuxing_utils.qqlogin import CookieException, Login from data_utils.conmongodb import mongo_con_keepalive from yichuxing.yichuxing_utils.create_grid import create_grid_by_center, get_gd_data class Crawl(): db = mongo_con_keepalive() header = { "Host": "c.easygo.qq.com", "Connection": "keep-alive", "Accept": "application/json", "Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.9", "X-Requested-With": "XMLHttpRequest", "Referer": "http://c.easygo.qq.com/eg_toc/map.html?origin=csfw", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36", } start_url = "http://c.easygo.qq.com/api/egc/heatmapdata" cookie_data = None if is_proxy: wait_time = [0.16, 0.17] else: wait_time = [3, 3.1, 3.2, 3.3, 3.4] time_stamp = get_time_stamp() time_local = time.localtime(int(time_stamp)) date = time.strftime("%Y-%m-%d", time_local) proxies = { "http": proxyMeta, "https": proxyMeta, } fre_data = {"qq": None,"pwd": None} fre = 0 pid = os.getpid() oss = OSS2() path_dir = None website = "population_yichuxing" qq_status = "yichuxing_qq_status" def __init__(self): self.path_dir = "population/yichuxing/{0}/".format(self.time_stamp) self.db.get_collection('pathdir_dict').insert_one( {'pathdir': self.path_dir, 'website': self.website, 'flag': False} ) if self.db.get_collection(self.qq_status).find_one({"date": self.date}) is None: self.db.get_collection(self.qq_status).remove({}) print("新的一天,新的开始 初始化所有账号") self.db.get_collection(self.qq_status).insert_many( [{"qq": i["qq"], "pwd": i["pwd"], "n": 0, 'status': False, "date": self.date} for i in qq_list] ) super(Crawl, self).__init__() def kill(self): try: os.system("kill {0}".format(self.pid)) except OSError as e: print("kill pid error: ", e) def get_cookie(self): all_qq = self.db.get_collection(self.qq_status).find( {"status": False}, {"_id": 0} ) qq_list = [i for i in all_qq] if len(qq_list) > 0: self.fre = 0 self.fre_data = random.choice(qq_list) qq_num = self.fre_data.get("qq") qq_passwd = self.fre_data.get("pwd") L = Login(qq_num=qq_num, qq_passwd=qq_passwd) cookie_data = L.get_cookie_by_PhantomJS() #cookie_data = L.get_cookie_by_Chrome() if cookie_data: self.cookie_data = cookie_data elif len(qq_list) == 0: print("没有账号了, 杀死自己") self.kill() def spyder_params(self, item): # 生成 请求参数 params = {"lng_min": item.get("lng_min"), "lat_max": item.get("lat_max"), "lng_max": item.get("lng_max"), "lat_min": item.get("lat_min"), "level": 16, "city": "", "lat": "undefined", "lng": "undefined", "_token": "" } return params def spyder(self, params): time.sleep(random.choice(self.wait_time)) try: if self.fre >= s_fre: print("账号: {0}, 抓取次数达到上限, 更换qq账号".format(self.fre_data.get("qq"))) qq = self.fre_data.get("qq") self.db.get_collection(self.qq_status).update_one( {"qq": qq}, {"$set": {"status": True}} ) self.get_cookie() if is_proxy: r = requests.get(self.start_url, headers=self.header, cookies=self.cookie_data, params=params, proxies=self.proxies) else: r = requests.get(self.start_url, headers=self.header, cookies=self.cookie_data, params=params) if r.status_code == 200: self.fre = self.fre + 1 try: return r.json() except: raise CookieException else: raise CookieException except RequestException : self.spyder(params) def get(self, params): data_json = None try: data_json = self.spyder(params) except CookieException: print("账号: {0}, cookie 失效,获取新账号登陆, 并抓取".format( self.fre_data.get("qq"))) qq = self.fre_data.get("qq") self.db.get_collection(self.qq_status).update_one( {"qq": qq}, {"$set": {"status": True}} ) self.get_cookie() data_json = self.spyder(params) return data_json def create_filename(self, url): # 生成文件名 fname = '%s_%s_%s_%s.json' % (socket.gethostname(), url.split('//')[-1].split('/')[0].replace('.', '-'), hashlib.md5(url.encode()).hexdigest(), str(time.time()).split('.')[0]) return fname def start(self): self.get_cookie() for i in get_gd_data(): print("begin: ", i) latlng_dict = create_grid_by_center(i) print("将要抓取的次数: ", len(latlng_dict)) for o in latlng_dict: print("抓取范围: ", o) params = self.spyder_params(o) data_json = self.get(params) file_ = "{0}{1}".format(self.path_dir, self.create_filename("{0}{1}".format(self.start_url, params))) if data_json.get("code") != 0: print("code: {0}, 获取新的账号,再一次抓取".format(data_json.get("code"))) qq = self.fre_data.get("qq") self.db.get_collection(self.qq_status).update_one( {"qq": qq}, {"$set": {"status": True}} ) self.get_cookie() data_json = self.get(params) if data_json.get("code") == 0 and len(data_json.get("data")) > 0: data_json["cityname"] = o["cityname"] #print(data_json) self.oss.uploadfiledata(file_, json.dumps(data_json)) co = self.db.get_collection(self.qq_status).find({"status": False}).count() print("剩余可用qq count: ", co) if __name__ == "__main__": c = Crawl() c.start()
# 每个账号抓取次数 s_fre = 70 # 每次爬取方格的边长(0.04 > 4公里) 平移量 lat_offset = 0.04 lng_offset = 0.04 # 是否开始代理 True:开启, False:不开启 is_proxy = True grade = {0:6, 1: 6, 2: 5, 3: 4, 4: 4, 5: 4} # 城市对应 抓取圈数 # 代理ip地址 proxyMeta = "http://xxx:xxx@proxy.abuyun.com:9020" # qq 账号 qq_list = [ {"qq": "xxx", "pwd": "xxx"}, ]
#!/usr/bin/env python # -*- coding:utf-8 -*- # Author : zhibo.wang # E-mail : d_1206@qq.com # Date : 18/03/23 16:28:43 # Desc : import json import numpy as np from yichuxing.settings import lat_offset, lng_offset, grade from data_utils.conmongodb import mongo_con_keepalive from data_utils.location_convert import bd09togcj02 db = mongo_con_keepalive() def get_gd_data(): city_list = db.get_collection("params_citys").find({"exists_city": True}, {"_id": 0}).sort( "class") location = [] for city in city_list: if city.get("province") != "广东省": if city.get("class") == 3: center_lng = city.get("center_lng") center_lat = city.get("center_lat") del city["center_lng"] del city["center_lat"] lng, lat = bd09togcj02(center_lng, center_lat) # 转腾讯坐标系 city["lng"] = lng city["lat"] = lat location.append(city) return location def create_grid_by_center(location, n=None): # 以城市中心点辐射n圈 即 4*4*(5*2)`2 1600平方公里 # 4*4 是每个区域的大小 区域大小可在setting里设置, (5*2)`2 5是圈数 lng, lat, city_class, cityname = location["lng"], location["lat"], location["class"], location["cityname"] if n is None: n = grade.get(city_class) n = float(n) bottom_lat, top_lat = lat - lat_offset*n, lat + lat_offset*n left_lng, right_lng = lng - lng_offset*n, lng + lng_offset*n lat_range = np.arange(bottom_lat, top_lat, lat_offset) end_data = [] for lat_ in lat_range: lng_range = np.arange(left_lng, right_lng, lng_offset) for lng_ in lng_range: end_data.append({"lng_min": lng_, "lat_max":lat_ + lat_offset , "lng_max": lng_ + lng_offset, "lat_min": lat_, "cityname": cityname}) return end_data
"cityname" : "北京市", "province" : "北京市", "citycode" : "131", "center_lat" : 39.904211, # 百度坐标 "center_lng" : 116.407394, "class" : 0, "ftx_code" : "bj", "meituan_code" : "beijing", "meituan_id" : 1, "dianping_id" : 2, "dianping_code" : "beijing", "gd_adcode" : "110000", "gd_citycode" : "010", "shunqi_code" : "beijing", "xiecheng_code" : "BJS", "xiecheng_status" : true, "zhilian_code" : "beijing", "baidu_id" : 131, "exists_city" : true
{ "scale" : "20,50,100,200", "lng_a" : 116.550125, "lat_a" : 39.843624999999996, "lng_b" : 116.55662935278988, "lat_b" : 39.84962393215385, "lng_g" : 116.54429316621265, "lat_g" : 39.842540318493164, "gps_s" : "a", "count" : 800, "grid_y" : 159374, "grid_x" : 466200, "max_data" : 32000, "crawl_time" : "2018-05-29 10:03:37", "city" : "北京市", }
经纬度解密代码
http://c.easygo.qq.com/eg_toc/js/map-d76c21c16d.bundle.js
lng = 1e-6 * (250.0 * d['grid_x'] + 125.0)
lat = 1e-6 * (250.0 * d['grid_y'] + 125.0)
教程仅供技术研究学习使用,若有侵权,联系本人删除