不分类别的效果
不同分类的分布效果图

从海友网获取各个企业名单保存进mysql
cmfishhelper.py
从下列网址得到各个企业名片的网址保存进表cmfish
cds = get_cds()
http://www.cmfish.com/cd/cd_style.php?pageNum_Recordset1=%d&totalRows_Recordset1=191&id=%d
访问企业名片页面获得名称联系人地址保存进数据库
update_cds()
取出地址,从百度地图获得经纬度保存进数据库
http://api.map.baidu.com/geocoder/v2/
update_lnglat()
把名称,经纬度取出来生成个json文件
en_json()
#encoding=utf-8
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
sys.path.append('..')
import requests
from bs4 import BeautifulSoup
import pprint
from utils.db import sqlhelper
from utils import setting
import re
import pymysql
import traceback
from requests.utils import get_encoding_from_headers, get_encodings_from_content
import urllib2
import json
typeto,pageto = 7,5
cardurls = "http://www.cmfish.com/cd/cd_style.php?id=%d"
cdpages = "http://www.cmfish.com/cd/cd_style.php?pageNum_Recordset1=%d&totalRows_Recordset1=191&id=1"
mysqldb = setting.YAMLDATA.get('mysqldb2')
host,user,pwd,db=mysqldb.get('host'),mysqldb.get('user'),mysqldb.get('pwd'),mysqldb.get('cmfishdb')
sh=sqlhelper.SqlHelper(host,user,pwd,db,'mysql')
cardidrec=re.compile('((id=)([^&][^&]*))', re.IGNORECASE)
hotrec=re.compile('((hot=)([^&][^&]*))', re.IGNORECASE)
daterec=re.compile('(xbcxd3xc8xebxc8xd5xc6xda: ([\s\S]*?)</td>)', re.IGNORECASE)
namerec=re.compile('(</a> > <strong>([\s\S]*?)</strong>)', re.IGNORECASE)
contactrec=re.compile('(xc1xaa xcfxb5 xc8xcb</td>
<td>([\s\S]*?)</td>)', re.IGNORECASE)
mobilerec=re.compile('(xc1xaaxcfxb5xb5xe7xbbxb0</td>
<td>([\s\S]*?)</td>
)', re.IGNORECASE)
mailrec=re.compile('(xb5xe7xd7xd3xd3xcaxcfxe4</td>
<td>([\s\S]*?)</td>
)', re.IGNORECASE)
addressrec=re.compile('(xc1xaaxcfxb5xb5xd8xd6xb7</td>
<td>([\s\S]*?)</td>
)', re.IGNORECASE)
noterec=re.compile('(<td> ([\s\S]*?)</td>
)', re.IGNORECASE)
def get_cds():
cds = []
for i in range(1,typeto+1):
for j in range(0,pageto):
url = 'http://www.cmfish.com/cd/cd_style.php?pageNum_Recordset1=%d&totalRows_Recordset1=191&id=%d' % (j,i)
req=requests.get(url)
soup = BeautifulSoup(req.text, 'html.parser')
links=soup.find_all('a')
for link in links:
href = link.attrs['href']
if "cd.php?id=" in href and href<>'cd.php?id=&hot=':
cdlink = "http://www.cmfish.com/cd/"+href
pprint.pprint(cdlink)
save_cd(i,j,cdlink)
cds.append(cdlink)
return cds
def save_cd(typeid,pageid,url):
sql = r"insert into card(typeid,pageid,url) values(%d,%d,'%s')" % (typeid,pageid,url)
sh.ExecNonQuery(sql)
def re_result(strrec,str,value):
searched = strrec.findall(str)
if searched <> None and len(searched)>0:
pprint.pprint(searched[0][1])
try:
return searched[0][1].decode('gb2312')
except:
return searched[0][1].decode('gbk')
else:
return value
def url_result(strrec,str,value):
searched = strrec.findall(str)
if searched <> None and len(searched)>0:
print searched[0][2]
return searched[0][2]
else:
return value
def get_detail(selsql,updsql):
results=sh.ExecQuery(selsql)
for result in results:
try:
id,url=result[0],result[1]
cardid,hot = url_result(cardidrec,url,''),url_result(hotrec,url,0)
#req = requests.get(url)
#req.encoding=get_encodings_from_content(req.content)
request = urllib2.Request(url)
response = urllib2.urlopen(request)
content = response.read()
date = re_result(daterec,content,'')
name = re_result(namerec,content,'')
contact = re_result(contactrec,content,'')
mobile=re_result(mobilerec,content,'')
mail = re_result(mailrec,content,'')
address = re_result(addressrec,content,'')
note = re_result(noterec,content,'')
sql = updsql % (cardid,hot,pymysql.escape_string(date),pymysql.escape_string(name),pymysql.escape_string(contact),pymysql.escape_string(mobile),pymysql.escape_string(mail),pymysql.escape_string(address),pymysql.escape_string(note),id)
sh.ExecNonQuery(sql)
except Exception,e:
print 'error:',e.message,traceback.format_exc()
def update_cds():
selsql= "select id,url from card where cardid is null"
updsql = "update card set cardid=%s,hot=%s,date='%s',name='%s',contact='%s',mobile='%s',mail='%s',address='%s',note='%s' where id=%d"
get_detail(selsql,updsql)
def get_lnglat(address):
print address
url = 'http://api.map.baidu.com/geocoder/v2/'
output = 'json'
ak = 'c7aBgFWD6cMDPOe4BSiG8HLNlvXNKvCW'
uri = url + '?' + 'address=' + address + '&output=' + output + '&ak=' + ak
temp = urllib2.urlopen(uri)
temp = json.loads(temp.read())
return temp
def save_lnglat(selsql,updsql):
results=sh.ExecQuery(selsql)
for result in results:
try:
id,address=result[0],result[1]
if '例如' not in address:
address = address.replace(' ',',')
result = get_lnglat(address)
if result.get('result') <> None:
lat,lng = result.get('result').get('location').get('lat'),result.get('result').get('location').get('lng')
sql = updsql % (lat,lng,id)
sh.ExecNonQuery(sql)
except Exception,e:
print 'error:',e.message,traceback.format_exc()
#result[1]
def update_lnglat():
selsql= "select id,address from card where lat is null"
updsql = "update card set lat=%f,lng=%f where id=%d"
save_lnglat(selsql,updsql)
def gen_json(ofile='./../json/cards.json'):
selsql = "select name,lat,lng,typeid from card where lat is not null"
results=sh.ExecQuery(selsql)
objs = []
for res in results:
objs.append({"name":res[0],"lat":float(res[1]),"lng":float(res[2]),"typeid":int(res[3])})
jsonstr =json.dumps(objs)
with open(ofile,'w') as f:
f.write(jsonstr)
print jsonstr
if __name__ == '__main__':
#cds = get_cds()
#print len(cds)
#update_cds()
#print get_lnglat('台灣省桃園縣龜山鄉振興路1089巷15-1號')
#update_lnglat()
gen_json()
读取生成的json文件,显示在地图上
用file://访问html的时候读取json文件的时候会报错不能跨域的错误,发布成网站访问就可以
cmfish.html
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <title>海友网企业全国分布</title> </head> <body> <div id="main" style=" 1200px;height:1200px;"></div> <script type="text/javascript" src="../js/jquery-3.3.1.min.js"></script> <script type="text/javascript" src="../js/echarts-all-3.js"></script> <script type="text/javascript" src="../js/china.js"></script> <!-- 为ECharts准备一个具备大小(宽高)的Dom --> <script type="text/javascript"> var cards_arr= new Array() cards_arr[0] = [] cards_arr[1] = [] cards_arr[2] = [] cards_arr[3] = [] cards_arr[4] = [] cards_arr[5] = [] cards_arr[6] = [] $.ajax({ url: "../json/cards.json",//json文件位置 type: "GET",//请求方式为get dataType: "json", //返回数据格式为json success: function(data) {//请求成功完成后要执行的方法 //each循环 使用$.each方法遍历返回的数据date $.each(data ,function(i, item) { cards_arr[item.typeid-1].push({ name: item.name, value: [item.lng, item.lat] }); loaddata(cards_arr) }) } }) function loaddata(data){ //初始化 var myChart = echarts.init(document.getElementById('main')); //myChart.showLoading();//加载数据前显示的动画效果 /* res.push({ name: "白玉路346号", value: [121.423744, 31.23829] //这里concat后面的值就是value,这里统一设置成1。 }); res.push({ name: "新民路城南工商所对面巷子( 南苑菜市旁边 )", value: [106.577573, 31.082472] //这里concat后面的值就是value,这里统一设置成1。 }); */ //console.log(data) var chart = echarts.init(document.getElementById('main')); //这里是主体的初始化echart方法,与上面的简单demo类似。 chart.setOption({ backgroundColor: '#404a59', title: { text: '海友网企业全国分布', subtext: 'data from cmfish', sublink: 'http://www.cmfish.com', x: 'center', textStyle: { color: '#fff' } }, tooltip: { trigger: 'item' }, legend: { orient: 'vertical', x: 'left', data: ['生产厂商','进出口商','代理商','店铺','繁殖','个人','其他'], textStyle: { color: 'orange' } }, //地图坐标系必备的配置,具体的含义可以参考api,索性都是中文的,没有什么阅读障碍。 geo: { silent:false, map: 'china', label: { normal: { show: false }, emphasis: { show: true } }, itemStyle: { hoverAnimation:true, normal: { areaColor: '#323c48', borderColor: '#000' }, emphasis: { areaColor: '#2a333d', opacity:0 } } }, series: [ { name: '生产厂商', type: 'scatter', coordinateSystem: 'geo', //参照系:之前设置的geo。 //这里是api数据接受的的地方 data: data[0], symbolSize: 10, //散点半径 label: { normal: { formatter: '{b}', show: false }, emphasis: { show: true } }, hoverAnimation:true, silent:false, animation:false, z:3 }, { name: '进出口商', type: 'scatter', coordinateSystem: 'geo', //参照系:之前设置的geo。 //这里是api数据接受的的地方 data:data[1], symbolSize: 10, //散点半径 label: { normal: { formatter: '{b}', show: false }, emphasis: { show: true } }, hoverAnimation:true, silent:false, animation:false, z:3 }, { name: '代理商', type: 'scatter', coordinateSystem: 'geo', //参照系:之前设置的geo。 //这里是api数据接受的的地方 data:data[2], symbolSize: 10, //散点半径 label: { normal: { formatter: '{b}', show: false }, emphasis: { show: true } }, hoverAnimation:true, silent:false, animation:false, z:3 }, { name: '店铺', type: 'scatter', coordinateSystem: 'geo', //参照系:之前设置的geo。 //这里是api数据接受的的地方 data:data[3], symbolSize: 10, //散点半径 label: { normal: { formatter: '{b}', show: false }, emphasis: { show: true } }, hoverAnimation:true, silent:false, animation:false, z:3 }, { name: '繁殖', type: 'scatter', coordinateSystem: 'geo', //参照系:之前设置的geo。 //这里是api数据接受的的地方 data:data[4], symbolSize: 10, //散点半径 label: { normal: { formatter: '{b}', show: false }, emphasis: { show: true } }, hoverAnimation:true, silent:false, animation:false, z:3 }, { name: '个人', type: 'scatter', coordinateSystem: 'geo', //参照系:之前设置的geo。 //这里是api数据接受的的地方 data:data[5], symbolSize: 10, //散点半径 label: { normal: { formatter: '{b}', show: false }, emphasis: { show: true } }, hoverAnimation:true, silent:false, animation:false, z:3 }, { name: '其他', type: 'scatter', coordinateSystem: 'geo', //参照系:之前设置的geo。 //这里是api数据接受的的地方 data:data[6], symbolSize: 10, //散点半径 label: { normal: { formatter: '{b}', show: false }, emphasis: { show: true } }, hoverAnimation:true, silent:false, animation:false, z:3 } ] }); } //myChart.setOption(option) </script> </body> </html>
以上代码提交在github上,可以下载所用到的echarts的js文件
https://github.com/sui84/pytest