zoukankan      html  css  js  c++  java
  • 一个电子围栏需求的脚本记录

    需求:系统对接了厂家的GPS数据,基于这些GPS数据,过滤出指定区域的数据

    从网上找到了一个电子围栏的python脚本,现在需要的是循环取数据判断是否在指定区域,在指定区域就把这部分数据拿出来放到另外一个库表

    1、效率问题

    碰到的其中一个问题是脚本的效率问题,以5W条数据来测试

    脚本1:使用cur.fetchone(),逐条读取数据,逐条判断,逐条插入列表,批量入库,批量commit

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    import json
    import math
    import MySQLdb
    import time
    ISOTIMEFORMAT='%Y-%m-%d %X'
    print 'start',':',time.strftime(ISOTIMEFORMAT, time.localtime())
    lnglatlist = []
    data = '[{"name":"广本黄埔工厂","points":[{"lng":113.642124,"lat":23.167372},{"lng":113.636176,"lat":23.175162},{"lng":113.644930,"lat":23.179870},{"lng":113.652108,"lat":23.173823}],"type":0}]'
    data = json.loads(data)
    if 'points' in data[0]:
        for point in data[0]['points']:
            #print(str(point['lng'])+" "+str(point['lat']))
            lnglat = []
            lnglat.append(float(str(point['lng'])))
            lnglat.append(float(str(point['lat'])))
            lnglatlist.append(lnglat)
    def windingNumber(point, poly):
        poly.append(poly[0])
        px = point[0]
        py = point[1]
        sum = 0
        length = len(poly)-1
    
        for index in range(0,length):
            sx = poly[index][0]
            sy = poly[index][1]
            tx = poly[index+1][0]
            ty = poly[index+1][1]
    
            #点与多边形顶点重合或在多边形的边上
            if((sx - px) * (px - tx) >= 0 and (sy - py) * (py - ty) >= 0 and (px - sx) * (ty - sy) == (py - sy) * (tx - sx)):
                return "on"
            #点与相邻顶点连线的夹角
            angle = math.atan2(sy - py, sx - px) - math.atan2(ty - py, tx - px)
    
            #确保夹角不超出取值范围(-π 到 π)
            if(angle >= math.pi):
                angle = angle - math.pi * 2
            elif(angle <= -math.pi):
                angle = angle + math.pi * 2
            sum += angle
    
            #计算回转数并判断点和多边形的几何关系
        result = 'out' if int(sum / math.pi) == 0 else 'in'
        return result
    
    ################循环取GPS数据##########################
    conn=MySQLdb.connect(user='root',passwd='XXX',host='XXX',charset="utf8") #连接到mysql
    cur=conn.cursor()
    conn.select_db('XXX')
    cur.execute("select id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no from gps_msg where sample_time>='2019-04-10 18:00:00' and sample_time<'2019-04-10 20:00:00' limit 50000")
    ####################第一种方式########################
    count=1
    scope_gps_list=[]
    while count<=50000:    #这种方式,这个数量不能比SQL的数据量小,不然会报错
        gps_data_per=cur.fetchone()
        # print gps_data_per
        point=gps_data_per[5].split(",")+gps_data_per[4].split(",")  #取出每条数据的经纬度,split()转换成列表
        point=map(float,point) #字符串类型转换成浮点型
        # print point
        # print count
        if count in(10000,20000,30000,40000,50000):
            print count,':',time.strftime(ISOTIMEFORMAT, time.localtime())
        # print windingNumber(point,lnglatlist)
        if windingNumber(point,lnglatlist)=='in':
            scope_gps_list.append(gps_data_per)   #生成[(1,2,3),(1,2,3)]
        count=count+1
    sqlin="insert into gps_msg_20190411(id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
    cur.executemany(sqlin,scope_gps_list)
    conn.commit()
    ####################第一种方式########################
    ####################第二种方式########################
    # gps_data_all=cur.fetchall()
    # count=0
    # for gps_data_per in gps_data_all:
        # sqlin="insert into gps_msg_20190411(id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"    
        # point=gps_data_per[5].split(",")+gps_data_per[4].split(",")  #取出每条数据的经纬度,split()转换成列表
        # point=map(float,point) #字符串类型转换成浮点型
        # if windingNumber(point,lnglatlist)=='in':
            # cur.execute(sqlin,gps_data_per)  
        # count=count+1 
        # print count    
    # conn.commit() 
    ####################第二种方式########################
    
    cur.close() #关闭游标
    conn.close() #关闭数据链接
    ################循环取GPS数据##########################    
    print 'end',':',time.strftime(ISOTIMEFORMAT, time.localtime())
    View Code

    为什么要逐条插入列表?

    因为使用cur.fetchone()读取数据后马上去insert into数据,再次cur.fenchone数据,取不到数据

    执行效率:30分6秒跑完

    image_thumb[5]

    start : 2019-04-19 20:30:00
    10000 : 2019-04-19 20:31:16        1分16秒
    20000 : 2019-04-19 20:34:56        3分40秒
    30000 : 2019-04-19 20:41:03        6分6秒
    40000 : 2019-04-19 20:49:35        8分32秒
    50000 : 2019-04-19 21:00:36        11分1秒
    end : 2019-04-19 21:00:36            不到一秒

    总结:最后的列表插入数据库很快,不到1秒

    脚本2:使用cur.fetchall()先存全部数据,再使用for循环,逐条读取,逐条判断,逐条入库,批量commit

    对比脚本1,主要是逐条入库,避免插入到列表,确定下是否插入到列表,列表越来越大导致的慢

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    import json
    import math
    import MySQLdb
    import time
    ISOTIMEFORMAT='%Y-%m-%d %X'
    print 'start',':',time.strftime(ISOTIMEFORMAT, time.localtime())
    lnglatlist = []
    data = '[{"name":"工厂","points":[{"lng":113.642124,"lat":23.167372},{"lng":113.636176,"lat":23.175162},{"lng":113.644930,"lat":23.179870},{"lng":113.652108,"lat":23.173823}],"type":0}]'
    data = json.loads(data)
    if 'points' in data[0]:
        for point in data[0]['points']:
            #print(str(point['lng'])+" "+str(point['lat']))
            lnglat = []
            lnglat.append(float(str(point['lng'])))
            lnglat.append(float(str(point['lat'])))
            lnglatlist.append(lnglat)
    def windingNumber(point, poly):
        poly.append(poly[0])
        px = point[0]
        py = point[1]
        sum = 0
        length = len(poly)-1
    
        for index in range(0,length):
            sx = poly[index][0]
            sy = poly[index][1]
            tx = poly[index+1][0]
            ty = poly[index+1][1]
    
            #点与多边形顶点重合或在多边形的边上
            if((sx - px) * (px - tx) >= 0 and (sy - py) * (py - ty) >= 0 and (px - sx) * (ty - sy) == (py - sy) * (tx - sx)):
                return "on"
            #点与相邻顶点连线的夹角
            angle = math.atan2(sy - py, sx - px) - math.atan2(ty - py, tx - px)
    
            #确保夹角不超出取值范围(-π 到 π)
            if(angle >= math.pi):
                angle = angle - math.pi * 2
            elif(angle <= -math.pi):
                angle = angle + math.pi * 2
            sum += angle
    
            #计算回转数并判断点和多边形的几何关系
        result = 'out' if int(sum / math.pi) == 0 else 'in'
        return result
    
    ################循环取GPS数据##########################
    conn=MySQLdb.connect(user='root',passwd='XXX',host='XXX',charset="utf8") #连接到mysql
    cur=conn.cursor()
    conn.select_db('XXX')
    cur.execute("select id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no from gps_msg where sample_time>='2019-04-10 18:00:00' and sample_time<'2019-04-10 20:00:00' limit 50000")
    ####################第一种方式########################
    # count=1
    # scope_gps_list=[]
    # while count<=50000:    #这种方式,这个数量不能比SQL的数据量小,不然会报错
        # gps_data_per=cur.fetchone()
        # point=gps_data_per[5].split(",")+gps_data_per[4].split(",")  #取出每条数据的经纬度,split()转换成列表
        # point=map(float,point) #字符串类型转换成浮点型
        # if count in(10000,20000,30000,40000,50000):
            # print count,':',time.strftime(ISOTIMEFORMAT, time.localtime())
        # if windingNumber(point,lnglatlist)=='in':
            # scope_gps_list.append(gps_data_per)   #生成[(1,2,3),(1,2,3)]
        # count=count+1
    # sqlin="insert into gps_msg_20190411(id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"    
    # cur.executemany(sqlin,scope_gps_list)
    # conn.commit() 
    ####################第一种方式########################
    ####################第二种方式########################
    gps_data_all=cur.fetchall()
    count=0
    for gps_data_per in gps_data_all:
        sqlin="insert into gps_msg_20190411(id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
        point=gps_data_per[5].split(",")+gps_data_per[4].split(",")  #取出每条数据的经纬度,split()转换成列表
        point=map(float,point) #字符串类型转换成浮点型
        if windingNumber(point,lnglatlist)=='in':
            cur.execute(sqlin,gps_data_per)
        count=count+1
        if count in(10000,20000,30000,40000,50000):
            print count,':',time.strftime(ISOTIMEFORMAT, time.localtime())
    conn.commit()
    ####################第二种方式########################
    
    cur.close() #关闭游标
    conn.close() #关闭数据链接
    ################循环取GPS数据##########################    
    print 'end',':',time.strftime(ISOTIMEFORMAT, time.localtime())
    View Code

    执行效率:29分钟22秒

    image_thumb[4]

    start : 2019-04-19 21:05:09
    10000 : 2019-04-19 21:06:22    1分16秒
    20000 : 2019-04-19 21:09:55    3分33秒
    30000 : 2019-04-19 21:15:48    5分53秒
    40000 : 2019-04-19 21:23:58    8分10秒
    50000 : 2019-04-19 21:34:31    10分33秒
    end : 2019-04-19 21:34:31        不到1秒

    总结,看来不是插入到列表,导致的速度慢

    脚本3:使用cur.fetchall()先存全部数据,再使用for循环,逐条读取,逐条判断,逐条入库,逐条commit

    对比脚本2,逐条入库,逐条commit,只是做个简单的对比

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    import json
    import math
    import MySQLdb
    import time
    ISOTIMEFORMAT='%Y-%m-%d %X'
    print 'start',':',time.strftime(ISOTIMEFORMAT, time.localtime())
    lnglatlist = []
    data = '[{"name":"工厂","points":[{"lng":113.642124,"lat":23.167372},{"lng":113.636176,"lat":23.175162},{"lng":113.644930,"lat":23.179870},{"lng":113.652108,"lat":23.173823}],"type":0}]'
    data = json.loads(data)
    if 'points' in data[0]:
        for point in data[0]['points']:
            #print(str(point['lng'])+" "+str(point['lat']))
            lnglat = []
            lnglat.append(float(str(point['lng'])))
            lnglat.append(float(str(point['lat'])))
            lnglatlist.append(lnglat)
    def windingNumber(point, poly):
        poly.append(poly[0])
        px = point[0]
        py = point[1]
        sum = 0
        length = len(poly)-1
    
        for index in range(0,length):
            sx = poly[index][0]
            sy = poly[index][1]
            tx = poly[index+1][0]
            ty = poly[index+1][1]
    
            #点与多边形顶点重合或在多边形的边上
            if((sx - px) * (px - tx) >= 0 and (sy - py) * (py - ty) >= 0 and (px - sx) * (ty - sy) == (py - sy) * (tx - sx)):
                return "on"
            #点与相邻顶点连线的夹角
            angle = math.atan2(sy - py, sx - px) - math.atan2(ty - py, tx - px)
    
            #确保夹角不超出取值范围(-π 到 π)
            if(angle >= math.pi):
                angle = angle - math.pi * 2
            elif(angle <= -math.pi):
                angle = angle + math.pi * 2
            sum += angle
    
            #计算回转数并判断点和多边形的几何关系
        result = 'out' if int(sum / math.pi) == 0 else 'in'
        return result
    
    ################循环取GPS数据##########################
    conn=MySQLdb.connect(user='root',passwd='XXX',host='XXX',charset="utf8") #连接到mysql
    cur=conn.cursor()
    conn.select_db('XXX')
    cur.execute("select id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no from gps_msg where sample_time>='2019-04-10 18:00:00' and sample_time<'2019-04-10 20:00:00' limit 50000")
    ####################第一种方式########################
    # count=1
    # scope_gps_list=[]
    # while count<=50000:    #这种方式,这个数量不能比SQL的数据量小,不然会报错
        # gps_data_per=cur.fetchone()
        # point=gps_data_per[5].split(",")+gps_data_per[4].split(",")  #取出每条数据的经纬度,split()转换成列表
        # point=map(float,point) #字符串类型转换成浮点型
        # if count in(10000,20000,30000,40000,50000):
            # print count,':',time.strftime(ISOTIMEFORMAT, time.localtime())
        # if windingNumber(point,lnglatlist)=='in':
            # scope_gps_list.append(gps_data_per)   #生成[(1,2,3),(1,2,3)]
        # count=count+1
    # sqlin="insert into gps_msg_20190411(id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"    
    # cur.executemany(sqlin,scope_gps_list)
    # conn.commit() 
    ####################第一种方式########################
    ####################第二种方式########################
    gps_data_all=cur.fetchall()
    count=0
    for gps_data_per in gps_data_all:
        sqlin="insert into gps_msg_20190411(id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
        point=gps_data_per[5].split(",")+gps_data_per[4].split(",")  #取出每条数据的经纬度,split()转换成列表
        point=map(float,point) #字符串类型转换成浮点型
        if windingNumber(point,lnglatlist)=='in':
            cur.execute(sqlin,gps_data_per)
            conn.commit()
        count=count+1
        if count in(10000,20000,30000,40000,50000):
            print count,':',time.strftime(ISOTIMEFORMAT, time.localtime())
    
    ####################第二种方式########################
    
    cur.close() #关闭游标
    conn.close() #关闭数据链接
    ################循环取GPS数据##########################    
    print 'end',':',time.strftime(ISOTIMEFORMAT, time.localtime())
    View Code

    执行效率:30分1秒

    image_thumb[3]

    start : 2019-04-19 21:45:11
    10000 : 2019-04-19 21:46:30    1分19秒
    20000 : 2019-04-19 21:50:10    3分40秒
    30000 : 2019-04-19 21:56:09    5分59秒
    40000 : 2019-04-19 22:04:30    8分21秒
    50000 : 2019-04-19 22:15:12    10分42秒
    end : 2019-04-19 22:15:12        不到1秒

    总结:逐条commit会降低效率,但是不会太多

    脚本4:前面执行10000条数据的时候都很快,后面越来越慢,尝试按时间点分段执行SQL,这次的数据范围为一个小时,数据量6W多条,如果效率提高,按5分钟分段,分12次执行,每次的SQL数据量只有5600多行,再使用for循环,逐条读取,逐条判断,逐条入库,逐条commit

    对比脚本1,2,3,试下按时间点分段执行SQL的效率,如果效率有提升,总时间应该=执行一次的时间*12

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    import json
    import math
    import MySQLdb
    import time
    ISOTIMEFORMAT='%Y-%m-%d %X'
    print 'start',':',time.strftime(ISOTIMEFORMAT, time.localtime())
    lnglatlist = []
    data = '[{"name":"工厂","points":[{"lng":113.642124,"lat":23.167372},{"lng":113.636176,"lat":23.175162},{"lng":113.644930,"lat":23.179870},{"lng":113.652108,"lat":23.173823}],"type":0}]'
    data = json.loads(data)
    if 'points' in data[0]:
        for point in data[0]['points']:
            #print(str(point['lng'])+" "+str(point['lat']))
            lnglat = []
            lnglat.append(float(str(point['lng'])))
            lnglat.append(float(str(point['lat'])))
            lnglatlist.append(lnglat)
    def windingNumber(point, poly):
        poly.append(poly[0])
        px = point[0]
        py = point[1]
        sum = 0
        length = len(poly)-1
    
        for index in range(0,length):
            sx = poly[index][0]
            sy = poly[index][1]
            tx = poly[index+1][0]
            ty = poly[index+1][1]
    
            #点与多边形顶点重合或在多边形的边上
            if((sx - px) * (px - tx) >= 0 and (sy - py) * (py - ty) >= 0 and (px - sx) * (ty - sy) == (py - sy) * (tx - sx)):
                return "on"
            #点与相邻顶点连线的夹角
            angle = math.atan2(sy - py, sx - px) - math.atan2(ty - py, tx - px)
    
            #确保夹角不超出取值范围(-π 到 π)
            if(angle >= math.pi):
                angle = angle - math.pi * 2
            elif(angle <= -math.pi):
                angle = angle + math.pi * 2
            sum += angle
    
            #计算回转数并判断点和多边形的几何关系
        result = 'out' if int(sum / math.pi) == 0 else 'in'
        return result
    
    ################循环取GPS数据##########################
    conn=MySQLdb.connect(user='XXX',passwd='XXX',host='XXX',charset="utf8") #连接到mysql
    cur=conn.cursor()
    conn.select_db('XXX')
    
    ####################第三种方式########################
    def TO_DB(sample_time_start,sample_time_end):
        sql="""
        select id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no
        from gps_msg
        where sample_time>="""+"'"+sample_time_start+"'"+" and sample_time<" +"'"+sample_time_end+"'"
        cur.execute(sql)
        gps_data_all=cur.fetchall()
        # count=0
        for gps_data_per in gps_data_all:
            sqlin="insert into gps_msg_20190411(id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
            point=gps_data_per[5].split(",")+gps_data_per[4].split(",")  #取出每条数据的经纬度,split()转换成列表
            point=map(float,point) #字符串类型转换成浮点型
            if windingNumber(point,lnglatlist)=='in':
                cur.execute(sqlin,gps_data_per)
                conn.commit()
    count=1
    sample_time_hour='2019-04-10 18:'
    sample_time_hour_next='2019-04-10 19:'
    for sample_time_min_start in range(0,60,5):
        if count <12:
            sample_time_min_end=sample_time_min_start+5
            sample_time_start=sample_time_hour+str(sample_time_min_start)
            sample_time_end=sample_time_hour+str(sample_time_min_end)
            TO_DB(sample_time_start,sample_time_end)
            print sample_time_start,sample_time_end,count,':',time.strftime(ISOTIMEFORMAT, time.localtime())
    
        else:
            sample_time_start=sample_time_hour+str(sample_time_min_start)
            sample_time_end=sample_time_hour_next+'0'
            TO_DB(sample_time_start,sample_time_end)
            print sample_time_start,sample_time_end,count,':',time.strftime(ISOTIMEFORMAT, time.localtime())
        count=count+1
    
    
    ####################第三种方式########################
    
    
    
    cur.close() #关闭游标
    conn.close() #关闭数据链接
    ################循环取GPS数据##########################    
    print 'end',':',time.strftime(ISOTIMEFORMAT, time.localtime())
    View Code

    执行效率:执行超过了30分钟,很明显此方式也行不通

    执行了9次,大概执行了5W条数据,耗时29分11秒,一样的,越到后面越慢。

    image

    start : 2019-04-20 10:34:28
    2019-04-10 18:0 2019-04-10 18:5 1 : 2019-04-20 10:34:56                 28秒
    2019-04-10 18:5 2019-04-10 18:10 2 : 2019-04-20 10:36:10               1分14秒
    2019-04-10 18:10 2019-04-10 18:15 3 : 2019-04-20 10:38:13             2分3秒
    2019-04-10 18:15 2019-04-10 18:20 4 : 2019-04-20 10:41:02             2分49秒
    2019-04-10 18:20 2019-04-10 18:25 5 : 2019-04-20 10:44:33             3分31秒
    2019-04-10 18:25 2019-04-10 18:30 6 : 2019-04-20 10:48:42             4分12秒
    2019-04-10 18:30 2019-04-10 18:35 7 : 2019-04-20 10:53:14             4分32秒
    2019-04-10 18:35 2019-04-10 18:40 8 : 2019-04-20 10:58:09             4分55秒
    2019-04-10 18:40 2019-04-10 18:45 9 : 2019-04-20 11:03:39             5分30秒

    总结:效率还是不行

    脚本5:在脚本4的基础上修改下,按时间点分段执行SQL,不同的是,执行完一次SQL就关闭连接,再重新打开一个连接执行下一个SQL

    对比脚本4,看下是不是一个连接一直执行效率会越来越低

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    import json
    import math
    import MySQLdb
    import time
    ISOTIMEFORMAT='%Y-%m-%d %X'
    print 'start',':',time.strftime(ISOTIMEFORMAT, time.localtime())
    lnglatlist = []
    data = '[{"name":"工厂","points":[{"lng":113.642124,"lat":23.167372},{"lng":113.636176,"lat":23.175162},{"lng":113.644930,"lat":23.179870},{"lng":113.652108,"lat":23.173823}],"type":0}]'
    data = json.loads(data)
    if 'points' in data[0]:
        for point in data[0]['points']:
            #print(str(point['lng'])+" "+str(point['lat']))
            lnglat = []
            lnglat.append(float(str(point['lng'])))
            lnglat.append(float(str(point['lat'])))
            lnglatlist.append(lnglat)
    def windingNumber(point, poly):
        poly.append(poly[0])
        px = point[0]
        py = point[1]
        sum = 0
        length = len(poly)-1
    
        for index in range(0,length):
            sx = poly[index][0]
            sy = poly[index][1]
            tx = poly[index+1][0]
            ty = poly[index+1][1]
    
            #点与多边形顶点重合或在多边形的边上
            if((sx - px) * (px - tx) >= 0 and (sy - py) * (py - ty) >= 0 and (px - sx) * (ty - sy) == (py - sy) * (tx - sx)):
                return "on"
            #点与相邻顶点连线的夹角
            angle = math.atan2(sy - py, sx - px) - math.atan2(ty - py, tx - px)
    
            #确保夹角不超出取值范围(-π 到 π)
            if(angle >= math.pi):
                angle = angle - math.pi * 2
            elif(angle <= -math.pi):
                angle = angle + math.pi * 2
            sum += angle
    
            #计算回转数并判断点和多边形的几何关系
        result = 'out' if int(sum / math.pi) == 0 else 'in'
        return result
    
    ################循环取GPS数据##########################
    ####################第三种方式########################
    def TO_DB(sample_time_start,sample_time_end):
        conn=MySQLdb.connect(user=XXX,passwd=XXX,host=XXX,charset="utf8") #连接到mysql
        cur=conn.cursor()
        conn.select_db(XXX)
        sql="""
        select id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no
        from gps_msg
        where sample_time>="""+"'"+sample_time_start+"'"+" and sample_time<" +"'"+sample_time_end+"'"
        cur.execute(sql)
        gps_data_all=cur.fetchall()
        # count=0
        for gps_data_per in gps_data_all:
            sqlin="insert into gps_msg_20190411(id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
            point=gps_data_per[5].split(",")+gps_data_per[4].split(",")  #取出每条数据的经纬度,split()转换成列表
            point=map(float,point) #字符串类型转换成浮点型
            if windingNumber(point,lnglatlist)=='in':
                cur.execute(sqlin,gps_data_per)
                conn.commit()
        cur.close() #关闭游标
        conn.close() #关闭数据链接
    count=1
    sample_time_hour='2019-04-10 18:'
    sample_time_hour_next='2019-04-10 19:'
    for sample_time_min_start in range(0,60,5):
        if count <12:
            sample_time_min_end=sample_time_min_start+5
            sample_time_start=sample_time_hour+str(sample_time_min_start)
            sample_time_end=sample_time_hour+str(sample_time_min_end)
            TO_DB(sample_time_start,sample_time_end)
            print sample_time_start,sample_time_end,count,':',time.strftime(ISOTIMEFORMAT, time.localtime())
    
        else:
            sample_time_start=sample_time_hour+str(sample_time_min_start)
            sample_time_end=sample_time_hour_next+'0'
            TO_DB(sample_time_start,sample_time_end)
            print sample_time_start,sample_time_end,count,':',time.strftime(ISOTIMEFORMAT, time.localtime())
        count=count+1
    
            # count=count+1
        # if count in(10000,20000,30000,40000,50000):
            # print count,':',time.strftime(ISOTIMEFORMAT, time.localtime())      
    
    ####################第三种方式########################
    
    
    
    
    ################循环取GPS数据##########################    
    print 'end',':',time.strftime(ISOTIMEFORMAT, time.localtime())
    View Code

    执行效率:执行超过了30分钟,很明显此方式也行不通

    执行到了9次,大概执行了5W条数据,耗时28分54秒,一样的,越到后面越慢。全部执行完,花费了48分50秒

    image

    start : 2019-04-20 11:10:35
    2019-04-10 18:0 2019-04-10 18:5 1 : 2019-04-20 11:11:03                    28秒
    2019-04-10 18:5 2019-04-10 18:10 2 : 2019-04-20 11:12:17                  1分14秒
    2019-04-10 18:10 2019-04-10 18:15 3 : 2019-04-20 11:14:19                 2分2秒
    2019-04-10 18:15 2019-04-10 18:20 4 : 2019-04-20 11:17:07                 2分48秒
    2019-04-10 18:20 2019-04-10 18:25 5 : 2019-04-20 11:20:37                 3分30秒
    2019-04-10 18:25 2019-04-10 18:30 6 : 2019-04-20 11:24:42                 4分5秒
    2019-04-10 18:30 2019-04-10 18:35 7 : 2019-04-20 11:29:12                 4分30秒
    2019-04-10 18:35 2019-04-10 18:40 8 : 2019-04-20 11:34:03                 4分51秒
    2019-04-10 18:40 2019-04-10 18:45 9 : 2019-04-20 11:39:29                 5分26秒
    2019-04-10 18:45 2019-04-10 18:50 10 : 2019-04-20 11:45:26               5分57秒
    2019-04-10 18:50 2019-04-10 18:55 11 : 2019-04-20 11:52:16               6分50秒
    2019-04-10 18:55 2019-04-10 19:0 12 : 2019-04-20 11:59:25                 7分9秒
    end : 2019-04-20 11:59:25

    总结:还是存在越到后面越慢的问题

    脚本6:使用profile工具工具进行检查

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    from cProfile import Profile
    import json
    import math
    import MySQLdb
    import time
    import gc
    import thread
    # from numba import jit
    ISOTIMEFORMAT='%Y-%m-%d %X'
    print 'start',':',time.strftime(ISOTIMEFORMAT, time.localtime())
    lnglatlist = []
    data = '[{"name":"工厂","points":[{"lng":113.642124,"lat":23.167372},{"lng":113.636176,"lat":23.175162},{"lng":113.644930,"lat":23.179870},{"lng":113.652108,"lat":23.173823}],"type":0}]'
    data = json.loads(data)
    if 'points' in data[0]:
        for point in data[0]['points']:
            #print(str(point['lng'])+" "+str(point['lat']))
            lnglat = []
            lnglat.append(float(str(point['lng'])))
            lnglat.append(float(str(point['lat'])))
            lnglatlist.append(lnglat)
    def windingNumber(point, poly):
        poly.append(poly[0])
        px = point[0]
        py = point[1]
        sum = 0
        length = len(poly)-1
    
        for index in range(0,length):
            sx = poly[index][0]
            sy = poly[index][1]
            tx = poly[index+1][0]
            ty = poly[index+1][1]
    
            #点与多边形顶点重合或在多边形的边上
            if((sx - px) * (px - tx) >= 0 and (sy - py) * (py - ty) >= 0 and (px - sx) * (ty - sy) == (py - sy) * (tx - sx)):
                return "on"
            #点与相邻顶点连线的夹角
            angle = math.atan2(sy - py, sx - px) - math.atan2(ty - py, tx - px)
    
            #确保夹角不超出取值范围(-π 到 π)
            if(angle >= math.pi):
                angle = angle - math.pi * 2
            elif(angle <= -math.pi):
                angle = angle + math.pi * 2
            sum += angle
    
            #计算回转数并判断点和多边形的几何关系
        result = 'out' if int(sum / math.pi) == 0 else 'in'
        return result
    
    ################循环取GPS数据##########################
    ####################第三种方式########################
    def TO_DB(sample_time_start,sample_time_end):
        conn=MySQLdb.connect(user='XXX',passwd='XXX',host='XXX',charset="utf8") #连接到mysql
        cur=conn.cursor()
        conn.select_db('XXX')
        sql="""
        select id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no
        from gps_msg
        where sample_time>="""+"'"+sample_time_start+"'"+" and sample_time<" +"'"+sample_time_end+"'"
        cur.execute(sql)
        gps_data_all=cur.fetchall()
        # count=0
        for gps_data_per in gps_data_all:
            sqlin="insert into gps_msg_20190411(id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
            point=gps_data_per[5].split(",")+gps_data_per[4].split(",")  #取出每条数据的经纬度,split()转换成列表
            point=map(float,point) #字符串类型转换成浮点型
            if windingNumber(point,lnglatlist)=='in':
                cur.execute(sqlin,gps_data_per)
                conn.commit()
                # del point  #释放变量,没啥用
                # gc.collect()
        cur.close() #关闭游标
        conn.close() #关闭数据链接
    def for_exec():
        count=1
        sample_time_hour='2019-04-10 18:'
        sample_time_hour_next='2019-04-10 19:'
        for sample_time_min_start in range(0,10,5):
            if count <12:
                sample_time_min_end=sample_time_min_start+5
                sample_time_start=sample_time_hour+str(sample_time_min_start)
                sample_time_end=sample_time_hour+str(sample_time_min_end)
                TO_DB(sample_time_start,sample_time_end)
                print sample_time_start,sample_time_end,count,':',time.strftime(ISOTIMEFORMAT, time.localtime())
    
            else:
                sample_time_start=sample_time_hour+str(sample_time_min_start)
                sample_time_end=sample_time_hour_next+'0'
                TO_DB(sample_time_start,sample_time_end)
                print sample_time_start,sample_time_end,count,':',time.strftime(ISOTIMEFORMAT, time.localtime())
            count=count+1
    
    ####################第三种方式########################
    ################循环取GPS数据##########################   
    
    if __name__ == '__main__':
        prof = Profile()
        prof.runcall(for_exec)
        print 'end',':',time.strftime(ISOTIMEFORMAT, time.localtime())
        prof.print_stats()
        # prof.dump_stats('test.prof') # dump profile result to test.prof

    结果:输出的结果如下,for_exec里面包含TO_DB,TO_DB里面包含windingNumber

    image

    每个字段(和上面截图一一对应)的含义如下:

    • ncalls 函数总的调用次数
    • tottime 函数内部(不包括子函数)的占用时间
    • percall(第一个) tottime/ncalls
    • cumtime 函数包括子函数所占用的时间
    • percall(第二个)cumtime/ncalls
    • filename:lineno(function)  文件:行号(函数)

    基于上面字段的理解分析,逻辑上看,判断出来主要是windingNumber函数耗时,如下

    • for_exec里面包含TO_DB,TO_DB里面包含windingNumber
    • for_exec和TO_DB的tottime很少,cumtime挺大,windingNumber的tottime和cumtime都挺大(tottime和cumtime相差的时间如下)

    image

    脚本7:安装numba模块,进行加速

    安装numba模块,使用pip install numba,安装失败

    尝试使用conda包管理工具进行安装,如下参考

    https://pypi.org/project/numba/

    使用命令,针对2.7的python,使用如下链接下载后安装

    https://repo.anaconda.com/archive/Anaconda2-2019.03-Linux-x86_64.sh --no-check-certificate

    安装完后修改脚本,如下,再次执行测试

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    from cProfile import Profile
    import json
    import math
    import MySQLdb
    import time
    import gc
    import thread
    from numba import jit
    ISOTIMEFORMAT='%Y-%m-%d %X'
    print 'start',':',time.strftime(ISOTIMEFORMAT, time.localtime())
    lnglatlist = []
    data = '[{"name":"工厂","points":[{"lng":113.642124,"lat":23.167372},{"lng":113.636176,"lat":23.175162},{"lng":113.644930,"lat":23.179870},{"lng":113.652108,"lat":23.173823}],"type":0}]'
    data = json.loads(data)
    if 'points' in data[0]:
        for point in data[0]['points']:
            #print(str(point['lng'])+" "+str(point['lat']))
            lnglat = []
            lnglat.append(float(str(point['lng'])))
            lnglat.append(float(str(point['lat'])))
            lnglatlist.append(lnglat)
    @jit
    def windingNumber(point, poly):
        poly.append(poly[0])
        px = point[0]
        py = point[1]
        sum = 0
        length = len(poly)-1
    
        for index in range(0,length):
            sx = poly[index][0]
            sy = poly[index][1]
            tx = poly[index+1][0]
            ty = poly[index+1][1]
    
            #点与多边形顶点重合或在多边形的边上
            if((sx - px) * (px - tx) >= 0 and (sy - py) * (py - ty) >= 0 and (px - sx) * (ty - sy) == (py - sy) * (tx - sx)):
                return "on"
            #点与相邻顶点连线的夹角
            angle = math.atan2(sy - py, sx - px) - math.atan2(ty - py, tx - px)
    
            #确保夹角不超出取值范围(-π 到 π)
            if(angle >= math.pi):
                angle = angle - math.pi * 2
            elif(angle <= -math.pi):
                angle = angle + math.pi * 2
            sum += angle
    
            #计算回转数并判断点和多边形的几何关系
        result = 'out' if int(sum / math.pi) == 0 else 'in'
        return result
    
    ################循环取GPS数据##########################
    ####################第三种方式########################
    # @jit
    def TO_DB(sample_time_start,sample_time_end):
        conn=MySQLdb.connect(user='XXX',passwd='XXX',host='XXX',charset="utf8") #连接到mysql
        cur=conn.cursor()
        conn.select_db('XXX')
        sql="""
        select id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no
        from gps_msg
        where sample_time>="""+"'"+sample_time_start+"'"+" and sample_time<" +"'"+sample_time_end+"'"
        cur.execute(sql)
        gps_data_all=cur.fetchall()
        # count=0
        for gps_data_per in gps_data_all:
            sqlin="insert into gps_msg_20190411(id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
            point=gps_data_per[5].split(",")+gps_data_per[4].split(",")  #取出每条数据的经纬度,split()转换成列表
            point=map(float,point) #字符串类型转换成浮点型
            if windingNumber(point,lnglatlist)=='in':
                cur.execute(sqlin,gps_data_per)
                conn.commit()
                # del point  #释放变量,没啥用
                # gc.collect()
        cur.close() #关闭游标
        conn.close() #关闭数据链接
    # @jit
    def for_exec():
        count=1
        sample_time_hour='2019-04-10 18:'
        sample_time_hour_next='2019-04-10 19:'
        for sample_time_min_start in range(0,60,5):
            if count <12:
                sample_time_min_end=sample_time_min_start+5
                sample_time_start=sample_time_hour+str(sample_time_min_start)
                sample_time_end=sample_time_hour+str(sample_time_min_end)
                TO_DB(sample_time_start,sample_time_end)
                print sample_time_start,sample_time_end,count,':',time.strftime(ISOTIMEFORMAT, time.localtime())
    
            else:
                sample_time_start=sample_time_hour+str(sample_time_min_start)
                sample_time_end=sample_time_hour_next+'0'
                TO_DB(sample_time_start,sample_time_end)
                print sample_time_start,sample_time_end,count,':',time.strftime(ISOTIMEFORMAT, time.localtime())
            count=count+1
    
    ####################第三种方式########################
    ################循环取GPS数据##########################  
    
    for_exec()
    # if __name__ == '__main__':
        # prof = Profile()
        # prof.runcall(for_exec)
        # print 'end',':',time.strftime(ISOTIMEFORMAT, time.localtime())
        # prof.print_stats()
        # prof.dump_stats('test.prof') # dump profile result to test.prof

    对比下脚本5,看下效率,总共花了40分39秒

    image

    节省了8分11秒

    image

    通过以上的种种办法,还是存在越到后面,数据处理会越来越慢的情况,于是继续尝试

    脚本8:按时间点执行分批次执行SQL,使用子进程执行,每次执行都是不同的子进程

    以下红色为对比脚本7添加的代码

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    from cProfile import Profile
    import json
    import math
    import MySQLdb
    import time
    import gc
    import thread
    from numba import jit
    from multiprocessing import Process
    ISOTIMEFORMAT='%Y-%m-%d %X'
    print 'start',':',time.strftime(ISOTIMEFORMAT, time.localtime())
    lnglatlist = []
    data = '[{"name":"工厂","points":[{"lng":113.642124,"lat":23.167372},{"lng":113.636176,"lat":23.175162},{"lng":113.644930,"lat":23.179870},{"lng":113.652108,"lat":23.173823}],"type":0}]'
    data = json.loads(data)
    if 'points' in data[0]:
        for point in data[0]['points']:
            #print(str(point['lng'])+" "+str(point['lat']))
            lnglat = []
            lnglat.append(float(str(point['lng'])))
            lnglat.append(float(str(point['lat'])))
            lnglatlist.append(lnglat)
    @jit
    def windingNumber(point, poly):
        poly.append(poly[0])
        px = point[0]
        py = point[1]
        sum = 0
        length = len(poly)-1
    
        for index in range(0,length):
            sx = poly[index][0]
            sy = poly[index][1]
            tx = poly[index+1][0]
            ty = poly[index+1][1]
    
            #点与多边形顶点重合或在多边形的边上
            if((sx - px) * (px - tx) >= 0 and (sy - py) * (py - ty) >= 0 and (px - sx) * (ty - sy) == (py - sy) * (tx - sx)):
                return "on"
            #点与相邻顶点连线的夹角
            angle = math.atan2(sy - py, sx - px) - math.atan2(ty - py, tx - px)
    
            #确保夹角不超出取值范围(-π 到 π)
            if(angle >= math.pi):
                angle = angle - math.pi * 2
            elif(angle <= -math.pi):
                angle = angle + math.pi * 2
            sum += angle
    
            #计算回转数并判断点和多边形的几何关系
        result = 'out' if int(sum / math.pi) == 0 else 'in'
        return result
    
    ################循环取GPS数据##########################
    ####################第三种方式########################
    # @jit
    def TO_DB(sample_time_start,sample_time_end):
        conn=MySQLdb.connect(user='XXX',passwd='XXX',host='XXX',charset="utf8") #连接到mysql
        cur=conn.cursor()
        conn.select_db('XXX')
        sql="""
        select id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no
        from gps_msg
        where sample_time>="""+"'"+sample_time_start+"'"+" and sample_time<" +"'"+sample_time_end+"'"
        cur.execute(sql)
        gps_data_all=cur.fetchall()
        # count=0
        for gps_data_per in gps_data_all:
            sqlin="insert into gps_msg_20190411(id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
            point=gps_data_per[5].split(",")+gps_data_per[4].split(",")  #取出每条数据的经纬度,split()转换成列表
            point=map(float,point) #字符串类型转换成浮点型
            if windingNumber(point,lnglatlist)=='in':
                cur.execute(sqlin,gps_data_per)
                conn.commit()
                # del point  #释放变量,没啥用
                # gc.collect()
        cur.close() #关闭游标
        conn.close() #关闭数据链接
    # @jit
    def for_exec():
        count=1
        sample_time_hour='2019-04-10 18:'
        sample_time_hour_next='2019-04-10 19:'
        for sample_time_min_start in range(0,60,5):
            if count <12:
                sample_time_min_end=sample_time_min_start+5
                sample_time_start=sample_time_hour+str(sample_time_min_start)
                sample_time_end=sample_time_hour+str(sample_time_min_end)
                # TO_DB(sample_time_start,sample_time_end)
                p=Process(target=TO_DB,args=(sample_time_start,sample_time_end,))
                p.start()
                p.join()
                print sample_time_start,sample_time_end,count,':',time.strftime(ISOTIMEFORMAT, time.localtime())
            else:
                sample_time_start=sample_time_hour+str(sample_time_min_start)
                sample_time_end=sample_time_hour_next+'0'
                # TO_DB(sample_time_start,sample_time_end)
                p=Process(target=TO_DB,args=(sample_time_start,sample_time_end,))
                p.start()
                p.join()
                print sample_time_start,sample_time_end,count,':',time.strftime(ISOTIMEFORMAT, time.localtime())
            count=count+1
    
    ####################第三种方式########################
    ################循环取GPS数据##########################   
    
    for_exec()
    print 'end',':',time.strftime(ISOTIMEFORMAT, time.localtime())
    # if __name__ == '__main__':
        # prof = Profile()
        # prof.runcall(for_exec)
        # print 'end',':',time.strftime(ISOTIMEFORMAT, time.localtime())
        # prof.print_stats()
        # prof.dump_stats('test.prof') # dump profile result to test.prof

    执行效率:4分15秒

    image

    使用此方法,成功,每次执行时间基本相同,看来原因是在一个进程执行导致。

    如果去掉numba,效率会降低多少?测试了下,去掉numba,执行效率:4分28秒,也没降低多少。

    另外还有一种方法可以比较实时地抓取超速数据,可以分批次,多频次执行脚本,每次脚本执行处理较少数据。可以使用crontab调度,每分钟执行一次,只执行1分钟的数据,因为数据量比较小,基本3秒完成。

    脚本9:使用多进程,按时间点,分批次执行SQL

    脚本如下:红色表示为多进程使用到的语句

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    from cProfile import Profile
    import json,os
    import math
    import MySQLdb
    import time
    import gc
    import thread
    from numba import jit
    from multiprocessing import Process
    from multiprocessing import Pool
    import datetime
    ISOTIMEFORMAT='%Y-%m-%d %X'
    start=time.time()
    print 'start',':',time.strftime(ISOTIMEFORMAT, time.localtime())
    lnglatlist = []
    data = '[{"name":"工厂","points":[{"lng":113.642124,"lat":23.167372},{"lng":113.636176,"lat":23.175162},{"lng":113.644930,"lat":23.179870},{"lng":113.652108,"lat":23.173823}],"type":0}]'
    data = json.loads(data)
    if 'points' in data[0]:
        for point in data[0]['points']:
            #print(str(point['lng'])+" "+str(point['lat']))
            lnglat = []
            lnglat.append(float(str(point['lng'])))
            lnglat.append(float(str(point['lat'])))
            lnglatlist.append(lnglat)
    # @jit
    def windingNumber(point, poly):
        poly.append(poly[0])
        px = point[0]
        py = point[1]
        sum = 0
        length = len(poly)-1
    
        for index in range(0,length):
            sx = poly[index][0]
            sy = poly[index][1]
            tx = poly[index+1][0]
            ty = poly[index+1][1]
    
            #点与多边形顶点重合或在多边形的边上
            if((sx - px) * (px - tx) >= 0 and (sy - py) * (py - ty) >= 0 and (px - sx) * (ty - sy) == (py - sy) * (tx - sx)):
                return "on"
            #点与相邻顶点连线的夹角
            angle = math.atan2(sy - py, sx - px) - math.atan2(ty - py, tx - px)
    
            #确保夹角不超出取值范围(-π 到 π)
            if(angle >= math.pi):
                angle = angle - math.pi * 2
            elif(angle <= -math.pi):
                angle = angle + math.pi * 2
            sum += angle
    
            #计算回转数并判断点和多边形的几何关系
        result = 'out' if int(sum / math.pi) == 0 else 'in'
        return result
        print 'test',result
    
    ################循环取GPS数据##########################
    ####################第三种方式########################
    mysql_host='XXX'
    mysql_database='XXX'
    mysql_user='XXX'
    mysql_passwd='XXX'
    
    def delete_data(sample_time_start):
        sample_time_end=datetime.datetime.strftime(datetime.datetime.strptime(sample_time_start, "%Y-%m-%d %H")+datetime.timedelta(hours=1), '%Y-%m-%d %H')
        conn=MySQLdb.connect(user=mysql_user,passwd=mysql_passwd,host=mysql_host,charset="utf8") #连接到mysql
        cur=conn.cursor()
        conn.select_db(mysql_database)
        sql_delete="""
        delete from gps_msg_test
        where sample_time>="""+"'"+sample_time_start+"'"+" and sample_time<" +"'"+sample_time_end+"'"
        cur.execute(sql_delete)
        conn.commit()
        cur.close() #关闭游标
        conn.close() #关闭数据链接
    
    def TO_DB(sample_time_start,sample_time_end):
        conn=MySQLdb.connect(user=mysql_user,passwd=mysql_passwd,host=mysql_host,charset="utf8") #连接到mysql
        cur=conn.cursor()
        conn.select_db(mysql_database)
        sql="""
        select id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no
        from gps_msg
        where sample_time>="""+"'"+sample_time_start+"'"+" and sample_time<" +"'"+sample_time_end+"'"
        conn.commit()
        cur.execute(sql)
        gps_data_all=cur.fetchall()
        for gps_data_per in gps_data_all:
            gps_data_per_list=list(gps_data_per)  #转为列表
            gps_data_per_list.append('XX厂区')
            sqlin="insert into gps_msg_test(id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no,location) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
            point=gps_data_per[5].split(",")+gps_data_per[4].split(",")  #取出每条数据的经纬度,split()转换成列表
            point=map(float,point) #字符串类型转换成浮点型            
            if windingNumber(point,lnglatlist)=='in':
                cur.execute(sqlin,gps_data_per_list)
                conn.commit()
        cur.close() #关闭游标
        conn.close() #关闭数据链接
    ############################################SQL分段执行#################################################
    
    def for_exec(sample_time_hour):
        count=1
        interval=1          #执行的间隔
        number=60/interval  #执行的次数
        sample_time_hour=sample_time_hour+':'
        # sample_time_hour_next='2019-04-10 19:'
        sample_time_hour_next=datetime.datetime.strftime(datetime.datetime.strptime(sample_time_hour, "%Y-%m-%d %H:")+datetime.timedelta(hours=1), '%Y-%m-%d %H:')
        p = Pool(60) #创建进程池
        for sample_time_min_start in range(0,60,interval):
            if count <number:
                sample_time_min_end=sample_time_min_start+interval    #分钟处的结算时间
                sample_time_start=sample_time_hour+str(sample_time_min_start) #开始时间,第一次执行:2019-04-10 18:0
                sample_time_end=sample_time_hour+str(sample_time_min_end)  #结束时间,第一次执行:2019-04-10 18:1
                p.apply_async(TO_DB,args=(sample_time_start,sample_time_end,))
                print sample_time_start,sample_time_end,count,':',time.strftime(ISOTIMEFORMAT, time.localtime()),os.getpid()
            else:
                sample_time_start=sample_time_hour+str(sample_time_min_start)
                sample_time_end=sample_time_hour_next+'0'
                p.apply_async(TO_DB,args=(sample_time_start,sample_time_end,))
                print sample_time_start,sample_time_end,count,':',time.strftime(ISOTIMEFORMAT, time.localtime()),os.getpid()
            count=count+1
        p.close()
        p.join()
     ############################################SQL分段执行#################################################
    ####################第三种方式########################
    ################循环取GPS数据##########################   
    gps_time='2019-04-10 18' #执行的时间点
    delete_data(gps_time)
    for_exec(gps_time)
    print 'end',':',time.strftime(ISOTIMEFORMAT, time.localtime())
    end=time.time()
    print end-start

    效率:耗时1秒多搞定

    image

    注:如果不指定进程的话,pool的默认大小是CPU的核数


    2、坐标转换

    标准坐标转火星坐标:https://lbs.amap.com/api/webservice/guide/api/convert

    火星坐标转标准坐标:

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    
    import math
    
    
    def GCJ2WGS(lon,lat):
        a = 6378245.0
        ee = 0.00669342162296594323
        PI = 3.14159265358979324
    
        x = lon - 105.0
        y = lat - 35.0
    
        dLon = 300.0 + x + 2.0 * y + 0.1 * x * x + 0.1 * x * y + 0.1 * math.sqrt(abs(x))
        dLon += (20.0 * math.sin(6.0 * x * PI) + 20.0 * math.sin(2.0 * x * PI)) * 2.0 / 3.0
        dLon += (20.0 * math.sin(x * PI) + 40.0 * math.sin(x / 3.0 * PI)) * 2.0 / 3.0
        dLon += (150.0 * math.sin(x / 12.0 * PI) + 300.0 * math.sin(x / 30.0 * PI)) * 2.0 / 3.0
    
        dLat = -100.0 + 2.0 * x + 3.0 * y + 0.2 * y * y + 0.1 * x * y + 0.2 * math.sqrt(abs(x))
        dLat += (20.0 * math.sin(6.0 * x * PI) + 20.0 * math.sin(2.0 * x * PI)) * 2.0 / 3.0
        dLat += (20.0 * math.sin(y * PI) + 40.0 * math.sin(y / 3.0 * PI)) * 2.0 / 3.0
        dLat += (160.0 * math.sin(y / 12.0 * PI) + 320 * math.sin(y * PI / 30.0)) * 2.0 / 3.0
        radLat = lat / 180.0 * PI
        magic = math.sin(radLat)
        magic = 1 - ee * magic * magic
        sqrtMagic = math.sqrt(magic)
        dLat = (dLat * 180.0) / ((a * (1 - ee)) / (magic * sqrtMagic) * PI);
        dLon = (dLon * 180.0) / (a / sqrtMagic * math.cos(radLat) * PI);
        wgsLon = lon - dLon
        wgsLat = lat - dLat
    
        return wgsLon,wgsLat
    print(GCJ2WGS(113.65723,23.171041))
    print("Done!")
    



    参考:https://www.cnblogs.com/dong1/p/10220116.html

    https://www.cnblogs.com/xybaby/p/6510941.html

  • 相关阅读:
    SpringCloud微服务基础学习
    EF6 + MySql 建立项目引用失败
    Forword(请求转发)与Redirect(重定向)区别
    Java 中 Hashtable与HashMap的区别
    cookie和session
    configparser模块的简单使用
    列表中的陷阱
    Python3面向对象编程总结
    Python---RabbitMQ的使用
    Django的template自定义函数的创建和使用
  • 原文地址:https://www.cnblogs.com/fuqu/p/10739194.html
Copyright © 2011-2022 走看看