zoukankan      html  css  js  c++  java
  • 数据清洗——根据地名转化成标准地址

    在数据清洗过程中,行政区域代码转换最麻烦,繁琐。

      1 package Util;
      2 
      3 import java.io.IOException;
      4 import java.io.InputStream;
      5 import java.net.HttpURLConnection;
      6 import java.net.MalformedURLException;
      7 import java.net.URL;
      8 import java.util.List;
      9 
     10 import org.dom4j.Document;
     11 import org.dom4j.DocumentException;
     12 import org.dom4j.Element;
     13 import org.dom4j.io.SAXReader;
     14 
     15 public class getregion{
     16              
     17      /* 
     18          * 
     19          * 
     20          * 
     21          * 地址编码
     22          * 
     23          * */
     24      public static String getLocation(String lat,String lng){
     25            String location1="";
     26           String url="http://api.map.baidu.com/reverse_geocoding/v3/?ak=ld0uqubVfSTAUlXH5qIMN2F3Snsp16LU&output=xml&coordtype=wgs84ll&location="+lat+","+lng;
     27           System.out.println(url);
     28           Document doc = null;
     29            java.net.HttpURLConnection conn = null;
     30            InputStream ins = null;
     31            SAXReader reader = null;
     32            try{
     33             //HttpTimeoutHandler hth = new HttpTimeoutHandler(600000);
     34             URL conURL = new URL(null,url);
     35             conn = (HttpURLConnection)conURL.openConnection();
     36             conn.setDoInput(true);
     37             conn.setDoOutput(true);
     38             conn.setUseCaches(false);
     39             ins = conn.getInputStream();
     40             reader =new SAXReader();
     41             doc= reader.read(ins);
     42             //System.out.println(url);
     43             Element root=doc.getRootElement();
     44             String docXmlText=doc.asXML();
     45             //System.out.println(docXmlText);
     46             Element e=root.element("result");
     47             Element location=e.element("formatted_address");
     48             location1=location.asXML();
     49             location1=location1.substring(location1.indexOf("address>")+8,location1.indexOf("</formatted_address>"));
     50            /* System.out.println("lng"+lng1);
     51             System.out.println("lat"+lat1);
     52            // System.out.println("location"+location.asXML());
     53             //System.out.println("xiayukun"+e.asXML());
     54             lng1=lng1.substring(lng1.indexOf("<lng>")+5,lng1.indexOf("</lng>"));
     55             
     56             System.out.println(lng1);
     57             lat1=lat1.substring(lat1.indexOf("<lat>")+5,lat1.indexOf("</lat>"));
     58             System.out.println(lat1);*/
     59             List<Element> list = root.elements("location");
     60             System.out.println(url);
     61             for (Element object : list) {
     62                 System.out.println(url);
     63                 System.out.println(object.getName());
     64                 for (Element element : (List<Element>) object.elements()) {
     65                     System.out.print(((Element) element).getName() + ":");
     66                     System.out.print(element.getText() + " ");
     67                 }
     68                 System.out.println();
     69 
     70             }
     71 
     72             ins.close();
     73             conn.disconnect();
     74            }catch (MalformedURLException e) {
     75             e.printStackTrace();
     76            } catch (IOException e) {
     77             e.printStackTrace();   
     78            } catch (DocumentException e) {
     79             e.printStackTrace();
     80            }catch(Exception e){
     81             e.printStackTrace();
     82            }finally {
     83             try {
     84              if (ins != null) {
     85               ins.close();
     86               ins = null;
     87              }
     88             } catch (IOException e1) {
     89              e1.printStackTrace();
     90             }
     91             try {
     92              if (conn != null) {
     93               conn.disconnect();
     94               conn = null;
     95              }
     96             } catch (Exception e2) {
     97              e2.printStackTrace();
     98             }
     99            }
    100            return location1;
    101         }
    102     /* 
    103      * 
    104      * 
    105      * 
    106      * 地址你编码
    107      * 
    108      * */
    109     public static String getlocation1(String loc){
    110         String location2="";
    111           String url="http://api.map.baidu.com/geocoding/v3/?address="+loc+"10号&output=xml&ak=ld0uqubVfSTAUlXH5qIMN2F3Snsp16LU&callback=showLocation";
    112           System.out.println(url);
    113           Document doc = null;
    114            java.net.HttpURLConnection conn = null;
    115            InputStream ins = null;
    116            SAXReader reader = null;
    117            try{
    118             //HttpTimeoutHandler hth = new HttpTimeoutHandler(600000);
    119             URL conURL = new URL(null,url);
    120             conn = (HttpURLConnection)conURL.openConnection();
    121             conn.setDoInput(true);
    122             conn.setDoOutput(true);
    123             conn.setUseCaches(false);
    124             ins = conn.getInputStream();
    125             reader =new SAXReader();
    126             doc= reader.read(ins);
    127             //System.out.println(url);
    128             Element root=doc.getRootElement();
    129             String docXmlText=doc.asXML();
    130             //System.out.println(docXmlText);
    131             Element e=root.element("result");
    132             Element location=e.element("location");
    133             Element lng=location.element("lng");
    134             Element lat=location.element("lat");
    135             String lng1=lng.asXML();
    136             String lat1=lat.asXML();
    137             System.out.println("lng"+lng1);
    138             System.out.println("lat"+lat1);
    139            // System.out.println("location"+location.asXML());
    140             //System.out.println("xiayukun"+e.asXML());
    141             lng1=lng1.substring(lng1.indexOf("<lng>")+5,lng1.indexOf("</lng>"));
    142             
    143             System.out.println(lng1);
    144             lat1=lat1.substring(lat1.indexOf("<lat>")+5,lat1.indexOf("</lat>"));
    145             System.out.println(lat1);
    146             location2=getLocation(lat1,lng1);
    147             List<Element> list = root.elements("location");
    148             System.out.println(url);
    149             for (Element object : list) {
    150                 System.out.println(url);
    151                 System.out.println(object.getName());
    152                 for (Element element : (List<Element>) object.elements()) {
    153                     System.out.print(((Element) element).getName() + ":");
    154                     System.out.print(element.getText() + " ");
    155                 }
    156                 System.out.println();
    157 
    158             }
    159 
    160             ins.close();
    161             conn.disconnect();
    162            }catch (MalformedURLException e) {
    163             e.printStackTrace();
    164            } catch (IOException e) {
    165             e.printStackTrace();   
    166            } catch (DocumentException e) {
    167             e.printStackTrace();
    168            }catch(Exception e){
    169             e.printStackTrace();
    170            }finally {
    171             try {
    172              if (ins != null) {
    173               ins.close();
    174               ins = null;
    175              }
    176             } catch (IOException e1) {
    177              e1.printStackTrace();
    178             }
    179             try {
    180              if (conn != null) {
    181               conn.disconnect();
    182               conn = null;
    183              }
    184             } catch (Exception e2) {
    185              e2.printStackTrace();
    186             }
    187            }
    188            return location2;
    189         }
    190     
    191     public static void main(String[] strgs){
    192         System.out.println("aaaa");
    193         String loc="河北邯郸马头经济开发区";
    194         System.out.println(loc);
    195         String structloc="";
    196         structloc=getlocation1(loc);
    197         System.out.println("struct_location:"+structloc);
    198         
    199         /*//System.out.println(addressResolution("湖北省武汉市洪山区"));
    200         //getPoint("河北省迁安市聚鑫街2126号");
    201         ArrayList<Map<String,String>> table=new ArrayList<Map<String,String>>();
    202         table = addressResolution(structloc);
    203         System.out.println(table.size());                
    204         for(int i = 0; i < table.size(); i++){
    205             System.out.println(table.get(i).get("province")+table.get(i).get("city")+table.get(i).get("county"));
    206         }*/
    207     }
    208     
    209     
    210     
    211 }
  • 相关阅读:
    DHCP服务器搭建
    Nginx Web服务器
    ansible批量管理工具
    同网段存活IP公钥分发脚本
    inotify事件监控工具
    搭建云yum仓库和本地定时yum仓库
    NFS网络文件共享系统
    shell 脚本的讲解 与应用
    awk 命令精讲
    ACL权限控制 及特殊权限
  • 原文地址:https://www.cnblogs.com/smartisn/p/11806801.html
Copyright © 2011-2022 走看看