zoukankan      html  css  js  c++  java
  • Lucene Spatial构建地理空间索引

    一、Maven依赖

    <?xml version="1.0" encoding="UTF-8"?>
    <project xmlns="http://maven.apache.org/POM/4.0.0"
             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
             xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
        <modelVersion>4.0.0</modelVersion>
    
        <groupId>com.sdu.lucene</groupId>
        <artifactId>lucene-learn</artifactId>
        <version>1.0-SNAPSHOT</version>
    
        <properties>
            <lucene.version>6.1.0</lucene.version>
            <spatial4j.version>0.6</spatial4j.version>
            <guava.version>19.0</guava.version>
        </properties>
    
        <dependencies>
            <dependency>
                <groupId>com.google.guava</groupId>
                <artifactId>guava</artifactId>
                <version>${guava.version}</version>
            </dependency>
    
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-core</artifactId>
                <version>${lucene.version}</version>
            </dependency>
    
            <!-- Lucene分词 -->
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-analyzers-common</artifactId>
                <version>${lucene.version}</version>
            </dependency>
    
    
            <!-- 地理坐标支持 -->
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-spatial</artifactId>
                <version>${lucene.version}</version>
            </dependency>
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-spatial-extras</artifactId>
                <version>${lucene.version}</version>
            </dependency>
    
            <!--
                Spatial4j是一个通用的空间/地理空间ASL许可的开源Java库,它的核心能力有3个方面:
                1 : 提供公共图形,可工作在Euclidean和geodesic(球体的表面)的世界模型
                2 : 提供距离计算和其它数学计算
                3 : 从WKT 格式化字符串来读取形状
            -->
            <dependency>
                <groupId>org.locationtech.spatial4j</groupId>
                <artifactId>spatial4j</artifactId>
                <version>${spatial4j.version}</version>
            </dependency>
        </dependencies>
    
        <build>
            <finalName>lucene-learn</finalName>
            <plugins>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-compiler-plugin</artifactId>
                    <version>3.5</version>
                    <configuration>
                        <source>1.8</source>
                        <target>1.8</target>
                        <encoding>UTF-8</encoding>
                    </configuration>
                </plugin>
            </plugins>
        </build>
    
    </project>

    二、Lucene Spatial

    /**
     * @author hanhan.zhang
     * */
    public class LuceneSpatial {
    
        /**
         * Spatial4j上下文
         * 1: SpatialContext初始化可由SpatialContextFactory配置
         * 2: SpatialContext属性
         *          DistanceCalculator(默认使用GeodesicSphereDistCalc.Haversine,将地球视为标准球体)
         *          ShapeFactory(默认使用ShapeFactoryImpl)
         *          Rectangle(构建经纬度空间:RectangleImpl(-180, 180, -90, 90, this))
         *          BinaryCodec()
         * */
        private SpatialContext ctx;
    
        /**
         * 索引和查询模型的策略接口
         * */
        private SpatialStrategy strategy;
    
        /**
         * 索引存储目录
         * */
        private Directory directory;
    
        protected void init() {
            /**
             * SpatialContext也可以通过SpatialContextFactory工厂类来构建
             * */
            this.ctx = SpatialContext.GEO;
    
            /**
             * 网格最大11层或Geo Hash的精度
             * 1: SpatialPrefixTree定义的Geo Hash最大精度为24
             * 2: GeohashUtils定义类经纬度到Geo Hash值公用方法
             * */
            SpatialPrefixTree spatialPrefixTree = new GeohashPrefixTree(ctx, 11);
    
            /**
             * 索引和搜索的策略接口,两个主要实现类
             * 1: RecursivePrefixTreeStrategy(支持任何Shape的索引和检索)
             * 2: TermQueryPrefixTreeStrategy(仅支持Point Shape)
             * 上述两个类继承PrefixTreeStrategy(有使用缓存)
             * */
            this.strategy = new RecursivePrefixTreeStrategy(spatialPrefixTree, "location");
            // 初始化索引目录
            this.directory = new RAMDirectory();
        }
    
        protected void createIndex(List<CityGeoInfo> cityGeoInfos) throws Exception {
            IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());
            IndexWriter indexWriter = new IndexWriter(directory, config);
            indexWriter.addDocuments(newSampleDocument(ctx, strategy, cityGeoInfos));
            indexWriter.close();
        }
    
        /**
         * 创建Document索引对象
         */
        protected List<Document> newSampleDocument(SpatialContext ctx, SpatialStrategy strategy, List<CityGeoInfo> cityGeoInfos) {
            List<Document> documents = Lists.newLinkedList(cityGeoInfos.stream()
                                                                       .map(cgi -> {
                                                                           Document doc = new Document();
                                                                           doc.add(new StoredField("id", cgi.getCityId()));
                                                                           doc.add(new NumericDocValuesField("id", cgi.getCityId()));
                                                                           doc.add(new StringField("city", cgi.getName(), Field.Store.YES));
                                                                           Shape shape = null;
                                                                           /**
                                                                            * 对小于MaxLevel的Geo Hash构建Field(IndexType[indexed,tokenized,omitNorms])
                                                                            * */
                                                                           Field []fields = strategy.createIndexableFields((shape = ctx.getShapeFactory()
                                                                                                                                       .pointXY(cgi.getLnt(), cgi.getLat())));
                                                                           for (Field field : fields) {
                                                                               doc.add(field);
                                                                           }
                                                                           Point pt = (Point) shape;
                                                                           doc.add(new StoredField(strategy.getFieldName(), pt.getX() + ","+ pt.getY()));
                                                                           return doc;
                                                                       })
                                                                       .collect(Collectors.toList()));
          return documents;
        }
    
        /**
         * 地理位置搜索
         * @throws Exception
         */
        public void search() throws Exception {
            IndexReader indexReader = DirectoryReader.open(directory);
            IndexSearcher indexSearcher = new IndexSearcher(indexReader);
            /**
             * 按照id升序排序
             * */
            Sort idSort = new Sort(new SortField("id", SortField.Type.INT));
    
            /**
             * 搜索方圆100千米范围以内,以当前位置经纬度(120.33,36.07)青岛为圆心,其中半径为100KM
             * */
            SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,
                                               ctx.getShapeFactory().circle(120.33, 36.07, DistanceUtils.dist2Degrees(100, DistanceUtils.EARTH_MEAN_RADIUS_KM)));
            Query query = strategy.makeQuery(args);
            TopDocs topDocs = indexSearcher.search(query, 10, idSort);
            /**
             * 输出命中结果
             * */
            printDocument(topDocs, indexSearcher, args.getShape().getCenter());
    
            System.out.println("==========================华丽的分割线=========================");
    
            /**
             * 定义坐标点(x,y)即(经度,纬度)即当前用户所在地点(烟台)
             * */
            Point pt = ctx.getShapeFactory().pointXY(121.39,37.52);
    
            /**
             * 计算当前用户所在坐标点与索引坐标点中心之间的距离即当前用户地点与每个待匹配地点之间的距离,DEG_TO_KM表示以KM为单位
             * 对Field(name=location)字段检索
             * */
            ValueSource valueSource = strategy.makeDistanceValueSource(pt, DistanceUtils.DEG_TO_KM);
            
            /**
             * 根据命中点与当前位置坐标点的距离远近降序排,距离数字大的排在前面,false表示降序,true表示升序
             * */
            Sort distSort = new Sort(valueSource.getSortField(false)).rewrite(indexSearcher);
            TopDocs topdocs = indexSearcher.search(new MatchAllDocsQuery(), 10, distSort);
            printDocument(topdocs, indexSearcher, pt);
            indexReader.close();
        }
    
        protected void printDocument(TopDocs topDocs, IndexSearcher indexSearcher, Point point) throws Exception {
            ScoreDoc[] scoreDocs = topDocs.scoreDocs;
            for (ScoreDoc scoreDoc : scoreDocs) {
                int docId = scoreDoc.doc;
                Document document = indexSearcher.doc(docId);
                int cityId = document.getField("id").numericValue().intValue();
                String city = document.getField("city").stringValue();
                String location = document.getField(strategy.getFieldName()).stringValue();
                String []locations = location.split(",");
                double xPoint = Double.parseDouble(locations[0]);
                double yPoint = Double.parseDouble(locations[1]);
                double distDEG = ctx.calcDistance(point, xPoint, yPoint);
                double juli = DistanceUtils.degrees2Dist(distDEG, DistanceUtils.EARTH_MEAN_RADIUS_KM);
                System.out.println("docId=" + docId + "	cityId=" + cityId + "	city=" + city + "	distance=" + juli + "KM");
            }
        }
    
        public static void main(String[] args) throws Exception {
            LuceneSpatial luceneSpatial = new LuceneSpatial();
            luceneSpatial.init();
            luceneSpatial.createIndex(GeoHelper.getCityGeoInfo("/Users/hanhan.zhang/Downloads/geo.txt"));
            luceneSpatial.search();
        }
    
    }

    三、地理信息文件

    招远:  120.38,37.35
    舟山:  122.207216,29.985295
    齐齐哈尔:123.97,47.33
    盐城:  120.13,33.38
    赤峰:  118.87,42.28
    青岛:  120.33,36.07
    乳山:  121.52,36.89
    金昌:  102.188043,38.520089
    泉州:  118.58,24.93
    莱西:  120.53,36.86
    日照:  119.46,35.42
    胶南:  119.97,35.88
    南通:  121.05,32.08
    拉萨:  91.11,29.97
    云浮:  112.02,22.93
    梅州:  116.1,24.55
    文登:  122.05,37.2
    上海:  121.48,31.22
    攀枝花: 101.718637,26.582347
    威海:  122.1,37.5
    承德:  117.93,40.97
    厦门:  118.1,24.46
    汕尾:  115.375279,22.786211
    潮州:  116.63,23.68
    丹东:  124.37,40.13
    太仓:  121.1,31.45
    曲靖:  103.79,25.51
    烟台:  121.39,37.52
    福州:  119.3,26.08
    瓦房店: 121.979603,39.627114
    即墨:  120.45,36.38
    抚顺:  123.97,41.97
    玉溪:  102.52,24.35
    张家口: 114.87,40.82
    阳泉:  113.57,37.85
    莱州:  119.942327,37.177017
    湖州:  120.1,30.86
    汕头:  116.69,23.39
    昆山:  120.95,31.39
    宁波:  121.56,29.86
    湛江:  110.359377,21.270708
    揭阳:  116.35,23.55
    荣成:  122.41,37.16
    连云港: 119.16,34.59
    葫芦岛: 120.836932,40.711052
    常熟:  120.74,31.64
    东莞:  113.75,23.04
    河源:  114.68,23.73
    淮安:  119.15,33.5
    泰州:  119.9,32.49
    南宁:  108.33,22.84
    营口:  122.18,40.65
    惠州:  114.4,23.09
    江阴:  120.26,31.91
    蓬莱:  120.75,37.8
    韶关:  113.62,24.84
    嘉峪关: 98.289152,39.77313
    广州:  113.23,23.16
    延安:  109.47,36.6
    太原:  112.53,37.87
    清远:  113.01,23.7
    中山:  113.38,22.52
    昆明:  102.73,25.04
    寿光:  118.73,36.86
    盘锦:  122.070714,41.119997
    长治:  113.08,36.18
    深圳:  114.07,22.62
    珠海:  113.52,22.3
    宿迁:  118.3,33.96
    咸阳:  108.72,34.36
    铜川:  109.11,35.09
    平度:  119.97,36.77
    佛山:  113.11,23.05
    海口:  110.35,20.02
    江门:  113.06,22.61
    章丘:  117.53,36.72
    肇庆:  112.44,23.05
    大连:  121.62,38.92
    临汾:  111.5,36.08
    吴江:  120.63,31.16
    石嘴山: 106.39,39.04
    沈阳:  123.38,41.8
    苏州:  120.62,31.32
    茂名:  110.88,21.68
    嘉兴:  120.76,30.77
    长春:  125.35,43.88
    胶州:  120.03336,36.264622
    银川:  106.27,38.47
    张家港: 120.555821,31.875428
    三门峡: 111.19,34.76
    锦州:  121.15,41.13
    南昌:  115.89,28.68
    柳州:  109.4,24.33
    三亚:  109.511909,18.252847
    自贡:  104.778442,29.33903
    吉林:  126.57,43.87
    阳江:  111.95,21.85
    泸州:  105.39,28.91
    西宁:  101.74,36.56
    宜宾:  104.56,29.77
    呼和浩特:111.65,40.82
    成都:  104.06,30.67
    大同:  113.3,40.12
    镇江:  119.44,32.2
    桂林:  110.28,25.29
    张家界: 110.479191,29.117096
    宜兴:  119.82,31.36
    北海:  109.12,21.49
    西安:  108.95,34.27
    金坛:  119.56,31.74
    东营:  118.49,37.46
    牡丹江: 129.58,44.6
    遵义:  106.9,27.7
    绍兴:  120.58,30.01
    扬州:  119.42,32.39
    常州:  119.95,31.79
    潍坊:  119.1,36.62
    重庆:  106.54,29.59
    台州:  121.420757,28.656386
    南京:  118.78,32.04
    滨州:  118.03,37.36
    贵阳:  106.71,26.57
    无锡:  120.29,31.59
    本溪:  123.73,41.3
    克拉玛依:84.77,45.59
    渭南:  109.5,34.52
    马鞍山: 118.48,31.56
    宝鸡:  107.15,34.38
    焦作:  113.21,35.24
    句容:  119.16,31.95
    北京:  116.46,39.92
    徐州:  117.2,34.26
    衡水:  115.72,37.72
    包头:  110,40.58
    绵阳:  104.73,31.48
    乌鲁木齐:87.68,43.77
    枣庄:  117.57,34.86
    杭州:  120.19,30.26
    淄博:  118.05,36.78
    鞍山:  122.85,41.12
    溧阳:  119.48,31.43
    库尔勒: 86.06,41.68
    安阳:  114.35,36.1
    开封:  114.35,34.79
    济南:  117,36.65
    德阳:  104.37,31.13
    温州:  120.65,28.01
    九江:  115.97,29.71
    邯郸:  114.47,36.6
    临安:  119.72,30.23
    兰州:  103.73,36.03
    沧州:  116.83,38.33
    临沂:  118.35,35.05
    南充:  106.110698,30.837793
    天津:  117.2,39.13
    富阳:  119.95,30.07
    泰安:  117.13,36.18
    诸暨:  120.23,29.71
    郑州:  113.65,34.76
    哈尔滨: 126.63,45.75
    聊城:  115.97,36.45
    芜湖:  118.38,31.33
    唐山:  118.02,39.63
    平顶山: 113.29,33.75
    邢台:  114.48,37.05
    德州:  116.29,37.45
    济宁:  116.59,35.38
    荆州:  112.239741,30.335165
    宜昌:  111.3,30.7
    义乌:  120.06,29.32
    丽水:  119.92,28.45
    洛阳:  112.44,34.7
    秦皇岛: 119.57,39.95
    株洲:  113.16,27.83
    石家庄: 114.48,38.03
    莱芜:  117.67,36.19
    常德:  111.69,29.05
    保定:  115.48,38.85
    湘潭:  112.91,27.87
    金华:  119.64,29.12
    岳阳:  113.09,29.37
    长沙:  113,28.21
    衢州:  118.88,28.97
    廊坊:  116.7,39.53
    菏泽:  115.480656,35.23375
    合肥:  117.27,31.86
    武汉:  114.31,30.52
    大庆:  125.03,46.5
  • 相关阅读:
    Atitti 图像处理 图像混合 图像叠加 blend 原理与实现
    Atitit Gaussian Blur 高斯模糊 的原理and实现and 用途
    Atitit 图像处理 灰度图片 灰度化的原理与实现
    Atitit (Sketch Filter)素描滤镜的实现  图像处理  attilax总结
    Atitit 实现java的linq 以及与stream api的比较
    Atitit attilax在自然语言处理领域的成果
    Atitit 图像处理 常用8大滤镜效果 Jhlabs 图像处理类库 java常用图像处理类库
    Atitit 图像处理--图像分类 模式识别 肤色检测识别原理 与attilax的实践总结
    Atitit apache 和guava的反射工具
    atitit。企业的价值观 员工第一 vs 客户第一.docx
  • 原文地址:https://www.cnblogs.com/hanfight/p/5776769.html
Copyright © 2011-2022 走看看