zoukankan      html  css  js  c++  java
  • SQL版本的Dijkstra最短路径算法

    受这篇文章《SQL,NoSQL以及数据库的实质》结尾处题目的启发,我尝试写了一个SQL版本的Dijkstra最短路径算法。算法描述如下:

    前提假设:
         Hive支持Stored Procedure
         或者
         Mysql支持Insert into、insert overwrite、create table as select操作
     
    数据结构:
    //各个节点之间的距离
    Table meta_distances(
         src int,
         dst int,
         distance int
    )
    //已知最短距离的点(及距离)
    Table known_nodes(
         node int,
         distance int
    )
    //未知最短距离的点(及暂时的距离)
    Table unknown_nodes(
         node int,
         distance int
    )
    求节点1到其他所有节点的距离
     
    初始,各表包含的数据:
    meta_distances:
         各个节点之间的距离
    known_nodes:
         (1,0)
    unknown_nodes:
         (2, MaxInt)
         (3, MaxInt)
         (4, MaxInt)
         …...
         (n, MaxInt)
     
         pivot_node=1
    Create Procedure Dijkstra(IN pivot_node int)
    Begin
    declare unknown_nodes_count int default 1 While unknown_nodes_count>0 do declare min_distance int default 0
             
             select
                distance into min_distance
             where
                node=pivot_node
             
    drop table if exists tmp_distance_a //计算unknown_nodes中每一个node若经过pivot_node,与源点的距离 create table tmp_distance_a as select f1.dst, distance+ min_distancefrom (select distinct meta_distances.* from meta_distances as f1, unknown_nodes as f2 where f1.dst=f2.node and f1.src=pivot_node )as tmp //更新unknown_nodes的距离信息 insert overwrite table unknown_nodes select f2.node IF(f1.node is null, f2.distance, IF(f1.distance>f2.distance, f2.distance, f1.distance ) ) from tmp_distance_a as f1 right outer join unknown_nodes as f2 on f1.dst=f2.node //挑选出最小的node,放入known_nodes中 insert into table known_nodes select node, distance from unknown_nodes where distance=min(distance) //挑选出最小的node,最为下一个pivot_node select node into pivot_node from unknown_nodes where distance=min(distance) //从unknown_nodes中删除最小node
             insert overwrite into unknown_nodes
             select *
             from unkown_nodes
             where distance!=min(distance)

             //
    计算unknown_nodes中剩余node的数量 select count(*) into unknown_nodes_count from unknown_nodes End While End

    java版本

    public class DijkstraSample {
        public static void compute(int pivotNode){
            int unknownNodesCount=1;
            while(true){
                if(hive.get("select count(*) from unknown_nodes")<=0){
                    //所有点的最短距离都已经计算出
                    break;
                }
                //计算unknown_nodes中每一个node若经过pivot_node,与源点的距离
                hive.execute(
                        "drop table if exists tmp_distance_a",
                        
                        "create table tmp_distance_a"+
                       "as"+
                       "select"+
                            "f1.dst,"+
                            "distance+(select distance from known_nodes where node=${pivotNode})"+
                       "from"+
                            "(select"+
                                 "distinct"+
                                 "meta_distances.*"+
                            "from"+
                                 "meta_distances as f1,"+
                                 "unknown_nodes as f2"+
                            "where"+
                                 "f1.dst=f2.node"+
                                 "and"+
                                 "f1.src=${pivotNode}"+
                            ")as tmp"
                        );
                //更新unknown_nodes的距离信息
                hive.execute(
                        "insert overwrite table unknown_nodes"+
                        "select"+
                             "f2.node"+
                             "IF(f1.node is null,"+
                                  "f2.distance,"+
                                  "IF(f1.distance>f2.distance,"+
                                       "f2.distance,"+
                                       "f1.distance"+
                                  ")"+
                             ")"+
                        "from"+
                             "tmp_distance_a as f1"+
                        "right outer join"+
                             "unknown_nodes as f2"+
                        "on"+
                             "f1.dst=f2.node"
                     );
                //挑选出最小的node,放入known_nodes中
                hive.execute(
                        "insert into table known_nodes"+
                        "select"+
                             "node,"+
                             "distance"+
                        "from"+
                             "unknown_nodes"+
                        "where"+
                             "distance=min(distance)"
                        );
                //挑选出最小的node,最为下一个pivot_node
                pivotNode=hive.get(
                        "select node"+
                        "from"+
                             "unknown_nodes"+
                        "where"+
                             "distance=min(distance)"
                        );
                //从unknown_nodes中删除最小node
                hive.execute(
                        "insert overwrite into unknown_nodes"+
                        "select *"+
                        "from unkown_nodes"+
                        "where distance!=min(distance)"
                        );
            }
        }
    }

    也许有人会问:用SQL实现这个算法的意义是什么?它与用常规语言写出来的程序相比,几乎没有任何优势。当数据规模控制在一定范围之内时,我承认是这样的。但是,设想如果我们面对的节点规模是几百万甚至几千万数量级,而我们的机器只有几个G内存时,如何处理?

    答案是:Hive+SQL。

  • 相关阅读:
    bash 中的 ; && 与|| 的作用
    远程root用户无法登陆
    MySQL5.6主从同步(热备份)
    进程之间的通信方式
    远程连接openGuass配置
    openGuass1.1.0部署
    Go同步原语
    spring boot集成activiti6
    解决默认的jackson序列化循环引用的问题
    spring boot集成websocket
  • 原文地址:https://www.cnblogs.com/linghuaichong/p/4367055.html
Copyright © 2011-2022 走看看