zoukankan      html  css  js  c++  java
  • 学习Spark GraphX

    import org.apache.spark._
    import org.apache.spark.graphx._
    
    import org.apache.spark.rdd.RDD
    
    val userGraph: Graph[(String, String), String]
    
    Name: Compile Error
    Message: <console>:30: error: class $iw needs to be abstract, since value userGraph is not defined
    class $iw extends Serializable {
          ^
    
    StackTrace: 
    
    val users: RDD[(VertexId, (String, String))] = 
        sc.parallelize(Array((3L, ("rxin", "student")),
                            (7L, ("jgonzal", "postdoc")),
                            (5L, ("franklin", "prof")),
                            (2L, ("istoica", "prof"))))
    
    users = ParallelCollectionRDD[0] at parallelize at <console>:35
    
    
    
    
    
    
    ParallelCollectionRDD[0] at parallelize at <console>:35
    
    val relationships: RDD[Edge[String]] = sc.parallelize(Array(
        Edge(3L, 7L, "collab"),
        Edge(5L, 3L, "advisor"),
        Edge(2L, 5L, "colleague"),
        Edge(5L, 7L, "pi")
        ))
    
    relationships = ParallelCollectionRDD[1] at parallelize at <console>:34
    
    
    
    
    
    
    ParallelCollectionRDD[1] at parallelize at <console>:34
    
    val defaultUser = ("John Doe", "Missing")
    val graph = Graph(users, relationships, defaultUser)
    
    defaultUser = (John Doe,Missing)
    graph = org.apache.spark.graphx.impl.GraphImpl@7c40e7e8
    
    
    
    
    
    
    org.apache.spark.graphx.impl.GraphImpl@7c40e7e8
    
    graph.vertices.filter {case (id, (name, pos)) => pos == "postdoc"}.count
    
    1
    
    graph.vertices.filter {case (id, (name, pos)) => pos == "prof"}.count
    
    2
    
    graph.edges.filter(e => e.srcId < e.dstId).count
    
    3
    

    Graph 操作

    详见 https://spark.apache.org/docs/latest/graphx-programming-guide.html 的Graph类

    1. 图信息

    //边数
    graph.numEdges
    
    4
    
    //顶点数
    graph.numVertices
    
    4
    
    //计算入度
    graph.inDegrees.reduceByKey(_ + _).take(5)
    
    Array((3,1), (5,1), (7,2))
    
    //计算出度
    graph.outDegrees.reduceByKey(_ + _).take(5)
    
    Array((2,1), (3,1), (5,2))
    
    //计算度
    graph.degrees.reduceByKey(_ + _).collect()
    
    Array((2,1), (3,2), (5,3), (7,2))
    

    2.图视图

    //顶点
    graph.vertices.filter {case (id, (name, pos)) => pos == "postdoc"}.count
    
    1
    
    //边
    graph.edges.filter(e => e.srcId < e.dstId).count
    
    3
    
    //返回三元组视图
    graph.triplets.collect()
    
    Array(((3,(rxin,student)),(7,(jgonzal,postdoc)),collab), ((5,(franklin,prof)),(3,(rxin,student)),advisor), ((2,(istoica,prof)),(5,(franklin,prof)),colleague), ((5,(franklin,prof)),(7,(jgonzal,postdoc)),pi))
    

    3.图缓存

    • persist
    • cache
    • unpersistVertices

    4.分区

    • partitionBy

    5.顶点与边的转换

    • mapVertices
    • mapEdges
    • mapTriplets

    6.修改图结构

    • reverse
    • subgraph
    • mask
    • groupEdges

    7.用图连接RDD

    • joinVertices
    • outerJoinVertices

    8.汇集邻近的三元组信息

    • collectNeighborIds
    • collectNeighbors
    • aggregateMessages

    9.交互并行图计算

    • pregel

    10.基本图算法

    • pageRank
    • connectedComponents
    • triangleCount
    • stronglyConnectedComponents
    
    
  • 相关阅读:
    关于这个 blog
    P6499 [COCI2016-2017#2] Burza 题解
    CF1172F Nauuo and Bug 题解
    CF1479D Odd Mineral Resource 题解
    CF1442E Black, White and Grey Tree 题解
    CF1442D Sum 题解
    CF1025D Recovering BST 题解
    CF1056E Check Transcription 题解
    CF1025F Disjoint Triangles 题解
    红包算法的PHP实现
  • 原文地址:https://www.cnblogs.com/DataNerd/p/10148152.html
Copyright © 2011-2022 走看看