zoukankan      html  css  js  c++  java
  • 学习Spark GraphX

    import org.apache.spark._
    import org.apache.spark.graphx._
    
    import org.apache.spark.rdd.RDD
    
    val userGraph: Graph[(String, String), String]
    
    Name: Compile Error
    Message: <console>:30: error: class $iw needs to be abstract, since value userGraph is not defined
    class $iw extends Serializable {
          ^
    
    StackTrace: 
    
    val users: RDD[(VertexId, (String, String))] = 
        sc.parallelize(Array((3L, ("rxin", "student")),
                            (7L, ("jgonzal", "postdoc")),
                            (5L, ("franklin", "prof")),
                            (2L, ("istoica", "prof"))))
    
    users = ParallelCollectionRDD[0] at parallelize at <console>:35
    
    
    
    
    
    
    ParallelCollectionRDD[0] at parallelize at <console>:35
    
    val relationships: RDD[Edge[String]] = sc.parallelize(Array(
        Edge(3L, 7L, "collab"),
        Edge(5L, 3L, "advisor"),
        Edge(2L, 5L, "colleague"),
        Edge(5L, 7L, "pi")
        ))
    
    relationships = ParallelCollectionRDD[1] at parallelize at <console>:34
    
    
    
    
    
    
    ParallelCollectionRDD[1] at parallelize at <console>:34
    
    val defaultUser = ("John Doe", "Missing")
    val graph = Graph(users, relationships, defaultUser)
    
    defaultUser = (John Doe,Missing)
    graph = org.apache.spark.graphx.impl.GraphImpl@7c40e7e8
    
    
    
    
    
    
    org.apache.spark.graphx.impl.GraphImpl@7c40e7e8
    
    graph.vertices.filter {case (id, (name, pos)) => pos == "postdoc"}.count
    
    1
    
    graph.vertices.filter {case (id, (name, pos)) => pos == "prof"}.count
    
    2
    
    graph.edges.filter(e => e.srcId < e.dstId).count
    
    3
    

    Graph 操作

    详见 https://spark.apache.org/docs/latest/graphx-programming-guide.html 的Graph类

    1. 图信息

    //边数
    graph.numEdges
    
    4
    
    //顶点数
    graph.numVertices
    
    4
    
    //计算入度
    graph.inDegrees.reduceByKey(_ + _).take(5)
    
    Array((3,1), (5,1), (7,2))
    
    //计算出度
    graph.outDegrees.reduceByKey(_ + _).take(5)
    
    Array((2,1), (3,1), (5,2))
    
    //计算度
    graph.degrees.reduceByKey(_ + _).collect()
    
    Array((2,1), (3,2), (5,3), (7,2))
    

    2.图视图

    //顶点
    graph.vertices.filter {case (id, (name, pos)) => pos == "postdoc"}.count
    
    1
    
    //边
    graph.edges.filter(e => e.srcId < e.dstId).count
    
    3
    
    //返回三元组视图
    graph.triplets.collect()
    
    Array(((3,(rxin,student)),(7,(jgonzal,postdoc)),collab), ((5,(franklin,prof)),(3,(rxin,student)),advisor), ((2,(istoica,prof)),(5,(franklin,prof)),colleague), ((5,(franklin,prof)),(7,(jgonzal,postdoc)),pi))
    

    3.图缓存

    • persist
    • cache
    • unpersistVertices

    4.分区

    • partitionBy

    5.顶点与边的转换

    • mapVertices
    • mapEdges
    • mapTriplets

    6.修改图结构

    • reverse
    • subgraph
    • mask
    • groupEdges

    7.用图连接RDD

    • joinVertices
    • outerJoinVertices

    8.汇集邻近的三元组信息

    • collectNeighborIds
    • collectNeighbors
    • aggregateMessages

    9.交互并行图计算

    • pregel

    10.基本图算法

    • pageRank
    • connectedComponents
    • triangleCount
    • stronglyConnectedComponents
    
    
  • 相关阅读:
    IMX6ULL开发板虚拟机eVMware的安装
    IMX6ULL开发板Ubuntu系统初体验之登录设置
    itop4412开发板QtE4.7UVC摄像头使用例程
    iTOP4412开发板android4.4代码下载和编译
    IMX6ULL开发板之Ubuntu常用命令(二)
    IMX6ULL开发板aptget软件下载工具
    4412开发板编译ARMqtopiafreesrc2.2.0常见错误的处理
    IMX6ULL开发板虚拟机安装Ubuntu系统
    伟大的程序员是怎样炼成的?
    Created field method on datasource in AX Form
  • 原文地址:https://www.cnblogs.com/DataNerd/p/10148152.html
Copyright © 2011-2022 走看看