zoukankan      html  css  js  c++  java
  • subgraph示例

    import org.apache.spark._
    import org.apache.spark.graphx._
    
    import org.apache.spark.rdd.RDD
    
    val users: RDD[(VertexId, (String, String))] =
        sc.parallelize(Array(
            (3L, ("rxin", "student")),
            (7L, ("jgonzal", "postdoc")),
            (5L, ("franklin", "prof")),
            (2L, ("istoica", "prof")),
            (4L, ("peter", "student"))
        ))
    
    users = ParallelCollectionRDD[62] at parallelize at <console>:49
    
    
    
    
    
    
    ParallelCollectionRDD[62] at parallelize at <console>:49
    
    val relationships: RDD[Edge[String]] =
        sc.parallelize(Array(
            Edge(3L, 7L, "collab"),
            Edge(5L, 3L, "advisor"),
            Edge(2L, 5L, "colleague"),
            Edge(4L, 0L, "student"),
            Edge(5L, 0L, "colleague")
        ))
    
    relationships = ParallelCollectionRDD[63] at parallelize at <console>:49
    
    
    
    
    
    
    ParallelCollectionRDD[63] at parallelize at <console>:49
    
    val defaultUser = ("John Doe", "Missing")
    
    defaultUser = (John Doe,Missing)
    
    
    
    
    
    
    (John Doe,Missing)
    
    val graph = Graph(users, relationships, defaultUser)
    
    graph = org.apache.spark.graphx.impl.GraphImpl@718b0840
    
    
    
    
    
    
    org.apache.spark.graphx.impl.GraphImpl@718b0840
    
    graph.vertices.collect.foreach(println(_))
    
    (0,(John Doe,Missing))
    (2,(istoica,prof))
    (3,(rxin,student))
    (4,(peter,student))
    (5,(franklin,prof))
    (7,(jgonzal,postdoc))
    
    graph.triplets
        .map(triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)
        .collect.foreach(println(_))
    
    rxin is the collab of jgonzal
    franklin is the advisor of rxin
    istoica is the colleague of franklin
    peter is the student of John Doe
    franklin is the colleague of John Doe
    

    去除Missing属性的节点

    val removeMissingGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "Missing")
    
    removeMissingGraph = org.apache.spark.graphx.impl.GraphImpl@21974f8a
    
    
    
    
    
    
    org.apache.spark.graphx.impl.GraphImpl@21974f8a
    
    removeMissingGraph.vertices.collect.foreach(println(_))
    
    (2,(istoica,prof))
    (3,(rxin,student))
    (4,(peter,student))
    (5,(franklin,prof))
    (7,(jgonzal,postdoc))
    
    removeMissingGraph.triplets
        .map(triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)
        .collect.foreach(println(_))
    
    rxin is the collab of jgonzal
    franklin is the advisor of rxin
    istoica is the colleague of franklin
    

    去除Pro属性的节点

    val removeProfGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "prof")
    removeProfGraph.vertices.collect.foreach(println(_))
    
    (0,(John Doe,Missing))
    (3,(rxin,student))
    (4,(peter,student))
    (7,(jgonzal,postdoc))
    
    
    
    removeProfGraph = org.apache.spark.graphx.impl.GraphImpl@4ed527a2
    
    
    
    
    
    
    org.apache.spark.graphx.impl.GraphImpl@4ed527a2
    
    removeProfGraph.triplets
        .map(triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)
        .collect.foreach(println(_))
    
    rxin is the collab of jgonzal
    peter is the student of John Doe
    
    
    
  • 相关阅读:
    JavaScript教程——JavaScript 的基本语法(标识符)
    ECMAScript 6 入门——ES6 声明变量的六种方法
    JavaScript教程——数据类型概述
    对称机密算法与非对称机密算法
    C语言提高 (2) 第二天 用指针对字符串进行操作
    C语言提高 (1) 第一天 数据类型本质与内存四区
    关于内存地址和内存空间的理解(转)
    为什么对数组处理的函数需要传递数组长度
    Unicode编码,解释UCS、UTF、BMP、BOM等名词
    上海
  • 原文地址:https://www.cnblogs.com/DataNerd/p/10148423.html
Copyright © 2011-2022 走看看