zoukankan      html  css  js  c++  java
  • subgraph示例

    import org.apache.spark._
    import org.apache.spark.graphx._
    
    import org.apache.spark.rdd.RDD
    
    val users: RDD[(VertexId, (String, String))] =
        sc.parallelize(Array(
            (3L, ("rxin", "student")),
            (7L, ("jgonzal", "postdoc")),
            (5L, ("franklin", "prof")),
            (2L, ("istoica", "prof")),
            (4L, ("peter", "student"))
        ))
    
    users = ParallelCollectionRDD[62] at parallelize at <console>:49
    
    
    
    
    
    
    ParallelCollectionRDD[62] at parallelize at <console>:49
    
    val relationships: RDD[Edge[String]] =
        sc.parallelize(Array(
            Edge(3L, 7L, "collab"),
            Edge(5L, 3L, "advisor"),
            Edge(2L, 5L, "colleague"),
            Edge(4L, 0L, "student"),
            Edge(5L, 0L, "colleague")
        ))
    
    relationships = ParallelCollectionRDD[63] at parallelize at <console>:49
    
    
    
    
    
    
    ParallelCollectionRDD[63] at parallelize at <console>:49
    
    val defaultUser = ("John Doe", "Missing")
    
    defaultUser = (John Doe,Missing)
    
    
    
    
    
    
    (John Doe,Missing)
    
    val graph = Graph(users, relationships, defaultUser)
    
    graph = org.apache.spark.graphx.impl.GraphImpl@718b0840
    
    
    
    
    
    
    org.apache.spark.graphx.impl.GraphImpl@718b0840
    
    graph.vertices.collect.foreach(println(_))
    
    (0,(John Doe,Missing))
    (2,(istoica,prof))
    (3,(rxin,student))
    (4,(peter,student))
    (5,(franklin,prof))
    (7,(jgonzal,postdoc))
    
    graph.triplets
        .map(triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)
        .collect.foreach(println(_))
    
    rxin is the collab of jgonzal
    franklin is the advisor of rxin
    istoica is the colleague of franklin
    peter is the student of John Doe
    franklin is the colleague of John Doe
    

    去除Missing属性的节点

    val removeMissingGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "Missing")
    
    removeMissingGraph = org.apache.spark.graphx.impl.GraphImpl@21974f8a
    
    
    
    
    
    
    org.apache.spark.graphx.impl.GraphImpl@21974f8a
    
    removeMissingGraph.vertices.collect.foreach(println(_))
    
    (2,(istoica,prof))
    (3,(rxin,student))
    (4,(peter,student))
    (5,(franklin,prof))
    (7,(jgonzal,postdoc))
    
    removeMissingGraph.triplets
        .map(triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)
        .collect.foreach(println(_))
    
    rxin is the collab of jgonzal
    franklin is the advisor of rxin
    istoica is the colleague of franklin
    

    去除Pro属性的节点

    val removeProfGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "prof")
    removeProfGraph.vertices.collect.foreach(println(_))
    
    (0,(John Doe,Missing))
    (3,(rxin,student))
    (4,(peter,student))
    (7,(jgonzal,postdoc))
    
    
    
    removeProfGraph = org.apache.spark.graphx.impl.GraphImpl@4ed527a2
    
    
    
    
    
    
    org.apache.spark.graphx.impl.GraphImpl@4ed527a2
    
    removeProfGraph.triplets
        .map(triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)
        .collect.foreach(println(_))
    
    rxin is the collab of jgonzal
    peter is the student of John Doe
    
    
    
  • 相关阅读:
    项目管理5大过程9大知识域44个定义
    linux centos6.5 修改ip地址
    .Net 6 已知问题集
    第二次阅读作业——程志
    采访大四学长整理笔记
    c#
    团队作业三两感想 by 程志
    搞定3G上网
    高焕堂Android應用框架原理與程式設計代码补遗(一)
    要素类属性内容全角换半角
  • 原文地址:https://www.cnblogs.com/DataNerd/p/10148423.html
Copyright © 2011-2022 走看看