zoukankan      html  css  js  c++  java
  • subgraph示例

    import org.apache.spark._
    import org.apache.spark.graphx._
    
    import org.apache.spark.rdd.RDD
    
    val users: RDD[(VertexId, (String, String))] =
        sc.parallelize(Array(
            (3L, ("rxin", "student")),
            (7L, ("jgonzal", "postdoc")),
            (5L, ("franklin", "prof")),
            (2L, ("istoica", "prof")),
            (4L, ("peter", "student"))
        ))
    
    users = ParallelCollectionRDD[62] at parallelize at <console>:49
    
    
    
    
    
    
    ParallelCollectionRDD[62] at parallelize at <console>:49
    
    val relationships: RDD[Edge[String]] =
        sc.parallelize(Array(
            Edge(3L, 7L, "collab"),
            Edge(5L, 3L, "advisor"),
            Edge(2L, 5L, "colleague"),
            Edge(4L, 0L, "student"),
            Edge(5L, 0L, "colleague")
        ))
    
    relationships = ParallelCollectionRDD[63] at parallelize at <console>:49
    
    
    
    
    
    
    ParallelCollectionRDD[63] at parallelize at <console>:49
    
    val defaultUser = ("John Doe", "Missing")
    
    defaultUser = (John Doe,Missing)
    
    
    
    
    
    
    (John Doe,Missing)
    
    val graph = Graph(users, relationships, defaultUser)
    
    graph = org.apache.spark.graphx.impl.GraphImpl@718b0840
    
    
    
    
    
    
    org.apache.spark.graphx.impl.GraphImpl@718b0840
    
    graph.vertices.collect.foreach(println(_))
    
    (0,(John Doe,Missing))
    (2,(istoica,prof))
    (3,(rxin,student))
    (4,(peter,student))
    (5,(franklin,prof))
    (7,(jgonzal,postdoc))
    
    graph.triplets
        .map(triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)
        .collect.foreach(println(_))
    
    rxin is the collab of jgonzal
    franklin is the advisor of rxin
    istoica is the colleague of franklin
    peter is the student of John Doe
    franklin is the colleague of John Doe
    

    去除Missing属性的节点

    val removeMissingGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "Missing")
    
    removeMissingGraph = org.apache.spark.graphx.impl.GraphImpl@21974f8a
    
    
    
    
    
    
    org.apache.spark.graphx.impl.GraphImpl@21974f8a
    
    removeMissingGraph.vertices.collect.foreach(println(_))
    
    (2,(istoica,prof))
    (3,(rxin,student))
    (4,(peter,student))
    (5,(franklin,prof))
    (7,(jgonzal,postdoc))
    
    removeMissingGraph.triplets
        .map(triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)
        .collect.foreach(println(_))
    
    rxin is the collab of jgonzal
    franklin is the advisor of rxin
    istoica is the colleague of franklin
    

    去除Pro属性的节点

    val removeProfGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "prof")
    removeProfGraph.vertices.collect.foreach(println(_))
    
    (0,(John Doe,Missing))
    (3,(rxin,student))
    (4,(peter,student))
    (7,(jgonzal,postdoc))
    
    
    
    removeProfGraph = org.apache.spark.graphx.impl.GraphImpl@4ed527a2
    
    
    
    
    
    
    org.apache.spark.graphx.impl.GraphImpl@4ed527a2
    
    removeProfGraph.triplets
        .map(triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)
        .collect.foreach(println(_))
    
    rxin is the collab of jgonzal
    peter is the student of John Doe
    
    
    
  • 相关阅读:
    nfs共享目录及sersync实时同步
    rsync备份
    MySQL基础操作
    源码包安装MySQL
    二进制安装MySQL
    Centos6防火墙-iptables版
    linux系统mongdb基础(1)
    linux系统ElK基础filebeat收集日志(4)
    linux系统ElK基础(3)
    linux系统ElK基础(2)
  • 原文地址:https://www.cnblogs.com/DataNerd/p/10148423.html
Copyright © 2011-2022 走看看