zoukankan      html  css  js  c++  java
  • subgraph示例

    import org.apache.spark._
    import org.apache.spark.graphx._
    
    import org.apache.spark.rdd.RDD
    
    val users: RDD[(VertexId, (String, String))] =
        sc.parallelize(Array(
            (3L, ("rxin", "student")),
            (7L, ("jgonzal", "postdoc")),
            (5L, ("franklin", "prof")),
            (2L, ("istoica", "prof")),
            (4L, ("peter", "student"))
        ))
    
    users = ParallelCollectionRDD[62] at parallelize at <console>:49
    
    
    
    
    
    
    ParallelCollectionRDD[62] at parallelize at <console>:49
    
    val relationships: RDD[Edge[String]] =
        sc.parallelize(Array(
            Edge(3L, 7L, "collab"),
            Edge(5L, 3L, "advisor"),
            Edge(2L, 5L, "colleague"),
            Edge(4L, 0L, "student"),
            Edge(5L, 0L, "colleague")
        ))
    
    relationships = ParallelCollectionRDD[63] at parallelize at <console>:49
    
    
    
    
    
    
    ParallelCollectionRDD[63] at parallelize at <console>:49
    
    val defaultUser = ("John Doe", "Missing")
    
    defaultUser = (John Doe,Missing)
    
    
    
    
    
    
    (John Doe,Missing)
    
    val graph = Graph(users, relationships, defaultUser)
    
    graph = org.apache.spark.graphx.impl.GraphImpl@718b0840
    
    
    
    
    
    
    org.apache.spark.graphx.impl.GraphImpl@718b0840
    
    graph.vertices.collect.foreach(println(_))
    
    (0,(John Doe,Missing))
    (2,(istoica,prof))
    (3,(rxin,student))
    (4,(peter,student))
    (5,(franklin,prof))
    (7,(jgonzal,postdoc))
    
    graph.triplets
        .map(triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)
        .collect.foreach(println(_))
    
    rxin is the collab of jgonzal
    franklin is the advisor of rxin
    istoica is the colleague of franklin
    peter is the student of John Doe
    franklin is the colleague of John Doe
    

    去除Missing属性的节点

    val removeMissingGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "Missing")
    
    removeMissingGraph = org.apache.spark.graphx.impl.GraphImpl@21974f8a
    
    
    
    
    
    
    org.apache.spark.graphx.impl.GraphImpl@21974f8a
    
    removeMissingGraph.vertices.collect.foreach(println(_))
    
    (2,(istoica,prof))
    (3,(rxin,student))
    (4,(peter,student))
    (5,(franklin,prof))
    (7,(jgonzal,postdoc))
    
    removeMissingGraph.triplets
        .map(triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)
        .collect.foreach(println(_))
    
    rxin is the collab of jgonzal
    franklin is the advisor of rxin
    istoica is the colleague of franklin
    

    去除Pro属性的节点

    val removeProfGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "prof")
    removeProfGraph.vertices.collect.foreach(println(_))
    
    (0,(John Doe,Missing))
    (3,(rxin,student))
    (4,(peter,student))
    (7,(jgonzal,postdoc))
    
    
    
    removeProfGraph = org.apache.spark.graphx.impl.GraphImpl@4ed527a2
    
    
    
    
    
    
    org.apache.spark.graphx.impl.GraphImpl@4ed527a2
    
    removeProfGraph.triplets
        .map(triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)
        .collect.foreach(println(_))
    
    rxin is the collab of jgonzal
    peter is the student of John Doe
    
    
    
  • 相关阅读:
    iOS14
    iOS 音量键翻页实现
    pod 相关写法
    js 递归树结构数据查找指定元素的所有父级
    前端实现访问一个图片URL直接下载该图片
    HTML5 drag api 的使用
    vue 组件的 patch
    centos7安装nginx
    nginx常用配置说明
    遍历删除
  • 原文地址:https://www.cnblogs.com/DataNerd/p/10148423.html
Copyright © 2011-2022 走看看