zoukankan      html  css  js  c++  java
  • subgraph示例

    import org.apache.spark._
    import org.apache.spark.graphx._
    
    import org.apache.spark.rdd.RDD
    
    val users: RDD[(VertexId, (String, String))] =
        sc.parallelize(Array(
            (3L, ("rxin", "student")),
            (7L, ("jgonzal", "postdoc")),
            (5L, ("franklin", "prof")),
            (2L, ("istoica", "prof")),
            (4L, ("peter", "student"))
        ))
    
    users = ParallelCollectionRDD[62] at parallelize at <console>:49
    
    
    
    
    
    
    ParallelCollectionRDD[62] at parallelize at <console>:49
    
    val relationships: RDD[Edge[String]] =
        sc.parallelize(Array(
            Edge(3L, 7L, "collab"),
            Edge(5L, 3L, "advisor"),
            Edge(2L, 5L, "colleague"),
            Edge(4L, 0L, "student"),
            Edge(5L, 0L, "colleague")
        ))
    
    relationships = ParallelCollectionRDD[63] at parallelize at <console>:49
    
    
    
    
    
    
    ParallelCollectionRDD[63] at parallelize at <console>:49
    
    val defaultUser = ("John Doe", "Missing")
    
    defaultUser = (John Doe,Missing)
    
    
    
    
    
    
    (John Doe,Missing)
    
    val graph = Graph(users, relationships, defaultUser)
    
    graph = org.apache.spark.graphx.impl.GraphImpl@718b0840
    
    
    
    
    
    
    org.apache.spark.graphx.impl.GraphImpl@718b0840
    
    graph.vertices.collect.foreach(println(_))
    
    (0,(John Doe,Missing))
    (2,(istoica,prof))
    (3,(rxin,student))
    (4,(peter,student))
    (5,(franklin,prof))
    (7,(jgonzal,postdoc))
    
    graph.triplets
        .map(triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)
        .collect.foreach(println(_))
    
    rxin is the collab of jgonzal
    franklin is the advisor of rxin
    istoica is the colleague of franklin
    peter is the student of John Doe
    franklin is the colleague of John Doe
    

    去除Missing属性的节点

    val removeMissingGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "Missing")
    
    removeMissingGraph = org.apache.spark.graphx.impl.GraphImpl@21974f8a
    
    
    
    
    
    
    org.apache.spark.graphx.impl.GraphImpl@21974f8a
    
    removeMissingGraph.vertices.collect.foreach(println(_))
    
    (2,(istoica,prof))
    (3,(rxin,student))
    (4,(peter,student))
    (5,(franklin,prof))
    (7,(jgonzal,postdoc))
    
    removeMissingGraph.triplets
        .map(triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)
        .collect.foreach(println(_))
    
    rxin is the collab of jgonzal
    franklin is the advisor of rxin
    istoica is the colleague of franklin
    

    去除Pro属性的节点

    val removeProfGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "prof")
    removeProfGraph.vertices.collect.foreach(println(_))
    
    (0,(John Doe,Missing))
    (3,(rxin,student))
    (4,(peter,student))
    (7,(jgonzal,postdoc))
    
    
    
    removeProfGraph = org.apache.spark.graphx.impl.GraphImpl@4ed527a2
    
    
    
    
    
    
    org.apache.spark.graphx.impl.GraphImpl@4ed527a2
    
    removeProfGraph.triplets
        .map(triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)
        .collect.foreach(println(_))
    
    rxin is the collab of jgonzal
    peter is the student of John Doe
    
    
    
  • 相关阅读:
    .Net 下的Wondows窗体常用项目
    常用工具下载网址
    灵活操作MS SQL 2005 中的数据库 分离、附加、离线、在线、日志截断
    C#中使用委托
    Internet Information Services 诊断工具
    谁能帮我看一下这个问题 HRESULT = '80131509'
    在C#中使用指针
    MSSQL2005的新功能创建数据库快照
    Exchange 规划安装配置一条龙课程
    将进酒
  • 原文地址:https://www.cnblogs.com/DataNerd/p/10148423.html
Copyright © 2011-2022 走看看