import org.apache.spark._
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD
val users: RDD[(VertexId, (String, String))] =
sc.parallelize(Array(
(3L, ("rxin", "student")),
(7L, ("jgonzal", "postdoc")),
(5L, ("franklin", "prof")),
(2L, ("istoica", "prof")),
(4L, ("peter", "student"))
))
users = ParallelCollectionRDD[62] at parallelize at <console>:49
ParallelCollectionRDD[62] at parallelize at <console>:49
val relationships: RDD[Edge[String]] =
sc.parallelize(Array(
Edge(3L, 7L, "collab"),
Edge(5L, 3L, "advisor"),
Edge(2L, 5L, "colleague"),
Edge(4L, 0L, "student"),
Edge(5L, 0L, "colleague")
))
relationships = ParallelCollectionRDD[63] at parallelize at <console>:49
ParallelCollectionRDD[63] at parallelize at <console>:49
val defaultUser = ("John Doe", "Missing")
defaultUser = (John Doe,Missing)
(John Doe,Missing)
val graph = Graph(users, relationships, defaultUser)
graph = org.apache.spark.graphx.impl.GraphImpl@718b0840
org.apache.spark.graphx.impl.GraphImpl@718b0840
graph.vertices.collect.foreach(println(_))
(0,(John Doe,Missing))
(2,(istoica,prof))
(3,(rxin,student))
(4,(peter,student))
(5,(franklin,prof))
(7,(jgonzal,postdoc))
graph.triplets
.map(triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)
.collect.foreach(println(_))
rxin is the collab of jgonzal
franklin is the advisor of rxin
istoica is the colleague of franklin
peter is the student of John Doe
franklin is the colleague of John Doe
去除Missing属性的节点
val removeMissingGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "Missing")
removeMissingGraph = org.apache.spark.graphx.impl.GraphImpl@21974f8a
org.apache.spark.graphx.impl.GraphImpl@21974f8a
removeMissingGraph.vertices.collect.foreach(println(_))
(2,(istoica,prof))
(3,(rxin,student))
(4,(peter,student))
(5,(franklin,prof))
(7,(jgonzal,postdoc))
removeMissingGraph.triplets
.map(triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)
.collect.foreach(println(_))
rxin is the collab of jgonzal
franklin is the advisor of rxin
istoica is the colleague of franklin
去除Pro属性的节点
val removeProfGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "prof")
removeProfGraph.vertices.collect.foreach(println(_))
(0,(John Doe,Missing))
(3,(rxin,student))
(4,(peter,student))
(7,(jgonzal,postdoc))
removeProfGraph = org.apache.spark.graphx.impl.GraphImpl@4ed527a2
org.apache.spark.graphx.impl.GraphImpl@4ed527a2
removeProfGraph.triplets
.map(triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)
.collect.foreach(println(_))
rxin is the collab of jgonzal
peter is the student of John Doe