zoukankan      html  css  js  c++  java
  • neo4j-(12)-spark操作

    创建对象

    <dependencies>
      <!-- list of dependencies -->
      <dependency>
        <groupId>org.neo4j</groupId>
        <artifactId>neo4j-connector-apache-spark_2.11</artifactId>
        <version>4.0.1_for_spark_2.4</version>
      </dependency>
    </dependencies>
    <repositories>
      <!-- list of other repositories -->
      <repository>
        <id>SparkPackagesRepo</id>
        <url>http://dl.bintray.com/spark-packages/maven</url>
      </repository>
    </repositories>
    

    使用sparkSession配置

    val spark=SparkSession.builder().appName("play")
    .master("local[*]")
    .config("spark.neo4j.bolt.url", "bolt://localhost:7687")
    .config("spark.neo4j.bolt.user", "neo4j")
    .config("spark.neo4j.bolt.password", "hortmt")
    .getOrCreate()
     import spark.implicits._
     val neo=Neo4j(spark.sparkContext)
    
    

    使用 sparkConf配置

    val conf = new SparkConf().setAppName("neoej")
                    .setMaster("local[*]")
                    .set("spark.neo4j.bolt.url", "bolt://localhost:7687")
                    .set("spark.neo4j.bolt.user", "neo4j")
                    .set("spark.neo4j.bolt.password", "hortmt")
            val sc =new SparkContext(conf)
    
            val neo=Neo4j(sc)
    

    通过 Neo4jConfig 来做配置

    val sparkSession = SparkSession.builder()
                  .master("local[*]")
                    .appName("LoadDataToNeo4j")
                        .getOrCreate();
    
      val sc = sparkSession.sparkContext
    
      val config = Neo4jConfig("localhost:","neo4j",Option("root"))
      Neo4j(sc).cypher("CREATE (c:Client {id:1230}) return c").loadRdd
      sparkSession.close()
    
    

    返回类型

    选择要返回的数据类型

    • loadRowRdd, loadNodeRdds, loadRelRdd,loadRdd[T]
    • loadDataFrame,loadDataFrame(schema)
    • loadGraph[VD,ED]
    • loadGraphFrame[VD,ED]

    使用

    loadRowRdd

    import org.neo4j.spark._
    
    val neo = Neo4j(sc)
    
    val rdd = neo.cypher("MATCH (n:Person) RETURN id(n) as id ").loadRowRdd
    rdd.count
    
    // inferred schema
    rdd.first.schema.fieldNames
    //   => ["id"]
    rdd.first.schema("id")
    //   => StructField(id,LongType,true)
    
    neo.cypher("MATCH (n:Person) RETURN id(n)").loadRdd[Long].mean
    //   => res30: Double = 236696.5
    
    neo.cypher("MATCH (n:Person) WHERE n.id <= {maxId} RETURN n.id").param("maxId", 10).loadRowRdd.count
    //   => res34: Long = 10
    

    loadGraph

    import org.neo4j.spark._
    
    val neo = Neo4j(sc)
    
    import org.graphframes._
    
    val graphFrame = neo.pattern(("Person","id"),("KNOWS",null), ("Person","id")).partitions(3).rows(1000).loadGraphFrame
    
    graphFrame.vertices.count
    //     => 100
    graphFrame.edges.count
    //     => 1000
    
    val pageRankFrame = graphFrame.pageRank.maxIter(5).run()
    val ranked = pageRankFrame.vertices
    ranked.printSchema()
    
    val top3 = ranked.orderBy(ranked.col("pagerank").desc).take(3)
    //     => top3: Array[org.apache.spark.sql.Row]
    //     => Array([236716,70,0.62285...], [236653,7,0.62285...], [236658,12,0.62285])
    

    文档: https://neo4j.com/developer/apache-spark/

  • 相关阅读:
    在jsp页面如果运行时路径错误解决方法
    Maven实现ssm框架整合
    JS进阶(二)this指南——绑定了谁?
    防御性编程方法收集
    react将多个公共组件归成一类,方便调用
    初始化构建React+Ts项目时出现:Module build failed (from ./node_modules/css-loader/dist/cjs.js): CssSyntaxError
    Dva三种方式实现dispatch的Promise回调
    ES6多层解构
    ES6解构过程添加一个默认值和赋值一个新的值
    Antd-Pro2.0版本如何修改代理,让Mock变为真实服务器接口
  • 原文地址:https://www.cnblogs.com/weijiqian/p/14840810.html
Copyright © 2011-2022 走看看