zoukankan      html  css  js  c++  java
  • 利用python将两张表链接



    from pyspark.sql import SparkSession
    from pyspark.sql.types import *
    import os


    def getUser(spark,path):
    struct1 = StructType([
    StructField("user", StringType(), True),
    StructField("vedios", StringType(), True),
    StructField("id", IntegerType(), True)
    ])
    df = spark.read.csv(path, schema=struct1, sep=" ", header=True)
    df.createOrReplaceTempView("users1")
    df = spark.sql("select * from users1")
    return df


    def getMovies(spark,path):
    df = spark.read.csv(path, header=True)
    df.createOrReplaceTempView("movies")
    df = spark.sql("select * from movies ")
    return df


    if __name__ == '__main__':
    os.environ['JAVA_HOME'] = 'C:Program FilesJavajdk1.8.0_211'
    print(os.path)
    spark = SparkSession
    .builder
    .appName("Python Spark SQL basic example")
    .config("spark.some.config.option", "some-value")
    .getOrCreate()
    path_user = "C:/Users/Administrator/Desktop/guiliVideo/user/2008/0903/user.txt"
    path_movies="C:/Users/Administrator/Desktop/vedios.txt"
    df1=getUser(spark,path_user)
    df2=getMovies(spark,path_movies)
    df3=df1.join(df2,df1.user==df2.uploader,how='inner')
    df3.createOrReplaceTempView('table1')
    df4=spark.sql('select * from table1 limit 10')
    df4.show(http://www.amjmh.com)
     
    ---------------------

  • 相关阅读:
    嵌入式系统之微处理器篇
    嵌入式系统之基础概念篇
    八大排序算法简述
    进程-PV操作
    实时操作系统与分时操作系统
    串口助手
    STM32通用定时器功能和用法
    三种主流芯片架构简单比较
    python 我的第一个自动化脚本
    jquery部分实用功能
  • 原文地址:https://www.cnblogs.com/ly570/p/11357427.html
Copyright © 2011-2022 走看看