zoukankan      html  css  js  c++  java
  • pyspark 注册临时表(registerTempTable)

    from pyspark import SparkConf, SparkContext
    from pyspark.sql import SQLContext, Row

    conf = SparkConf().setAppName("spark_sql_table_join")

    sc = SparkContext(conf=conf)

    sqlCtx = SQLContext(sc)

    line1 = sc.parallelize(["name1 a", "name3 c", "name4 d"])

    line2 = sc.parallelize(["name1 1", "name2 2", "name3 3"])

    word1 = line1.map(lambda line: line.split(" "))

    word2 = line2.map(lambda line: line.split(" "))

    table1 = word1.map(lambda words: Row(name=words[0], title=words[1]))

    table2 = word2.map(lambda words: Row(name=words[0], fraction=words[1]))

    tableSchema1 = sqlCtx.inferSchema(table1)

    tableSchema2 = sqlCtx.inferSchema(table2)

    tableSchema1.registerTempTable("table1")

    tableSchema2.registerTempTable("table2")


    def printRows(rows):
    if rows:
    for row in rows:
    print row

    # inner join
    rows = sqlCtx.sql(
    "select table1.name, table1.title, table2.fraction from table1 join table2 on table1.name = table2.name").collect()

    printRows(rows)

    print "============================================="

    # left outer join
    rows = sqlCtx.sql(
    "select table1.name, table1.title, table2.fraction from table1 left outer join table2 on table1.name = table2.name").collect()

    printRows(rows)

    # right outer join
    rows = sqlCtx.sql(
    "select table1.name, table1.title, table2.fraction from table1 right outer join table2 on table1.name = table2.name").collect()

    print "============================================="

    printRows(rows)

    # full outer join
    rows = sqlCtx.sql(
    "select table1.name, table1.title, table2.fraction from table1 full outer join table2 on table1.name = table2.name").collect()

    print "============================================="

    printRows(rows)

    """
    Row(name=u'name1', title=u'a', fraction=u'1')
    Row(name=u'name3', title=u'c', fraction=u'3')
    =============================================
    Row(name=u'name1', title=u'a', fraction=u'1')
    Row(name=u'name3', title=u'c', fraction=u'3')
    Row(name=u'name4', title=u'd', fraction=None)
    =============================================
    Row(name=u'name1', title=u'a', fraction=u'1')
    Row(name=None, title=None, fraction=u'2')
    Row(name=u'name3', title=u'c', fraction=u'3')
    =============================================
    Row(name=u'name1', title=u'a', fraction=u'1')
    Row(name=None, title=None, fraction=u'2')
    Row(name=u'name3', title=u'c', fraction=u'3')
    Row(name=u'name4', title=u'd', fraction=None)
    """

    sc.stop()

  • 相关阅读:
    模板方法模式
    备忘录模式
    观察者模式
    中介者模式
    迭代器模式
    Char型和string型字符串比较整理
    命令模式
    责任链模式
    代理模式
    dokcer 杂谈
  • 原文地址:https://www.cnblogs.com/zyzloner/p/6430744.html
Copyright © 2011-2022 走看看