pyspark 实现的wordcount。
from pyspark.sql import SparkSession, DataFrame spark = SparkSession .builder .master("local[*]") .appName("PythonWordCount") .config("spark.driver.host", "localhost") .getOrCreate() res_rdd = spark.read.text('E:/test.txt').rdd .map(lambda r: r[0]) .flatMap(lambda x: x.split()) .map(lambda x: (x, 1)) .reduceByKey(lambda x, y: x + y) .collect()