zoukankan      html  css  js  c++  java
  • Python操作hdfs

    Python直接操作hdfs,包括追加数据文件到hdfs文件

    #!coding:utf-8
    import sys 
    from hdfs.client import Client
    
    #设置utf-8模式
    reload(sys)
    sys.setdefaultencoding( "utf-8" )
    
    #关于python操作hdfs的API可以查看官网:
    #https://hdfscli.readthedocs.io/en/latest/api.html
    
        
    #读取hdfs文件内容,将每行存入数组返回
    def read_hdfs_file(client,filename):
        #with client.read('samples.csv', encoding='utf-8', delimiter='
    ') as reader:
        #  for line in reader:
        #pass
        lines = []
        with client.read(filename, encoding='utf-8', delimiter='
    ') as reader:
            for line in reader:
                #pass
                #print line.strip()
                lines.append(line.strip())
        return lines
        
    #创建目录
    def mkdirs(client,hdfs_path) :
        client.makedirs(hdfs_path)
     
    #删除hdfs文件
    def delete_hdfs_file(client,hdfs_path):
        client.delete(hdfs_path)
        
    #上传文件到hdfs
    def put_to_hdfs(client,local_path,hdfs_path):
        client.upload(hdfs_path, local_path,cleanup=True)
        
    #从hdfs获取文件到本地
    def get_from_hdfs(client,hdfs_path,local_path):
        download(hdfs_path, local_path, overwrite=False)
    
    #追加数据到hdfs文件    
    def append_to_hdfs(client,hdfs_path,data):
        client.write(hdfs_path, data,overwrite=False,append=True)
        
    #覆盖数据写到hdfs文件
    def write_to_hdfs(client,hdfs_path,data):
        client.write(hdfs_path, data,overwrite=True,append=False)
      
    #移动或者修改文件  
    def move_or_rename(client,hdfs_src_path, hdfs_dst_path):
        client.rename(hdfs_src_path, hdfs_dst_path)
       
    #返回目录下的文件
    def list(client,hdfs_path):
        return client.list(hdfs_path, status=False)
     
    #client = Client(url, root=None, proxy=None, timeout=None, session=None) 
    #client = Client("http://hadoop:50070")
       
    #move_or_rename(client,'/input/2.csv', '/input/emp.csv')
    #read_hdfs_file(client,'/input/emp.csv')
    #put_to_hdfs(client,'/home/shutong/hdfs/1.csv','/input/')
    #append_to_hdfs(client,'/input/emp.csv','我爱你'+'
    ')
    #write_to_hdfs(client,'/input/emp.csv','我爱你'+'
    ')
    #read_hdfs_file(client,'/input/emp.csv')
    #move_or_rename(client,'/input/emp.csv', '/input/2.csv')
    #mkdirs(client,'/input/python')
    #print list(client,'/input/')
    #chown(client,'/input/1.csv', 'root')
  • 相关阅读:
    【前端开发】vue子项目打包一个组件供另一个项目使用教程
    【前端开发】基于flow-editor-vue库改造的流程设计器,审批流程引擎前端教程
    【前端开发】基于logicFlow可视化流程库改造的流程引擎教程
    知识蒸馏:Distillation
    浮点神经网络vs二值神经网络
    Representation Learning 表征学习
    mybatis plus 追加where 函数
    20211012 MapStruct
    20211012 Dubbo 的 SPI 和 Adaptive
    20210916 小马哥讲 Spring AOP
  • 原文地址:https://www.cnblogs.com/Jims2016/p/8047914.html
Copyright © 2011-2022 走看看