zoukankan      html  css  js  c++  java
  • python Hbase Thrift pycharm 及引入包

    cp -r hbase/ /usr/lib/python2.7/site-packages/

    官方示例子

    http://code.google.com/p/hbase-thrift/source/browse/trunk/python/test/tables.py
    http://yannramin.com/2008/07/19/using-facebook-thrift-with-python-and-hbase/
    http://wiki.apache.org/hadoop/Hbase/ThriftApi

    将生成的hbase目录copy到python的包下 cp
    -r hbase /usr/lib/python2.4/site-packages/ 3。启动hbase和thrift服务 ./bin/start-hbase.sh ./bin/hbase-daemon.sh start thrift
    好像需要源码,我反正没找到src目录,忘记了  。。。。。。 忘记当初自己怎么装的了。
    # --*-- coding:utf-8 --*--
    
    import sys
    import time
    
    # 所有thirft编程都需要的
    from thrift import Thrift
    from thrift.transport import TSocket, TTransport
    from thrift.protocol import TBinaryProtocol
    # Hbase的 客户端代码
    from hbase import ttypes
    from hbase.Hbase import Client, ColumnDescriptor, Mutation
    
    
    # make socket 这里配置的是hbase zookeeper的地址,因为master只负责负载均衡,读写由zookeeper协调
    transport = TSocket.TSocket('localhost', 9090)
    
    # buffering is critical . raw sockets are very slow
    transport = TTransport.TBufferedTransport(transport)
    
    # wrap in a protocol
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    
    # create a client to use the protocol encoder
    client = Client(protocol)
    
    # connect
    transport.open()
    
    t = 'tab2'
    
    
    # 扫描所有表获取所有表名称
    print 'scanning tables ......'
    for table in client.getTableNames():
        print 'found:%s' % table
        if client.isTableEnabled(table):
            print ' disabling table: %s' % t
            # 置为无效
            client.disableTable(table)
            print 'deleting table: %s' % t
            # 删除表
            client.deleteTable(table)
    
    
    # 创建表
    columns = []
    col = ColumnDescriptor()
    col.name = 'entry:'
    col.maxVersions = 10
    columns.append(col)
    col = ColumnDescriptor()
    col.name = 'unused:'
    columns.append(col)
    
    try:
        print 'creating table : % s' % t
        client.createTable(t, columns)
    except Exception, ae:
        print 'Warn:' + ae.message
    
    
    # 插入数据
    invalid = 'foo-xfcxa1xa1xa1xa1xa1'
    valid = 'foo-xE7x94x9FxE3x83x93xE3x83xBCxE3x83xAB'
    
    # non-utf8 is fine for data
    mutations = [Mutation(column='entry:foo', value=invalid)]
    print str(mutations)
    client.mutateRow(t, 'foo', mutations)  # foo is row key
    
    # try empty strings
    # cell value empty
    mutations = [Mutation(column='entry:foo', value='')]
    # rowkey empty
    client.mutateRow(t, '', mutations)
    
    #this row name is valid utf8
    mutations = [Mutation(column='entry:foo', value=valid)]
    client.mutateRow(t, valid, mutations)
    
    
    # run a scanner on the rows we just created
    # 全表扫描
    print 'starting scanner...'
    scanner = client.scannerOpen(t, '', ['entry:'])
    
    r = client.scannerGet(scanner)
    while r:
        #printRow(r[0])
        r = client.scannerGet(scanner)
    print 'scanner finished '
    
    # 范围扫描
    columnNames = []
    for (col, desc) in client.getColumnDescriptors(t).items():
        print 'column with name:', desc.name
        print desc
        columnNames.append(desc.name + ':')
    
    print 'stating scanner...'
    scanner = client.scannerOpenWithStop(t, '00020', '00040', columnNames)
    
    r = client.scannerGet(scanner)
    while r:
        # printRow(r[0])
        r = client.scannerGet(scanner)
    
    client.scannerClose(scanner)
    print 'scanner finished'
    
    # 关闭socket
    transport.close()





    
    
    





    现在我们就可以用python来和hbase通信了 #
    -*-coding:utf-8 -*- #!/usr/bin/python from thrift import Thrift from thrift.transport import TSocket from thrift.transport import TTransport from thrift.protocol import TBinaryProtocol from hbase import Hbase from hbase.ttypes import ColumnDescriptor,Mutation,BatchMutation class HbaseWriter: """ IP地址 端口 表名 """ def __init__(self,address,port,table='user'): self.tableName = table #建立与hbase的连接 self.transport=TTransport.TBufferedTransport(TSocket.TSocket(address,port)) self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.client=Hbase.Client(self.protocol) self.transport.open() tables = self.client.getTableNames() if self.tableName not in tables: print "not in tables" self.__createTable() self.write("hell,babay!!!") self.read() #关闭 def __del__(self): self.transport.close() #建表 def __createTable(self): col1 = ColumnDescriptor(name="person:",maxVersions=1) col2 = ColumnDescriptor(name="contents:",maxVersions=1) col3 = ColumnDescriptor(name="info:",maxVersions=1) self.client.createTable(self.tableName,[col1,col2,col3]) def write(self,content): row="abc" mutations=[Mutation(column="person:",value=content),Mutation(column="info:",value=content)] self.client.mutateRow(self.tableName,row,mutations) def read(self): scannerId = self.client.scannerOpen(self.tableName,"",["contents:",]) while True: try: result = self.client.scannerGet(scannerId) except: break contents = result.columns["contents:"].value #print contents self.client.scannerClose(scannerId) if __name__ == "__main__": client = HbaseWriter("192.168.239.135","9090","person") 我们看下使用thrift生成的代码中都提供了那些方法 提供的方法有: void enableTable(Bytes tableName) enable表 void disableTable(Bytes tableName) disable表 bool isTableEnabled(Bytes tableName) 查看表状态 void compact(Bytes tableNameOrRegionName) void majorCompact(Bytes tableNameOrRegionName) getTableNames() getColumnDescriptors(Text tableName) getTableRegions(Text tableName) void createTable(Text tableName, columnFamilies) void deleteTable(Text tableName) get(Text tableName, Text row, Text column) getVer(Text tableName, Text row, Text column, i32 numVersions) getVerTs(Text tableName, Text row, Text column, i64 timestamp, i32 numVersions) getRow(Text tableName, Text row) getRowWithColumns(Text tableName, Text row, columns) getRowTs(Text tableName, Text row, i64 timestamp) getRowWithColumnsTs(Text tableName, Text row, columns, i64 timestamp) getRows(Text tableName, rows) getRowsWithColumns(Text tableName, rows, columns) getRowsTs(Text tableName, rows, i64 timestamp) getRowsWithColumnsTs(Text tableName, rows, columns, i64 timestamp) void mutateRow(Text tableName, Text row, mutations) void mutateRowTs(Text tableName, Text row, mutations, i64 timestamp) void mutateRows(Text tableName, rowBatches) void mutateRowsTs(Text tableName, rowBatches, i64 timestamp) i64 atomicIncrement(Text tableName, Text row, Text column, i64 value) void deleteAll(Text tableName, Text row, Text column) void deleteAllTs(Text tableName, Text row, Text column, i64 timestamp) void deleteAllRow(Text tableName, Text row) void deleteAllRowTs(Text tableName, Text row, i64 timestamp) ScannerID scannerOpenWithScan(Text tableName, TScan scan) ScannerID scannerOpen(Text tableName, Text startRow, columns) ScannerID scannerOpenWithStop(Text tableName, Text startRow, Text stopRow, columns) ScannerID scannerOpenWithPrefix(Text tableName, Text startAndPrefix, columns) ScannerID scannerOpenTs(Text tableName, Text startRow, columns, i64 timestamp) ScannerID scannerOpenWithStopTs(Text tableName, Text startRow, Text stopRow, columns, i64 timestamp) scannerGet(ScannerID id) scannerGetList(ScannerID id, i32 nbRows) void scannerClose(ScannerID id)

    http://blog.csdn.net/poechant/article/details/6618264

    http://mmicky.blog.163.com/blog/static/150290154201311801519681/  按照这个配置python hbase开发环境

    编程前切换到/usr/program/python/hbase   然后运行python

    >>>from thrift.transport import TSocket
    >>>from thrift.protocol import TBinaryProtocol
    >>>from hbase import Hbase

    都不报错,但是到pycharm报错,原因时python默认搜索当前目录。

    到pycharm 需要把 /usr/program/python/hbase 添加到pycharm的path

    操作步骤:File>>setting>>project interpreter>>python interpreter>>>paths>>>+ 把/usr/program/python/hbase 文件夹添加进去就好了。

    __author__ = 'root'
    
    
    from thrift.transport import TSocket
    from thrift.protocol import TBinaryProtocol
    from hbase import Hbase
    
    transport = TSocket.TSocket("localhost", 9090)
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    client = Hbase.Client(protocol)
    transport.open()
    tabs = client.getTableNames()
    print tabs
  • 相关阅读:
    【ybtoj高效进阶 21173】简单区间(分治)
    【ybtoj高效进阶 21170】投篮训练(贪心)(线段树)(构造)
    【ybtoj高效进阶 21172】筹备计划(线段树)(树状数组)
    【ybtoj高效进阶 21168】打字机器(Trie树)(LCA)(值域线段树)
    【ybtoj高效进阶 21169】毁灭计划(分类讨论)(树形DP)
    【ybtoj高效进阶 21167】旅游计划(基环树)(DP)(单调队列)
    Day-15 面向对象02 成员
    Day-14 初识面向对象
    Day 13 内置函数(点击网址进入思维导图)、递归、二分法
    Day12 生成器函数-推导式
  • 原文地址:https://www.cnblogs.com/i80386/p/3556907.html
Copyright © 2011-2022 走看看