目前有两个库可以操作HBASE:hbase-thrift 和 happybase
happybase使用起来比较简单方便,因此重点学习该库,hbase-thrift只做简要介绍。
(一)hbase-thrift
1、使用前先添加库和依赖库:
pip install thrift pip install hbase-thrift pip install google-cloud pip install google-cloud-vision pip install kazoo
2、连接数据库的配置信息:
#先在Linux上启动HBASE server #/opt/cloudera/parcels/CDH/lib/hbase/bin/hbase-daemon.sh --config /opt/cloudera/parcels/CDH/lib/hbase/conf foreground_start thrift --infoport 9096 -p 9091 #再运行该python脚本连接服务器 from thrift.transport import TSocket from hbase import Hbase from hbase.ttypes import * host = "xxx.xxx.xxx.xxx" port = 9091 framed = False socket = TSocket.TSocket(host, port) if framed: transport = TTransport.TFramedTransport(socket) else: transport = TTransport.TBufferedTransport(socket) protocol = TBinaryProtocol.TBinaryProtocol(transport) client = Hbase.Client(protocol)
3、操作数据库
print ("Thrift2 Demo") print ("This demo assumes you have a table called "example" with a column family called "family1"") #打开连接 transport.open() # 获取所有表名 tableNames = client.getTableNames() print('tableNames:', tableNames) #关闭连接 transport.close() ################################################# # #结果为: # Thrift2 Demo # This demo assumes you have a table called "example" with a column family called "family1" # tableNames: ['lrx_hbase_test', 'lrx_hbase_test2', 'lrx_hbase_test3', 'lrx_test']
(二)happybase
# pip install thrift # pip install happybase # 先在Linux上启动HBASE server # /opt/cloudera/parcels/CDH/lib/hbase/bin/hbase-daemon.sh --config /opt/cloudera/parcels/CDH/lib/hbase/conf foreground_start thrift --infoport 9096 -p 9091 & # 再运行python脚本连接服务器 import happybase from conf import setting # 创建连接,通过参数size来设置连接池中连接的个数 connection = happybase.Connection(**setting.HBASE) # 打开传输,无返回值 connection.open() # 创建表,无返回值 # connection.create_table('lrx_test', # { # 'data':dict() # }) # 获取一个表对象,返回一个happybase.table.Table对象(返回二进制表名) table0 = connection.table('lrx_test') print('表对象为:') print(table0) #<happybase.table.Table name=b'lrx_test'> # 获取表实例,返回一个happybase.table.Table对象(返回表名) table = happybase.Table('lrx_test',connection) print('表实例为:') print(table) #<happybase.table.Table name='lrx_test'> # 插入数据,无返回值 ----在row1行,data:1列插入值value1 for i in range(5): table.put('row%s' %i,{'data:%s'%i:'%s' %i} ) table.put('row5',{'data:5':'value1'}) # 获取单元格数据,返回一个list content = table.cells('row1','data:1') print (content) #[b'value1', b'value1'] # 获取计数器列的值,返回当前单元格的值 # content2 = table.counter_get('row2','data:2') # print(content2) #0 # 获取一个扫描器,返回一个generator scanner = table.scan() for k,v in scanner: print(k,v) ########################################### # #结果为: # #b'row0' {b'data:0': b'0'} # b'row1' {b'data:1': b'value1'} # b'row2' {b'data:2': b'2'} # b'row3' {b'data:3': b'3'} # b'row4' {b'data:4': b'4'} print(scanner) #<generator object Table.scan at 0x000001E17CCDAF10> # 获取一行数据,返回一个dict info = table.row('row2') info1={} for k,v in info.items(): info1[k.decode()]=v.decode() print(info1) #获取表名 table = connection.tables() print(table) # 关闭传输,无返回值 connection.close()