场景:HBase存储在Azure上,现在通过访问Azure Storage的接口,获取HBase中各个表的数据量。
注意:
1、Azure存储,默认的副本数为2,即共存3份,但只收1份的费用,取到的size也是1份的大小。如果是自建HDFS,则不同。
2、此处访问的是Azure Storage的接口,还可以访问HBase的接口来获取数据量(另行验证)。
c#代码:
using Microsoft.WindowsAzure.Storage;
using Microsoft.WindowsAzure.Storage.Auth;
using Microsoft.WindowsAzure.Storage.Blob;
using System;
using System.Collections.Generic;
using System.Data;
using System.Linq;
public class HBaseResourceFetcher { private CloudBlobContainer blobContainer; public HBaseResourceFetcher() { StorageCredentials storageCred = new StorageCredentials( AppConfigGetter.Get(ConfigConstants.KEY_STORAGEACCOUNTNAME), AppConfigGetter.Get(ConfigConstants.KEY_STORAGEACCOUNTKEY)); CloudStorageAccount storageAccount = new CloudStorageAccount(storageCred, AppConfigGetter.Get(ConfigConstants.KEY_ENDPOINTSUFFIX), true); var blobClient = storageAccount.CreateCloudBlobClient(); this.blobContainer = blobClient.GetContainerReference( AppConfigGetter.Get(ConfigConstants.KEY_STORAGECONTAINERNAME)); } public Dictionary<string, ResourceEntity> GetHBaseTableSizeInfo() { Dictionary<string, ResourceEntity> result = new Dictionary<string, ResourceEntity>(); CloudBlobDirectory directory = this.blobContainer.GetDirectoryReference("hbase/data/default"); if (directory == null) return result; var items = directory.ListBlobs(); foreach (var item in items) { if (item is CloudBlobDirectory) { var dir = item as CloudBlobDirectory; string key = dir.Prefix.Replace("hbase/data/default/", "").Replace("/", ""); if (result.ContainsKey(key)) continue; result.Add(key, new ResourceEntity() { Type = ResourceType.HBase, TableName=key, CopiesNum=0,//Azure存储,HDFS默认的副本数为2,即共存3份,但只收1份的费用,故此处记为0 Size_B = GetFileSizeByBlobPath(dir.Prefix)//Azure存储,HDFS默认的副本数为2,即共存3份,但只收1份的费用,此处取到的size也是1份的大小 }); } } return result; } public long GetFileSizeByBlobPath(string directoryPath) { CloudBlobDirectory directory = this.blobContainer.GetDirectoryReference(directoryPath); if (directory == null) return 0; var items = directory.ListBlobs(true, BlobListingDetails.All).Where(item => (item as CloudBlockBlob).Properties.Length > 0); long size = 0; foreach (var item in items) { var tmp = (item as CloudBlockBlob); if (tmp.Name.Contains(".regioninfo") || tmp.Name.Contains(".tableinfo") || tmp.Name.Contains("recovered.edits")) continue; string[] guid = tmp.Name.Replace(directoryPath, "").Split(new char[] { '/' }, StringSplitOptions.RemoveEmptyEntries); size += tmp.Properties.Length; } return size; } }