• 如何使用API
  • 编译Thrift客户端需要的库(使用Hbase的源码编译)
使用以下命令:
thrift --gen [lang] [hbase-root]/src/main/resources/org/apache/hadoop/hbase/thrift/Hbase.thrift
lang:目标语言, java, cpp, rb, py, perl 或其他兼容的语言
编译生成的文件在gen-py, gen-rb类似的文件中
  • 启动Thrift server
使用以下命令:
[hbase-root]/bin/hbase thrift start
  • 编写自己的python应用
基本结构:
from thrift.transport.TSocket import TSocketfrom thrift.transport.TTransport import TBufferedTransportfrom thrift.protocol import TBinaryProtocolfrom hbase import Hbasetransport = TBufferedTransport(TSocket(host, port))transport.open()protocol = TBinaryProtocol.TBinaryProtocol(transport)client = Hbase.Client(protocol)

可使用help(client)来查看Python相应的API。
  • 数据结构一览
See Hbase.thrift for an up-to-date API and a complete definition of all methods and data types.
This section contains some definitions of Thrift data types needed for communication.
ColumnDescriptor
Used by getColumnDescriptors.
struct ColumnDescriptor { 1:Text name, 2:i32 maxVersions = 3, 3:string compression = "NONE", 4:bool inMemory = 0, 5:string bloomFilterType = "NONE", 6:i32 bloomFilterVectorSize = 0, 7:i32 bloomFilterNbHashes = 0, 8:bool blockCacheEnabled = 0, 9:i32 timeToLive = -1}
TRegionInfo
Used by getTableRegions.
struct TRegionInfo { 1:Text startKey, 2:Text endKey, 3:i64 id, 4:Text name, 5:byte version, 6:Text serverName, 7:i32 port}
Mutation
Used when performing batch update operations. isDelete is the switch you flip when you want to delete a cell.
struct Mutation { 1:bool isDelete = 0, 2:Text column, 3:Text value, 4:bool writeToWAL = 1}
TRowResult
Used by getRow and other gets of rows.
struct TRowResult { 1:Text row, 2:map<Text, TCell> columns}
方法一览
This section contains some definitions of the methods exposed to clients. If you have a method to propose, add a JIRA along with a comment explaining why we should want such a method.
void enableTable(Bytes tableName)启动xx表
void disableTable(Bytes tableName)禁用xx表
bool isTableEnabled(Bytes tableName)查看xx表是否可用
void compact(Bytes tableNameOrRegionName) 
void majorCompact(Bytes tableNameOrRegionName) 
list<Text> getTableNames()查看所有表名
map<Text,ColumnDescriptor> getColumnDescriptors(Text tableName)查看xx表的结构
list<TRegionInfo> getTableRegions(Text tableName) 
void createTable(Text tableName, columnFamilies)创建xx表
void deleteTable(Text tableName)删除xx表
list<TRowResult> get(Text tableName, Text row, Text column) 
list<TRowResult> getVer(Text tableName, Text row, Text column, i32 numVersions) 
list<TRowResult> getVerTs(Text tableName, Text row, Text column, i64 timestamp, i32 numVersions) 
list<TRowResult> getRow(Text tableName, Text row) 
list<TRowResult> getRowWithColumns(Text tableName, Text row, columns) 
list<TRowResult> getRowTs(Text tableName, Text row, i64 timestamp) 
list<TRowResult> getRowWithColumnsTs(Text tableName, Text row, columns, i64 timestamp) 
list<TRowResult> getRows(Text tableName, rows) 
list<TRowResult> getRowsWithColumns(Text tableName, rows, columns) 
list<TRowResult> getRowsTs(Text tableName, rows, i64 timestamp) 
list<TRowResult> getRowsWithColumnsTs(Text tableName, rows, columns, i64 timestamp) 
void mutateRow(Text tableName, Text row, mutations) 
void mutateRowTs(Text tableName, Text row, mutations, i64 timestamp) 
void mutateRows(Text tableName, rowBatches) 
void mutateRowsTs(Text tableName, rowBatches, i64 timestamp) 
i64 atomicIncrement(Text tableName, Text row, Text column, i64 value) 
void deleteAll(Text tableName, Text row, Text column) 
void deleteAllTs(Text tableName, Text row, Text column, i64 timestamp) 
void deleteAllRow(Text tableName, Text row) 
void deleteAllRowTs(Text tableName, Text row, i64 timestamp) 
ScannerID scannerOpenWithScan(Text tableName, TScan scan) 
ScannerID scannerOpen(Text tableName, Text startRow, columns) 
ScannerID scannerOpenWithStop(Text tableName, Text startRow, Text stopRow, columns) 
ScannerID scannerOpenWithPrefix(Text tableName, Text startAndPrefix, columns) 
ScannerID scannerOpenTs(Text tableName, Text startRow, columns, i64 timestamp) 
ScannerID scannerOpenWithStopTs(Text tableName, Text startRow, Text stopRow, columns, i64 timestamp) 
list<TRowResult> scannerGet(ScannerID id) 
list<TRowResult> scannerGetList(ScannerID id, i32 nbRows) 
void scannerClose(ScannerID id) 
获取基本信息方法
Get Table Names
Returns a list of table names.
list<Text> getTableNames()
Get Column Descriptors
Return a list of column families for a given table.
map<Text,ColumnDescriptor> getColumnDescriptors(Text tableName)
Get Table Regions
Return a list of the regions that make up a table.
list<TRegionInfo> getTableRegions(Text tableName)
行操作方法
Get Row
Retrieve a list<TRowResult> for a given row, with all the usual options. (timestamp, selected columns). There are lots of other methods to get rows with different parameters.
list<TRowResult> getRow(Text tableName, Text row, map<Text, Text> attributes)
Mutate Row (Put)
Send a series of mutation commands (put, delete) to the table.
void mutateRow(Text tableName, Text row, list<Mutation> mutations, map<Text, Text> attributes)
Delete Row
Delete an entire row.
void deleteAllRow(Text tableName, Text row, map<Text, Text> attributes)
遍历方法
Scanner methods use a scanner id that is maintained on the Thrift server. The ScannerId is defined as:
typedef i32 ScannerID
In Thrift, an i32 is a signed, 32-bit integer that identifies the scanner object on the Thrift server.
Open Scanner
Create a scanner for a table with some options.
ScannerID scannerOpen(Text tableName, Text startRow, list<Text> columns, map<Text, Text> attributes)
Get Scanner Results
Retrieve one or more records from the scanner at once.
list<TRowResult> scannerGetList(ScannerID id, i32 numberOfRows)
Close Scanner
Close a scanner.
void scannerClose(ScannerID id)

未整理源码:
/**
* Addresses a single cell or multiple cells
* in a HBase table by column family and optionally
* a column qualifier and timestamp
*/
struct TColumn {
1: required binary family,
2: optional binary qualifier,
3: optional i64 timestamp
}

/**
* Represents a single cell and its value.
*/
struct TColumnValue {
1: required binary family,
2: required binary qualifier,
3: required binary value,
4: optional i64 timestamp,
5: optional binary tags
}

/**
* Represents a single cell and the amount to increment it by
*/
struct TColumnIncrement {
1: required binary family,
2: required binary qualifier,
3: optional i64 amount = 1
}

/**
* if no Result is found, row and columnValues will not be set.
*/
struct TResult {
1: optional binary row,
2: required list<TColumnValue> columnValues
}

/**
* Specify type of delete:
* - DELETE_COLUMN means exactly one version will be removed,
* - DELETE_COLUMNS means previous versions will also be removed.
*/
enum TDeleteType {
DELETE_COLUMN = 0,
DELETE_COLUMNS = 1
}

/**
* Specify Durability:
* - SKIP_WAL means do not write the Mutation to the WAL.
* - ASYNC_WAL means write the Mutation to the WAL asynchronously,
* - SYNC_WAL means write the Mutation to the WAL synchronously,
* - FSYNC_WAL means Write the Mutation to the WAL synchronously and force the entries to disk.
*/

enum TDurability {
SKIP_WAL = 1,
ASYNC_WAL = 2,
SYNC_WAL = 3,
FSYNC_WAL = 4
}
struct TAuthorization {
1: optional list<string> labels
}

struct TCellVisibility {
1: optional string expression
}
/**
* Used to perform Get operations on a single row.
*
* The scope can be further narrowed down by specifying a list of
* columns or column families.
*
* To get everything for a row, instantiate a Get object with just the row to get.
* To further define the scope of what to get you can add a timestamp or time range
* with an optional maximum number of versions to return.
*
* If you specify a time range and a timestamp the range is ignored.
* Timestamps on TColumns are ignored.
*/
struct TGet {
1: required binary row,
2: optional list<TColumn> columns,

3: optional i64 timestamp,
4: optional TTimeRange timeRange,

5: optional i32 maxVersions,
6: optional binary filterString,
7: optional map<binary, binary> attributes
8: optional TAuthorization authorizations
}

/**
* Used to perform Put operations for a single row.
*
* Add column values to this object and they'll be added.
* You can provide a default timestamp if the column values
* don't have one. If you don't provide a default timestamp
* the current time is inserted.
*
* You can specify how this Put should be written to the write-ahead Log (WAL)
* by changing the durability. If you don't provide durability, it defaults to
* column family's default setting for durability.
*/
struct TPut {
1: required binary row,
2: required list<TColumnValue> columnValues
3: optional i64 timestamp,
5: optional map<binary, binary> attributes,
6: optional TDurability durability,
7: optional TCellVisibility cellVisibility
}

/**
* Used to perform Delete operations on a single row.
*
* The scope can be further narrowed down by specifying a list of
* columns or column families as TColumns.
*
* Specifying only a family in a TColumn will delete the whole family.
* If a timestamp is specified all versions with a timestamp less than
* or equal to this will be deleted. If no timestamp is specified the
* current time will be used.
*
* Specifying a family and a column qualifier in a TColumn will delete only
* this qualifier. If a timestamp is specified only versions equal
* to this timestamp will be deleted. If no timestamp is specified the
* most recent version will be deleted. To delete all previous versions,
* specify the DELETE_COLUMNS TDeleteType.
*
* The top level timestamp is only used if a complete row should be deleted
* (i.e. no columns are passed) and if it is specified it works the same way
* as if you had added a TColumn for every column family and this timestamp
* (i.e. all versions older than or equal in all column families will be deleted)
*
* You can specify how this Delete should be written to the write-ahead Log (WAL)
* by changing the durability. If you don't provide durability, it defaults to
* column family's default setting for durability.
*/
struct TDelete {
1: required binary row,
2: optional list<TColumn> columns,
3: optional i64 timestamp,
4: optional TDeleteType deleteType = 1,
6: optional map<binary, binary> attributes,
7: optional TDurability durability

}

/**
* Used to perform Increment operations for a single row.
*
* You can specify how this Increment should be written to the write-ahead Log (WAL)
* by changing the durability. If you don't provide durability, it defaults to
* column family's default setting for durability.
*/
struct TIncrement {
1: required binary row,
2: required list<TColumnIncrement> columns,
4: optional map<binary, binary> attributes,
5: optional TDurability durability
6: optional TCellVisibility cellVisibility
}

/*
* Used to perform append operation
*/
struct TAppend {
1: required binary row,
2: required list<TColumnValue> columns,
3: optional map<binary, binary> attributes,
4: optional TDurability durability
5: optional TCellVisibility cellVisibility
}

/**
* Any timestamps in the columns are ignored, use timeRange to select by timestamp.
* Max versions defaults to 1.
*/
struct TScan {
1: optional binary startRow,
2: optional binary stopRow,
3: optional list<TColumn> columns
4: optional i32 caching,
5: optional i32 maxVersions=1,
6: optional TTimeRange timeRange,
7: optional binary filterString,
8: optional i32 batchSize,
9: optional map<binary, binary> attributes
10: optional TAuthorization authorizations
11: optional bool reversed
12: optional bool cacheBlocks
}

/**
* Atomic mutation for the specified row. It can be either Put or Delete.
*/
union TMutation {
1: TPut put,
2: TDelete deleteSingle,
}

/**
* A TRowMutations object is used to apply a number of Mutations to a single row.
*/
struct TRowMutations {
1: required binary row
2: required list<TMutation> mutations
}

struct THRegionInfo {
1: required i64 regionId
2: required binary tableName
3: optional binary startKey
4: optional binary endKey
5: optional bool offline
6: optional bool split
7: optional i32 replicaId
}

struct TServerName {
1: required string hostName
2: optional i32 port
3: optional i64 startCode
}

struct THRegionLocation {
1: required TServerName serverName
2: required THRegionInfo regionInfo
}

Logo

CSDN联合极客时间,共同打造面向开发者的精品内容学习社区,助力成长!

更多推荐