文章目录
一、环境准备
二、HBaseAPI
三、代码实现
一、环境准备
新建项目后在pom.xml 中添加依赖:
<!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-client --> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-client</artifactId> <version>1.2.0</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-common --> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-common</artifactId> <version>1.2.0</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-server --> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-server</artifactId> <version>1.2.0</version> </dependency>
二、HBaseAPI
以下是几个主要 Hbase API 类和数据模型之间的对应关系:
1、 HBaseAdmin
关系: org.apache.hadoop.hbase.client.HBaseAdmin
作用:提供了一个接口来管理 HBase 数据库的表信息。它提供的方法包括:创建表,删 除表,列出表项,使表有效或无效,以及添加或删除表列族成员等。
2、 HBaseConfiguration
关系: org.apache.hadoop.hbase.HBaseConfiguration
作用:对 HBase 进行配置
3、 HTableDescriptor
关系: org.apache.hadoop.hbase.HTableDescriptor
作用:包含了表的名字极其对应表的列族
4、 HColumnDescriptor
关系: org.apache.hadoop.hbase.HColumnDescriptor
作用:维护着关于列族的信息,例如版本号,压缩设置等。它通常在创建表或者为表添 加列族的时候使用。列族被创建后不能直接修改,只能通过删除然后重新创建的方式。
列族被删除的时候,列族里面的数据也会同时被删除。
5、 HTable
关系: org.apache.hadoop.hbase.client.HTable
作用:可以用来和 HBase 表直接通信。此方法对于更新操作来说是非线程安全的。
6、 Put
关系: org.apache.hadoop.hbase.client.Put
作用:用来对单个行执行添加操作
7、 Get
关系: org.apache.hadoop.hbase.client.Get
作用:用来获取单个行的相关信息
8、 Result
关系: org.apache.hadoop.hbase.client.Result
作用:存储 Get 或者 Scan 操作后获取表的单行值。使用此类提供的方法可以直接获取值 或者各种 Map 结构( key-value 对)
三、代码实现
package com.hbase.util; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.NamespaceDescriptor; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; import org.apache.hadoop.hbase.filter.FilterList; import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; import org.apache.hadoop.hbase.util.Bytes; public class HbaseUtility { public static Configuration conf; public static Connection conn; /** * 类级别的初始化,只是在类加载的时候做一次 配置zookeeper的端口2181 * 配置zookeeper的仲裁主机名centos,如果有多个机器,主机名间用冒号隔开 配置hbase master * 还有一种方式是new一个configuration对象,然后用addresource方法去添加xml配置文件 但是像这样显式的配置是会覆盖xml里的配置的 */ static { conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.property.clientPort", "2181"); conf.set("hbase.zookeeper.quorum", "centos"); conf.set("hbase.master", "centos:60000"); try { conn = ConnectionFactory.createConnection(conf); } catch (IOException e) { e.printStackTrace(); } } /** * 建表,建列族 * * @param tablename, * @param ColumnFamilys * NamespaceDescriptor:维护命名空间的信息,但是namespace,一般用shell来建立 * Admin:提供了一个接口来管理 HBase 数据库的表信息 * HTableDescriptor:维护了表的名字及其对应表的列族,通过HTableDescriptor对象设置表的特性 * HColumnDescriptor:维护着关于列族的信息,可以通过HColumnDescriptor对象设置列族的特性 */ public static void createtable(String tablename, String... ColumnFamilys) throws IOException { Admin admin = conn.getAdmin(); // admin.createNamespace(NamespaceDescriptor.create("my_ns").build()); // HTableDescriptor table=new // HTableDescriptor(TableName.valueOf("my_ns"+tablename)); HTableDescriptor table = new HTableDescriptor(TableName.valueOf(tablename)); for (String family : ColumnFamilys) { HColumnDescriptor columnfamily = new HColumnDescriptor(family); table.addFamily(columnfamily); } if (admin.tableExists(TableName.valueOf(tablename))) { System.out.println("Table Exists"); } else { admin.createTable(table); System.out.println("Table Created"); admin.close(); } } /** * 插入数据,当指定rowkey已经存在,则会覆盖掉之前的旧数据 * * @param tablename, * @param rowkey, * @param ColumnFamilys, * @param columns,@values * Table:用于与单个HBase表通信 Put:用来对单个行执行添加操作 */ public static void insertdata(String tablename, String rowkey, String ColumnFamilys, String[] columns, String[] values) throws IOException { Table table = conn.getTable(TableName.valueOf(tablename)); Put put = new Put(Bytes.toBytes(rowkey)); for (int i = 0; i < columns.length; i++) { put.addColumn(Bytes.toBytes(ColumnFamilys), Bytes.toBytes(columns[i]), Bytes.toBytes(values[i])); } table.put(put); System.out.println("data inserted"); table.close(); } /** * 根据rowkey删除整行的所有列族、所有行、所有版本 * * @param tablename * @param rowkey */ public static void deleterow(String tablename, String rowkey) throws IOException { Table table = conn.getTable(TableName.valueOf(tablename)); Delete delete = new Delete(Bytes.toBytes(rowkey)); table.delete(delete); table.close(); System.out.println("row" + rowkey + " is deleted"); } /** * 删除某个row的指定列 * * @param tablename * @param rowkey * @param columnfamily * @param column */ public static void deletecol(String tablename, String rowkey, String columnfamily, String column) throws IOException { Table table = conn.getTable(TableName.valueOf(tablename)); Delete delete = new Delete(Bytes.toBytes(rowkey)); delete.deleteColumn(Bytes.toBytes(columnfamily), Bytes.toBytes(column)); table.delete(delete); table.close(); System.out.println("row" + rowkey + " is deleted"); } /** * 删除指定列族中所有列的时间戳等于指定时间戳的版本数据 * * @param tablename * @param rowkey * @param columnfamily * @param timestamp */ public static void deleteversion(String tablename, String rowkey, String columnfamily, Long timestamp) throws IOException { Table table = conn.getTable(TableName.valueOf(tablename)); Delete delete = new Delete(Bytes.toBytes(rowkey)); delete.deleteFamilyVersion(Bytes.toBytes(columnfamily), timestamp); table.delete(delete); table.close(); System.out.println("row" + rowkey + " is deleted"); } /** * 删除指定列族,注意要先disable,修改完再enable表 * * @param tablename, * @param columnfamily * */ public static void deletefamily(String tablename, String columnfamily) throws IOException { Admin admin = conn.getAdmin(); admin.disableTable(TableName.valueOf(tablename)); HTableDescriptor table = admin.getTableDescriptor(TableName.valueOf(tablename)); table.removeFamily(Bytes.toBytes(columnfamily)); admin.modifyTable(TableName.valueOf(tablename), table); admin.enableTable(TableName.valueOf(tablename)); System.out.println("columnfamily " + columnfamily + " is deleted"); admin.close(); } /** * drop表,注意要先disable表,否则会报错 * * @param tablename */ public static void droptable(String tablename) throws IOException { Admin admin = conn.getAdmin(); admin.disableTable(TableName.valueOf(tablename)); admin.deleteTable(TableName.valueOf(tablename)); System.out.println("Table " + tablename + " is droped"); } /** * 扫描全表 * * @param tablename */ public static void scantable(String tablename) throws IOException { Scan scan = new Scan(); Table table = conn.getTable(TableName.valueOf(tablename)); ResultScanner rs = table.getScanner(scan); for (Result result : rs) { for (Cell cell : result.listCells()) { System.out.println(Bytes.toString(cell.getRow()) + " " + "column=" + Bytes.toString(cell.getFamily()) + ":" + Bytes.toString(cell.getQualifier()) + ",timestamp=" + cell.getTimestamp() + ",value=" + Bytes.toString(cell.getValue())); } } rs.close(); } /** * 根据rowkey对表进行scan * * @param tablename * @param rowkey * scan 'student',{ROWPREFIXFILTER => '1'} */ public static void scanrow(String tablename, String rowkey) throws IOException { Get get = new Get(Bytes.toBytes(rowkey)); Table table = conn.getTable(TableName.valueOf(tablename)); Result result = table.get(get); for (KeyValue kv : result.list()) { System.out.println( rowkey + " column=" + Bytes.toString(kv.getFamily()) + ":" + Bytes.toString(kv.getQualifier()) + "," + "timestamp=" + kv.getTimestamp() + ",value=" + Bytes.toString(kv.getValue())); } } /** * 获取指定rowkey中,指定列的最新版本数据 * * @param tablename * @param rowkey * @param columnfamily * @param column */ public static void scanspecifycolumn(String tablename, String rowkey, String columnfamily, String column) throws IOException { Table table = conn.getTable(TableName.valueOf(tablename)); Get get = new Get(Bytes.toBytes(rowkey)); get.addColumn(Bytes.toBytes(columnfamily), Bytes.toBytes(column)); Result result = table.get(get); for (KeyValue kv : result.list()) { System.out.println( rowkey + " column=" + Bytes.toString(kv.getFamily()) + ":" + Bytes.toString(kv.getQualifier()) + "," + "timestamp=" + kv.getTimestamp() + ",value=" + Bytes.toString(kv.getValue())); } } /** * 获取行键指定的行中,指定时间戳的数据, * * @param tablename * @param rowkey * @param timestamp * 如果要获取指定时间戳范围的数据,可以使用get.setTimeRange方法 */ public static void scanspecifytimestamp(String tablename, String rowkey, Long timestamp) throws IOException { Get get = new Get(Bytes.toBytes(rowkey)); get.setTimeStamp(timestamp); Table table = conn.getTable(TableName.valueOf(tablename)); Result result = table.get(get); for (KeyValue kv : result.list()) { System.out.println( rowkey + " column=" + Bytes.toString(kv.getFamily()) + ":" + Bytes.toString(kv.getQualifier()) + "," + "timestamp=" + kv.getTimestamp() + ",value=" + Bytes.toString(kv.getValue())); } } /** * 获取行键指定的行中,所有版本的数据 * 能输出多版本数据的前提是当前列族能保存多版本数据,列族可以保存的数据版本数通过HColumnDescriptor的setMaxVersions(Int)方法设置。 * * @param tablename * @param rowkey * @param timestamp */ public static void scanallversion(String tablename, String rowkey) throws IOException { Get get = new Get(Bytes.toBytes(rowkey)); get.setMaxVersions(); Table table = conn.getTable(TableName.valueOf(tablename)); Result result = table.get(get); for (KeyValue kv : result.list()) { System.out.println( rowkey + " column=" + Bytes.toString(kv.getFamily()) + ":" + Bytes.toString(kv.getQualifier()) + "," + "timestamp=" + kv.getTimestamp() + ",value=" + Bytes.toString(kv.getValue())); } } /** * 使用过滤器,获取18-20岁之间的学生信息 * * @param tablename * @param age * @throws IOException */ public static void scanfilterage(String tablename, int startage, int endage) throws IOException { Table table = conn.getTable(TableName.valueOf(tablename)); FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL); SingleColumnValueFilter filter1 = new SingleColumnValueFilter(Bytes.toBytes("information"), Bytes.toBytes("age"), CompareOp.GREATER_OR_EQUAL, Bytes.toBytes(startage)); SingleColumnValueFilter filter2 = new SingleColumnValueFilter(Bytes.toBytes("information"), Bytes.toBytes("age"), CompareOp.LESS_OR_EQUAL, Bytes.toBytes(endage)); filterList.addFilter(filter1); filterList.addFilter(filter2); Scan scan = new Scan(); scan.setFilter(filterList); ResultScanner rs = table.getScanner(scan); for (Result r : rs) { for (Cell cell : r.listCells()) { System.out.println(Bytes.toString(cell.getRow()) + " Familiy:Quilifier : " + Bytes.toString(cell.getFamily()) + ":" + Bytes.toString(cell.getQualifier()) + " Value : " + Bytes.toString(cell.getValue()) + " Time : " + cell.getTimestamp()); } } table.close(); } public static void main(String[] args) throws IOException { String[] col1 = new String[] { "name", "age" }; String[] val1 = new String[] { "xx", "18" }; String[] col2 = new String[] { "chinese", "math" }; String[] val2 = new String[] { "60", "70" }; createtable("student", "imformation", "score"); insertdata("student", "1", "imformation", col1, val1); insertdata("student", "1", "imformation", col2, val2); deleterow("student", "1"); deletecol("student", "1", "imformation", "chinese"); deleteversion("student", "1", "imformation", 1533482642629L); deletefamily("student", "imformation"); droptable("student"); scantable("student"); scanrow("student", "1"); scanspecifycolumn("student", "1", "imformation", "chinese"); scanspecifytimestamp("student", "imformation", 1533482642629L); scanallversion("student", "1"); scanfilterage("student", 18, 20); } }