将所有info列簇下的name列导入到另一张表中
建表:
读取的表:create 'stu_info','info','degree','work'
写入的表:create 't5',{NAME => 'info'}
put 'stu_info','20170820_10001','degree:xueli','benke' put 'stu_info','20170820_10001','info:age','18' put 'stu_info','20170820_10001','info:sex','male' put 'stu_info','20170820_10001','info:name','tom' put 'stu_info','20170820_10001','work:job','bigdata' put 'stu_info','20170820_10002','degree:xueli','gaozhong' put 'stu_info','20170820_10002','info:age','22' put 'stu_info','20170820_10002','info:sex','female' put 'stu_info','20170820_10002','info:name','jack' put 'stu_info','20170820_10003','info:age','22' put 'stu_info','20170820_10003','info:name','leo' put 'stu_info','20170820_10004','info:age','18' put 'stu_info','20170820_10004','info:name','peter' put 'stu_info','20170820_10005','info:age','19' put 'stu_info','20170820_10005','info:name','jim' put 'stu_info','20170820_10006','info:age','20' put 'stu_info','20170820_10006','info:name','zhangsan'
引入依赖
<properties> <hbase.version>0.98.6-hadoop2</hbase.version> </properties> <!-- hbase Dependency --> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-client</artifactId> <version>${hbase.version}</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-server</artifactId> <version>${hbase.version}</version> </dependency>
编写TestHBasemapper类和TestHBaseDriver类
打jar包,同时选中两个类,右击export 打包
package com.lzhsite.hbase; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class TestHBaseDriver extends Configured implements Tool{ @Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); //job Job job = Job.getInstance(conf, "mr-hbase"); job.setJarByClass(TestHBaseDriver.class); Scan scan = new Scan(); scan.addFamily(Bytes.toBytes("info")); TableMapReduceUtil.initTableMapperJob( "stu_info", // input table scan, // Scan instance to control CF and attribute selection TestHBaseMapper.class, // mapper class ImmutableBytesWritable.class, // mapper output key Put.class, // mapper output value //job,false); //最后一个参数表示是否打包运行 job); TableMapReduceUtil.initTableReducerJob( "t5", // output table null, // reducer class // job,null,null,null,null,false); //最后一个参数表示是否打包运行 job); job.setNumReduceTasks(1); // at least one, adjust as required return job.waitForCompletion(true) ? 0:1; } public static void main(String[] args) { Configuration conf = HBaseConfiguration.create(); try { int status = ToolRunner.run(conf, new TestHBaseDriver(), args); System.exit(status); } catch (Exception e) { e.printStackTrace(); } } }
package com.lzhsite.hbase; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class TestHBaseDriver extends Configured implements Tool{ @Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); //job Job job = Job.getInstance(conf, "mr-hbase"); job.setJarByClass(TestHBaseDriver.class); Scan scan = new Scan(); scan.addFamily(Bytes.toBytes("info")); TableMapReduceUtil.initTableMapperJob( "stu_info", // input table scan, // Scan instance to control CF and attribute selection TestHBaseMapper.class, // mapper class ImmutableBytesWritable.class, // mapper output key Put.class, // mapper output value //job,false); //最后一个参数表示是否打包运行 job); TableMapReduceUtil.initTableReducerJob( "t5", // output table null, // reducer class // job,null,null,null,null,false); //最后一个参数表示是否打包运行 job); job.setNumReduceTasks(1); // at least one, adjust as required return job.waitForCompletion(true) ? 0:1; } public static void main(String[] args) { Configuration conf = HBaseConfiguration.create(); try { int status = ToolRunner.run(conf, new TestHBaseDriver(), args); System.exit(status); } catch (Exception e) { e.printStackTrace(); } } }
-->运行jar包
/opt/modules/cdh/hadoop-2.5.0-cdh5.3.6/bin/yarn jar /opt/datas/Test_HBase_mr.jar
-->查看数据
scan 'default:stu_info'
scan 'default:t5'
遇到的问题:
eclipse本地执行代码是报:Caused by: java.lang.NoSuchMethodError: org.apache.hadoop.mapred.ReduceTask.setLocalMapFiles
放在linux里运行却可以,感觉是hbase 0.98.6-hadoop2的一个bug,如果把mavne依赖里的hbase版本升级到1.2.6由于服务器安装的是hbase-0.98.6-cdh5.3.6,在本地运行又会造成HRegionServer节点挂掉