Hbase实践将所有info列簇下的name列导入到另一张表中-阿里云开发者社区

Hbase实践将所有info列簇下的name列导入到另一张表中

2023-08-29 90

版权

本文内容由阿里云实名注册用户自发贡献，版权归原作者所有，阿里云开发者社区不拥有其著作权，亦不承担相应法律责任。具体规则请查看《阿里云开发者社区用户服务协议》和《阿里云开发者社区知识产权保护指引》。如果您发现本社区中有涉嫌抄袭的内容，填写侵权投诉表单进行举报，一经查实，本社区将立刻删除涉嫌侵权内容。

简介： Hbase实践将所有info列簇下的name列导入到另一张表中

将所有info列簇下的name列导入到另一张表中

建表：

读取的表：create 'stu_info','info','degree','work'

写入的表：create 't5',{NAME => 'info'}

put 'stu_info','20170820_10001','degree:xueli','benke'
put 'stu_info','20170820_10001','info:age','18'
put 'stu_info','20170820_10001','info:sex','male'
put 'stu_info','20170820_10001','info:name','tom'
put 'stu_info','20170820_10001','work:job','bigdata'
put 'stu_info','20170820_10002','degree:xueli','gaozhong'
put 'stu_info','20170820_10002','info:age','22'
put 'stu_info','20170820_10002','info:sex','female'
put 'stu_info','20170820_10002','info:name','jack'
put 'stu_info','20170820_10003','info:age','22'
put 'stu_info','20170820_10003','info:name','leo'
put 'stu_info','20170820_10004','info:age','18'
put 'stu_info','20170820_10004','info:name','peter'
put 'stu_info','20170820_10005','info:age','19'
put 'stu_info','20170820_10005','info:name','jim'
put 'stu_info','20170820_10006','info:age','20'
put 'stu_info','20170820_10006','info:name','zhangsan'

引入依赖

<properties>
     <hbase.version>0.98.6-hadoop2</hbase.version>
</properties>
<!-- hbase Dependency -->
<dependency>
     <groupId>org.apache.hbase</groupId>
     <artifactId>hbase-client</artifactId>
     <version>${hbase.version}</version>
</dependency>
<dependency>
     <groupId>org.apache.hbase</groupId>
     <artifactId>hbase-server</artifactId>
     <version>${hbase.version}</version>
</dependency>

编写TestHBasemapper类和TestHBaseDriver类

打jar包，同时选中两个类，右击export 打包

package com.lzhsite.hbase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class TestHBaseDriver extends Configured implements Tool{
  @Override
  public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    //job
    Job job = Job.getInstance(conf, "mr-hbase");
    job.setJarByClass(TestHBaseDriver.class);
    Scan scan = new Scan();
    scan.addFamily(Bytes.toBytes("info")); 
    TableMapReduceUtil.initTableMapperJob(
          "stu_info",        // input table
          scan,               // Scan instance to control CF and attribute selection
          TestHBaseMapper.class,     // mapper class
          ImmutableBytesWritable.class,         // mapper output key
          Put.class,  // mapper output value
          //job,false);  //最后一个参数表示是否打包运行
        job);   
    TableMapReduceUtil.initTableReducerJob(
          "t5",        // output table
          null,    // reducer class
         // job,null,null,null,null,false); //最后一个参数表示是否打包运行
           job);
        job.setNumReduceTasks(1);   // at least one, adjust as required
    return job.waitForCompletion(true) ? 0:1;
  }
  public static void main(String[] args) {
    Configuration conf = HBaseConfiguration.create();
    try {
      int status = ToolRunner.run(conf, new TestHBaseDriver(), args);
      System.exit(status);
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
}

package com.lzhsite.hbase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class TestHBaseDriver extends Configured implements Tool{
  @Override
  public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    //job
    Job job = Job.getInstance(conf, "mr-hbase");
    job.setJarByClass(TestHBaseDriver.class);
    Scan scan = new Scan();
    scan.addFamily(Bytes.toBytes("info")); 
    TableMapReduceUtil.initTableMapperJob(
          "stu_info",        // input table
          scan,               // Scan instance to control CF and attribute selection
          TestHBaseMapper.class,     // mapper class
          ImmutableBytesWritable.class,         // mapper output key
          Put.class,  // mapper output value
          //job,false);  //最后一个参数表示是否打包运行
        job);   
    TableMapReduceUtil.initTableReducerJob(
          "t5",        // output table
          null,    // reducer class
         // job,null,null,null,null,false); //最后一个参数表示是否打包运行
           job);
        job.setNumReduceTasks(1);   // at least one, adjust as required
    return job.waitForCompletion(true) ? 0:1;
  }
  public static void main(String[] args) {
    Configuration conf = HBaseConfiguration.create();
    try {
      int status = ToolRunner.run(conf, new TestHBaseDriver(), args);
      System.exit(status);
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
}

-->运行jar包

/opt/modules/cdh/hadoop-2.5.0-cdh5.3.6/bin/yarn jar /opt/datas/Test_HBase_mr.jar

-->查看数据

scan 'default:stu_info'

scan 'default:t5'

遇到的问题：

eclipse本地执行代码是报：Caused by: java.lang.NoSuchMethodError: org.apache.hadoop.mapred.ReduceTask.setLocalMapFiles

放在linux里运行却可以，感觉是hbase 0.98.6-hadoop2的一个bug，如果把mavne依赖里的hbase版本升级到1.2.6由于服务器安装的是hbase-0.98.6-cdh5.3.6，在本地运行又会造成HRegionServer节点挂掉

Hbase实践将所有info列簇下的name列导入到另一张表中

热门文章

最新文章

相关课程

相关电子书

相关实验场景

热门

活动广场

任务中心

开发者评测

高校计划

乘风者计划

训练营

阿里云MVP

话题

直播

下载

镜像站

技术资料

插件

Hbase实践将所有info列簇下的name列导入到另一张表中

热门文章

最新文章

相关课程

相关电子书

相关实验场景