storm+hbase开发

1、、maven依赖导入

    <!--storm-hbase到数据到hbase添加-->
    <dependency>
      <groupId>org.apache.storm</groupId>
      <artifactId>storm-hbase</artifactId>
      <version>1.1.1</version>
      <type>jar</type>
    </dependency>

    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-client</artifactId>
      <version>2.7.3</version>
      <exclusions>
        <exclusion>
          <groupId>org.slf4j</groupId>
          <artifactId>slf4j-log4j12</artifactId>
        </exclusion>
      </exclusions>
    </dependency>

    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-common</artifactId>
      <version>2.7.3</version>
      <exclusions>
        <exclusion>
          <groupId>org.slf4j</groupId>
          <artifactId>slf4j-log4j12</artifactId>
        </exclusion>
      </exclusions>
    </dependency>

    <dependency>
      <groupId>org.apache.hbase</groupId>
      <artifactId>hbase-common</artifactId>
      <version>1.2.0</version>
    </dependency>

    <dependency>
      <groupId>org.apache.hbase</groupId>
      <artifactId>hbase-client</artifactId>
      <version>1.2.0</version>
    </dependency>

3、rowkey设计

1)、RowKey长度原则：RowKey是一个二进制码流，可以是任意字符串，最大长度为64KB，实际应用中一般为10~100bytes，存为byte[]字节数组，一般设计成定长。建议是越短越好，不要超过16个字节。原因一是数据的持久化文件HFile中是按照KeyValue存储的，如果RowKey过长比如100字节，1000万列数据光RowKey就要占用100*1000万=10亿个字节，将近1G数据，这会极大影响HFile的存储效率；原因二是memstore将缓存部分数据到内存，如果RowKey字段过长内存的有效利用率会降低，系统将无法缓存更多的数据，这会降低检索效率。因此RowKey的字节长度越短越好原因三是目前操作系统大都是64位，内存8字节对齐。控制在16个字节，8字节的整数倍利用操作系统的最佳特性。

2)、RowKey散列原则：如果RowKey是按时间戳的方式递增，不要将时间放在二进制码的前面，建议将RowKey的高位作为散列字段，由程序循环生成，低位放时间字段，这样将提高数据均衡分布在每个RegionServer实现负载均衡的几率，如果没有散列字段，首字段直接是时间信息，将产生所有数据都在一个RegionServer上堆积的热点现象，这样在做数据检索的时候负载将会集中在个别RegionServer，降低查询效率。

3)、RowKey唯一原则：必须在设计上保证其唯一性。

RowKey是按照字典排序存储的，因此，设计RowKey时候，要充分利用这个排序特点，将经常一起读取的数据存储到一块，将最近可能会被访问的数据放在一块。

举个例子：如果最近写入HBase表中的数据是最可能被访问的，可以考虑将时间戳作为RowKey的一部分，由于是字段排序，所以可以使用Long.MAX_VALUE-timeStamp作为RowKey，这样能保证新写入的数据在读取时可以别快速命中。
参考：

HBase RowKey的设计原则

HBase的rowkey设计（含实例）

4、java API

初始化连接：

private Connection connection; //HBase 连接
private Table table;
public void initHbase() {
    // 本地调试需要，windows中需要添加hadoop的安装包
    System.setProperty("hadoop.home.dir", "D:\\Program Files\\hadoop-common-2.2.0-bin-master");
    // 本地调试需要，设置当前window/linux下用户为HBase可访问用户
    System.setProperty("HADOOP_USER_NAME", "hbase");
    Configuration conf = HBaseConfiguration.create(); //HBase 配置信息

    try {
        this.connection = ConnectionFactory.createConnection(conf);
        String tableName = "TABLE_NAME";
        this.table = this.connection.getTable(TableName.valueOf(tableName));
    } catch (IOException e) {
        e.printStackTrace();
    }
}

判断表，建表：

// 判断表是否存在
Admin admin = connection.getAdmin();
TableName tableName = TableName.valueOf(myTableName);
if(admin.tableExists(tableName)){
    System.out.println("table exists!");
} else {
    HTableDescriptor hTableDescriptor = new HTableDescriptor(tableName);
    for(String str:colFamily){
        HColumnDescriptor hColumnDescriptor = new HColumnDescriptor(str);
        hTableDescriptor.addFamily(hColumnDescriptor);
    }
    admin.createTable(hTableDescriptor);
}

批量写入：


private String rowKey; //行键
private String family; // 列族
private String column; // 列
private String value; //列值

byte[] rowKey = Bytes.toBytes(rowKey);
byte[] family = Bytes.toBytes(family);
    
Put put = new Put(rowKey);
put.addColumn(family, Bytes.toBytes(column), Bytes.toBytes(value));
puts.add(put);
// 批量写入
table.put(puts);

关闭连接：

public void closeHbase() {
    //关闭table
    try {
        if (this.table != null) {
            this.table.close();
        }
    } catch (Exception e) {
        e.printStackTrace();
        log.error(e.getMessage());
    } finally {
        //在finally中关闭connection
        try {
            this.connection.close();
        } catch (IOException e) {
            e.printStackTrace();
            log.error(e.getMessage());
        }
    }
}

扫描查询：

// scan查询
Scan scan = new Scan();
scan.setStartRow(Bytes.toBytes(""));
scan.setStopRow(Bytes.toBytes(""));

ResultScanner resutScanner = table.getScanner(scan);
for (Result result : resutScanner) {
    String rowKey = Bytes.toString(result.getRow());

    for (Cell kv : result.rawCells()) {
        Long timestamp = kv.getTimestamp();
        String qualifier = Bytes.toString(CellUtil.cloneQualifier(kv));
        String value = Bytes.toString(CellUtil.cloneValue(kv));
       
    }
}

参考：HBase Java API编程实例

csdnzh365

发布了49 篇原创文章 · 获赞 7 · 访问量 1万+

私信关注

猜你喜欢