全表扫描数据,范围扫描数据
public static void main(String[] args) throws Exception {//1.创建HTab
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "hadoop01:2181,hadoop02:2181,hadoop03:2181");
HTable tab = new HTable(conf, "tabx1");
//2.扫描表
//--全表扫描
//Scan scan = new Scan();
//--范围扫描
Scan scan = new Scan();
scan.setStartRow("rk3".getBytes());
scan.setStopRow("rk6".getBytes());
ResultScanner rs = tab.getScanner(scan);
//3.遍历扫描结果 打印
for(Result r : rs){
//--获取行键
String rk = new String(r.getRow());
//--获取当前行所有列数据
NavigableMap<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> map = r.getMap();
for(Map.Entry<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> entry : map.entrySet()){
String cf = new String(entry.getKey());
NavigableMap<byte[], NavigableMap<Long, byte[]>> cmap = entry.getValue();
for(Map.Entry<byte[], NavigableMap<Long, byte[]>>centry :cmap.entrySet()){
String c = new String(centry.getKey());
String v = new String(centry.getValue().firstEntry().getValue());
System.out.println("--rk:["+rk+"],cf:["+cf+"],c:["+c+"],v:["+v+"]--");
}
}
}
//4.关闭资源
tab.close();
}
过滤器的使用
Base中只能按照指定行键行键范围或全表扫描来查询数据
HBase还提供了过滤器机制,可以在原有的查询结果的基础上,在服务器端实现进一步的过滤,返回符合过滤条件的数据,为HBase查询数据提供了更好的灵活性。
HBase支持自定义过滤器,但也同时提供了大量内置的过滤器,可以直接使用
RowFilter
行过滤器,可以筛选出匹配的行
Filter filter = new RowFilter(CompareOp.NOT_EQUAL, new BinaryComparator("rk3".getBytes()));
Filter filter = new RowFilter(CompareOp.EQUAL,new RegexStringComparator("^[^1]+1[^1]+$|^.*x$");
KeyOnlyFilter
这个过滤器唯一的功能就是只返回每行的行键,其他值全部为空
对于只关注于行键的应用场景来说非常合适,这样忽略掉其值就可以减少传递到客户端的数据量,能起到一定的优化作用
Filter filter = new KeyOnlyFilter();
RandomRowFilter
本过滤器的作用就是按照一定的几率(<=0会过滤掉所有的行,>=1会包含所有的行)来返回随机的结果集,对于同样的数据集,多次使用同一个RandomRowFilter会返回不通的结果集,对于需要随机抽取一部分数据的应用场景,可以使用此过滤器
Filter filter = new RandomRowFilter(0.5f);
ColumnPrefixFilter
按照列名的前缀来筛选单元格的,如果我们想要对返回的列的前缀加以限制的话,可以使用这个过滤器
Filter filter = new ColumnPrefixFilter("c2".getBytes());
ValueFilter
按照具体的值来筛选单元格的过滤器
Filter filter = new ValueFilter(CompareOp.EQUAL, new RegexStringComparator("^v[^2].*2.*$"));
SingleColumnValueFilter
按照指定列的值,决定整行是否返回
Filter filter = new SingleColumnValueFilter("cf1".getBytes(), "c1".getBytes(),
CompareOp.EQUAL, new RegexStringComparator("^[\\w&&[^2]]*$"));
Filter List
用于综合使用多个过滤器。其有两种关系:FilterList.Operator.MUST_PASS_ONE和FilterList.Operator.MUST_PASS_ALL,默认的是FilterList.Operator.MUST_PASS_ALL,顾名思义,它们分别是AND和OR的关系
Filter f1 = new KeyOnlyFilter();
Filter f2 = new RandomRowFilter(0.5f);
Filter flist = new FilterList(FilterList.Operator.MUST_PASS_ALL,f1,f2);
scan.setFilter(flist);
ResultScanner rs = tab.getScanner(scan);
测试代码
public static void main(String[] args) throws Exception { //1.创建HTab
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "hadoop01:2181,hadoop02:2181,hadoop03:2181");
HTable tab = new HTable(conf, "tabx1");
//2.扫描表
//--全表扫描
//Scan scan = new Scan();
//--范围扫描
//Scan scan = new Scan();
//scan.setStartRow("rk3".getBytes());
//scan.setStopRow("rk6".getBytes());
Scan scan = new Scan();
// Filter filter = new RowFilter(CompareOp.NOT_EQUAL, new BinaryComparator("rk3".getBytes()));
// Filter filter = new RowFilter(CompareOp.EQUAL,new RegexStringComparator("^[^1]+1[^1]+$|^.*x$"));
// Filter filter = new KeyOnlyFilter();
// Filter filter = new RandomRowFilter(0.5f);
// Filter filter = new ColumnPrefixFilter("c2".getBytes());
// Filter filter = new ValueFilter(CompareOp.EQUAL, new RegexStringComparator("^v[^2].*2.*$"));
// Filter filter = new SingleColumnValueFilter("cf1".getBytes(), "c1".getBytes(), CompareOp.EQUAL, new RegexStringComparator("^[\\w&&[^2]]*$"));
Filter f1 = new KeyOnlyFilter();
Filter f2 = new RandomRowFilter(0.5f);
Filter flist = new FilterList(FilterList.Operator.MUST_PASS_ALL,f1,f2);
scan.setFilter(flist);
ResultScanner rs = tab.getScanner(scan);
//3.遍历扫描结果 打印
for(Result r : rs){
//--获取行键
String rk = new String(r.getRow());
//--获取当前行所有列数据
NavigableMap<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> map = r.getMap();
for(Map.Entry<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> entry : map.entrySet()){
String cf = new String(entry.getKey());
NavigableMap<byte[], NavigableMap<Long, byte[]>> cmap = entry.getValue();
for(Map.Entry<byte[], NavigableMap<Long, byte[]>>centry :cmap.entrySet()){
String c = new String(centry.getKey());
String v = new String(centry.getValue().firstEntry().getValue());
System.out.println("--rk:["+rk+"],cf:["+cf+"],c:["+c+"],v:["+v+"]--");
}
}
}
//4.关闭资源
tab.close();
}
学习中,有不正确的地方多多指教