1. 数据以及代码准备
1.1 hbase过滤器介绍
以下介绍过滤器是基于HBase2.0.2版本。
HBase的Get和Scan实例可以调用setFilter()来设置过滤器,HBase的过滤器种类繁多,以满足不同的过滤需求。Filter作用于各个RegionServer,通过使用过滤器可以高效的获取数据。HBase Filter主要分为三大类,用户也可以通过继承FilterBase或实现Filter接口来自定义Filter。若需要多个Fi lter结合使用,可以通过FilterList来满足。
以下主要对各个Filter进行简单的了解。
Comparision Filters (比较过滤器)
Dedicated Filters (专用过滤器)
Decorating Filters (装饰性过滤器)
1.2 数据准备
rowkey | fam1:col1 | fam1:col2 | fam2:col1 | fam2:col2 |
---|---|---|---|---|
101 | 10086 | yid1 | 13522886092 | xiaom1 |
102 | 10000 | dianx2 | 18201607321 | xiaoh2 |
103 | 10001 | liant3 | 15313089839 | dah3 |
104 | 12306 | tiel4 | 13872232462 | zhangs4 |
105 | 12580 | muj5 | 15801019898 | lix5 |
1.3 HBase比较器
比较器 | 含义 |
---|---|
BinaryComparator | 按字节索引顺序比较指定字节数组,匹配完整字节数组 |
BinaryPrefixComparator | 按字节索引顺序比较指定字节数组前缀(左端数据),匹配字节数组前缀 |
NullComparator | 判断给定的是否为空 |
BitComparator | 按位比较 |
RegexStringComparator | 正则表达式比较,仅支持EQUAL和NOT_EQUAL,匹配正则表达式 |
SubstringComparator | 字串比较,匹配子字符串 |
1.4 HBase比较符
import org.apache.hadoop.hbase.CompareOperator;
比较符 | 含义 |
---|---|
LESS | < |
LESS_OR_EQUAL | <= |
EQUAL | = |
NOT_EQUAL | <> |
GREATER_OR_EQUAL | >= |
GREATER | > |
NO_OP | 无操作 |
1.5 准备代码
建表并插入数据
package com.hnbian.filters;
import com.hnbian.utils.HBaseUtils;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.util.Bytes;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class HBaseTest {
private static Connection hbaseConnection;
private byte[] fam1 = Bytes.toBytes("fam1");
private byte[] col1 = Bytes.toBytes("col1");
private byte[] fam2 = Bytes.toBytes("fam2");
private byte[] col2 = Bytes.toBytes("col2");
public static void main(String[] args) {
HBaseTest hBaseTest = new HBaseTest();
//创建表
//hBaseTest.createTable();
//添加数据
hBaseTest.saveData();
}
/**
* 保存数据
*/
public void saveData() {
hbaseConnection = HBaseUtils.getHBaseConnection();
List<Put> puts = new ArrayList<Put>();
Put put1 = new Put(Bytes.toBytes("101"));//为指定行创建一个Put操作
put1.addColumn(fam1, col1, Bytes.toBytes("10086"));
put1.addColumn(fam1, col2, Bytes.toBytes("yid1"));
put1.addColumn(fam2, col1, Bytes.toBytes("13522886092"));
put1.addColumn(fam2, col2, Bytes.toBytes("xiaom1"));
Put put2 = new Put(Bytes.toBytes("102"));
put2.addColumn(fam1, col1, Bytes.toBytes("10000"));
put2.addColumn(fam1, col2, Bytes.toBytes("dianx2"));
put2.addColumn(fam2, col1, Bytes.toBytes("18201607321"));
put2.addColumn(fam2, col2, Bytes.toBytes("xiaoh2"));
Put put3 = new Put(Bytes.toBytes("103"));
put3.addColumn(fam1, col1, Bytes.toBytes("10001"));
put3.addColumn(fam1, col2, Bytes.toBytes("liant3"));
put3.addColumn(fam2, col1, Bytes.toBytes("15313089839"));
put3.addColumn(fam2, col2, Bytes.toBytes("dah3"));
Put put4 = new Put(Bytes.toBytes("104"));
put4.addColumn(fam1, col1, Bytes.toBytes("12306"));
put4.addColumn(fam1, col2, Bytes.toBytes("tiel4"));
put4.addColumn(fam2, col1, Bytes.toBytes("13872232462"));
put4.addColumn(fam2, col2, Bytes.toBytes("zhangs4"));
Put put5 = new Put(Bytes.toBytes("105"));
put5.addColumn(fam1, col1, Bytes.toBytes("12580"));
put5.addColumn(fam1, col2, Bytes.toBytes("muj5"));
put5.addColumn(fam2, col1, Bytes.toBytes("15801019898"));
put5.addColumn(fam2, col2, Bytes.toBytes("liux5"));
puts.add(put1);
puts.add(put2);
puts.add(put3);
puts.add(put4);
puts.add(put5);
try {
if (null != hbaseConnection && !hbaseConnection.isClosed()) {
Table table = hbaseConnection.getTable(TableName.valueOf("test"));
table.put(puts);
table.close();
}
} catch (Exception e) {
e.printStackTrace();
} finally {
close(hbaseConnection);
}
}
/**
* 创建表
*
* @return
*/
public void createTable() {
String tableName = "test";
try {
hbaseConnection = HBaseUtils.getHBaseConnection();
Admin admin = hbaseConnection.getAdmin();
TableName tn = TableName.valueOf(tableName);
//创建表描述
TableDescriptorBuilder mtd = TableDescriptorBuilder.newBuilder(tn);
//fam1列簇描述
ColumnFamilyDescriptorBuilder.ModifyableColumnFamilyDescriptor cfd1 =
new ColumnFamilyDescriptorBuilder.ModifyableColumnFamilyDescriptor(fam1);
//fam2列簇描述
ColumnFamilyDescriptorBuilder.ModifyableColumnFamilyDescriptor cfd2 =
new ColumnFamilyDescriptorBuilder.ModifyableColumnFamilyDescriptor(fam2);
//设置压缩算法
cfd1.setCompressionType(Compression.Algorithm.GZ);
cfd2.setCompressionType(Compression.Algorithm.GZ);
Set<ColumnFamilyDescriptor> set = new HashSet();
set.add(cfd1);
set.add(cfd2);
mtd.setColumnFamilies(set);
admin.createTable(mtd.build());
admin.close();
} catch (Exception e) {
e.printStackTrace();
} finally {
close(hbaseConnection);
}
}
/**
* 关闭连接
*
* @param hbaseConnection
*/
public void close(Connection hbaseConnection) {
if (!hbaseConnection.isClosed()) {
try {
hbaseConnection.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
查询打印数据
package com.hnbian.utils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
/**
* HBase 工具类
* @author hnbian
*/
public class HBaseUtils {
private static final String HBASE_MASTER = "node1.com";
private static final String HBASE_ROOTDIR = "/apps/hbase/data";
private static final String HBASE_ZOOKEEPER_QUORUM = "node3.com,node2.com,node4.com";
private static final String HBASE_ZOOKEEPER_PROPERTY_CLIENTPORT = "2181";
private static Map<String, String> confMap = new HashMap<String, String>();
private static Configuration hbaseConfiguration = null;
private static Connection hbaseConnection = null;
public static Connection getHBaseConnection() {
if (hbaseConnection == null) {
hbaseConfiguration = HBaseConfiguration.create();
hbaseConfiguration.set("hbase.master", HBASE_MASTER);
hbaseConfiguration.set("hbase.rootdir", HBASE_ROOTDIR);
hbaseConfiguration.set("hbase.zookeeper.quorum", HBASE_ZOOKEEPER_QUORUM);
hbaseConfiguration.set("hbase.zookeeper.property.clientPort", HBASE_ZOOKEEPER_PROPERTY_CLIENTPORT);
try {
hbaseConnection = ConnectionFactory.createConnection(hbaseConfiguration);
} catch (IOException e) {
//log.error("获取hbase 连接异常!");
hbaseConnection = null;
}
}
return hbaseConnection;
}
/**
* 设置过滤器查询数据并打印
* @param filter
*/
public static void setFilterAndPrint(Filter filter) {
Scan scan = new Scan();
scan.setFilter(filter);
setScanAndPrint(scan);
}
/**
* 设置过滤器查询数据并打印
* @param scan
*/
public static void setScanAndPrint(Scan scan) {
String resultString = "RowKey= %s, ColumnFamily= %s, ColumnName= %s, TimeStamp= %s, ColumnValue= %s";
ResultScanner resultScanner = null;
Table table = null;
Connection conn = null;
try {
conn = getHBaseConnection();
table = conn.getTable(TableName.valueOf("test"));
resultScanner = table.getScanner(scan);
for (Result rs : resultScanner) {
for (Cell cell : rs.rawCells()) {
String family = Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength());
String qualifier = Bytes.toString(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());
String rowKey = Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength());
String value = Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
Long timeStamp = cell.getTimestamp();
System.out.println(String.format(resultString, rowKey, family, qualifier, timeStamp, value));
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if(null !=conn){
try{
conn.close();
}catch(Exception e){
e.printStackTrace();
}
}
if(null !=resultScanner){
resultScanner.close();
}
if(null != table ){
try{
table.close();
}catch(Exception e){
e.printStackTrace();
}
}
}
}
}
2. 比较过滤器
2.1 行过滤器
RowFilter
:行过滤器
实际上称之为行键过滤器更合适,用于行键的各种过滤上,构造函数为RowFilter(CompareFilter.CompareOp rowCompareOp, WritableByteArrayComparable rowComparator)。结合两个构造参数可以实现基于rowkey的不同过滤方式。
假设根据rowkey,数据分为3部分A、B、C,其中B为通过rowComparator选出的数据,
接着通过rowCompareOp来确定最终的数据:若为EQUAL则选择B,若为LESS_OR_EQUAL则选择A+B,如此类推。
2.1.1 取出rowkey为x的所有数据
/**
* 取出rowkey为101的所有数据
*/
@Test
public void rowFilter1() {
Filter filter = new RowFilter(
CompareOperator.EQUAL,
new BinaryComparator(Bytes.toBytes("101")));
HBaseUtils.setFilterAndPrint(filter);
/**
* RowKey= 101, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 10086
* RowKey= 101, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= yid1
* RowKey= 101, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 13522886092
* RowKey= 101, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= xiaom1
*/
}
2.1.2 取出所有以x结尾的rowkey
/**
* 取出所有以5结尾的rowkey
*/
@Test
public void rowFilter2() {
Filter filter = new RowFilter(
CompareOperator.EQUAL,
new SubstringComparator("5"));
HBaseUtils.setFilterAndPrint(filter);
/**
* RowKey= 105, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 12580
* RowKey= 105, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= muj5
* RowKey= 105, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 15801019898
* RowKey= 105, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= liux5
*/
}
2.1.3 取出所有不以x结尾的rowkey
/**
* 取出所有不以5结尾的rowkey
*/
@Test
public void rowFilter3() {
Filter filter = new RowFilter(
CompareOperator.NOT_EQUAL,
new SubstringComparator("5"));
HBaseUtils.setFilterAndPrint(filter);
/**
* RowKey= 101, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 10086
* RowKey= 101, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= yid1
* RowKey= 101, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 13522886092
* RowKey= 101, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= xiaom1
* RowKey= 102, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 10000
* RowKey= 102, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= dianx2
* RowKey= 102, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 18201607321
* RowKey= 102, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= xiaoh2
* RowKey= 103, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 10001
* RowKey= 103, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= liant3
* RowKey= 103, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 15313089839
* RowKey= 103, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= dah3
* RowKey= 104, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 12306
* RowKey= 104, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= tiel4
* RowKey= 104, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 13872232462
* RowKey= 104, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= zhangs4
*/
}
2.1.4 取出所有大于x的rowkey
/**
* 取出所有大于103的rowkey
*/
@Test
public void rowFilter4() {
Filter filter = new RowFilter(
CompareOperator.GREATER,
new BinaryComparator(Bytes.toBytes("103")));
HBaseUtils.setFilterAndPrint(filter);
/**
* RowKey= 104, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 12306
* RowKey= 104, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= tiel4
* RowKey= 104, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 13872232462
* RowKey= 104, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= zhangs4
* RowKey= 105, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 12580
* RowKey= 105, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= muj5
* RowKey= 105, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 15801019898
* RowKey= 105, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= liux5
*/
}
2.1.5 取出所有小于x的rowkey
/**
* 取出所有小于103的rowkey
*/
@Test
public void rowFilter5() {
Filter filter = new RowFilter(
CompareOperator.LESS,
new BinaryComparator(Bytes.toBytes("103")));
HBaseUtils.setFilterAndPrint(filter);
/**
* RowKey= 101, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 10086
* RowKey= 101, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= yid1
* RowKey= 101, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 13522886092
* RowKey= 101, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= xiaom1
* RowKey= 102, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 10000
* RowKey= 102, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= dianx2
* RowKey= 102, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 18201607321
* RowKey= 102, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= xiaoh2
*/
}
2.2 列族过滤器
FamilyFilter
,列族过滤器
用于列族的过滤,其余特性与RowFilter类似,只是范围由rowkey变成了列族名。
- 取出列族小于fam2的数据
/**
* 过滤列族小于fam2
*/
@Test
public void testFamilyFilter() throws IOException {
Filter filter = new FamilyFilter(CompareOperator.LESS,
new BinaryComparator(Bytes.toBytes("fam2")));
HBaseUtils.setFilterAndPrint(filter);
/**
* RowKey= 101, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 10086
* RowKey= 101, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= yid1
* RowKey= 102, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 10000
* RowKey= 102, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= dianx2
* RowKey= 103, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 10001
* RowKey= 103, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= liant3
* RowKey= 104, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 12306
* RowKey= 104, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= tiel4
* RowKey= 105, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 12580
* RowKey= 105, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= muj5
*/
}
2.3 列名过滤器
QualifierFilter
,列名过滤器
用于列名的过滤,其余特性与RowFilter类似,只是范围由rowkey变成了列名。
- 取出列名大于col1的数据
/**
* 过滤列名大于col1
*/
@Test
public void testQualifierFilter() {
Filter filter = new QualifierFilter(CompareOperator.GREATER,
new BinaryComparator(Bytes.toBytes("col1")));
HBaseUtils.setFilterAndPrint(filter);
/**
* RowKey= 101, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= yid1
* RowKey= 101, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= xiaom1
* RowKey= 102, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= dianx2
* RowKey= 102, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= xiaoh2
* RowKey= 103, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= liant3
* RowKey= 103, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= dah3
* RowKey= 104, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= tiel4
* RowKey= 104, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= zhangs4
* RowKey= 105, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= muj5
* RowKey= 105, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= liux5
*/
}
2.4 值过滤器
ValueFilter
,值过滤器
用于值的过滤,其余特性与RowFilter类似,只是范围由rowkey变成了值。
- 取出值的前缀为100的数据
public static void main(String[] args) {
/**
* 过滤Value的前缀为100的数据
*/
Filter filter = new ValueFilter(
CompareOperator.EQUAL,
new SubstringComparator("100"));
HBaseUtils.setFilterAndPrint(filter);
/**
* RowKey= 101, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 10086
* RowKey= 102, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 10000
* RowKey= 103, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 10001
*/
}
2.5 参考列过滤器
DependentColumnFilter
参考列过滤器
初步理解:与 ValueFilter 类似,只不过 ValueFilter 只输出满足条件的列,而 DependentColumnFilter 则将满足条件的列所在的行全部输出。
2.5.1 取出前缀等于x的数据
/**
* 过滤前缀等于1000的数据
*/
@Test
public void dependentColumnFilter() {
//第三个参数为false时,输出参考列,当为true时不输出参考列 也就是 fam1.col1
Filter filter = new DependentColumnFilter(
Bytes.toBytes("fam1"),
Bytes.toBytes("col1"),
true,
CompareOperator.EQUAL,
new BinaryPrefixComparator(Bytes.toBytes("1000")));
HBaseUtils.setFilterAndPrint(filter);
/** false 输出参考列
* RowKey= 102, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 10000 //参考列
* RowKey= 102, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= dianx2
* RowKey= 102, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 18201607321
* RowKey= 102, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= xiaoh2
* RowKey= 103, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 10001
* RowKey= 103, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= liant3
* RowKey= 103, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 15313089839
* RowKey= 103, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= dah3
*/
/** true 不输出参考列
* RowKey= 102, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= dianx2
* RowKey= 102, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 18201607321
* RowKey= 102, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= xiaoh2
* RowKey= 103, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= liant3
* RowKey= 103, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 15313089839
* RowKey= 103, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= dah3
*/
}
2.5.2 取出前缀等于x的数据
/**
* 过滤前缀等于li的数据 不输出参考列
*/
@Test
public void dependentColumnFilter2(){
//第三个参数为false时,输出参考列,当为true时不输出参考列 也就是 fam1.col1
Filter filter = new DependentColumnFilter(
Bytes.toBytes("fam1"),
Bytes.toBytes("col2"),
false,
CompareOperator.EQUAL,
new BinaryPrefixComparator(Bytes.toBytes("li")));
HBaseUtils.setFilterAndPrint(filter);
/** true 不输出参考列
* RowKey= 103, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 10001
* RowKey= 103, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 15313089839
* RowKey= 103, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= dah3
*/
/** fasle 输出参考列
* RowKey= 103, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 10001
* RowKey= 103, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= liant3 //参考列
* RowKey= 103, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 15313089839
* RowKey= 103, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= dah3
*/
}
3. 专用过滤器
3.1 单值过滤器
SingleColumnValueFilter
,单值过滤器
可以看成是对DependentColumnFilter的一层包装。
上述代码示例中的功能可以通过如下代码实现:
Filter filter = new DependentColumnFilter(
Bytes.toBytes("cf1"),
Bytes.toBytes("col1"),
false,
CompareOperator.NOT_EQUAL, new SubstringComparator("100")
);
3.1.1 取出fam1:col1的值不含x的数据
/**
* 获取fam1:col1的值没有100子串的数据
*/
@Test
public void singleColumnValueFilter1() {
Filter filter = new SingleColumnValueFilter(
Bytes.toBytes("fam1"),
Bytes.toBytes("col1"),
CompareOperator.NOT_EQUAL,
new SubstringComparator("100")
);
HBaseUtils.setFilterAndPrint(filter);
/**
* RowKey= 104, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 12306
* RowKey= 104, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= tiel4
* RowKey= 104, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 13872232462
* RowKey= 104, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= zhangs4
* RowKey= 105, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 12580
* RowKey= 105, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= muj5
* RowKey= 105, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 15801019898
* RowKey= 105, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= liux5
*/
}
3.1.2 取出fam1:col2的值有x的数据
/**
* 获取fam1:col2的值含有t子串的数据
*/
@Test
public void singleColumnValueFilter2() {
Filter filter = new SingleColumnValueFilter(
Bytes.toBytes("fam1"),
Bytes.toBytes("col2"),
CompareOperator.EQUAL,
new SubstringComparator("t")
);
HBaseUtils.setFilterAndPrint(filter);
/**
* RowKey= 103, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 10001
* RowKey= 103, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= liant3
* RowKey= 103, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 15313089839
* RowKey= 103, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= dah3
* RowKey= 104, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 12306
* RowKey= 104, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= tiel4
* RowKey= 104, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 13872232462
* RowKey= 104, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= zhangs4
*/
}
3.1.3 取出fam2的col1中值大于x的数据
/**
* 获取fam2:col1中值以182016073开头的数据
*/
@Test
public void singleColumnValueFilter3() throws IOException {
Filter filter = new SingleColumnValueFilter(
Bytes.toBytes("fam2"),
Bytes.toBytes("col1"),
CompareOperator.GREATER,
new BinaryComparator(Bytes.toBytes("182016073"))
);
HBaseUtils.setFilterAndPrint(filter);
/**
* RowKey= 102, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 10000
* RowKey= 102, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= dianx2
* RowKey= 102, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 18201607321
* RowKey= 102, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= xiaoh2
*/
}
3.2 单列值排除过滤器
SingleColumnValueExcludeFilter
: 单列值排除过滤器
SingleColumnValueExcludeFilter
与 SingleColumnValueFilter
唯一的区别是:
SingleColumnValueFilter 选择满足条件的数据,
SingleColumnValueExcludeFilter 选择不满足条件的数据。
3.2.1 取出fam2:col1值等于x的数据
/**
* 获取fam2:col1的值等于135228860子串的其他列数据
*/
@Test
public void testSingleColumnValueExcludeFilter() {
Filter filter = new SingleColumnValueExcludeFilter(
Bytes.toBytes("fam2"),
Bytes.toBytes("col1"),
CompareOperator.EQUAL,
new BinaryPrefixComparator(Bytes.toBytes("18201607321"))
);
HBaseUtils.setFilterAndPrint(filter);
/** 返回fam2 col1 中值为18201607321 的那条数据,但是不返回fam2 col1
* RowKey= 102, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 10000
* RowKey= 102, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= dianx2
* RowKey= 102, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= xiaoh2
*/
}
3.2.2 取出fam1:col1值等于x的数据
/**
* 获取fam1:col1的值等于100子串的其他列数据
*/
@Test
public void testSingleColumnValueExcludeFilter2() {
Filter filter = new SingleColumnValueExcludeFilter(
Bytes.toBytes("fam1"),
Bytes.toBytes("col1"),
CompareOperator.EQUAL,
new BinaryPrefixComparator(Bytes.toBytes("100"))
);
HBaseUtils.setFilterAndPrint(filter);
/**找到fam1 col1中符合条件的数据,返回其他列
* RowKey= 101, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= yid1
* RowKey= 101, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 13522886092
* RowKey= 101, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= xiaom1
* RowKey= 102, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= dianx2
* RowKey= 102, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 18201607321
* RowKey= 102, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= xiaoh2
* RowKey= 103, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= liant3
* RowKey= 103, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 15313089839
* RowKey= 103, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= dah3
*/
}
3.3 前缀过滤器
PrefixFilter
,前缀过滤器
相当于RowFilter配合BinaryPrefixComparator使用。
上述代码示例中的功能可以通过如下代码实现:
Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,
new BinaryPrefixComparator(Bytes.toBytes("101")));
- 选择rowkey前缀为 101 的数据
public static void main(String[] args) {
/**
* 选择rowkey前缀为101的数据
*/
Filter filter = new PrefixFilter(Bytes.toBytes("101"));
HBaseUtils.setFilterAndPrint(filter);
/**
* RowKey= 101, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 10086
* RowKey= 101, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= yid1
* RowKey= 101, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551669078407, ColumnValue= 13522886092
* RowKey= 101, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551669078407, ColumnValue= xiaom1
*/
}
3.4 分页过滤器
PageFilter
,分页过滤器
可以看成 RowFilter 加一个计数器组合而成
- 以 rowkey=102 为起始值分页大小为 2
public static void main(String[] args) {
Filter filter = new PageFilter(2);
Scan scan = new Scan();
scan.setFilter(filter);
scan.withStartRow(Bytes.toBytes("102"));
HBaseUtils.setFilterAndPrint(filter);
/**
* RowKey= 101, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 10086
* RowKey= 101, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= yid1
* RowKey= 101, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 13522886092
* RowKey= 101, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= xiaom1
* RowKey= 102, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 10000
* RowKey= 102, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= dianx2
* RowKey= 102, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 18201607321
* RowKey= 102, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= xiaoh2
*/
}
3.5 行键过滤器
KeyOnlyFilter
,行键过滤器
只输出行键的过滤器,会重复输出rowkey
- 示例
public static void main(String[] args) {
Filter filter = new KeyOnlyFilter();
HBaseUtils.setFilterAndPrint(filter);
/**
* RowKey= 101, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue=
* RowKey= 101, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue=
* RowKey= 101, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue=
* RowKey= 101, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue=
* RowKey= 102, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue=
* RowKey= 102, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue=
* RowKey= 102, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue=
* RowKey= 102, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue=
* RowKey= 103, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue=
* RowKey= 103, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue=
* RowKey= 103, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue=
* RowKey= 103, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue=
* RowKey= 104, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue=
* RowKey= 104, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue=
* RowKey= 104, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue=
* RowKey= 104, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue=
* RowKey= 105, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue=
* RowKey= 105, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue=
* RowKey= 105, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue=
* RowKey= 105, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue=
*/
}
3.6 首次行键过滤器
FirstKeyOnlyFilter
首次行键过滤器。
FirstKeyOnlyFilter
与 KeyOnlyFilter
的区别
KeyOnlyFilter 会将同一个 rowkey 输出多次(取决于有多少列),
FirstKeyOnlyFilter 将相同的 rowkey 只输出一次,会带有某些列的 value
- 示例: 输出不重复的rowKey,会带value
public static void main(String[] args) {
Filter filter = new FirstKeyOnlyFilter();
HBaseUtils.setFilterAndPrint(filter);
/**
* RowKey= 101, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 10086
* RowKey= 102, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 10000
* RowKey= 103, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 10001
* RowKey= 104, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 12306
* RowKey= 105, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 12580
*/
}
3.7 包含结束行过滤器
InclusiveStopFilter
,包含结束行过滤器
Scan.setStartRow() 会将开始行包含在结果中,但 Scan.setStopRow() 则不会将结束行包含在结果中。
用此过滤器可将结束行包含进结果中。
public static void main(String[] args) {
Filter filter = new InclusiveStopFilter(Bytes.toBytes("103"));
Scan scan = new Scan();
scan.setFilter(filter);
scan.withStartRow(Bytes.toBytes("102"));
HBaseUtils.setScanAndPrint(scan);
/**
* RowKey= 102, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 10000
* RowKey= 102, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= dianx2
* RowKey= 102, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 18201607321
* RowKey= 102, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= xiaoh2
* RowKey= 103, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 10001
* RowKey= 103, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= liant3
* RowKey= 103, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 15313089839
* RowKey= 103, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= dah3
*/
}
3.8 时间戳过滤器
TimestampsFilter
,时间戳过滤器, 选择特定的timestamps,根据时间戳选择数据
public static void main(String[] args) {
List<Long> ts = Lists.newArrayList(1551945588412L);
Filter filter = new TimestampsFilter(ts);
HBaseUtils.setFilterAndPrint(filter);
/**
* RowKey= 101, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 10086
* RowKey= 101, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= yid1
* RowKey= 101, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 13522886092
* RowKey= 101, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= xiaom1
* RowKey= 102, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 10000
* RowKey= 102, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= dianx2
* RowKey= 102, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 18201607321
* RowKey= 102, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= xiaoh2
* RowKey= 103, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 10001
* RowKey= 103, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= liant3
* RowKey= 103, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 15313089839
* RowKey= 103, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= dah3
* RowKey= 104, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 12306
* RowKey= 104, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= tiel4
* RowKey= 104, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 13872232462
* RowKey= 104, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= zhangs4
* RowKey= 105, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 12580
* RowKey= 105, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= muj5
* RowKey= 105, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 15801019898
* RowKey= 105, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= liux5
*/
}
3.9 列级数过滤器
ColumnCountGetFilter
,列级数过滤器
控制每行取多少列,按列族名和列名的字典序进行排列。
实际测试中发现当 ColumnCountGetFilter(1) 时,
只输出第一行数据的第一列,而不是所有数据的第一列,貌似是个bug?
public static void main(String[] args) {
Filter filter = new ColumnCountGetFilter(2);
HBaseUtils.setFilterAndPrint(filter);
/**
* RowKey= 101, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 10086
* RowKey= 101, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= yid1
* RowKey= 102, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 10000
* RowKey= 102, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= dianx2
* RowKey= 103, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 10001
* RowKey= 103, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= liant3
* RowKey= 104, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 12306
* RowKey= 104, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= tiel4
* RowKey= 105, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 12580
* RowKey= 105, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= muj5
*/
}
3.10 列分页过滤器
ColumnPaginationFilter,列分页过滤器
正如HBase表结构设计中所描述的表的横向扩展,ColumnPaginationFilter无疑是针对此场景的最佳分页手段
同ColumnCountGetFilter,按照列族名和列名的字典序进行排列。
public static void main(String[] args) {
//从第3列开始,选择1列
Filter filter = new ColumnPaginationFilter(1,3);
HBaseUtils.setFilterAndPrint(filter);
/**
* RowKey= 101, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= xiaom1
* RowKey= 102, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= xiaoh2
* RowKey= 103, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= dah3
* RowKey= 104, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= zhangs4
* RowKey= 105, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= liux5
*/
}
3.11 列前缀过滤器
ColumnPrefixFilter
,列前缀过滤器
通过列名进行前缀匹配过滤
等同于QualifierFilter(CompareFilter.CompareOp.EQUAL,new BinaryPrefixComparator(Bytes.toBytes(“…”)));
public static void main(String[] args) {
Filter filter = new ColumnPrefixFilter(Bytes.toBytes("col2"));
HBaseUtils.setFilterAndPrint(filter);
/**
* RowKey= 101, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= yid1
* RowKey= 101, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= xiaom1
* RowKey= 102, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= dianx2
* RowKey= 102, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= xiaoh2
* RowKey= 103, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= liant3
* RowKey= 103, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= dah3
* RowKey= 104, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= tiel4
* RowKey= 104, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= zhangs4
* RowKey= 105, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= muj5
* RowKey= 105, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= liux5
*/
}
3.12 随机行过滤器
RandomRowFilter
,随机行过滤器
构造方法 Filter filter = new RandomRowFilter(0.2f);
每次的运行结果都不一样,随机生成结果。
参数 0.2f 只是参考作用,并不是每次必定取 20% 的样本,意思是每条数据有 20% 概率被选择,
所以所有数据都没被选择的可能也是有的。
public static void main(String[] args) {
Filter filter = new RandomRowFilter(0.2f);
HBaseUtils.setFilterAndPrint(filter);
/**
* RowKey= 101, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 10086
* RowKey= 101, ColumnFamily= fam1, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= yid1
* RowKey= 101, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 13522886092
* RowKey= 101, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= xiaom1
*/
}
4. 装饰性过滤器
4.1 跳转过滤器
SkipFilter
,跳转过滤器
目前只能作用于 ValueFilter。
ValueFilter用于输出满足条件的行和对应的列(该行中也有1至N列满足),
当加上SkipFilter后,若某行中有不满足ValueFilter的列存在,
则整行都不会输出(即,只要某行中有一列不满足ValueFilter,则其它满足的列也不会输出)。
public static void main(String[] args) {
Filter filter = new ValueFilter(
CompareOperator.EQUAL,
new SubstringComparator("100")
);
Filter f = new SkipFilter(filter);
HBaseUtils.setFilterAndPrint(f);
//输出结果为空,因为没有哪一行的所有列的值全部以100开头。
}
4.2 全匹配过滤器
WhileMatchFilter
,全匹配过滤器
使用过滤器时,当遇到不满足过滤条件的数据时将不会返回该数据,
当加上WhileMatchFilter后,遇到第一个不符合条件的数据时将会停止继续的扫描。
public static void main(String[] args) {
Filter filter = new ValueFilter(
CompareOperator.EQUAL,
new SubstringComparator("100")
);
Filter f = new WhileMatchFilter(filter);
HBaseUtils.setFilterAndPrint(f);
//Key: 101/cf1:col1/1405913524513/Put/vlen=5/ts=0, Value: 10086
//可以看到在扫描到第一行的cf1:col2时,由于值为qwe1,不满足条件,所以终止了扫描。
//RowKey= 101, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 10086
}
5. 过滤器集合
HBase 的过滤器集合是一组用于检索和筛选存储在HBase表中数据的工具。这些过滤器可以按行键、列族、列修饰符和时间戳进行过滤,也可以根据列值的类型和范围进行过滤。
package com.hnbian.filters;
import com.hnbian.utils.HBaseUtils;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.util.ArrayList;
import java.util.List;
/**
* 组合过滤器,将各种过滤器组合在一起使用
*/
public class FilterListTest {
private static Connection hbaseConnection;
public static void main(String[] args) {
ResultScanner resultScanner = null;
hbaseConnection = HBaseUtils.getHBaseConnection();
List<Filter> list = new ArrayList<Filter>();
//添加行过滤器 过滤出rowkey 为 101的数据
Filter filter1 = new RowFilter(
CompareOperator.GREATER_OR_EQUAL,
new BinaryComparator(Bytes.toBytes("101"))
);
//添加行过滤器 过滤出rowkey 小于 106的数据
Filter filter2 = new RowFilter(
CompareOperator.LESS_OR_EQUAL,
new BinaryComparator(Bytes.toBytes("106"))
);
//Filter filter3 = new ValueFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator("yi"));
//加入参考列过滤器,过滤掉fam1 col2值为yid1 的列
Filter filter4 = new DependentColumnFilter(
Bytes.toBytes("fam1"),
Bytes.toBytes("col2"),
true,
CompareOperator.EQUAL,
new BinaryPrefixComparator(Bytes.toBytes("yid1"))
);
list.add(filter1);
list.add(filter2);
//list.add(filter3);
list.add(filter4);
Scan scan = new Scan();
// 通过将operator参数设置为Operator.MUST_PASS_ONE,达到list中各filter为"或"的关系
// 默认operator参数的值为Operator.MUST_PASS_ALL,即list中各filter为"并"的关系
Filter filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL, list);
scan.setFilter(filterList);
HBaseUtils.setFilterAndPrint(filterList);
/**
* RowKey= 101, ColumnFamily= fam1, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 10086
* RowKey= 101, ColumnFamily= fam2, ColumnName= col1, TimeStamp= 1551945588412, ColumnValue= 13522886092
* RowKey= 101, ColumnFamily= fam2, ColumnName= col2, TimeStamp= 1551945588412, ColumnValue= xiaom1
*/
}
}