介绍 HBase 过滤器类别与使用方式


1. 数据以及代码准备

1.1 hbase过滤器介绍

以下介绍过滤器是基于HBase2.0.2版本。

HBase的Get和Scan实例可以调用setFilter()来设置过滤器,HBase的过滤器种类繁多,以满足不同的过滤需求。Filter作用于各个RegionServer,通过使用过滤器可以高效的获取数据。HBase Filter主要分为三大类,用户也可以通过继承FilterBase或实现Filter接口来自定义Filter。若需要多个Fi lter结合使用,可以通过FilterList来满足。
以下主要对各个Filter进行简单的了解。
Comparision Filters (比较过滤器)
Dedicated Filters (专用过滤器)
Decorating Filters (装饰性过滤器)

1.2 数据准备

rowkey fam1:col1 fam1:col2 fam2:col1 fam2:col2
101 10086 yid1 13522886092 xiaom1
102 10000 dianx2 18201607321 xiaoh2
103 10001 liant3 15313089839 dah3
104 12306 tiel4 13872232462 zhangs4
105 12580 muj5 15801019898 lix5

1.3 HBase比较器

比较器 含义
BinaryComparator 按字节索引顺序比较指定字节数组,匹配完整字节数组
BinaryPrefixComparator 按字节索引顺序比较指定字节数组前缀(左端数据),匹配字节数组前缀
NullComparator 判断给定的是否为空
BitComparator 按位比较
RegexStringComparator 正则表达式比较,仅支持EQUAL和NOT_EQUAL,匹配正则表达式
SubstringComparator 字串比较,匹配子字符串

1.4 HBase比较符

import org.apache.hadoop.hbase.CompareOperator;
比较符 含义
LESS <
LESS_OR_EQUAL <=
EQUAL =
NOT_EQUAL <>
GREATER_OR_EQUAL >=
GREATER >
NO_OP 无操作

1.5 准备代码

建表并插入数据

package com.hnbian.filters;

import com.hnbian.utils.HBaseUtils;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.util.Bytes;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

public class HBaseTest {
    private static Connection hbaseConnection;
    private byte[] fam1 = Bytes.toBytes("fam1");
    private byte[] col1 = Bytes.toBytes("col1");
    private byte[] fam2 = Bytes.toBytes("fam2");
    private byte[] col2 = Bytes.toBytes("col2");

    public static void main(String[] args) {
        HBaseTest hBaseTest = new HBaseTest();
        //创建表
        //hBaseTest.createTable();
        //添加数据
        hBaseTest.saveData();
    }

    /**
     * 保存数据
     */
    public void saveData() {
        hbaseConnection = HBaseUtils.getHBaseConnection();
        List<Put> puts = new ArrayList<Put>();
        Put put1 = new Put(Bytes.toBytes("101"));//为指定行创建一个Put操作

        put1.addColumn(fam1, col1, Bytes.toBytes("10086"));
        put1.addColumn(fam1, col2, Bytes.toBytes("yid1"));
        put1.addColumn(fam2, col1, Bytes.toBytes("13522886092"));
        put1.addColumn(fam2, col2, Bytes.toBytes("xiaom1"));

        Put put2 = new Put(Bytes.toBytes("102"));
        put2.addColumn(fam1, col1, Bytes.toBytes("10000"));
        put2.addColumn(fam1, col2, Bytes.toBytes("dianx2"));
        put2.addColumn(fam2, col1, Bytes.toBytes("18201607321"));
        put2.addColumn(fam2, col2, Bytes.toBytes("xiaoh2"));

        Put put3 = new Put(Bytes.toBytes("103"));
        put3.addColumn(fam1, col1, Bytes.toBytes("10001"));
        put3.addColumn(fam1, col2, Bytes.toBytes("liant3"));
        put3.addColumn(fam2, col1, Bytes.toBytes("15313089839"));
        put3.addColumn(fam2, col2, Bytes.toBytes("dah3"));

        Put put4 = new Put(Bytes.toBytes("104"));
        put4.addColumn(fam1, col1, Bytes.toBytes("12306"));
        put4.addColumn(fam1, col2, Bytes.toBytes("tiel4"));
        put4.addColumn(fam2, col1, Bytes.toBytes("13872232462"));
        put4.addColumn(fam2, col2, Bytes.toBytes("zhangs4"));

        Put put5 = new Put(Bytes.toBytes("105"));
        put5.addColumn(fam1, col1, Bytes.toBytes("12580"));
        put5.addColumn(fam1, col2, Bytes.toBytes("muj5"));
        put5.addColumn(fam2, col1, Bytes.toBytes("15801019898"));
        put5.addColumn(fam2, col2, Bytes.toBytes("liux5"));
        puts.add(put1);
        puts.add(put2);
        puts.add(put3);
        puts.add(put4);
        puts.add(put5);
        try {
            if (null != hbaseConnection && !hbaseConnection.isClosed()) {
                Table table = hbaseConnection.getTable(TableName.valueOf("test"));
                table.put(puts);
                table.close();
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            close(hbaseConnection);
        }
    }

    /**
     * 创建表
     *
     * @return
     */
    public void createTable() {
        String tableName = "test";
        try {
            hbaseConnection = HBaseUtils.getHBaseConnection();
            Admin admin = hbaseConnection.getAdmin();
            TableName tn = TableName.valueOf(tableName);
            //创建表描述
            TableDescriptorBuilder mtd = TableDescriptorBuilder.newBuilder(tn);
            //fam1列簇描述
            ColumnFamilyDescriptorBuilder.ModifyableColumnFamilyDescriptor cfd1 =
                    new ColumnFamilyDescriptorBuilder.ModifyableColumnFamilyDescriptor(fam1);
            //fam2列簇描述
            ColumnFamilyDescriptorBuilder.ModifyableColumnFamilyDescriptor cfd2 =
                    new ColumnFamilyDescriptorBuilder.ModifyableColumnFamilyDescriptor(fam2);
            //设置压缩算法
            cfd1.setCompressionType(Compression.Algorithm.GZ);
            cfd2.setCompressionType(Compression.Algorithm.GZ);

            Set<ColumnFamilyDescriptor> set = new HashSet();
            set.add(cfd1);
            set.add(cfd2);
            mtd.setColumnFamilies(set);

            admin.createTable(mtd.build());
            admin.close();

        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            close(hbaseConnection);
        }
    }

    /**
     * 关闭连接
     *
     * @param hbaseConnection
     */
    public void close(Connection hbaseConnection) {
        if (!hbaseConnection.isClosed()) {
            try {
                hbaseConnection.close();
            } catch (Exception e) {
                e.printStackTrace();
            }

        }
    }
}

查询打印数据

package com.hnbian.utils;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

/**
 * HBase 工具类
 * @author hnbian
 */
public class HBaseUtils {

    private static final String HBASE_MASTER = "node1.com";
    private static final String HBASE_ROOTDIR = "/apps/hbase/data";
    private static final String HBASE_ZOOKEEPER_QUORUM = "node3.com,node2.com,node4.com";
    private static final String HBASE_ZOOKEEPER_PROPERTY_CLIENTPORT = "2181";


    private static Map<String, String> confMap = new HashMap<String, String>();
    private static Configuration hbaseConfiguration = null;
    private static Connection hbaseConnection = null;

    public static Connection getHBaseConnection() {
        if (hbaseConnection == null) {
            hbaseConfiguration = HBaseConfiguration.create();
            hbaseConfiguration.set("hbase.master", HBASE_MASTER);
            hbaseConfiguration.set("hbase.rootdir", HBASE_ROOTDIR);
            hbaseConfiguration.set("hbase.zookeeper.quorum", HBASE_ZOOKEEPER_QUORUM);
            hbaseConfiguration.set("hbase.zookeeper.property.clientPort", HBASE_ZOOKEEPER_PROPERTY_CLIENTPORT);
            try {
                hbaseConnection = ConnectionFactory.createConnection(hbaseConfiguration);
            } catch (IOException e) {
                //log.error("获取hbase 连接异常!");
                hbaseConnection = null;
            }
        }
        return hbaseConnection;
    }

    /**
     * 设置过滤器查询数据并打印
     * @param filter
     */
    public static void setFilterAndPrint(Filter filter) {
        Scan scan = new Scan();
        scan.setFilter(filter);
        setScanAndPrint(scan);
    }

    /**
     * 设置过滤器查询数据并打印
     * @param scan
     */
    public static void setScanAndPrint(Scan scan) {
        String resultString = "RowKey= %s, ColumnFamily= %s, ColumnName= %s,  TimeStamp= %s, ColumnValue= %s";
        ResultScanner resultScanner = null;
        Table table = null;
        Connection conn = null;
        try {
            conn = getHBaseConnection();
            table = conn.getTable(TableName.valueOf("test"));
            resultScanner = table.getScanner(scan);
            for (Result rs : resultScanner) {
                for (Cell cell : rs.rawCells()) {
                    String family = Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength());
                    String qualifier = Bytes.toString(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());
                    String rowKey = Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength());
                    String value = Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
                    Long timeStamp = cell.getTimestamp();
                    System.out.println(String.format(resultString, rowKey, family, qualifier, timeStamp, value));
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if(null !=conn){
                try{
                    conn.close();
                }catch(Exception e){
                    e.printStackTrace();
                }
            }
            if(null !=resultScanner){
                resultScanner.close();
            }
            if(null != table ){
                try{
                    table.close();
                }catch(Exception e){
                    e.printStackTrace();
                }
            }
        }
    }
}

2. 比较过滤器

2.1 行过滤器

RowFilter:行过滤器

实际上称之为行键过滤器更合适,用于行键的各种过滤上,构造函数为RowFilter(CompareFilter.CompareOp rowCompareOp, WritableByteArrayComparable rowComparator)。结合两个构造参数可以实现基于rowkey的不同过滤方式。
假设根据rowkey,数据分为3部分A、B、C,其中B为通过rowComparator选出的数据,
接着通过rowCompareOp来确定最终的数据:若为EQUAL则选择B,若为LESS_OR_EQUAL则选择A+B,如此类推。

2.1.1 取出rowkey为x的所有数据

/**
     * 取出rowkey为101的所有数据
     */
    @Test
    public void rowFilter1() {
        Filter filter = new RowFilter(
                CompareOperator.EQUAL,
                new BinaryComparator(Bytes.toBytes("101")));
        HBaseUtils.setFilterAndPrint(filter);
/**
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 10086
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= yid1
 * RowKey= 101, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 13522886092
 * RowKey= 101, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= xiaom1
 */
    }

2.1.2 取出所有以x结尾的rowkey

/**
     * 取出所有以5结尾的rowkey
     */
    @Test
    public void rowFilter2() {

        Filter filter = new RowFilter(
                CompareOperator.EQUAL,
                new SubstringComparator("5"));
        HBaseUtils.setFilterAndPrint(filter);
/**
 * RowKey= 105, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 12580
 * RowKey= 105, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= muj5
 * RowKey= 105, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 15801019898
 * RowKey= 105, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= liux5
 */

    }

2.1.3 取出所有不以x结尾的rowkey

/**
     * 取出所有不以5结尾的rowkey
     */
    @Test
    public void rowFilter3() {

        Filter filter = new RowFilter(
                CompareOperator.NOT_EQUAL,
                new SubstringComparator("5"));
        HBaseUtils.setFilterAndPrint(filter);
/**
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 10086
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= yid1
 * RowKey= 101, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 13522886092
 * RowKey= 101, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= xiaom1
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 10000
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= dianx2
 * RowKey= 102, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 18201607321
 * RowKey= 102, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= xiaoh2
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 10001
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= liant3
 * RowKey= 103, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 15313089839
 * RowKey= 103, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= dah3
 * RowKey= 104, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 12306
 * RowKey= 104, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= tiel4
 * RowKey= 104, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 13872232462
 * RowKey= 104, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= zhangs4
 */

    }

2.1.4 取出所有大于x的rowkey

 /**
     * 取出所有大于103的rowkey
     */
    @Test
    public void rowFilter4() {

        Filter filter = new RowFilter(
                CompareOperator.GREATER,
                new BinaryComparator(Bytes.toBytes("103")));
        HBaseUtils.setFilterAndPrint(filter);
/**
 * RowKey= 104, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 12306
 * RowKey= 104, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= tiel4
 * RowKey= 104, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 13872232462
 * RowKey= 104, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= zhangs4
 * RowKey= 105, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 12580
 * RowKey= 105, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= muj5
 * RowKey= 105, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 15801019898
 * RowKey= 105, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= liux5
 */

    }

2.1.5 取出所有小于x的rowkey


 /**
     * 取出所有小于103的rowkey
     */
    @Test
    public void rowFilter5() {
        Filter filter = new RowFilter(
                CompareOperator.LESS,
                new BinaryComparator(Bytes.toBytes("103")));
        HBaseUtils.setFilterAndPrint(filter);
/**
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 10086
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= yid1
 * RowKey= 101, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 13522886092
 * RowKey= 101, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= xiaom1
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 10000
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= dianx2
 * RowKey= 102, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 18201607321
 * RowKey= 102, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= xiaoh2
 */

    }

2.2 列族过滤器

FamilyFilter ,列族过滤器

用于列族的过滤,其余特性与RowFilter类似,只是范围由rowkey变成了列族名。

  • 取出列族小于fam2的数据
/**
     * 过滤列族小于fam2
     */
    @Test
    public void testFamilyFilter() throws IOException {
        Filter filter = new FamilyFilter(CompareOperator.LESS,
                new BinaryComparator(Bytes.toBytes("fam2")));
        HBaseUtils.setFilterAndPrint(filter);
/**
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 10086
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= yid1
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 10000
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= dianx2
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 10001
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= liant3
 * RowKey= 104, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 12306
 * RowKey= 104, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= tiel4
 * RowKey= 105, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 12580
 * RowKey= 105, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= muj5
 */

    }

2.3 列名过滤器

QualifierFilter,列名过滤器

用于列名的过滤,其余特性与RowFilter类似,只是范围由rowkey变成了列名。

  • 取出列名大于col1的数据
/**
     * 过滤列名大于col1
     */
    @Test
    public void testQualifierFilter() {
        Filter filter = new QualifierFilter(CompareOperator.GREATER,
                new BinaryComparator(Bytes.toBytes("col1")));
        HBaseUtils.setFilterAndPrint(filter);
/**
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= yid1
 * RowKey= 101, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= xiaom1
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= dianx2
 * RowKey= 102, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= xiaoh2
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= liant3
 * RowKey= 103, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= dah3
 * RowKey= 104, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= tiel4
 * RowKey= 104, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= zhangs4
 * RowKey= 105, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= muj5
 * RowKey= 105, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= liux5
 */

    }

2.4 值过滤器

ValueFilter,值过滤器

用于值的过滤,其余特性与RowFilter类似,只是范围由rowkey变成了值。

  • 取出值的前缀为100的数据
public static void main(String[] args) {
        /**
         * 过滤Value的前缀为100的数据
         */
        Filter filter = new ValueFilter(
                CompareOperator.EQUAL,
                new SubstringComparator("100"));
        HBaseUtils.setFilterAndPrint(filter);
/**
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 10086
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 10000
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 10001
 */
    }

2.5 参考列过滤器

DependentColumnFilter 参考列过滤器

初步理解:与 ValueFilter 类似,只不过 ValueFilter 只输出满足条件的列,而 DependentColumnFilter 则将满足条件的列所在的行全部输出。

2.5.1 取出前缀等于x的数据

/**
     * 过滤前缀等于1000的数据
     */
    @Test
    public void dependentColumnFilter()  {
        //第三个参数为false时,输出参考列,当为true时不输出参考列 也就是 fam1.col1
        Filter filter = new DependentColumnFilter(
                Bytes.toBytes("fam1"),
                Bytes.toBytes("col1"),
                true,
                CompareOperator.EQUAL,
                new BinaryPrefixComparator(Bytes.toBytes("1000")));
        HBaseUtils.setFilterAndPrint(filter);
/** false 输出参考列
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 10000 //参考列
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= dianx2
 * RowKey= 102, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 18201607321
 * RowKey= 102, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= xiaoh2
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 10001
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= liant3
 * RowKey= 103, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 15313089839
 * RowKey= 103, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= dah3
 */
/** true 不输出参考列
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= dianx2
 * RowKey= 102, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 18201607321
 * RowKey= 102, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= xiaoh2
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= liant3
 * RowKey= 103, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 15313089839
 * RowKey= 103, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= dah3
 */

    }

2.5.2 取出前缀等于x的数据

/**
     * 过滤前缀等于li的数据 不输出参考列
     */
    @Test
    public void dependentColumnFilter2(){
        //第三个参数为false时,输出参考列,当为true时不输出参考列 也就是 fam1.col1
        Filter filter = new DependentColumnFilter(
                Bytes.toBytes("fam1"),
                Bytes.toBytes("col2"),
                false,
                CompareOperator.EQUAL,
                new BinaryPrefixComparator(Bytes.toBytes("li")));

        HBaseUtils.setFilterAndPrint(filter);
/** true 不输出参考列
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 10001
 * RowKey= 103, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 15313089839
 * RowKey= 103, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= dah3
 */
/** fasle 输出参考列
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 10001
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= liant3 //参考列
 * RowKey= 103, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 15313089839
 * RowKey= 103, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= dah3
 */

    }

3. 专用过滤器

3.1 单值过滤器

SingleColumnValueFilter,单值过滤器

可以看成是对DependentColumnFilter的一层包装。
上述代码示例中的功能可以通过如下代码实现:

Filter filter = new DependentColumnFilter(
    Bytes.toBytes("cf1"),
    Bytes.toBytes("col1"),
    false,
    CompareOperator.NOT_EQUAL, new SubstringComparator("100")
);

3.1.1 取出fam1:col1的值不含x的数据

/**
     * 获取fam1:col1的值没有100子串的数据
     */
    @Test
    public void singleColumnValueFilter1() {
        Filter filter = new SingleColumnValueFilter(
                Bytes.toBytes("fam1"),
                Bytes.toBytes("col1"),
                CompareOperator.NOT_EQUAL,
                new SubstringComparator("100")
        );
        HBaseUtils.setFilterAndPrint(filter);
/**
 * RowKey= 104, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 12306
 * RowKey= 104, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= tiel4
 * RowKey= 104, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 13872232462
 * RowKey= 104, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= zhangs4
 * RowKey= 105, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 12580
 * RowKey= 105, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= muj5
 * RowKey= 105, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 15801019898
 * RowKey= 105, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= liux5
 */

    }

3.1.2 取出fam1:col2的值有x的数据

/**
     * 获取fam1:col2的值含有t子串的数据
     */
    @Test
    public void singleColumnValueFilter2() {
        Filter filter = new SingleColumnValueFilter(
                Bytes.toBytes("fam1"),
                Bytes.toBytes("col2"),
                CompareOperator.EQUAL,
                new SubstringComparator("t")
        );
        HBaseUtils.setFilterAndPrint(filter);
/**
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 10001
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= liant3
 * RowKey= 103, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 15313089839
 * RowKey= 103, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= dah3
 * RowKey= 104, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 12306
 * RowKey= 104, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= tiel4
 * RowKey= 104, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 13872232462
 * RowKey= 104, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= zhangs4
 */

    }

3.1.3 取出fam2的col1中值大于x的数据


/**
     * 获取fam2:col1中值以182016073开头的数据
     */
    @Test
    public void singleColumnValueFilter3() throws IOException {
        Filter filter = new SingleColumnValueFilter(
                Bytes.toBytes("fam2"),
                Bytes.toBytes("col1"),
                CompareOperator.GREATER,
                new BinaryComparator(Bytes.toBytes("182016073"))
        );
        HBaseUtils.setFilterAndPrint(filter);
 /**
  * RowKey= 102, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 10000
  * RowKey= 102, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= dianx2
  * RowKey= 102, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 18201607321
  * RowKey= 102, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= xiaoh2
  */

    }

3.2 单列值排除过滤器

SingleColumnValueExcludeFilter : 单列值排除过滤器

SingleColumnValueExcludeFilterSingleColumnValueFilter 唯一的区别是:
SingleColumnValueFilter 选择满足条件的数据,
SingleColumnValueExcludeFilter 选择不满足条件的数据。

3.2.1 取出fam2:col1值等于x的数据


/**
     * 获取fam2:col1的值等于135228860子串的其他列数据
     */
    @Test
    public void testSingleColumnValueExcludeFilter() {
        Filter filter = new SingleColumnValueExcludeFilter(
                Bytes.toBytes("fam2"),
                Bytes.toBytes("col1"),
                CompareOperator.EQUAL,
                new BinaryPrefixComparator(Bytes.toBytes("18201607321"))
        );
        HBaseUtils.setFilterAndPrint(filter);
        /** 返回fam2 col1 中值为18201607321 的那条数据,但是不返回fam2 col1
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 10000
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= dianx2
 * RowKey= 102, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= xiaoh2
 */

    }

3.2.2 取出fam1:col1值等于x的数据

/**
     * 获取fam1:col1的值等于100子串的其他列数据
     */
    @Test
    public void testSingleColumnValueExcludeFilter2() {
        Filter filter = new SingleColumnValueExcludeFilter(
                Bytes.toBytes("fam1"),
                Bytes.toBytes("col1"),
                CompareOperator.EQUAL,
                new BinaryPrefixComparator(Bytes.toBytes("100"))
        );
        HBaseUtils.setFilterAndPrint(filter);
/**找到fam1 col1中符合条件的数据,返回其他列
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= yid1
 * RowKey= 101, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 13522886092
 * RowKey= 101, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= xiaom1
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= dianx2
 * RowKey= 102, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 18201607321
 * RowKey= 102, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= xiaoh2
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= liant3
 * RowKey= 103, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 15313089839
 * RowKey= 103, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= dah3
 */
    }

3.3 前缀过滤器

PrefixFilter,前缀过滤器

相当于RowFilter配合BinaryPrefixComparator使用。
上述代码示例中的功能可以通过如下代码实现:

Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,
      new BinaryPrefixComparator(Bytes.toBytes("101")));
  • 选择rowkey前缀为 101 的数据
public static void main(String[] args) {
        /**
         * 选择rowkey前缀为101的数据
         */
        Filter filter = new PrefixFilter(Bytes.toBytes("101"));
        HBaseUtils.setFilterAndPrint(filter);
/**
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 10086
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= yid1
 * RowKey= 101, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551669078407, ColumnValue= 13522886092
 * RowKey= 101, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551669078407, ColumnValue= xiaom1
 */

    }

3.4 分页过滤器

PageFilter,分页过滤器

可以看成 RowFilter 加一个计数器组合而成

  • 以 rowkey=102 为起始值分页大小为 2
public static void main(String[] args) {
        Filter filter = new PageFilter(2);
        Scan scan = new Scan();
        scan.setFilter(filter);
        scan.withStartRow(Bytes.toBytes("102"));
        HBaseUtils.setFilterAndPrint(filter);
/**
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 10086
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= yid1
 * RowKey= 101, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 13522886092
 * RowKey= 101, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= xiaom1
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 10000
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= dianx2
 * RowKey= 102, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 18201607321
 * RowKey= 102, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= xiaoh2
 */
    }

3.5 行键过滤器

KeyOnlyFilter,行键过滤器

只输出行键的过滤器,会重复输出rowkey

  • 示例
public static void main(String[] args) {
        Filter filter = new KeyOnlyFilter();
        HBaseUtils.setFilterAndPrint(filter);
/**
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue=
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue=
 * RowKey= 101, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue=
 * RowKey= 101, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue=
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue=
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue=
 * RowKey= 102, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue=
 * RowKey= 102, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue=
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue=
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue=
 * RowKey= 103, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue=
 * RowKey= 103, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue=
 * RowKey= 104, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue=
 * RowKey= 104, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue=
 * RowKey= 104, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue=
 * RowKey= 104, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue=
 * RowKey= 105, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue=
 * RowKey= 105, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue=
 * RowKey= 105, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue=
 * RowKey= 105, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue=
 */

    }

3.6 首次行键过滤器

FirstKeyOnlyFilter 首次行键过滤器。

FirstKeyOnlyFilterKeyOnlyFilter 的区别
KeyOnlyFilter 会将同一个 rowkey 输出多次(取决于有多少列),
FirstKeyOnlyFilter 将相同的 rowkey 只输出一次,会带有某些列的 value

  • 示例: 输出不重复的rowKey,会带value
public static void main(String[] args) {
        Filter filter = new FirstKeyOnlyFilter();
        HBaseUtils.setFilterAndPrint(filter);
/**
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 10086
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 10000
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 10001
 * RowKey= 104, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 12306
 * RowKey= 105, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 12580
 */
    }

3.7 包含结束行过滤器

InclusiveStopFilter,包含结束行过滤器

Scan.setStartRow() 会将开始行包含在结果中,但 Scan.setStopRow() 则不会将结束行包含在结果中。
用此过滤器可将结束行包含进结果中。

public static void main(String[] args) {
        Filter filter = new InclusiveStopFilter(Bytes.toBytes("103"));
        Scan scan = new Scan();
        scan.setFilter(filter);
        scan.withStartRow(Bytes.toBytes("102"));
        HBaseUtils.setScanAndPrint(scan);
/**
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 10000
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= dianx2
 * RowKey= 102, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 18201607321
 * RowKey= 102, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= xiaoh2
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 10001
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= liant3
 * RowKey= 103, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 15313089839
 * RowKey= 103, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= dah3
 */

    }

3.8 时间戳过滤器

TimestampsFilter,时间戳过滤器, 选择特定的timestamps,根据时间戳选择数据

public static void main(String[] args) {
        List<Long> ts = Lists.newArrayList(1551945588412L);
        Filter filter = new TimestampsFilter(ts);
        HBaseUtils.setFilterAndPrint(filter);
/**
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 10086
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= yid1
 * RowKey= 101, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 13522886092
 * RowKey= 101, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= xiaom1
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 10000
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= dianx2
 * RowKey= 102, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 18201607321
 * RowKey= 102, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= xiaoh2
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 10001
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= liant3
 * RowKey= 103, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 15313089839
 * RowKey= 103, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= dah3
 * RowKey= 104, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 12306
 * RowKey= 104, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= tiel4
 * RowKey= 104, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 13872232462
 * RowKey= 104, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= zhangs4
 * RowKey= 105, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 12580
 * RowKey= 105, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= muj5
 * RowKey= 105, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 15801019898
 * RowKey= 105, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= liux5
 */
    }

3.9 列级数过滤器

ColumnCountGetFilter,列级数过滤器

控制每行取多少列,按列族名和列名的字典序进行排列。
实际测试中发现当 ColumnCountGetFilter(1) 时,
只输出第一行数据的第一列,而不是所有数据的第一列,貌似是个bug?

public static void main(String[] args) {
        Filter filter = new ColumnCountGetFilter(2);
        HBaseUtils.setFilterAndPrint(filter);
/**
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 10086
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= yid1
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 10000
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= dianx2
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 10001
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= liant3
 * RowKey= 104, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 12306
 * RowKey= 104, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= tiel4
 * RowKey= 105, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 12580
 * RowKey= 105, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= muj5
 */
    }

3.10 列分页过滤器

ColumnPaginationFilter,列分页过滤器
正如HBase表结构设计中所描述的表的横向扩展,ColumnPaginationFilter无疑是针对此场景的最佳分页手段
同ColumnCountGetFilter,按照列族名和列名的字典序进行排列。

public static void main(String[] args) {
        //从第3列开始,选择1列
        Filter filter = new ColumnPaginationFilter(1,3);
        HBaseUtils.setFilterAndPrint(filter);
/**
 * RowKey= 101, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= xiaom1
 * RowKey= 102, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= xiaoh2
 * RowKey= 103, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= dah3
 * RowKey= 104, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= zhangs4
 * RowKey= 105, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= liux5
 */
    }

3.11 列前缀过滤器

ColumnPrefixFilter,列前缀过滤器

通过列名进行前缀匹配过滤
等同于QualifierFilter(CompareFilter.CompareOp.EQUAL,new BinaryPrefixComparator(Bytes.toBytes(“…”)));

public static void main(String[] args) {
        Filter filter = new ColumnPrefixFilter(Bytes.toBytes("col2"));
        HBaseUtils.setFilterAndPrint(filter);
/**
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= yid1
 * RowKey= 101, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= xiaom1
 * RowKey= 102, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= dianx2
 * RowKey= 102, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= xiaoh2
 * RowKey= 103, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= liant3
 * RowKey= 103, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= dah3
 * RowKey= 104, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= tiel4
 * RowKey= 104, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= zhangs4
 * RowKey= 105, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= muj5
 * RowKey= 105, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= liux5
 */
    }

3.12 随机行过滤器

RandomRowFilter,随机行过滤器

构造方法 Filter filter = new RandomRowFilter(0.2f);
每次的运行结果都不一样,随机生成结果。
参数 0.2f 只是参考作用,并不是每次必定取 20% 的样本,意思是每条数据有 20% 概率被选择,
所以所有数据都没被选择的可能也是有的。

public static void main(String[] args) {
        Filter filter = new RandomRowFilter(0.2f);
        HBaseUtils.setFilterAndPrint(filter);
/**
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 10086
 * RowKey= 101, ColumnFamily= fam1, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= yid1
 * RowKey= 101, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 13522886092
 * RowKey= 101, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= xiaom1
 */
    }

4. 装饰性过滤器

4.1 跳转过滤器

SkipFilter,跳转过滤器

目前只能作用于 ValueFilter。
ValueFilter用于输出满足条件的行和对应的列(该行中也有1至N列满足),
当加上SkipFilter后,若某行中有不满足ValueFilter的列存在,
则整行都不会输出(即,只要某行中有一列不满足ValueFilter,则其它满足的列也不会输出)。


public static void main(String[] args) {
        Filter filter = new ValueFilter(
                CompareOperator.EQUAL,
                new SubstringComparator("100")
        );
        Filter f = new SkipFilter(filter);
        HBaseUtils.setFilterAndPrint(f);
        //输出结果为空,因为没有哪一行的所有列的值全部以100开头。
    }

4.2 全匹配过滤器

WhileMatchFilter,全匹配过滤器

使用过滤器时,当遇到不满足过滤条件的数据时将不会返回该数据,
当加上WhileMatchFilter后,遇到第一个不符合条件的数据时将会停止继续的扫描。

public static void main(String[] args) {
        Filter filter = new ValueFilter(
                CompareOperator.EQUAL,
                new SubstringComparator("100")
        );
        Filter f = new WhileMatchFilter(filter);
        HBaseUtils.setFilterAndPrint(f);
//Key: 101/cf1:col1/1405913524513/Put/vlen=5/ts=0, Value: 10086
//可以看到在扫描到第一行的cf1:col2时,由于值为qwe1,不满足条件,所以终止了扫描。
//RowKey= 101, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 10086
    }

5. 过滤器集合

HBase 的过滤器集合是一组用于检索和筛选存储在HBase表中数据的工具。这些过滤器可以按行键、列族、列修饰符和时间戳进行过滤,也可以根据列值的类型和范围进行过滤。

package com.hnbian.filters;

import com.hnbian.utils.HBaseUtils;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.util.ArrayList;
import java.util.List;

/**
 * 组合过滤器,将各种过滤器组合在一起使用
 */
public class FilterListTest {
    private static Connection hbaseConnection;

    public static void main(String[] args) {
        ResultScanner resultScanner = null;
        hbaseConnection = HBaseUtils.getHBaseConnection();
        List<Filter> list = new ArrayList<Filter>();

        //添加行过滤器 过滤出rowkey 为 101的数据
        Filter filter1 = new RowFilter(
                CompareOperator.GREATER_OR_EQUAL,
                new BinaryComparator(Bytes.toBytes("101"))
        );
        //添加行过滤器 过滤出rowkey 小于 106的数据
        Filter filter2 = new RowFilter(
                CompareOperator.LESS_OR_EQUAL,
                new BinaryComparator(Bytes.toBytes("106"))
        );
        //Filter filter3 = new ValueFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator("yi"));

        //加入参考列过滤器,过滤掉fam1 col2值为yid1 的列
        Filter filter4 = new DependentColumnFilter(
                Bytes.toBytes("fam1"),
                Bytes.toBytes("col2"),
                true,
                CompareOperator.EQUAL,
                new BinaryPrefixComparator(Bytes.toBytes("yid1"))
        );

        list.add(filter1);
        list.add(filter2);
        //list.add(filter3);
        list.add(filter4);
        Scan scan = new Scan();

        // 通过将operator参数设置为Operator.MUST_PASS_ONE,达到list中各filter为"或"的关系
        // 默认operator参数的值为Operator.MUST_PASS_ALL,即list中各filter为"并"的关系
        Filter filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL, list);
        scan.setFilter(filterList);
        HBaseUtils.setFilterAndPrint(filterList);
        /**
         * RowKey= 101, ColumnFamily= fam1, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 10086
         * RowKey= 101, ColumnFamily= fam2, ColumnName= col1,  TimeStamp= 1551945588412, ColumnValue= 13522886092
         * RowKey= 101, ColumnFamily= fam2, ColumnName= col2,  TimeStamp= 1551945588412, ColumnValue= xiaom1
         */
    }
}

文章作者: hnbian
版权声明: 本博客所有文章除特別声明外,均采用 CC BY 4.0 许可协议。转载请注明来源 hnbian !
评论
 上一篇
Spark 中的基本概念 Spark 中的基本概念
1. Application应用 Spark上运行的应用, 包含了驱动器进程(Driver)和集群上的执行器进程(Executor) 每个Application 只有一个Driver 但是可以有多个Executor 2. Appli
2019-08-19
下一篇 
HBase shell 介绍 HBase shell 介绍
1.DDL(data definition language)DDL的命令有CREATE、ALTER、DROP等,DDL主要是用在定义或改变表(TABLE)的结构,数据类型,表之间的链接和约束等初始化工作上,他们大多在建立表时使用 1.1一
2019-07-29
  目录