hbase RowFilter

RowFilter用于过滤row key

OperatorDescription
LESS小于
LESS_OR_EQUAL小于等于
EQUAL等于
NOT_EQUAL不等于
GREATER_OR_EQUAL大于等于
GREATER大于
NO_OP排除所有
 
ComparatorDescription
BinaryComparator使用Bytes.compareTo()比较
BinaryPrefixComparator和BinaryComparator差不多,从前面开始比较
NullComparatorDoes not compare against an actual value but whether a given one is null, or not null.
BitComparatorPerforms a bitwise comparison, providing a BitwiseOp class with ANDOR, and XOR operators.
RegexStringComparator正则表达式
SubstringComparator把数据当成字符串,用contains()来判断
   
import java.io.IOException;

import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.filter.BinaryComparator; import org.apache.hadoop.hbase.filter.BinaryPrefixComparator; import org.apache.hadoop.hbase.filter.CompareFilter; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.RegexStringComparator; import org.apache.hadoop.hbase.filter.RowFilter; import org.apache.hadoop.hbase.filter.SubstringComparator;

public class TestHbaseRowFilter { String tableName = "test_row_filter"; Configuration config = HBaseConfiguration.create();

/**
 * 部分代码来自hbase权威指南
 * @throws IOException
 */
public void testRowFilter() throws IOException {

	HTable table = new HTable(config, tableName);
	Scan scan = new Scan();

	System.out.println("小于等于row010的行");
	Filter filter1 = new RowFilter(CompareFilter.CompareOp.LESS_OR_EQUAL,
			new BinaryComparator("row010".getBytes()));
	scan.setFilter(filter1);
	ResultScanner scanner1 = table.getScanner(scan);
	for (Result res : scanner1) {
		System.out.println(res);
	}
	scanner1.close();

	System.out.println("正则获取结尾为5的行");
	Filter filter2 = new RowFilter(CompareFilter.CompareOp.EQUAL,
			new RegexStringComparator(".*5$"));
	scan.setFilter(filter2);
	ResultScanner scanner2 = table.getScanner(scan);
	for (Result res : scanner2) {
		System.out.println(res);
	}
	scanner2.close();

	System.out.println("包行有5的行");
	Filter filter3 = new RowFilter(CompareFilter.CompareOp.EQUAL,
			new SubstringComparator("5"));
	scan.setFilter(filter3);
	ResultScanner scanner3 = table.getScanner(scan);
	for (Result res : scanner3) {
		System.out.println(res);
	}
	scanner3.close();

	System.out.println("开头是row01的");
	Filter filter4 = new RowFilter(CompareFilter.CompareOp.EQUAL,
			new BinaryPrefixComparator("row01".getBytes()));
	scan.setFilter(filter4);
	ResultScanner scanner4 = table.getScanner(scan);
	for (Result res : scanner4) {
		System.out.println(res);
	}
	scanner3.close();
}

/**
 * 初始化数据
 */
public void init() {
	// 创建表和初始化数据
	try {
		HBaseAdmin admin = new HBaseAdmin(config);
		if (!admin.tableExists(tableName)) {
			HTableDescriptor htd = new HTableDescriptor(tableName);
			HColumnDescriptor hcd1 = new HColumnDescriptor("data");
			htd.addFamily(hcd1);
			HColumnDescriptor hcd2 = new HColumnDescriptor("url");
			htd.addFamily(hcd2);

			admin.createTable(htd);
		}

		HTable table = new HTable(config, tableName);

		table.setAutoFlush(false);
		int count = 50;
		for (int i = 1; i <= count; ++i) {
			Put p = new Put(String.format("row%03d", i).getBytes());
			p.add("data".getBytes(), String.format("col%01d", i % 10)
					.getBytes(), String.format("data%03d", i).getBytes());
			p.add("url".getBytes(), String.format("col%01d", i % 10)
					.getBytes(), String.format("url%03d", i).getBytes());
			table.put(p);
		}
		table.close();

	} catch (IOException e) {
		e.printStackTrace();
	}
}

/**
 * @param args
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
	TestHbaseRowFilter test = new TestHbaseRowFilter();
	test.init();
	test.testRowFilter();
}

}

输出结果

小于等于row010的行
keyvalues={row001/data:col1/1364133382268/Put/vlen=7, row001/url:col1/1364133382268/Put/vlen=6}
keyvalues={row002/data:col2/1364133382268/Put/vlen=7, row002/url:col2/1364133382268/Put/vlen=6}
keyvalues={row003/data:col3/1364133382268/Put/vlen=7, row003/url:col3/1364133382268/Put/vlen=6}
keyvalues={row004/data:col4/1364133382268/Put/vlen=7, row004/url:col4/1364133382268/Put/vlen=6}
keyvalues={row005/data:col5/1364133382268/Put/vlen=7, row005/url:col5/1364133382268/Put/vlen=6}
keyvalues={row006/data:col6/1364133382268/Put/vlen=7, row006/url:col6/1364133382268/Put/vlen=6}
keyvalues={row007/data:col7/1364133382268/Put/vlen=7, row007/url:col7/1364133382268/Put/vlen=6}
keyvalues={row008/data:col8/1364133382268/Put/vlen=7, row008/url:col8/1364133382268/Put/vlen=6}
keyvalues={row009/data:col9/1364133382268/Put/vlen=7, row009/url:col9/1364133382268/Put/vlen=6}
keyvalues={row010/data:col0/1364133382268/Put/vlen=7, row010/url:col0/1364133382268/Put/vlen=6}
正则获取结尾为5的行
keyvalues={row005/data:col5/1364133382268/Put/vlen=7, row005/url:col5/1364133382268/Put/vlen=6}
keyvalues={row015/data:col5/1364133382268/Put/vlen=7, row015/url:col5/1364133382268/Put/vlen=6}
keyvalues={row025/data:col5/1364133382268/Put/vlen=7, row025/url:col5/1364133382268/Put/vlen=6}
keyvalues={row035/data:col5/1364133382268/Put/vlen=7, row035/url:col5/1364133382268/Put/vlen=6}
keyvalues={row045/data:col5/1364133382268/Put/vlen=7, row045/url:col5/1364133382268/Put/vlen=6}
包行有5的行
keyvalues={row005/data:col5/1364133382268/Put/vlen=7, row005/url:col5/1364133382268/Put/vlen=6}
keyvalues={row015/data:col5/1364133382268/Put/vlen=7, row015/url:col5/1364133382268/Put/vlen=6}
keyvalues={row025/data:col5/1364133382268/Put/vlen=7, row025/url:col5/1364133382268/Put/vlen=6}
keyvalues={row035/data:col5/1364133382268/Put/vlen=7, row035/url:col5/1364133382268/Put/vlen=6}
keyvalues={row045/data:col5/1364133382268/Put/vlen=7, row045/url:col5/1364133382268/Put/vlen=6}
keyvalues={row050/data:col0/1364133382268/Put/vlen=7, row050/url:col0/1364133382268/Put/vlen=6}
开头是row01的
keyvalues={row010/data:col0/1364133382268/Put/vlen=7, row010/url:col0/1364133382268/Put/vlen=6}
keyvalues={row011/data:col1/1364133382268/Put/vlen=7, row011/url:col1/1364133382268/Put/vlen=6}
keyvalues={row012/data:col2/1364133382268/Put/vlen=7, row012/url:col2/1364133382268/Put/vlen=6}
keyvalues={row013/data:col3/1364133382268/Put/vlen=7, row013/url:col3/1364133382268/Put/vlen=6}
keyvalues={row014/data:col4/1364133382268/Put/vlen=7, row014/url:col4/1364133382268/Put/vlen=6}
keyvalues={row015/data:col5/1364133382268/Put/vlen=7, row015/url:col5/1364133382268/Put/vlen=6}
keyvalues={row016/data:col6/1364133382268/Put/vlen=7, row016/url:col6/1364133382268/Put/vlen=6}
keyvalues={row017/data:col7/1364133382268/Put/vlen=7, row017/url:col7/1364133382268/Put/vlen=6}
keyvalues={row018/data:col8/1364133382268/Put/vlen=7, row018/url:col8/1364133382268/Put/vlen=6}
keyvalues={row019/data:col9/1364133382268/Put/vlen=7, row019/url:col9/1364133382268/Put/vlen=6}

参考

hbase权威指南

updatedupdated2024-08-302024-08-30