7
votes

I am trying to get the selected columns in rows from HBase after applying some filters. Consider a table like:

ename:fname ename:lname salary:gross salary:da salary:ta

I want to get list of all employees having gross salary > 1500. For this I have written following code. The problem I am facing is that when I filter column I get only that filter in output, which make sense because that is what they are created for, but what if I want to get desired column, but want to filter based only on specific column, like the one I just mentioned - list of all employees having salary > 1500.

Output should be the following set of columns:

lname,fname,salary:gross,salary:ta

Code so far

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.RegexStringComparator;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.QualifierFilter;
import org.apache.hadoop.hbase.filter.FamilyFilter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.ValueFilter;
import org.apache.hadoop.hbase.util.Bytes;


import java.io.IOException;
import java.util.ArrayList;
import java.util.List;


public class MyQualifierFilterExample {

  public static void main(String[] args) throws IOException {
    Configuration conf = HBaseConfiguration.create();

    HTable table = new HTable(conf, "emp");

    List<Filter> filters = new ArrayList<Filter>();

    Filter famFilter = new FamilyFilter(CompareFilter.CompareOp.EQUAL,
              new BinaryComparator(Bytes.toBytes("salary")));
    filters.add(famFilter);

    Filter colFilter = new QualifierFilter(CompareFilter.CompareOp.EQUAL,
      new BinaryComparator(Bytes.toBytes("gross")));

    filters.add(colFilter);

    Filter valFilter = new ValueFilter(CompareFilter.CompareOp.GREATER_OR_EQUAL,
              new BinaryComparator(Bytes.toBytes("1500")));

    filters.add(valFilter);

    FilterList fl = new FilterList( FilterList.Operator.MUST_PASS_ALL, filters);


    Scan scan = new Scan();
    scan.setFilter(fl);
    ResultScanner scanner = table.getScanner(scan);
    System.out.println("Scanning table... ");
    for (Result result : scanner) {
        //System.out.println("getRow:"+Bytes.toString(result.getRow()));
        for (KeyValue kv : result.raw()) {
            //System.out.println("Family - "+Bytes.toString(kv.getFamily()));
            //System.out.println("Qualifier - "+Bytes.toString(kv.getQualifier() ));
            System.out.println("kv:"+kv +", Key: " + Bytes.toString(kv.getRow())  + ", Value: " +Bytes.toString(kv.getValue()));
        }
    }   

    scanner.close();
    System.out.println("Completed ");
  }
}

Output

Scanning table... 
kv:101/salary:gross/1339876269770/Put/vlen=4, Key: 101, Value: 2000
kv:102/salary:gross/1339876277659/Put/vlen=4, Key: 102, Value: 2400
kv:105/salary:gross/1339876300585/Put/vlen=4, Key: 105, Value: 2300
kv:106/salary:gross/1339876310004/Put/vlen=4, Key: 106, Value: 2900
Completed 

Solution 1

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.RegexStringComparator;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.QualifierFilter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.filter.FamilyFilter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.ValueFilter;
import org.apache.hadoop.hbase.util.Bytes;


import java.io.IOException;
import java.util.ArrayList;
import java.util.List;


public class MyQualifierFilterExample {

  public static void main(String[] args) throws IOException {
    Configuration conf = HBaseConfiguration.create();

    HTable table = new HTable(conf, "emp");

    List<Filter> filters = new ArrayList<Filter>();

    SingleColumnValueFilter colValFilter = new SingleColumnValueFilter(Bytes.toBytes("salary"), Bytes.toBytes("gross")
            , CompareFilter.CompareOp.GREATER_OR_EQUAL, new BinaryComparator(Bytes.toBytes("1300")));
    colValFilter.setFilterIfMissing(false);
    filters.add(colValFilter);          

    Filter colValFilter2 = new SingleColumnValueFilter(Bytes.toBytes("salary"), Bytes.toBytes("da")
            , CompareFilter.CompareOp.GREATER_OR_EQUAL, new BinaryComparator(Bytes.toBytes("150")));
    filters.add(colValFilter2);

    //Filter colValFilter3 = new SingleColumnValueFilter(Bytes.toBytes("ename"), Bytes.toBytes("fname")
    //      , CompareFilter.CompareOp.GREATER_OR_EQUAL, new SubstringComparator("jack"));
    //filters.add(colValFilter3);

    FilterList fl = new FilterList( FilterList.Operator.MUST_PASS_ALL, filters);


    Scan scan = new Scan();
    scan.setFilter(fl);
    scan.addColumn(Bytes.toBytes("ename"), Bytes.toBytes("fname"));
    scan.addColumn(Bytes.toBytes("ename"), Bytes.toBytes("lname"));
    scan.addColumn(Bytes.toBytes("salary"), Bytes.toBytes("gross"));
    scan.addColumn(Bytes.toBytes("salary"), Bytes.toBytes("da"));

    ResultScanner scanner = table.getScanner(scan);
    String key = new String("~");
    String keyFlag = new String("~");
    System.out.println("Scanning table... ");
    for (Result result : scanner) {
        //System.out.println("getRow:"+Bytes.toString(result.getRow()));
        key = "~";
        for (KeyValue kv : result.raw()) {

            if (key.compareTo(keyFlag)==0)
            {
                key = Bytes.toString(kv.getRow());
                System.out.print("Key: " + key);
            }
            //System.out.print("Family - "+Bytes.toString(kv.getFamily()));

            //System.out.print(", Buffer - "+Bytes.toString(kv.getBuffer() ));
            //System.out.print(", FamilyOffset - " + kv.getFamilyOffset() );
            System.out.print(", "+Bytes.toString(kv.getFamily())+"."+Bytes.toString(kv.getQualifier()));
            System.out.print("=" +Bytes.toString(kv.getValue()));
        }
        System.out.println("");
        System.out.println("-------------------");
    }   

    scanner.close();
    System.out.println("Completed ");
  }
}

Output:

Scanning table... 
Key: 103, ename.fname=peter, ename.lname=parker, salary.da=190, salary.gross=1400
-------------------
Key: 105, ename.fname=harry, ename.lname=potter, salary.da=154, salary.gross=2300
-------------------
Completed 
3
Are your salaries really saved as bytes representing a string? That might be a problem because the string "900" is greater than the string "1500".kichik

3 Answers

2
votes

You should use a combination of SingleColumnValueFilter and addFamily (or addColumn)

See below (I cannot test it on my end at this time):

SingleColumnValueFilter filter = new SingleColumnValueFilter(
    Bytes.toBytes("salary"),
    Bytes.toBytes("gross"),
    CompareOp.GREATER,
    Bytes.toBytes("1500")
);
//To prevent the entire row from being emitted
//if the column is not found on a row
scan.setFilterIfMissing(true)
scan.setFilter(filter);

scan.addFamily(Bytes.toBytes("ename"))
scan.addColumn(Bytes.toBytes("salary"), Bytes.toBytes("da"))
scan.addColumn(Bytes.toBytes("salary"), Bytes.toBytes("gross"))
0
votes

ValueFilter This filter makes it possible to include only columns that have a specific value

That's why you are only getting columns you've specified in filters.

Tell me if I'm wrong but what you want to do is to retrieve ALL columns when salary is > 1500, don't you?

0
votes

Your requirement is relational. So, I suggest you use a wrapper over HBase to make life easy.

Consider using: Apache Phoenix. It's a high performance SQL wrapper for HBase, using which you can run a query like: select * from emp where salary>1500.