2
votes

I am using catalog method to read data from hbase and store it into dataframe using method described here Read HBase table with where clause using Spark, but I am wondering if there is any other efficient way to this problem statement is :

  1. scan hbase table_a
  2. scan hbase table_b(mapping table)
  3. check if col_1 value present in table_b, if yes get the parent_id from mapping table
  4. if not then check col_2 present in table_b, if yes then get the parent_id from mapping table
    1. save the result in file.

I am able to do this using above method but as i am using join like below select * from a join b where (case when a.duns is null then a.ig else a.duns end) = b.rowkey

it takes forever

please help

1

1 Answers

0
votes
import org.apache.hadoop.hbase.{HBaseConfiguration, 

HTableDescriptor,HColumnDescriptor,HConstants,TableName,CellUtil}

import org.apache.hadoop.hbase.client.{HBaseAdmin, 

Result,Put,HTable,ConnectionFactory,Connection,Get,Scan}

import org.apache.hadoop.hbase.io.ImmutableBytesWritable

import org.apache.hadoop.hbase.mapreduce.TableInputFormat

import org.apache.hadoop.hbase.util.Bytes


      val hconf = HBaseConfiguration.create()

      hconf.set("hbase.zookee per.quorum","localhost")

      hconf.set("hbase.zookeeper.property.clientPort","2181") 

      val admin = new HBaseAdmin(hconf)

      val hconn=ConnectionFactory.createConnection(hconf)

      var tabName_string= admin.getTableNames("student")(0)   // enter table name

      val table = new HTable(hconf,tabName_string)  // create table connection

      var data= table.get(new Get(Bytes.toBytes("row-id97")))   // row ID

      def getHBaseRowData (x: org.apache.hadoop.hbase.Cell, hint: Int )=  { 

      if(hint == 1){
                       ((Bytes.toString(x.getRow())), Bytes.toString(CellUtil.cloneQualifier(x)))

                    } else if(hint == 2) { 
                        ((Bytes.toString(x.getRow())),Bytes.toString(CellUtil.cloneValue(x))) 

                    } else if(hint == 3) { 
                        ((Bytes.toString(x.getRow())),Bytes.toString(CellUtil.cloneFamily(x))) 

                    } else if(hint == 4) { 
                    ((Bytes.toString(x.getRow())),(Bytes.toString(CellUtil.cloneQualifier(x))), (Bytes.toString(CellUtil.cloneFamily(x))), (Bytes.toString(CellUtil.cloneValue(x)))) 

                    } else 

                      ("Wrong Hint")

            }

       data.rawCells().foreach(x=> println(getHBaseRowData(x,4)))