I need to use MR on Hbase to compare 2 tables table1, table2) in hbase and write summary to the third table ( table3)
I am using the below TableMapReduceUtil psuedo code. Mapper: Table1 Reducer: Table3.
In mapper, I need to compare Table1 value with Table2. where do i instantiate Table2?
In the mapper, Do i have to instantiate Table3 for each mapper? I would like to instantiate Table3 only once for the entire MapReduce job?
driver()
{
TableMapReduceUtil.initTableMapperJob(
table1, // input table
scan,
MyMapper.class, // mapper class
Text.class,
IntWritable.class,
job);
TableMapReduceUtil.initTableReducerJob(
table3, // output table
MyTableReducer.class,
job);
}
public static class MyMapper extends TableMapper<Text, IntWritable> {
public static final byte[] CF = "cf".getBytes();
public static final byte[] ATTR1 = "attr1".getBytes();
private final IntWritable ONE = new IntWritable(1);
private Text text = new Text();
public void map(ImmutableBytesWritable row, Result value, Context context) throws IOException, InterruptedException {
String val = new String(value.getValue(CF, ATTR1));
String diff;
//instantiate Table3 and compare with val. Do i have to instantiate for each mapper?
text.set(diff);
context.write(text, ONE);
}
}
public static class MyTableReducer extends TableReducer<Text, IntWritable, ImmutableBytesWritable> {
public static final byte[] CF = "cf".getBytes();
public static final byte[] COUNT = "count".getBytes();
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int i = 0;
for (IntWritable val : values) {
i += val.get();
}
Put put = new Put(Bytes.toBytes(key.toString()));
put.add(CF, COUNT, Bytes.toBytes(i));
context.write(null, put);
}
}