I am trying to use HDFS file as source and HBase as sink. My Hadoop cluster has following specification:
master 192.168.4.65
slave1 192.168.4.176
slave2 192.168.4.175
slave3 192.168.4.57
slave4 192.168.4.146
The Zookeeper nodes are on following ip address:
zks1 192.168.4.60
zks2 192.168.4.61
zks3 192.168.4.66
The HBase nodes are on following ip addresses:
hbmaster 192.168.4.59
rs1 192.168.4.69
rs2 192.168.4.110
rs3 192.168.4.45
I have copied the zookeeper-3.4.6.jar to the lib folder of all nodes in Hadoop cluster. Individually HDFS and HBase work fine. But I'm unable to access HBase using MapReduce. The driver code for the MapReduce code is:
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class Driver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException
{
// TODO Auto-generated method stub
//Configuration conf=new Configuration();
Configuration conf = HBaseConfiguration.create();
conf.addResource("hbase-site.xml");
//conf.set("hbase.master","hbmaster:60000");
conf.set("hbase.zookeeper.quorum", "zks1,zks2,zks3");
conf.set("hbase.zookeeper.property.clientPort", "2181");
try{
HBaseAdmin.checkHBaseAvailable(conf);
}
catch(MasterNotRunningException e){
System.out.println("Master not running");
System.exit(1);
}
System.out.println("connected to hbase");
//HTable hTable = new HTable(conf, args[3]);
String[] otherArgs=new GenericOptionsParser(conf,args).getRemainingArgs();
if(otherArgs.length!=2)
{
System.err.println("Usage: wordcount <in> <out>");
System.exit(2);
}
Job job=new Job(conf,"HBase Provenance Component");
job.setJarByClass(Driver.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapperClass(HBaseMapper.class);
job.setReducerClass(HBaseReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
//TableMapReduceUtil.addDependencyJars(job);
//TableMapReduceUtil.addHBaseDependencyJars(conf);
System.out.println("Before");
TableMapReduceUtil.initTableReducerJob("Provenance", HBaseReducer.class, job);
//job.setOutputKeyClass(NullWritable.class);
//job.setOutputValueClass(Text.class);
//job.setOutputKeyClass(Text.class);
//job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.out.println("After");
//HFileOutputFormat.configureIncrementalLoad(job, hTable);
System.exit(job.waitForCompletion(true)?0:1);
}
}
The job hangs at this point:
Warning: $HADOOP_HOME is deprecated.
15/05/27 11:46:35 INFO zookeeper.RecoverableZooKeeper: The identifier of this process is 6608@master
15/05/27 11:46:35 INFO zookeeper.ZooKeeper: Client environment:zookeeper.version=3.4.6-1569965, built on 02/20/2014 09:09 GMT
15/05/27 11:46:35 INFO zookeeper.ZooKeeper: Client environment:host.name=master
15/05/27 11:46:35 INFO zookeeper.ZooKeeper: Client environment:java.version=1.7.0_79
15/05/27 11:46:35 INFO zookeeper.ZooKeeper: Client environment:java.vendor=Oracle Corporation
15/05/27 11:46:35 INFO zookeeper.ZooKeeper: Client environment:java.home=/usr/lib/jvm/java-7-openjdk-amd64/jre
15/05/27 11:46:35 INFO zookeeper.ZooKeeper: Client environment:java.class.path=/usr/local/hadoop/libexec/../conf:/usr/lib/jvm/java-7-openjdk-amd64/lib/tools.jar:/usr/local/hadoop/libexec/..:/usr/local/hadoop/libexec/../hadoop-core-1.2.1.jar:/usr/local/hadoop/libexec/../lib/asm-3.2.jar:/usr/local/hadoop/libexec/../lib/aspectjrt-1.6.11.jar:/usr/local/hadoop/libexec/../lib/aspectjtools-1.6.11.jar:/usr/local/hadoop/libexec/../lib/commons-beanutils-1.7.0.jar:/usr/local/hadoop/libexec/../lib/commons-beanutils-core-1.8.0.jar:/usr/local/hadoop/libexec/../lib/commons-cli-1.2.jar:/usr/local/hadoop/libexec/../lib/commons-codec-1.4.jar:/usr/local/hadoop/libexec/../lib/commons-collections-3.2.1.jar:/usr/local/hadoop/libexec/../lib/commons-configuration-1.6.jar:/usr/local/hadoop/libexec/../lib/commons-daemon-1.0.1.jar:/usr/local/hadoop/libexec/../lib/commons-digester-1.8.jar:/usr/local/hadoop/libexec/../lib/commons-el-1.0.jar:/usr/local/hadoop/libexec/../lib/commons-httpclient-3.0.1.jar:/usr/local/hadoop/libexec/../lib/commons-io-2.1.jar:/usr/local/hadoop/libexec/../lib/commons-lang-2.4.jar:/usr/local/hadoop/libexec/../lib/commons-logging-1.1.1.jar:/usr/local/hadoop/libexec/../lib/commons-logging-api-1.0.4.jar:/usr/local/hadoop/libexec/../lib/commons-math-2.1.jar:/usr/local/hadoop/libexec/../lib/commons-net-3.1.jar:/usr/local/hadoop/libexec/../lib/core-3.1.1.jar:/usr/local/hadoop/libexec/../lib/guava-18.0.jar:/usr/local/hadoop/libexec/../lib/hadoop-capacity-scheduler-1.2.1.jar:/usr/local/hadoop/libexec/../lib/hadoop-fairscheduler-1.2.1.jar:/usr/local/hadoop/libexec/../lib/hadoop-thriftfs-1.2.1.jar:/usr/local/hadoop/libexec/../lib/hbase-0.94.24.jar:/usr/local/hadoop/libexec/../lib/hsqldb-1.8.0.10.jar:/usr/local/hadoop/libexec/../lib/jackson-core-asl-1.8.8.jar:/usr/local/hadoop/libexec/../lib/jackson-mapper-asl-1.8.8.jar:/usr/local/hadoop/libexec/../lib/jasper-compiler-5.5.12.jar:/usr/local/hadoop/libexec/../lib/jasper-runtime-5.5.12.jar:/usr/local/hadoop/libexec/../lib/jdeb-0.8.jar:/usr/local/hadoop/libexec/../lib/jersey-core-1.8.jar:/usr/local/hadoop/libexec/../lib/jersey-json-1.8.jar:/usr/local/hadoop/libexec/../lib/jersey-server-1.8.jar:/usr/local/hadoop/libexec/../lib/jets3t-0.6.1.jar:/usr/local/hadoop/libexec/../lib/jetty-6.1.26.jar:/usr/local/hadoop/libexec/../lib/jetty-util-6.1.26.jar:/usr/local/hadoop/libexec/../lib/jsch-0.1.42.jar:/usr/local/hadoop/libexec/../lib/junit-4.5.jar:/usr/local/hadoop/libexec/../lib/kfs-0.2.2.jar:/usr/local/hadoop/libexec/../lib/log4j-1.2.15.jar:/usr/local/hadoop/libexec/../lib/mockito-all-1.8.5.jar:/usr/local/hadoop/libexec/../lib/oro-2.0.8.jar:/usr/local/hadoop/libexec/../lib/protobuf-java-2.4.0a.jar:/usr/local/hadoop/libexec/../lib/servlet-api-2.5-20081211.jar:/usr/local/hadoop/libexec/../lib/slf4j-api-1.4.3.jar:/usr/local/hadoop/libexec/../lib/slf4j-log4j12-1.4.3.jar:/usr/local/hadoop/libexec/../lib/xmlenc-0.52.jar:/usr/local/hadoop/libexec/../lib/zookeeper-3.4.6.jar:/usr/local/hadoop/libexec/../lib/jsp-2.1/jsp-2.1.jar:/usr/local/hadoop/libexec/../lib/jsp-2.1/jsp-api-2.1.jar
15/05/27 11:46:35 INFO zookeeper.ZooKeeper: Client environment:java.library.path=/usr/local/hadoop/libexec/../lib/native/Linux-amd64-64
15/05/27 11:46:35 INFO zookeeper.ZooKeeper: Client environment:java.io.tmpdir=/tmp
15/05/27 11:46:35 INFO zookeeper.ZooKeeper: Client environment:java.compiler=<NA>
15/05/27 11:46:35 INFO zookeeper.ZooKeeper: Client environment:os.name=Linux
15/05/27 11:46:35 INFO zookeeper.ZooKeeper: Client environment:os.arch=amd64
15/05/27 11:46:35 INFO zookeeper.ZooKeeper: Client environment:os.version=3.5.0-60-generic
15/05/27 11:46:35 INFO zookeeper.ZooKeeper: Client environment:user.name=hduser
15/05/27 11:46:35 INFO zookeeper.ZooKeeper: Client environment:user.home=/home/hduser
15/05/27 11:46:35 INFO zookeeper.ZooKeeper: Client environment:user.dir=/usr/local/hadoop
15/05/27 11:46:35 INFO zookeeper.ZooKeeper: Initiating client connection, connectString=zks2:2181,zks1:2181,zks3:2181 sessionTimeout=180000 watcher=hconnection
15/05/27 11:46:35 INFO zookeeper.ClientCnxn: Opening socket connection to server zks3/192.168.4.66:2181. Will not attempt to authenticate using SASL (unknown error)
15/05/27 11:46:35 INFO zookeeper.ClientCnxn: Socket connection established to zks3/192.168.4.66:2181, initiating session
15/05/27 11:46:35 INFO zookeeper.ClientCnxn: Session establishment complete on server zks3/192.168.4.66:2181, sessionid = 0x34d94005a530003, negotiated timeout = 60000
15/05/27 11:46:35 INFO client.HConnectionManager$HConnectionImplementation: Closed zookeeper sessionid=0x34d94005a530003
15/05/27 11:46:35 INFO zookeeper.ZooKeeper: Session: 0x34d94005a530003 closed
15/05/27 11:46:35 INFO zookeeper.ClientCnxn: EventThread shut down
connected to hbase
Before
After
15/05/27 11:46:38 INFO zookeeper.RecoverableZooKeeper: The identifier of this process is 6608@master
15/05/27 11:46:38 INFO zookeeper.ZooKeeper: Initiating client connection, connectString=zks2:2181,zks1:2181,zks3:2181 sessionTimeout=180000 watcher=hconnection
15/05/27 11:46:38 INFO zookeeper.ClientCnxn: Opening socket connection to server zks2/192.168.4.61:2181. Will not attempt to authenticate using SASL (unknown error)
15/05/27 11:46:38 INFO zookeeper.ClientCnxn: Socket connection established to zks2/192.168.4.61:2181, initiating session
15/05/27 11:46:38 INFO zookeeper.ClientCnxn: Session establishment complete on server zks2/192.168.4.61:2181, sessionid = 0x24d93f965730002, negotiated timeout = 60000
Please suggest where I'm wrong.