On a signle machine I have installed hadoop-2.7.3, hbase-1.2.6 and spark 2.1.1. With a spark application I'm trying to read data from hbase but I always get timeout error. The code is in scala:
object SparkHbaseTest {
def main(args: Array[String]) {
val sparkConf = new SparkConf().setAppName("Spark HBase Test")
val sc = new SparkContext(sparkConf)
val conf = HBaseConfiguration.create()
//conf.set("hbase.zookeeper.quorum","localhost");
//conf.set("hbase.zookeeper.property.clientPort","2181");
//conf.set("zookeeper.znode.parent","/hbase");
conf.set(TableInputFormat.INPUT_TABLE, "resource_usage")
conf.set(TableInputFormat.SCAN_ROW_START, "8111c675d890620dfdc33b792d2152ef_1496080860")
conf.set(TableInputFormat.SCAN_ROW_STOP, "8111c675d890620dfdc33b792d2152ef_1496083860")
val hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],
classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
classOf[org.apache.hadoop.hbase.client.Result])
val count = hBaseRDD.count()
println(s"RDD COUNT: $count")
sc.stop()
}
}
I always get this exception:
17/07/01 05:02:37 INFO ClientCnxn: Opening socket connection to server localhost/127.0.0.1:2181. Will not attempt to authenticate using SASL (unknown error)
17/07/01 05:02:37 INFO ClientCnxn: Socket connection established to localhost/127.0.0.1:2181, initiating session
17/07/01 05:02:37 INFO ClientCnxn: Session establishment complete on server localhost/127.0.0.1:2181, sessionid = 0x15cfbe24a1f0006, negotiated timeout = 90000
17/07/01 05:02:37 INFO RegionSizeCalculator: Calculating region sizes for table "resource_usage".
17/07/01 05:02:37 INFO CoarseGrainedSchedulerBackend$DriverEndpoint: Registered executor NettyRpcEndpointRef(null) (192.168.88.13:58576) with ID 0
17/07/01 05:02:37 INFO BlockManagerMasterEndpoint: Registering block manager 192.168.88.13:39253 with 366.3 MB RAM, BlockManagerId(0, 192.168.88.13, 39253, None)
17/07/01 05:03:15 INFO RpcRetryingCaller: Call exception, tries=10, retries=35, started=38510 ms ago, cancelled=false, msg=row 'resource_usage,,00000000000000' on table 'hbase:meta' at region=hbase:meta,,1.1588230740, hostname=ubuntu-17,16201,1498871531458, seqNum=0
17/07/01 05:03:25 INFO RpcRetryingCaller: Call exception, tries=11, retries=35, started=48566 ms ago, cancelled=false, msg=row 'resource_usage,,00000000000000' on table 'hbase:meta' at region=hbase:meta,,1.1588230740, hostname=ubuntu-17,16201,1498871531458, seqNum=0
17/07/01 05:03:25 INFO ConnectionManager$HConnectionImplementation: Closing zookeeper sessionid=0x15cfbe24a1f0006
17/07/01 05:03:25 INFO ClientCnxn: EventThread shut down
17/07/01 05:03:25 INFO ZooKeeper: Session: 0x15cfbe24a1f0006 closed
Exception in thread "main" org.apache.hadoop.hbase.client.RetriesExhaustedException: Failed after attempts=36, exceptions:
Sat Jul 01 05:03:25 EEST 2017, null, java.net.SocketTimeoutException: callTimeout=60000, callDuration=68657: row 'resource_usage,,00000000000000' on table 'hbase:meta' at region=hbase:meta,,1.1588230740, hostname=ubuntu-17,16201,1498871531458, seqNum=0
at org.apache.hadoop.hbase.client.RpcRetryingCallerWithReadReplicas.throwEnrichedException(RpcRetryingCallerWithReadReplicas.java:276)
at org.apache.hadoop.hbase.client.ScannerCallableWithReplicas.call(ScannerCallableWithReplicas.java:210)
at org.apache.hadoop.hbase.client.ScannerCallableWithReplicas.call(ScannerCallableWithReplicas.java:60)
at org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithoutRetries(RpcRetryingCaller.java:210)
at org.apache.hadoop.hbase.client.ClientScanner.call(ClientScanner.java:327)
at org.apache.hadoop.hbase.client.ClientScanner.nextScanner(ClientScanner.java:302)
at org.apache.hadoop.hbase.client.ClientScanner.initializeScannerInConstruction(ClientScanner.java:167)
at org.apache.hadoop.hbase.client.ClientScanner.<init>(ClientScanner.java:162)
at org.apache.hadoop.hbase.client.HTable.getScanner(HTable.java:797)
at org.apache.hadoop.hbase.client.MetaScanner.metaScan(MetaScanner.java:193)
at org.apache.hadoop.hbase.client.MetaScanner.metaScan(MetaScanner.java:89)
at org.apache.hadoop.hbase.client.MetaScanner.allTableRegions(MetaScanner.java:324)
at org.apache.hadoop.hbase.client.HRegionLocator.getAllRegionLocations(HRegionLocator.java:89)
at org.apache.hadoop.hbase.util.RegionSizeCalculator.init(RegionSizeCalculator.java:94)
at org.apache.hadoop.hbase.util.RegionSizeCalculator.<init>(RegionSizeCalculator.java:81)
at org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getSplits(TableInputFormatBase.java:256)
at org.apache.hadoop.hbase.mapreduce.TableInputFormat.getSplits(TableInputFormat.java:239)
at org.apache.spark.rdd.NewHadoopRDD.getPartitions(NewHadoopRDD.scala:125)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1965)
at org.apache.spark.rdd.RDD.count(RDD.scala:1158)
at SparkHbaseTest$.main(SparkHbaseTest.scala:41)
at SparkHbaseTest.main(SparkHbaseTest.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:743)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:187)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:212)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:126)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.net.SocketTimeoutException: callTimeout=60000, callDuration=68657: row 'resource_usage,,00000000000000' on table 'hbase:meta' at region=hbase:meta,,1.1588230740, hostname=ubuntu-17,16201,1498871531458, seqNum=0
at org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithRetries(RpcRetryingCaller.java:169)
at org.apache.hadoop.hbase.client.ResultBoundedCompletionService$QueueingFuture.run(ResultBoundedCompletionService.java:65)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.io.IOException: com.google.protobuf.ServiceException: java.lang.NoClassDefFoundError: com/yammer/metrics/core/Gauge
at org.apache.hadoop.hbase.protobuf.ProtobufUtil.getRemoteException(ProtobufUtil.java:332)
at org.apache.hadoop.hbase.client.ScannerCallable.openScanner(ScannerCallable.java:408)
at org.apache.hadoop.hbase.client.ScannerCallable.call(ScannerCallable.java:204)
at org.apache.hadoop.hbase.client.ScannerCallable.call(ScannerCallable.java:65)
at org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithoutRetries(RpcRetryingCaller.java:210)
at org.apache.hadoop.hbase.client.ScannerCallableWithReplicas$RetryingRPC.call(ScannerCallableWithReplicas.java:364)
at org.apache.hadoop.hbase.client.ScannerCallableWithReplicas$RetryingRPC.call(ScannerCallableWithReplicas.java:338)
at org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithRetries(RpcRetryingCaller.java:136)
... 4 more
Caused by: com.google.protobuf.ServiceException: java.lang.NoClassDefFoundError: com/yammer/metrics/core/Gauge
at org.apache.hadoop.hbase.ipc.AbstractRpcClient.callBlockingMethod(AbstractRpcClient.java:240)
at org.apache.hadoop.hbase.ipc.AbstractRpcClient$BlockingRpcChannelImplementation.callBlockingMethod(AbstractRpcClient.java:336)
at org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$BlockingStub.scan(ClientProtos.java:34094)
at org.apache.hadoop.hbase.client.ScannerCallable.openScanner(ScannerCallable.java:400)
... 10 more
Caused by: java.lang.NoClassDefFoundError: com/yammer/metrics/core/Gauge
at org.apache.hadoop.hbase.ipc.AbstractRpcClient.callBlockingMethod(AbstractRpcClient.java:225)
... 13 more
Caused by: java.lang.ClassNotFoundException: com.yammer.metrics.core.Gauge
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:335)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 14 more
I have seen this issue mentioned again, but I couldn't solve it by adding ${HBase_Home}/conf at HADOOP_CLASSPATH.
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:${HBase_Home}/conf/
- Spark Hbase connection issue
- https://community.hortonworks.com/questions/66756/spark-hbase-connection-issue.html
- https://community.cloudera.com/t5/Advanced-Analytics-Apache-Spark/java-net-SocketTimeoutException-on-table-hbase-meta-at-region/td-p/45074
- https://github.com/hortonworks-spark/shc/issues/23
hadoop/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/user/hadoop-data</value>
</property>
</configuration>
hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
</configuration>
hbase/hbase-site.xml
<configuration>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.rootdir</name>
<value>hdfs://localhost:9000/hbase</value>
</property>
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/home/user/zookeeper-data</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>localhost</value>
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
</property>
<property>
<name>zookeeper.znode.parent</name>
<value>/hbase</value>
</property>
</configuration>
hbase/hbase-env.sh
# Set environment variables here.
# Extra Java runtime options.
# Below are what we set by default. May only work with SUN JVM.
# For more on why as well as other possible settings,
# see http://wiki.apache.org/hadoop/PerformanceTuning
export HBASE_OPTS="-XX:+UseConcMarkSweepGC"
# Tell HBase whether it should manage it's own instance of Zookeeper or not.
export HBASE_MANAGES_ZK=true
spark/spark-defaults.conf
spark.driver.extraClassPath :${Hadoop_Home}/hadoop-2.7.3/share/hadoop/common/hadoop-common-2.7.3.jar:${HBase_Home}/lib/hbase-common-1.2.6.jar:${HBase_Home}/lib/hbase-server-1.2.6.jar:${HBase_Home}/lib/hbase-client-1.2.6.jar:${HBase_Home}/lib/hbase-protocol-1.2.6.jar:${HBase_Home}/conf/
spark.executor.extraClassPath :${Hadoop_Home}/hadoop-2.7.3/share/hadoop/common/hadoop-common-2.7.3.jar:${HBase_Home}/lib/hbase-common-1.2.6.jar:${HBase_Home}/lib/hbase-server-1.2.6.jar:${HBase_Home}/lib/hbase-client-1.2.6.jar:${HBase_Home}/lib/hbase-protocol-1.2.6.jar:${HBase_Home}/conf/