0
votes

Trying to migrate code from Spark 1.6, Scala 2.10 to Spark 2.4, Scala 2.11. Cannot get the code to compile. Showing dependency versions, minimal example and compilation error below.

// Dependencies
, "org.apache.spark" %% "spark-core" % "2.4.0"
, "org.apache.spark" %% "spark-sql"  % "2.4.0"
, "org.apache.hbase" % "hbase-server" % "1.2.0-cdh5.14.4"
, "org.apache.hbase" % "hbase-common" % "1.2.0-cdh5.14.4"
, "org.apache.hbase" % "hbase-spark"  % "1.2.0-cdh5.14.4"

// Minimal example
package spark2.hbase
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.spark.HBaseContext
import org.apache.spark.SparkContext
import org.apache.spark.sql.SparkSession
object ConnectToHBase {
  def main(args: Array[String]): Unit = {
    implicit val spark: SparkSession = SparkSession.builder.appName("Connect to HBase from Spark 2")
      .config("spark.master", "local")
      .getOrCreate()
    implicit val sc: SparkContext = spark.sparkContext
    val hbaseConf = HBaseConfiguration.create()
    val hbaseContext = new HBaseContext(sc, hbaseConf)
  }
}

// Compilation error
[error] missing or invalid dependency detected while loading class file 'HBaseContext.class'.
[error] Could not access type Logging in package org.apache.spark,
[error] because it (or its dependencies) are missing. Check your build definition for
[error] missing or conflicting dependencies. (Re-run with `-Ylog-classpath` to see the problematic classpath.)
[error] A full rebuild may help if 'HBaseContext.class' was compiled against an incompatible version of org.apache.spark.
1

1 Answers

0
votes

This works:

lazy val sparkVer = "2.4.0-cdh6.2.0"
lazy val hbaseVer = "2.1.0"
libraryDependencies ++= Seq(
    "org.apache.spark" %% "spark-core"      % sparkVer
  , "org.apache.spark" %% "spark-sql"       % sparkVer
  , "org.apache.spark" %% "spark-streaming" % sparkVer
  , "org.apache.hbase" % "hbase-common"     % hbaseVer
  , "org.apache.hbase" % "hbase-client"     % hbaseVer
  , "org.apache.hbase.connectors.spark" % "hbase-spark" % "1.0.0"
)

The essential piece here is using Cloudera CDH 6 (not 5) and using a different version of "hbase-spark" because CDH 5 cannot work with Spark 2.