I got the following SBT files:
.
-- root
-- plugins.sbt
-- build.sbt
With plugins.sbt
containing the following:
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.12.0")
And build.sbt
containing the following:
import sbt.Keys._
resolvers in ThisBuild ++= Seq("Apache Development Snapshot Repository" at "https://repository.apache.org/content/repositories/snapshots/", Resolver.sonatypeRepo("public"))
name := "flink-experiment"
lazy val commonSettings = Seq(
organization := "my.organisation",
version := "0.1.0-SNAPSHOT"
)
val flinkVersion = "1.1.0"
val sparkVersion = "2.0.0"
val kafkaVersion = "0.8.2.1"
val hadoopDependencies = Seq(
"org.apache.avro" % "avro" % "1.7.7" % "provided",
"org.apache.avro" % "avro-mapred" % "1.7.7" % "provided"
)
val flinkDependencies = Seq(
"org.apache.flink" %% "flink-scala" % flinkVersion % "provided",
"org.apache.flink" %% "flink-streaming-scala" % flinkVersion % "provided",
"org.apache.flink" %% "flink-connector-kafka-0.8" % flinkVersion exclude("org.apache.kafka", "kafka_${scala.binary.version}")
)
val sparkDependencies = Seq(
"org.apache.spark" %% "spark-core" % sparkVersion % "provided",
"org.apache.spark" %% "spark-streaming" % sparkVersion % "provided",
"org.apache.spark" %% "spark-streaming-kafka-0-8" % sparkVersion exclude("org.apache.kafka", "kafka_${scala.binary.version}")
)
val kafkaDependencies = Seq(
"org.apache.kafka" %% "kafka" % "0.8.2.1"
)
val toolDependencies = Seq(
"com.github.scopt" %% "scopt" % "3.5.0"
)
val testDependencies = Seq(
"org.scalactic" %% "scalactic" % "2.2.6",
"org.scalatest" %% "scalatest" % "2.2.6" % "test"
)
lazy val root = (project in file(".")).
settings(commonSettings: _*).
settings(
libraryDependencies ++= hadoopDependencies,
libraryDependencies ++= flinkDependencies,
libraryDependencies ++= sparkDependencies,
libraryDependencies ++= kafkaDependencies,
libraryDependencies ++= toolDependencies,
libraryDependencies ++= testDependencies
).
enablePlugins(AssemblyPlugin)
run in Compile <<= Defaults.runTask(fullClasspath in Compile, mainClass in(Compile, run), runner in(Compile, run))
mainClass in assembly := Some("my.organization.experiment.Experiment")
assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)
Now sbt clean assembly
sadly gives the following exception:
[error] (root/*:assembly) deduplicate: different file contents found in the following:
[error] /home/kevin/.ivy2/cache/org.apache.spark/spark-streaming-kafka-0-8_2.10/jars/spark-streaming-kafka-0-8_2.10-2.0.0.jar:org/apache/spark/unused/UnusedStubClass.class
[error] /home/kevin/.ivy2/cache/org.apache.spark/spark-tags_2.10/jars/spark-tags_2.10-2.0.0.jar:org/apache/spark/unused/UnusedStubClass.class
[error] /home/kevin/.ivy2/cache/org.spark-project.spark/unused/jars/unused-1.0.0.jar:org/apache/spark/unused/UnusedStubClass.class
How can I fix this?