I am copying and pasting the exact Spark MLlib LDA example from here: http://spark.apache.org/docs/latest/mllib-clustering.html#latent-dirichlet-allocation-lda
I am trying the Scala sample code, but I am having the following errors when I am trying to save and load the LDA model:
- on the line before the last line:
value saveis not a member is not a member of org.apach.spark.mllib.clustering.DistributedLDAModel
- on the last line:
not found: value DistributedLDAModel
Here is the code, knowing that I am using SBT to create my Scala project skeleton and to load the libraries then I import it into Eclipse (Mars) for editing, I am using spark-core 1.5.0
and spark-mllib 1.3.1
and Scala version 2.11.7
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
import org.apache.spark.mllib.clustering.{LDA, DistributedLDAModel}
import org.apache.spark.mllib.linalg.Vectors
object sample {
def main(args: Array[String]) {
val conf = new SparkConf().setAppName("sample_SBT").setMaster("local[2]")
val sc = new SparkContext(conf)
// Load and parse the data
val data = sc.textFile("data/mllib/sample_lda_data.txt")
val parsedData = data.map(s => Vectors.dense(s.trim.split(' ').map(_.toDouble)))
// Index documents with unique IDs
val corpus = parsedData.zipWithIndex.map(_.swap).cache()
// Cluster the documents into three topics using LDA
val ldaModel = new LDA().setK(3).run(corpus)
// Output topics. Each is a distribution over words (matching word count vectors)
println("Learned topics (as distributions over vocab of " + ldaModel.vocabSize + " words):")
val topics = ldaModel.topicsMatrix
for (topic <- Range(0, 3)) {
print("Topic " + topic + ":")
for (word <- Range(0, ldaModel.vocabSize)) { print(" " + topics(word, topic)); }
println()
}
// Save and load model.
ldaModel.save(sc, "myLDAModel")
val sameModel = DistributedLDAModel.load(sc, "myLDAModel")
}
}