This is my code
import org.apache.spark.SparkContext..
def main(args: Array[String]): Unit = {
val conf = new sparkConf().setMaster("local").setAppname("My app")
val sc = new SparkContext(conf_
val inputfile = "D:/test.txt"
val inputData = sc.textFile(inputFile)
val DupleRawData = inputData.map(_.split("\\<\\>").toList)
.map(s => (s(8),s(18)))
.map(s => (s, 1))
.reduceByKey(_ + _)
val UserShopCount = DupleRawData.groupBy(s => s._1._1)
.map(s => (s._1, s._2.toList.sortBy(z => z._2).reverse))
val ResultSet = UserShopCount.map(s => (s._1, s._2.take(1000).map(z => z._1._2, z._2))))
ResultSet.foreach(println)
//(aaa,List((100,4), (200,4), (300,3), (800,1)))
//(bbb,List((100,6), (400,5), (500,4)))
//(ccc,List((300,7), (400,6), (700,3)))
// here now I reach..
}
and this is the result I'm getting:
(aaa,List((100,4), (200,4), (300,3), (800,1)))
(bbb,List((100,6), (400,5), (500,4)))
(ccc,List((300,7), (400,6), (700,3)))
I want to final result set RDD is
// val ResultSet: org.apache.spark.rdd.RDD[(String, List[(String, Int)])]
(aaa, List(200,4), (800,1)) // because key of bbb and ccc except 100,300
(bbb, List((500,4)) // because aaa and ccc key except 100,400
(ccc, List((700,3)) // because aaa and bbb key except 300,400
please give me a solution or advice...sincerely