0
votes

I have two dataframes in spark/scala in which i have some common column like salary,bonus,increment etc. i need to compare these two dataframes's columns and anything changes like in first dataframe salary is 3000 and in second dataframe salary is 5000 then i need to insert 5000-3000=2000 in new dataframe as salary, and if in first dataframe salary is 5000 and in second dataframe salary is 3000 then i need to insert 5000+3000=8000 in new dataframe as salary, and if salary is same in both the dataframe then need to insert from second dataframe.

    val columns = df1.schema.fields.map(_.salary)
    val salaryDifferences = columns.map(col => df1.select(col).except(df2.select(col)))
    salaryDifferences.map(diff => {if(diff.count > 0) diff.show})

I tried above query but its giving column and value where any difference is there i need to also check if diff is negative or positive and based to that i need to perform logic.can anyone please give me a hint how can i implement this and insert record in 3rd dataframe,

1

1 Answers

1
votes

Join the Dataframes and use nested when and otherwise clause. Also find comments in the code

import org.apache.spark.sql.functions._

object SalaryDiff {

  def main(args: Array[String]): Unit = {
    val spark = Constant.getSparkSess
    import spark.implicits._

    val df1 = List(("1", "5000"), ("2", "3000"), ("3", "5000")).toDF("id", "salary") // First dataframe

    val df2 = List(("1", "3000"), ("2", "5000"), ("3", "5000")).toDF("id", "salary") // Second dataframe

    val df3 = df1   // Is your 3rd tables
      .join(
        df2
        , df1("id") === df2("id")    // Join both dataframes on id column
      ).withColumn("finalSalary", when(df1("salary") < df2("salary"), df2("salary") - df1("salary")) // 5000-3000=2000 check
      .otherwise(
      when(df1("salary") > df2("salary"), df1("salary") + df2("salary"))  // 5000+3000=8000  check
        .otherwise(df2("salary"))))    // insert from second dataframe
      .drop(df1("salary"))
      .drop(df2("salary"))
      .withColumnRenamed("finalSalary","salary")
      .show()

  }

}