I am trying to run a Python Script from Visual Studio code on Databricks Cluster using Databricks connect. The jar files for the Apache Spark connector: SQL Server & Azure SQL have been installed on the Databricks Cluster.
However I am getting the error:
py4j.protocol.Py4JJavaError: An error occurred while calling o42.save. : java.sql.SQLException: No suitable driver
from pyspark.sql import SparkSession, DataFrame
from db_connection import fetch_db_config
from pyspark.context import SparkContext
from pyspark import sql
from IPython.display import display
##Fetch Data from delta table into Data frame
def fetch_data_from_delta(
spark: SparkSession,
sql: sql
):
global df
df = spark.sql("select * from lending_club_acc_loans limit 10000")
# newdf = df.repartition(8)
return df
def load_df_to_db():
jdbchostname, jdbcurl, jdbcusername, jdbcpassword, connection_properties = fetch_db_config()
try:
df.write \
.format("com.microsoft.sqlserver.jdbc.spark") \
.mode("overwrite") \
.option("url", jdbcurl) \
.option("dbtable", "lending_club_acc_loans") \
.option("user", jdbcusername) \
.option("password", jdbcpassword) \
.option("tableLock", "true") \
.option("batchsize", "500") \
.option("reliabilityLevel", "BEST_EFFORT") \
.save()
except ValueError as error :
print("Connector write failed", error)
def run():
spark = SparkSession.builder.config('spark.driver.extraClassPath','dbfs:/FileStore/jars/18df20ee_ae05_45d0_ba41_89502b080b51-spark_mssql_connector_2_12_3_0_1_0_0_alpha-a6e1c.jar').getOrCreate()
fetch_data_from_delta(spark, sql)
load_df_to_db()
if __name__ == "__main__":
run()
Can someone help me figure out the issue?