I am getting strange error while saving dataframe to AWS S3.
df.coalesce(1).write.mode(SaveMode.Overwrite)
.json(s"s3://myawsacc/results/")
In the same location I was able to insert the data from spark-shell . and is working...
spark.sparkContext.parallelize(1 to 4).toDF.write.mode(SaveMode.Overwrite)
.format("com.databricks.spark.csv")
.save(s"s3://myawsacc/results/")
My question is why its working in spark-shell and is not working via spark-submit ? Is there any logic/properties/configuration for this?
Exception in thread "main" java.lang.ExceptionInInitializerError at com.amazon.ws.emr.hadoop.fs.s3n.S3Credentials.initialize(S3Credentials.java:45) at com.amazon.ws.emr.hadoop.fs.HadoopConfigurationAWSCredentialsProvider.(HadoopConfigurationAWSCredentialsProvider.java:26) at com.amazon.ws.emr.hadoop.fs.guice.DefaultAWSCredentialsProviderFactory.getAwsCredentialsProviderChain(DefaultAWSCredentialsProviderFactory.java:44) at com.amazon.ws.emr.hadoop.fs.guice.DefaultAWSCredentialsProviderFactory.getAwsCredentialsProvider(DefaultAWSCredentialsProviderFactory.java:28) at com.amazon.ws.emr.hadoop.fs.guice.EmrFSProdModule.getAwsCredentialsProvider(EmrFSProdModule.java:70) at com.amazon.ws.emr.hadoop.fs.guice.EmrFSProdModule.createS3Configuration(EmrFSProdModule.java:86) at com.amazon.ws.emr.hadoop.fs.guice.EmrFSProdModule.createAmazonS3LiteClient(EmrFSProdModule.java:80) at com.amazon.ws.emr.hadoop.fs.guice.EmrFSProdModule.createAmazonS3Lite(EmrFSProdModule.java:120) at com.amazon.ws.emr.hadoop.fs.guice.EmrFSBaseModule.provideAmazonS3Lite(EmrFSBaseModule.java:99) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at com.amazon.ws.emr.hadoop.fs.shaded.com.google.inject.internal.ProviderMethod.get(ProviderMethod.java:104) at com.amazon.ws.emr.hadoop.fs.shaded.com.google.inject.internal.InternalFactoryToProviderAdapter.get(InternalFactoryToProviderAdapter.java:40) at com.amazon.ws.emr.hadoop.fs.shaded.com.google.inject.internal.ProviderToInternalFactoryAdapter$1.call(ProviderToInternalFactoryAdapter.java:46) at com.amazon.ws.emr.hadoop.fs.shaded.com.google.inject.internal.InjectorImpl.callInContext(InjectorImpl.java:1031) at com.amazon.ws.emr.hadoop.fs.shaded.com.google.inject.internal.ProviderToInternalFactoryAdapter.get(ProviderToInternalFactoryAdapter.java:40) at com.amazon.ws.emr.hadoop.fs.shaded.com.google.inject.Scopes$1$1.get(Scopes.java:65) at com.amazon.ws.emr.hadoop.fs.shaded.com.google.inject.internal.InternalFactoryToProviderAdapter.get(InternalFactoryToProviderAdapter.java:40) at com.amazon.ws.emr.hadoop.fs.shaded.com.google.inject.internal.SingleFieldInjector.inject(SingleFieldInjector.java:53) at com.amazon.ws.emr.hadoop.fs.shaded.com.google.inject.internal.MembersInjectorImpl.injectMembers(MembersInjectorImpl.java:110) at com.amazon.ws.emr.hadoop.fs.shaded.com.google.inject.internal.ConstructorInjector.construct(ConstructorInjector.java:94) at com.amazon.ws.emr.hadoop.fs.shaded.com.google.inject.internal.ConstructorBindingImpl$Factory.get(ConstructorBindingImpl.java:254) at com.amazon.ws.emr.hadoop.fs.shaded.com.google.inject.internal.FactoryProxy.get(FactoryProxy.java:54) at com.amazon.ws.emr.hadoop.fs.shaded.com.google.inject.internal.InjectorImpl$4$1.call(InjectorImpl.java:978) at com.amazon.ws.emr.hadoop.fs.shaded.com.google.inject.internal.InjectorImpl.callInContext(InjectorImpl.java:1024) at com.amazon.ws.emr.hadoop.fs.shaded.com.google.inject.internal.InjectorImpl$4.get(InjectorImpl.java:974) at com.amazon.ws.emr.hadoop.fs.shaded.com.google.inject.internal.InjectorImpl.getInstance(InjectorImpl.java:1009) at com.amazon.ws.emr.hadoop.fs.EmrFileSystem.initialize(EmrFileSystem.java:103) at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2717) at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:93) at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2751) at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2733) at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:377) at org.apache.hadoop.fs.Path.getFileSystem(Path.java:295) at org.apache.spark.sql.execution.datasources.DataSource.writeInFileFormat(DataSource.scala:394) at org.apache.spark.sql.execution.datasources.DataSource.write(DataSource.scala:471) at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:50) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92) at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:609) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:233) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:217) at org.apache.spark.sql.DataFrameWriter.json(DataFrameWriter.scala:487) at com.org.ComparatorUtil$.writeLogNError(ComparatorUtil.scala:245) at com.org.ComparatorUtil$.writeToJson(ComparatorUtil.scala:161) at com.org.comparator.SnowFlakeTableComparator$.mainExecutor(SnowFlakeTableComparator.scala:98) at com.org.executor.myclass$$anonfun$main$4$$anonfun$apply$1.apply(myclass.scala:232) at com.org.executor.myclass$$anonfun$main$4$$anonfun$apply$1.apply(myclass.scala:153) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) at scala.collection.IterableLike$class.foreach(IterableLike.scala:72) at scala.collection.AbstractIterable.foreach(Iterable.scala:54) at com.org.executor.myclass$$anonfun$main$4.apply(myclass.scala:153) at com.org.executor.myclass$$anonfun$main$4.apply(myclass.scala:134) at scala.collection.immutable.List.foreach(List.scala:381) at com.org.executor.myclass$.main(myclass.scala:134) at com.org.executor.myclass.main(myclass.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:775) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:119) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) Caused by: java.lang.IllegalArgumentException: URI is not absolute at java.net.URI.toURL(URI.java:1088) at org.apache.hadoop.fs.http.AbstractHttpFileSystem.open(AbstractHttpFileSystem.java:60) at org.apache.hadoop.fs.http.HttpFileSystem.open(HttpFileSystem.java:23) at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:773) at org.apache.hadoop.fs.FsUrlConnection.connect(FsUrlConnection.java:50) at org.apache.hadoop.fs.FsUrlConnection.getInputStream(FsUrlConnection.java:59) at java.net.URL.openStream(URL.java:1045) at com.amazon.ws.emr.hadoop.fs.shaded.com.fasterxml.jackson.core.JsonFactory._optimizedStreamFromURL(JsonFactory.java:1479) at com.amazon.ws.emr.hadoop.fs.shaded.com.fasterxml.jackson.core.JsonFactory.createParser(JsonFactory.java:779) at com.amazon.ws.emr.hadoop.fs.shaded.com.fasterxml.jackson.databind.ObjectMapper.readValue(ObjectMapper.java:2679) at com.amazon.ws.emr.hadoop.fs.util.PlatformInfo.getClusterIdFromConfigurationEndpoint(PlatformInfo.java:39) at com.amazon.ws.emr.hadoop.fs.util.PlatformInfo.getJobFlowId(PlatformInfo.java:53) at com.amazon.ws.emr.hadoop.fs.util.EmrFsUtils.getJobFlowId(EmrFsUtils.java:384) at com.amazon.ws.emr.hadoop.fs.util.EmrFsUtils.(EmrFsUtils.java:60) ... 77 more