I have a pyspark dataframe with a string column in the format of YYYYMMDD and I am attempting to convert this into a date column (I should have a final date ISO 8061). The field is named deadline and is formatted as follows:
deadline
20190530
I tried the following solutions:
from pyspark.sql.functions import unix_timestamp, col
from pyspark.sql.types import TimestampType
from pyspark.sql.types import StringType
from pyspark.sql.functions import from_unixtime
from pyspark.sql.types import DateType
df.select(to_date(df.deadline).alias('dt')).show()
df.withColumn('new_date',to_date(unix_timestamp(df.deadline, 'YYYYMMDD').cast('timestamp'))).show()
orders_concat.select(unix_timestamp(orders_concat.deadline, 'YYYYMMDD')).show()
df.select(unix_timestamp(df.ts_string, 'yyyy/MM/dd HH:mm:ss').cast(TimestampType()).alias("timestamp")).show()
df.select(unix_timestamp(df.deadline, 'yyyy/MM/dd HH:mm:ss').cast(TimestampType()).alias("timestamp")).show()
df.select(to_date(cast(unix_timestamp('deadline', 'YYYYMMDD').alias('timestamp').show()
ndf = df.withColumn('_1', df['deadline'].cast(DateType()))
df2 = df.select('deadline', from_unixtime(unix_timestamp('deadline', 'YYYYMMDD')).alias('date'))
I always get null values.
Does anyone has suggestions?