I have a pyspark dataframe
a = [
(0.31, .3, .4, .6, 0.4),
(.01, .2, .92, .4, .47),
(.3, .1, .05, .2, .82),
(.4, .4, .3, .6, .15),
]
b = ["column1", "column2", "column3", "column4", "column5"]
df = spark.createDataFrame(a, b)
Now I want to create a new column based on below condition
df.withColumn('new_column' ,(norm.ppf(F.col('column1')) - norm.ppf(F.col('column1') * F.col('column1'))) / (1 - F.col('column2')) ** 0.5)
but its giving error. Please help!
Update : I have replaced corrected the column name
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-38-8dfe7d50be84> in <module>
----> 1 df.withColumn('new_column' ,(norm.ppf(F.col('PD')) - norm.ppf(F.col('PD') * F.col('PD'))) / (1 - F.col('rho_start')) ** 0.5)
~/anaconda3/envs/python3/lib/python3.6/site-packages/scipy/stats/_distn_infrastructure.py in ppf(self, q, *args, **kwds)
1995 args = tuple(map(asarray, args))
1996 cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc)
-> 1997 cond1 = (0 < q) & (q < 1)
1998 cond2 = cond0 & (q == 0)
1999 cond3 = cond0 & (q == 1)
~/anaconda3/envs/python3/lib/python3.6/site-packages/pyspark/sql/column.py in __nonzero__(self)
633
634 def __nonzero__(self):
--> 635 raise ValueError("Cannot convert column into bool: please use '&' for 'and', '|' for 'or', "
636 "'~' for 'not' when building DataFrame boolean expressions.")
637 __bool__ = __nonzero__
ValueError: Cannot convert column into bool: please use '&' for 'and', '|' for 'or', '~' for 'not' when building DataFrame boolean expressions.
PD
and columnrho_start
? – Michael Szczesny