from unittest import TestCase
from pyspark.sql import SparkSession
from lib.utils import load_survey_df, count_by_country
class UtilsTestCase(TestCase):
@classmethod
def create_testing_pyspark_session(cls):
return SparkSession.builder.master('local[2]').appName('my - local - testing - pyspark - context').getOrCreate()
@classmethod
def SetUpClass(cls) -> None:
cls.spark = cls.create_testing_pyspark_session()
def test_datafile_loading(self):
sample_df = load_survey_df(self.spark, "data/sample.csv")
result_count = sample_df.count()
self.assertEqual(result_count, 9, "Record count should be 9")
def test_country_count(self):
sample_df = load_survey_df(self.spark, "data/sample.csv")
count_list = count_by_country(sample_df).collect()
count_dict = dict()
for row in count_list:
count_dict[row["Country"]] = row["count"]
self.assertEqual(count_dict["United States"], 4, "Count for United States should be 4")
self.assertEqual(count_dict["Canada"], 2, "Count for United States should be 2")
self.assertEqual(count_dict["United Kingdom"], 1, "Count for United States should be 1")
Hi All, Can you please tell me what's wrong with this code? I am getting below error: Error Traceback (most recent call last): File "C:\Users\abc\AppData\Local\Continuum\anaconda3\lib\unittest\case.py", line 59, in testPartExecutor yield File "C:\Users\abc\AppData\Local\Continuum\anaconda3\lib\unittest\case.py", line 628, in run testMethod() File "C:\Users\abc\PycharmProjects\HelloSpark\lib\test_utils.py", line 17, in test_datafile_loading sample_df = load_survey_df(self.spark, "data/sample.csv") AttributeError: 'UtilsTestCase' object has no attribute 'spark'
setUpClass
, notSetUpClass
. – MrBean Bremen