import pandas as pd
from sklearn.preprocessing import LabelEncoder
pd.set_option('display.max_columns', 500)
df=pd.read_csv("https://media-doselect.s3.amazonaws.com/generic/831JKKEkW7kqd5M4evNva9LyB/insurance_grouped.csv")
le = LabelEncoder()#use this encoder to encod
df.BMI_group = le.fit_transform(df.BMI_group.values)
print(df.head())
Expected output - age sex bmi children smoker region charges BMI_group 0 19 0 27.900 0 1 3 16884.92400 2 1 18 1 33.770 1 0 2 1725.55230 1 2 28 1 33.000 3 0 2 4449.46200 1 3 33 1 22.705 0 0 1 21984.47061 0 4 32 1 28.880 0 0 1 3866.85520 2
Error message - TypeError Traceback (most recent call last) ~\Anaconda3\lib\site-packages\sklearn\preprocessing\label.py in _encode(values, uniques, encode) 104 try: --> 105 res = _encode_python(values, uniques, encode) 106 except TypeError:
~\Anaconda3\lib\site-packages\sklearn\preprocessing\label.py in _encode_python(values, uniques, encode) 58 if uniques is None: ---> 59 uniques = sorted(set(values)) 60 uniques = np.array(uniques, dtype=values.dtype)
TypeError: '<' not supported between instances of 'str' and 'float'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last) in 4 #df=pd.read_csv("https://media-doselect.s3.amazonaws.com/generic/831JKKEkW7kqd5M4evNva9LyB/insurance_grouped.csv") 5 le = LabelEncoder()#use this encoder to encod ----> 6 df.BMI_group = le.fit_transform(df.BMI_group.values) 7 df.BMI_group
~\Anaconda3\lib\site-packages\sklearn\preprocessing\label.py in fit_transform(self, y) 234 """ 235 y = column_or_1d(y, warn=True) --> 236 self.classes_, y = _encode(y, encode=True) 237 return y 238
~\Anaconda3\lib\site-packages\sklearn\preprocessing\label.py in _encode(values, uniques, encode) 105 res = _encode_python(values, uniques, encode) 106 except TypeError: --> 107 raise TypeError("argument must be a string or number") 108 return res 109 else:
TypeError: argument must be a string or number