I've been working on a LightGBM prediction model for checking the probability of something. I scale my data using min max scaler, I save it, and trained the model on the scaled data.
Then in real-time I load my model and scaler from before, and try to predict the probability of a new entry. For some reason I'm getting a negative probability
Here's the code:
# Model Vars
learning_rate = 0.005
boosting_type = 'gbdt'
objective = 'rmse'
metric = ['balanced_accuracy_score', 'rmse', 'auc']
sub_feature = 0.5721
num_leaves = 3000
min_data = 500
max_depth = 22
max_bin = 12000
def createLGBM():
# Read dataset
dataset = pd.read_csv('C:\FullDatasetNoArson.csv')
x = dataset.values # returns a numpy array
# Normalize Data
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
# Save scaler
joblib.dump(min_max_scaler, "Saved_Scaler")
dataset = pd.DataFrame(x_scaled)
X = dataset.iloc[:, 0:8].values
y = dataset.iloc[:, 8].values
# Initiate LGBM
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
d_train = lgb.Dataset(x_train, label=y_train)
params = {'task': 'train', 'learning_rate': learning_rate, 'boosting_type': boosting_type, 'objective': objective,
'metric': metric, 'sub_feature': sub_feature, 'num_leaves': num_leaves, 'min_data': min_data,
'max_depth': max_depth, 'max_bin': max_bin, 'num_threads': 7, 'is_training_metric': True, 'verbose': 1}
clf = lgb.train(params, d_train, 2000, keep_training_booster=True)
# Prediction
y_pred = clf.predict(x_test)
# convert into binary values
for i in range(0, len(y_pred)):
if y_pred[i] >= .5: # setting threshold to .5
y_pred[i] = 1
else:
y_pred[i] = 0
# Accuracy
accuracy = accuracy_score(y_pred, y_test)
clf.save_model("lgb-model_" + str(accuracy) + ".txt")
return "lgb-model_" + str(accuracy) + ".txt"
def predict(data):
data = np.asarray(data)
print(data)
min_max_scaler = joblib.load("Saved_Scaler")
data = min_max_scaler.transform(data)
data = data[:, 0:8]
model = lgb.Booster(model_file='lgb-model_0.7906763418553157.txt')
print(data)
pred = model.predict(data)
return pred.tolist()