from time import time from sklearn.metrics import f1_score
def train_classifier(clf, X_train, y_train): ''' Fits a classifier to the training data. '''
# Start the clock, train the classifier, then stop the clock
start = time()
clf.fit(X_train, y_train)
end = time()
# Print the results
print("Trained model in {:.4f} seconds".format(end - start))
def predict_labels(clf, features, target): ''' Makes predictions using a fit classifier based on F1 score. '''
# Start the clock, make predictions, then stop the clock
start = time()
y_pred = clf.predict(features)
end = time()
# Print and return results
print("Made predictions in {:.4f} seconds.".format(end - start))
return f1_score(target, y_pred, pos_label='H'), sum(target == y_pred) / float(len(y_pred))
def train_predict(clf, X_train, y_train, X_test, y_test): ''' Train and predict using a classifer based on F1 score. '''
# Indicate the classifier and the training set size
print("Training a {} using a training set size of {}. . .".format(clf.__class__.__name__, len(X_train)))
# Train the classifier
train_classifier(clf, X_train, y_train)
# Print the results of prediction for both training and testing
f1, acc = predict_labels(clf, X_train, y_train)
print(f1, acc)
print("F1 score and accuracy score for training set: {:.4f} , {:.4f}.".format(f1 , acc))
f1, acc = predict_labels(clf, X_test, y_test)
print("F1 score and accuracy score for test set: {:.4f} , {:.4f}.".format(f1 , acc))
clf_A = LogisticRegression(random_state = 42) train_predict(clf_A, X_train, y_train, X_test, y_test) print('')