I'm trying to run my classifier but I get this error
import pandas
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.svm import SVC
from sklearn import cross_validation
from sklearn.metrics import confusion_matrix
from sklearn.multiclass import OneVsOneClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
dataset = pd.read_csv('all_topics_limpo.csv', encoding = 'utf-8')
data = pandas.get_dummies(dataset['verbatim_corrige'])
labels = dataset['label']
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size = 0.2, random_state = 0)
count_vector = CountVectorizer()
tfidf = TfidfTransformer()
classifier = OneVsOneClassifier(SVC(kernel = 'linear', random_state = 100))
#classifier = LogisticRegression()
train_counts = count_vector.fit_transform(X_train)
train_tfidf = tfidf.fit_transform(train_counts)
classifier.fit(X_train, y_train)
test_counts = count_vector.transform(X_test)
test_tfidf = tfidf.transform(test_counts)
predicted = classifier.predict(test_tfidf)
predicted = classifier.predict(X_test)
print("confusion matrix")
print(confusion_matrix(y_test, predicted, labels = labels))
print("F-score")
print(f1_score(y_test, predicted))
print(precision_score(y_test, predicted))
print(recall_score(y_test, predicted))
print("cross validation")
test_counts = count_vector.fit_transform(data)
test_tfidf = tfidf.fit_transform(test_counts)
scores = cross_validation.cross_val_score(classifier, test_tfidf, labels, cv = 10)
print(scores)
print("Accuracy: {} +/- {}".format(scores.mean(), scores.std() * 2))
My output error:
ValueError: cannot use sparse input in 'SVC' trained on dense data
I can not execute my code because of this problem and I am not understanding anything of what is happening.
all output error
Traceback (most recent call last):
File "classification.py", line 42, in predicted = classifier.predict(test_tfidf)
File "/usr/lib/python3/dist-packages/sklearn/multiclass.py", line 584, in predict Y = self.decision_function(X)
File "/usr/lib/python3/dist-packages/sklearn/multiclass.py", line 614, in decision_function for est, Xi in zip(self.estimators_, Xs)]).T
File "/usr/lib/python3/dist-packages/sklearn/multiclass.py", line 614, in for est, Xi in zip(self.estimators_, Xs)]).T
File "/usr/lib/python3/dist-packages/sklearn/svm/base.py", line 548, in predict y = super(BaseSVC, self).predict(X)
File "/usr/lib/python3/dist-packages/sklearn/svm/base.py", line 308, in predict X = self._validate_for_predict(X)
File "/usr/lib/python3/dist-packages/sklearn/svm/base.py", line 448, in _validate_for_predict % type(self).name)
ValueError: cannot use sparse input in 'SVC' trained on dense data
predicted
2 times in different ways)? Please include he full error trace. – desertnautclassifier.predict(test_tfidf)
; check my answer below... – desertnaut