I'm currently using the following code to train an SVM to classify (Iceberg Challenge), I need to display the accuracy of the model but I'm facing problems.
import numpy as np
import pandas as pd
import cv2
from sklearn import svm
from sklearn import preprocessing
from sklearn.preprocessing import normalize
from sklearn.metrics import log_loss
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
import pdb
from subprocess import check_output
import matplotlib.pyplot as plt
import numpy
from sklearn import metrics
print(check_output(["ls", "/content/drive/MyDrive/iceberg"]).decode("utf8"))
trainDataFilePath = "/content/drive/MyDrive/iceberg/train.json"
testDataFilePath = "/content/drive/MyDrive/iceberg/test.json"
trainDataFrame = pd.read_json('/content/drive/MyDrive/iceberg/train.json')
testDataFrame = pd.read_json('/content/drive/MyDrive/iceberg/test.json')
def getHogDescriptor(image,binNumber = 16):
gx = cv2.Sobel(image, cv2.CV_32F, 1, 0)
gy = cv2.Sobel(image, cv2.CV_32F, 0, 1)
mag, ang = cv2.cartToPolar(gx, gy)
bins = np.int32(binNumber*ang/(2*np.pi))
bin_cells = bins[:10,:10], bins[10:,:10], bins[:10,10:], bins[10:,10:]
mag_cells = mag[:10,:10], mag[10:,:10], mag[:10,10:], mag[10:,10:]
hists = [np.bincount(b.ravel(), m.ravel(), binNumber) for b, m in zip(bin_cells, mag_cells)]
hist = np.hstack(hists)
hist = np.array(hist,dtype=np.float32)
return hist
def makeRGBImageFromHnV(bandHH,bandHV):
b = np.divide(bandHH, bandHV, out=np.zeros_like(bandHH), where=(bandHV!=0))
rgb = np.dstack((bandHH.astype(np.float32), bandHV.astype(np.float32),b.astype(np.uint16)))
return rgb
def getMeanImageFromImageDataFrame(trainDataFrame):
meanImage = np.zeros(shape =(75,75,3),dtype = np.float32)
for currentImage in trainDataFrame["fullImage"]:
meanImage = meanImage + currentImage
meanImage = meanImage/len(trainDataFrame)
return meanImage.astype(np.float32)
def getStandardDeviationFromImageDataFrame(trainDataFrame,meanImage):
stdImage = np.zeros(shape =(75,75,3),dtype = np.float32)
for currentImage in trainDataFrame["fullImage"]:
stdImage = stdImage + (currentImage - meanImage)
stdImage = stdImage/len(trainDataFrame)
return stdImage.astype(np.float32)
def normalizedImageParamFromDataFrame(trainDataFrame):
meanImageData = getMeanImageFromImageDataFrame(trainDataFrame)
stdImageData = getStandardDeviationFromImageDataFrame(trainDataFrame,meanImageData)
return meanImageData,stdImageData
def normalizeSingleImage(currentImage,meanImageData,stdImageData):
normalizedImage = (currentImage - meanImageData)/stdImageData
return normalizedImage
def transformImageDataFrame(inputDataFrame,meanImageData,stdImageData):
inputDataFrame["fullImageNormalized"] = [normalizeSingleImage(inputDataFrame["fullImage"][i],meanImageData,stdImageData) for i in range(0,len(inputDataFrame["fullImage"]))]
return inputDataFrame
def normalizeImageUsingOpenCV(currentImage):
norm_image = cv2.normalize(currentImage,currentImage, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
return norm_image
def getValuesfromDB(bandDB):
currentband = np.array(bandDB).reshape(75,75)
actualValue = 10**(currentband/10)
return actualValue
def getImageFromBandDataFrame(dataFrame):
dataFrame["valueBand1"] = [getValuesfromDB(dataFrame["band_1"][i]) for i in range(0,len(dataFrame["band_1"]))]
dataFrame["valueBand2"] = [getValuesfromDB(dataFrame["band_2"][i]) for i in range(0,len(dataFrame["band_2"]))]
dataFrame["fullImage"] = [makeRGBImageFromHnV(dataFrame["valueBand1"][i],dataFrame["valueBand2"][i]) for i in range(0,len(dataFrame["band_1"]))]
dataFrame["fullImageNormalized"] = [normalizeImageUsingOpenCV(dataFrame["fullImage"][i]) for i in range(0,len(dataFrame["fullImage"]))]
return dataFrame
def bootstrapAndEqualizeTheData(inputDataFrame):
noOfIceBergData = len(inputDataFrame[inputDataFrame.is_iceberg == 1])
totalData = len(inputDataFrame)
print("Tornando os dados de iceberg e de barcos como iguais através de bootstrap")
randomSamplesForIceBerg = inputDataFrame[inputDataFrame.is_iceberg == 1].sample((totalData - (2*noOfIceBergData)))
inputDataFrame = pd.concat([inputDataFrame, randomSamplesForIceBerg], ignore_index=True)
return inputDataFrame
def addFeatureDataFrame(dataFrame):
dataFrame["hogFeature"] = [getHogDescriptor(dataFrame["fullImageNormalized"][i]) for i in range(0,len(dataFrame["fullImageNormalized"]))]
return dataFrame
def getFeatureFromDataFrame(dataFrame,isTestData=0):
featureDataVector = []
responseVector = []
featureDataVector = np.array(featureDataVector).reshape(-1,64)
for i in range(0,len(dataFrame)):
currentFeature = dataFrame["hogFeature"][i].tolist()
if(isTestData is 0):
currentResponse = dataFrame["is_iceberg"][i].tolist()
else:
currentResponse = 2
currentFeature = np.array(currentFeature[0:64]).reshape(-1,64)
currentResponse = int(currentResponse)
if(i == 0):
featureDataVector = currentFeature
responseVector.append(currentResponse)
else:
featureDataVector = np.vstack((featureDataVector,currentFeature))
responseVector.append(currentResponse)
return featureDataVector,responseVector
print("Adicionando novas colunas ao Dataframe")
print("Normalizando o Dataframe")
trainDataFrame = getImageFromBandDataFrame(trainDataFrame)
trainDataFrame = bootstrapAndEqualizeTheData(trainDataFrame)
trainDataFrame = addFeatureDataFrame(trainDataFrame)
print("Calculando os vetores de atributos")
trainFeatureData , trainResponseData = getFeatureFromDataFrame(trainDataFrame)
print("Feito")
print(str(trainFeatureData.shape))
print("Inserindo no modelo")
clf = svm.SVC(gamma=0.001,C=100,kernel='rbf',probability=True)
clf.fit(trainFeatureData,trainResponseData)
print("Feito")
print("getting TestData features...")
print("Normalizando o Dataframe")
testDataFrame = getImageFromBandDataFrame(testDataFrame)
testDataFrame = addFeatureDataFrame(testDataFrame)
testFeatureData , _ = getFeatureFromDataFrame(testDataFrame,1)
print("Feito")
print(str(testFeatureData.shape))
test_predictions = clf.predict_proba(testFeatureData)
trainPredictions = clf.predict_proba(trainFeatureData)
print("Predição concluída")
print("A perda do algoritmo foi de: "+str(log_loss(trainResponseData,trainPredictions[:,1])))
print("A perda do algoritmo foi de: "+str(accuracy_score(trainResponseData,trainPredictions)))
pred_df = testDataFrame[['id']].copy()
pred_df['is_iceberg'] = test_predictions[:,1]
pred_df.to_csv('predictions.csv', index = False)
Here's the error message: ValueError: Classification metrics can't handle a mix of binary and continuous-multioutput targets
Is there any way or code that I can use to be able to display my model's acurracy?