How to get to work reshape() function over 2D vectors

Question

I've reshaped a feature vector and still got this error:

ValueError: Expected 2D array, got 1D array instead: array=[].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

I've used reshape before the prediction like

features = features.reshape(1, -1)

But no luck at all.

This is the code I have

import cv2
import numpy as np
import os
import glob
import mahotas as mt
from sklearn.svm import LinearSVC

# function to extract haralick textures from an image
def extract_features(image):
    # calculate haralick texture features for 4 types of adjacency
    textures = mt.features.haralick(image)

    # take the mean of it and return it
    ht_mean = textures.mean(axis = 0).reshape(1, -1)
    return ht_mean

# load the training dataset
train_path  = "C:/dataset/train"
train_names = os.listdir(train_path)

# empty list to hold feature vectors and train labels
train_features = []
train_labels   = []

# loop over the training dataset
print ("[STATUS] Started extracting haralick textures..")
for train_name in train_names:
    cur_path = train_path + "/" + train_name
    cur_label = train_name
    i = 1

    for file in glob.glob(cur_path + "/*.jpg"):
        print ("Processing Image - {} in {}".format(i, cur_label))
        # read the training image
        image = cv2.imread(file)

        # convert the image to grayscale
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # extract haralick texture from the image
        features = extract_features(gray)

        # append the feature vector and label
        train_features.append(features.reshape(1, -1))[0]
        train_labels.append(cur_label)

        # show loop update
        i += 1

# have a look at the size of our feature vector and labels
print ("Training features: {}".format(np.array(train_features).shape))
print ("Training labels: {}".format(np.array(train_labels).shape))

# create the classifier
print ("[STATUS] Creating the classifier..")
clf_svm = LinearSVC(random_state = 9)

# fit the training data and labels
print ("[STATUS] Fitting data/label to model..")
clf_svm.fit(train_features, train_labels)

# loop over the test images
test_path = "C:/dataset/test"
for file in glob.glob(test_path + "/*.jpg"): 
    # read the input image
    image = cv2.imread(file)

    # convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # extract haralick texture from the image
    features = extract_features(gray)

    # evaluate the model and predict label
    prediction = clf_svm.predict(features)

    # show the label
    cv2.putText(image, prediction, (20,30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0,255,255), 3)
    print ("Prediction - {}".format(prediction))

    # display the output image
    cv2.imshow("Test_Image", image)
    cv2.waitKey(0)

I don't know if I'm using reshape() incorrectly or I'm missing something.

ValueError: Expected 2D array, got 1D array instead: array=[]. Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

Anubhav Singh Anubhav Singh · Accepted Answer · 2019-07-31T23:15:46

Consider following points:

You are getting above error because train_features is [ ](empty list) in line clf_svm.fit(train_features, train_labels). It should contain atleast 1 data. It is happening because train_path points to a folder which contains only image files, but above code is assuming that train_path points to a folder having atleast 1 subfolder (no files).
```
train 
   - class1_folder[class11.jpg, class12.jpg, ...]
   - class2_folder[class21.jpg, class22.jpg, ...]
   - and so on ...
```
Here, your class names of the training data will be [class1, class2, ...]
Correct line train_features.append(features.reshape(1, -1))[0] to train_features.append(features.reshape(1, -1)[0])
Output of clf_svm.predict(features) is a numpy array. So, replace prediction with str(prediction) in cv2.putText function. You can also replace it with prediction[0].

Try below code:

import cv2
import numpy as np
import os
import glob
import mahotas as mt
from sklearn.svm import LinearSVC

# function to extract haralick textures from an image
def extract_features(image):
    # calculate haralick texture features for 4 types of adjacency
    textures = mt.features.haralick(image)

    # take the mean of it and return it
    ht_mean = textures.mean(axis = 0).reshape(1, -1)
    return ht_mean

# load the training dataset
train_path  = "C:\\dataset\\train"
train_names = os.listdir(train_path)

# empty list to hold feature vectors and train labels
train_features = []
train_labels   = []

# loop over the training dataset
print ("[STATUS] Started extracting haralick textures..")
for train_name in train_names:
    cur_path = train_path + "\\" + train_name
    print(cur_path)
    cur_label = train_name
    i = 1

    for file in glob.glob(cur_path + "\*.jpg"):
        print ("Processing Image - {} in {}".format(i, cur_label))
        # read the training image
        #print(file)
        image = cv2.imread(file)
        #print(image)

        # convert the image to grayscale
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # extract haralick texture from the image
        features = extract_features(gray)
        #print(features.reshape(1, -1))
        # append the feature vector and label
        train_features.append(features.reshape(1, -1)[0])
        train_labels.append(cur_label)

        # show loop update
        i += 1

# have a look at the size of our feature vector and labels
print ("Training features: {}".format(np.array(train_features).shape))
print ("Training labels: {}".format(np.array(train_labels).shape))

# create the classifier
print ("[STATUS] Creating the classifier..")
clf_svm = LinearSVC(random_state = 9)

# fit the training data and labels
print ("[STATUS] Fitting data/label to model..")
print(train_features)
clf_svm.fit(train_features, train_labels)

# loop over the test images
test_path = "C:\\dataset\\test"
for file in glob.glob(test_path + "\*.jpg"): 
    # read the input image
    image = cv2.imread(file)

    # convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # extract haralick texture from the image
    features = extract_features(gray)

    # evaluate the model and predict label
    prediction = clf_svm.predict(features)

    # show the label
    cv2.putText(image, str(prediction), (20,30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0,255,255), 3)
    print ("Prediction - {}".format(prediction))

    # display the output image
    cv2.imshow("Test_Image", image)
    cv2.waitKey(0)
cv2.destroyAllWindows()

How to get to work reshape() function over 2D vectors

1 Answers