0
votes

I present to your attention a script for training a convolutional neural network for recognizing a road network from winter satellite images. I have a problem at the stage of assigning weights using sample_weight and I have no idea how to solve it yet, as I am still only delving into the subject of convolutional neural networks. I look forward to your help and advice. Thank you!

My code:

from Simple_multiclass_unet_model import multiclass_unet_model
from tensorflow.keras.utils import normalize
import os
import glob
import cv2
import numpy as np
import random
from matplotlib import pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from keras.metrics import MeanIoU
from tensorflow.keras.models import load_model
from sklearn.utils.class_weight import compute_sample_weight

n_classes = 2                                      # Количество классов для сегментации
train_images = []                                  # Представляем информацию о тренировочных изображениях в виде списка


for directory_path in glob.glob('D:/Practise/Learning/Augmented_images/'):
    for img_path in glob.glob(os.path.join(directory_path, "*.tif")):
        img_bgr = cv2.imread(img_path, cv2.IMREAD_COLOR|cv2.IMREAD_ANYDEPTH)
        img = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
        train_images.append(img)

train_images = np.array(train_images)               # Конвертируем список в массив для обучения
print('1...')

train_masks = []                                    # Представляем информацию о масках в виде списка
for directory_path in glob.glob('D:/Practise/Learning/Augmented_masks/'):
    for mask_path in glob.glob(os.path.join(directory_path, "*.tif")):
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE|cv2.IMREAD_ANYDEPTH)
        train_masks.append(mask)

train_masks = np.array(train_masks)                 # Конвертируем список в массив для обучения
print('2...')

labelencoder = LabelEncoder()                       # Начинаем кодировать метки, имея дело с многомерным массивом, поэтому необходимо его сгладить, кодировать и изменить форму
n, h, w = train_masks.shape                         # Определяем параметры массива
train_masks_reshaped = train_masks.reshape(-1, 1)   # Раскладываем массив в один столбец
train_masks_reshaped_encoded = labelencoder.fit_transform(train_masks_reshaped)     # Присваиваем метки для значений
train_masks_encoded_original_shape = train_masks_reshaped_encoded.reshape(n, h, w)  # Возвращаем массив в исходное состояние
np.unique(train_masks_encoded_original_shape)       # Находим уникальные значения массива
print('3...')

train_images = normalize(train_images, axis=1)      # Нормализуем массив вдоль оси 1
train_masks_input = np.expand_dims(train_masks_encoded_original_shape, axis=3)      # Увеличиваем количество осей массива масок до 3 // Скорее всего тут надо будет вместо 3 поставить 2

# Создаем подмножество данных для быстрого тестирования, отбираем 10% на тестирование и оставшиеся на обучение
X_train, X_test, y_train, y_test = train_test_split(train_images, train_masks_input, test_size=0.10, random_state=0)
print('Значения классов в наборе данных: ', np.unique(y_train))                     # 0 - это фон

train_masks_cat = to_categorical(y_train, num_classes=n_classes)
y_train_cat = train_masks_cat.reshape((y_train.shape[0], y_train.shape[1], y_train.shape[2], n_classes))

test_masks_cat = to_categorical(y_test, num_classes=n_classes)
y_test_cat = test_masks_cat.reshape((y_test.shape[0], y_test.shape[1], y_test.shape[2], n_classes))

sample_weights = compute_sample_weight('balanced', np.unique(train_masks_reshaped_encoded),
                                                  train_masks_reshaped_encoded)
# sample_weights = {i:sample_weights[i] for i in range(2)}
print('Веса классов:', sample_weights)

IMG_HEIGHT = X_train.shape[1]
IMG_WIDTH = X_train.shape[2]
IMG_CHANNELS = X_train.shape[3]

def get_model():
    return multiclass_unet_model(n_classes=n_classes, IMG_HEIGHT=IMG_HEIGHT, IMG_WIDTH=IMG_WIDTH,
                                 IMG_CHANNELS=IMG_CHANNELS)

model = get_model()
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'], sample_weight_mode = 'temporal')
model.summary()

history = model.fit(X_train, y_train_cat,
                    batch_size=16,
                    verbose=1,
                    epochs=10,
                    validation_data=(X_test, y_test_cat),
                    sample_weight=sample_weights,
                    shuffle=False)
model.save('CNN.hdf5')

Error:

Traceback (most recent call last):
  File "D:/Practise/Scripts/4_Multiclass_unet_sandstone.py", line 84, in <module>
    shuffle=False)
  File "C:\Users\User\anaconda3\envs\python_3_6\lib\site-packages\keras\engine\training.py", line 1148, in fit
    steps_per_execution=self._steps_per_execution)
  File "C:\Users\User\anaconda3\envs\python_3_6\lib\site-packages\keras\engine\data_adapter.py", line 1383, in get_data_handler
    return DataHandler(*args, **kwargs)
  File "C:\Users\User\anaconda3\envs\python_3_6\lib\site-packages\keras\engine\data_adapter.py", line 1150, in __init__
    model=model)
  File "C:\Users\User\anaconda3\envs\python_3_6\lib\site-packages\keras\engine\data_adapter.py", line 241, in __init__
    _check_data_cardinality(inputs)
  File "C:\Users\User\anaconda3\envs\python_3_6\lib\site-packages\keras\engine\data_adapter.py", line 1649, in _check_data_cardinality
    raise ValueError(msg)
ValueError: Data cardinality is ambiguous:
  x sizes: 10216
  y sizes: 10216
  sample_weight sizes: 2
Make sure all arrays contain the same number of samples.

Process finished with exit code 1
I'm using here 11352 images 128x128 pixels with 3 channels. - Arthur