I present to your attention a script for training a convolutional neural network for recognizing a road network from winter satellite images. I have a problem at the stage of assigning weights using sample_weight and I have no idea how to solve it yet, as I am still only delving into the subject of convolutional neural networks. I look forward to your help and advice. Thank you!
My code:
from Simple_multiclass_unet_model import multiclass_unet_model
from tensorflow.keras.utils import normalize
import os
import glob
import cv2
import numpy as np
import random
from matplotlib import pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from keras.metrics import MeanIoU
from tensorflow.keras.models import load_model
from sklearn.utils.class_weight import compute_sample_weight
n_classes = 2 # Количество классов для сегментации
train_images = [] # Представляем информацию о тренировочных изображениях в виде списка
for directory_path in glob.glob('D:/Practise/Learning/Augmented_images/'):
for img_path in glob.glob(os.path.join(directory_path, "*.tif")):
img_bgr = cv2.imread(img_path, cv2.IMREAD_COLOR|cv2.IMREAD_ANYDEPTH)
img = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
train_images.append(img)
train_images = np.array(train_images) # Конвертируем список в массив для обучения
print('1...')
train_masks = [] # Представляем информацию о масках в виде списка
for directory_path in glob.glob('D:/Practise/Learning/Augmented_masks/'):
for mask_path in glob.glob(os.path.join(directory_path, "*.tif")):
mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE|cv2.IMREAD_ANYDEPTH)
train_masks.append(mask)
train_masks = np.array(train_masks) # Конвертируем список в массив для обучения
print('2...')
labelencoder = LabelEncoder() # Начинаем кодировать метки, имея дело с многомерным массивом, поэтому необходимо его сгладить, кодировать и изменить форму
n, h, w = train_masks.shape # Определяем параметры массива
train_masks_reshaped = train_masks.reshape(-1, 1) # Раскладываем массив в один столбец
train_masks_reshaped_encoded = labelencoder.fit_transform(train_masks_reshaped) # Присваиваем метки для значений
train_masks_encoded_original_shape = train_masks_reshaped_encoded.reshape(n, h, w) # Возвращаем массив в исходное состояние
np.unique(train_masks_encoded_original_shape) # Находим уникальные значения массива
print('3...')
train_images = normalize(train_images, axis=1) # Нормализуем массив вдоль оси 1
train_masks_input = np.expand_dims(train_masks_encoded_original_shape, axis=3) # Увеличиваем количество осей массива масок до 3 // Скорее всего тут надо будет вместо 3 поставить 2
# Создаем подмножество данных для быстрого тестирования, отбираем 10% на тестирование и оставшиеся на обучение
X_train, X_test, y_train, y_test = train_test_split(train_images, train_masks_input, test_size=0.10, random_state=0)
print('Значения классов в наборе данных: ', np.unique(y_train)) # 0 - это фон
train_masks_cat = to_categorical(y_train, num_classes=n_classes)
y_train_cat = train_masks_cat.reshape((y_train.shape[0], y_train.shape[1], y_train.shape[2], n_classes))
test_masks_cat = to_categorical(y_test, num_classes=n_classes)
y_test_cat = test_masks_cat.reshape((y_test.shape[0], y_test.shape[1], y_test.shape[2], n_classes))
sample_weights = compute_sample_weight('balanced', np.unique(train_masks_reshaped_encoded),
train_masks_reshaped_encoded)
# sample_weights = {i:sample_weights[i] for i in range(2)}
print('Веса классов:', sample_weights)
IMG_HEIGHT = X_train.shape[1]
IMG_WIDTH = X_train.shape[2]
IMG_CHANNELS = X_train.shape[3]
def get_model():
return multiclass_unet_model(n_classes=n_classes, IMG_HEIGHT=IMG_HEIGHT, IMG_WIDTH=IMG_WIDTH,
IMG_CHANNELS=IMG_CHANNELS)
model = get_model()
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'], sample_weight_mode = 'temporal')
model.summary()
history = model.fit(X_train, y_train_cat,
batch_size=16,
verbose=1,
epochs=10,
validation_data=(X_test, y_test_cat),
sample_weight=sample_weights,
shuffle=False)
model.save('CNN.hdf5')
Error:
Traceback (most recent call last):
File "D:/Practise/Scripts/4_Multiclass_unet_sandstone.py", line 84, in <module>
shuffle=False)
File "C:\Users\User\anaconda3\envs\python_3_6\lib\site-packages\keras\engine\training.py", line 1148, in fit
steps_per_execution=self._steps_per_execution)
File "C:\Users\User\anaconda3\envs\python_3_6\lib\site-packages\keras\engine\data_adapter.py", line 1383, in get_data_handler
return DataHandler(*args, **kwargs)
File "C:\Users\User\anaconda3\envs\python_3_6\lib\site-packages\keras\engine\data_adapter.py", line 1150, in __init__
model=model)
File "C:\Users\User\anaconda3\envs\python_3_6\lib\site-packages\keras\engine\data_adapter.py", line 241, in __init__
_check_data_cardinality(inputs)
File "C:\Users\User\anaconda3\envs\python_3_6\lib\site-packages\keras\engine\data_adapter.py", line 1649, in _check_data_cardinality
raise ValueError(msg)
ValueError: Data cardinality is ambiguous:
x sizes: 10216
y sizes: 10216
sample_weight sizes: 2
Make sure all arrays contain the same number of samples.
Process finished with exit code 1