0
votes

I want to create a sklearn pipeline that consists of two steps:

  1. Custom transformer function
  2. Keras classification model

This is my data set (of course, I'm providing a reduced subset to show the data format):

x_train

array([[[0.45977011, 0.16666667, 0.18373494, ..., 0.33333333,
         0.71317829, 0.7246617 ],
        [0.6091954 , 0.25      , 0.28313253, ..., 0.33333333,
         0.66666667, 0.73101353],
        [0.25287356, 0.75      , 0.34337349, ..., 0.16666667,
         0.62790698, 0.62137531],
        ...,
        [0.6091954 , 0.58333333, 0.20481928, ..., 0.33333333,
         0.62015504, 0.65009666],
        [0.41954023, 0.91666667, 0.30722892, ..., 0.33333333,
         0.71317829, 0.76719138],
        [0.31609195, 0.41666667, 0.46987952, ..., 0.33333333,
         0.5503876 , 0.71306269]],

       [[0.6091954 , 0.25      , 0.28313253, ..., 0.33333333,
         0.66666667, 0.73101353],
        [0.25287356, 0.75      , 0.34337349, ..., 0.16666667,
         0.62790698, 0.62137531],
        [0.54022989, 0.5       , 0.34337349, ..., 0.33333333,
         0.57364341, 0.66238608],
        ...,
        [0.41954023, 0.91666667, 0.30722892, ..., 0.33333333,
         0.71317829, 0.76719138],
        [0.31609195, 0.41666667, 0.46987952, ..., 0.33333333,
         0.5503876 , 0.71306269],
        [0.44252874, 0.75      , 0.48192771, ..., 0.41666667,
         0.62015504, 0.65023474]],

       [[0.25287356, 0.75      , 0.34337349, ..., 0.16666667,
         0.62790698, 0.62137531],
        [0.54022989, 0.5       , 0.34337349, ..., 0.33333333,
         0.57364341, 0.66238608],
        [0.3908046 , 0.33333333, 0.34939759, ..., 0.41666667,
         0.58914729, 0.70450152],
        ...,
        [0.31609195, 0.41666667, 0.46987952, ..., 0.33333333,
         0.5503876 , 0.71306269],
        [0.44252874, 0.75      , 0.48192771, ..., 0.41666667,
         0.62015504, 0.65023474],
        [0.60344828, 0.41666667, 0.46686747, ..., 0.25      ,
         0.66666667, 0.61391881]]]

y_train

array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.]], dtype=float32)

And this is my current code:

import numpy as np
from keras.wrappers.scikit_learn import KerasClassifier
import tensorflow as tf
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.utils import *
from tensorflow.keras.callbacks import *
from sklearn.pipeline import Pipeline

# Custom transformer
class Transformer():

    def transform(self, x):
        x_img = np.apply_along_axis(self.rec_plot, 1, x).astype('float16')
        return x_img
    
    def rec_plot(s, eps=0.10, steps=10):
        d = pdist(s[:,None])
        d = np.floor(d/eps)
        d[d>steps] = steps
        Z = squareform(d)
        return Z
    
    def fit(self, x, y=None):
        return x

def create_model():
    model = Sequential()

    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(50, 50, 17)))
    model.add(Conv2D(32, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(3, activation='softmax'))

    #sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    print(model.summary())
    
    return model

transformer = Transformer()
clf = KerasClassifier(build_fn=create_model, verbose=0)

blackbox_model = Pipeline([('transform', transformer),('clf',clf)])
blackbox_model.fit(x_train, y_train)

When I run this code on my dataset, I get the following error:

AttributeError: 'numpy.ndarray' object has no attribute 'transform'

It seems to be related to the data format (therefore, I shared my data format above). But I'm not sure how to fix this issue.

1

1 Answers

1
votes

The method Transformer().fit() should return self.

Because your Transformer object is stateless, it might be easier to use sklearn.preprocessing.FunctionTransformer. You can instantiate that class with your transform function. Something like the following (untested):

import sklearn.preprocessing

def _rec_plot(s, eps=0.10, steps=10):
    d = pdist(s[:,None])
    d = np.floor(d/eps)
    d[d>steps] = steps
    Z = squareform(d)
    return Z

def fun(x, y=None):
    return np.apply_along_axis(_rec_plot, 1, x).astype('float16')

transformer = sklearn.preprocessing.FunctionTransformer(func=fun)

I also suggest not using the syntax from module import * because that can pollute your namespace. When I first read your question, I wondered if the problem was classing function names because of all of the unnecessary imports.