0
votes
import pandas as pd
import numpy as np
from numpy import array
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from sklearn.preprocessing import MinMaxScaler

# split a multivariate sequence into samples
def split_sequences(sequences, n_steps, n_test):
    X, y = list(), list()

    for i in range(0,len(sequences),100):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the dataset
        if i!=0 and end_ix > len(sequences):
            break

        sequences[i:end_ix,0]=np.insert(np.diff(sequences[i:end_ix,0]),0,0)
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix-n_test], sequences[end_ix-n_test:end_ix]

        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

df = pd.read_csv('time-series-19-covid-combined.csv')
df = df.drop(['Lat','Long'], axis = 1)
df.columns = ['day','country', 'territory','confirmed','recovered','deaths']

data=df[df.country.isin(['Australia','Costa Rica','Greece','Hungary','Israel'])][['confirmed','recovered','deaths']]

is_brazil =  (df['country']=='Brazil')

data2=df[(is_brazil)][['confirmed','recovered','deaths']]
date=df[(is_brazil)][['day','confirmed']]
date.day = pd.to_datetime(date.day,format='%Y%m%d', errors='ignore')
date.set_index('day', inplace=True)

n_features = data.shape[1] # this is number of parallel inputs
n_timesteps = date.shape[0] # this is number of timesteps
n_test = int(n_timesteps*0.25)

X, Y = split_sequences(data.values, n_timesteps, n_test)

#normalization#####################################################
alld=np.concatenate((X,Y),1)
alld=alld.reshape(alld.shape[0]*alld.shape[1],alld.shape[2])



scaler = MinMaxScaler()
scaler.fit(alld)
X=[scaler.transform(x) for x in X]
y=[scaler.transform(y) for y in Y]

X=np.array(X)
y=np.array(y)[:,:,0]

# define model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(n_timesteps - n_test, n_features)))
model.add(Dense(y.shape[1]))
model.compile(optimizer='adam', loss='mse')

# fit model
model.fit(X, y, epochs=200, verbose=1)

# evaluation

data2x=data2
truth = data2

data2x.values[0:len(data2x),0]=np.insert(np.diff(data2x.values[0:len(data2x),0]),0,0)
data2x=scaler.transform(data2x)

X_test = np.expand_dims(data2x, axis=0)

yhat = model.predict(X_test[:,-n_timesteps + n_test:,:], verbose=0)

print (data2x[-n_timesteps + n_test:,0], yhat)

actual_predictions = scaler.inverse_transform(np.tile(yhat, (1, 1, 3))[0])[:,0]

Sizes and values:

X: array of float-64 (16,108,3)

X_test: array of float-64 (1,144,3)

Y: array of float-64 (16,36,3)

alld: array of float-64 (2304,3)

data: Dataframe (1728,3)

data2: Dataframe (144,3)

data2x: array of float-64 (144,3)

date: Dataframe (144,1)

df: Dataframe (38448,6)

is_brazil: Series (38448,)

n_features: 3 (int)

n_test: 36 (int)

n_timesteps: 144 (int)

truth: Dataframe (144,3)

y: Array of float-64 (16,36)

yhat: Array of float-32 (1,36)

What I intend to do on my project is to train an LSTM with data from confirmed cases, recovered patients and deaths from a certain set of countries and try to predict the number of cases in another country. For example: training the LSTM with data from Australia, Costa Rica, Greece, Hungary and Israel and trying to predict the number of cases in Brazil.

Found original code here and tried to code it using Keras, but in the last line of code above, when I try to reverse normalization, I'm having the error: ValueError: operands could not be broadcast together with shapes (1,108) (3,) (1,108)

I have no clue of what can be done to solve this. Searched in other threads, but with no success. Any solution will be greatly appreciated.

Best regards,

Higo.

1

1 Answers

0
votes

Solved.

I added a missing reshape line before:

lstm_out=lstm_out.reshape(1,144,1).cpu().data.numpy()