What you need to do in order to predict future values with RNNs is to provide data as sequences. Something like this:
[0 1 2] --> [3]
[1 2 3] --> [4]
[2 3 4] --> [5]
[3 4 5] --> [6]
[4 5 6] --> [7]
RNNs learn the structure of sequences, and therefore need a unique input shape:
(n_samples, time_steps, n_features)
For instance, the time steps could be 7 if you use every day of the last week.
How can I create a dataset for RNNs?
tf.keras.preprocessing.timeseries_dataset_from_array
What you'll need to do is provide this function with a) present values, and b) future values. Here, seq_length
is the number of time steps to use.
import tensorflow as tf
seq_length = 3
x = tf.range(25)[:-seq_length]
y = tf.range(25)[seq_length:]
ds = tf.keras.preprocessing.timeseries_dataset_from_array(x, y,
sequence_length=seq_length,
batch_size=1)
for present_values, next_value in ds.take(5):
print(tf.squeeze(present_values).numpy(), '-->', next_value.numpy())
[0 1 2] --> [3]
[1 2 3] --> [4]
[2 3 4] --> [5]
[3 4 5] --> [6]
[4 5 6] --> [7]
You can also do the same for multiple variables:
import tensorflow as tf
seq_length = 3
x = tf.concat([
tf.reshape(tf.range(25, dtype=tf.float32)[:-seq_length], (-1, 1)),
tf.reshape(tf.linspace(0., .24, 25) [:-seq_length], (-1, 1))], axis=-1)
y = tf.concat([
tf.reshape(tf.range(25, dtype=tf.float32)[seq_length:], (-1, 1)),
tf.reshape(tf.linspace(0., .24, 25) [seq_length:], (-1, 1))], axis=-1)
ds = tf.keras.preprocessing.timeseries_dataset_from_array(x, y,
sequence_length=seq_length,
batch_size=1)
for present_values, next_value in ds.take(5):
print(tf.squeeze(present_values).numpy(), '-->', tf.squeeze(next_value).numpy())
model = tf.keras.Sequential([
tf.keras.layers.LSTM(8),
tf.keras.layers.Dense(8, activation='relu'),
tf.keras.layers.Dense(2)
])
model.compile(loss='mae', optimizer='adam')
history = model.fit(ds)
[[0. 0. ]
[1. 0.01]
[2. 0.02]] --> [3. 0.03]
[[1. 0.01]
[2. 0.02]
[3. 0.03]] --> [4. 0.04]
[[2. 0.02]
[3. 0.03]
[4. 0.04]] --> [5. 0.05]
[[3. 0.03]
[4. 0.04]
[5. 0.05]] --> [6. 0.06]
[[4. 0.04]
[5. 0.05]
[6. 0.06]] --> [7. 0.07]
- This function
import tensorflow as tf
import numpy as np
x = np.arange(25)
def univariate_data(dataset, start_index, end_index, history_size, target_size):
data, labels = [], []
start_index = start_index + history_size
if end_index is None:
end_index = len(dataset) - target_size
for i in range(start_index, end_index):
indices = np.arange(i-history_size, i)
data.append(np.reshape(dataset[indices], (history_size, 1)))
labels.append(dataset[i:i+target_size])
return np.array(data), np.array(labels)
present_values, future_values = univariate_data(x, 0, 9, 3, 3)
for present, next_val in zip(present_values, future_values):
print(tf.squeeze(present).numpy(), '-->', tf.squeeze(next_val).numpy())
[0 1 2] --> [3 4]
[1 2 3] --> [4 5]
[2 3 4] --> [5 6]
[3 4 5] --> [6 7]
[4 5 6] --> [7 8]
[5 6 7] --> [8 9]
And now for multiple variables:
import tensorflow as tf
import numpy as np
history_size = 3
x = np.concatenate([np.expand_dims(np.arange(25), 1)[:-history_size],
np.expand_dims(np.linspace(0., .24, 25), 1)[:-history_size]], axis=1)
y = np.concatenate([np.expand_dims(np.arange(25), 1)[history_size:],
np.expand_dims(np.linspace(0., .24, 25), 1)[history_size:]], axis=1)
def multivariate_data(dataset, target, start_index, end_index, history_size,
target_size, step, single_step=False):
data = []
labels = []
start_index = start_index + history_size
if end_index is None:
end_index = len(dataset) - target_size
for i in range(start_index, end_index):
indices = range(i-history_size, i, step)
data.append(dataset[indices])
if single_step:
labels.append(target[i+target_size])
else:
labels.append(target[i:i+target_size])
return np.array(data), np.array(labels)
present_values, future_values = multivariate_data(x, y, 0, 8, history_size, 1, 1)
for present, next_val in zip(present_values, future_values):
print(tf.squeeze(present).numpy(), '-->', tf.squeeze(next_val).numpy())
[[0. 0. ]
[1. 0.01]
[2. 0.02]] --> [6. 0.06]
[[1. 0.01]
[2. 0.02]
[3. 0.03]] --> [7. 0.07]
[[2. 0.02]
[3. 0.03]
[4. 0.04]] --> [8. 0.08]
[[3. 0.03]
[4. 0.04]
[5. 0.05]] --> [9. 0.09]
[[4. 0.04]
[5. 0.05]
[6. 0.06]] --> [10. 0.1]
tf.data.Dataset.window
import tensorflow as tf
import numpy as np
history_size = 3
lookahead = 2
x = tf.range(8)
ds = tf.data.Dataset.from_tensor_slices(x)
ds = ds.window(history_size + lookahead, shift=1, drop_remainder=True)
ds = ds.flat_map(lambda window: window.batch(history_size + lookahead))
ds = ds.map(lambda window: (window[:-lookahead], window[-lookahead:]))
for present_values, next_value in ds:
print(present_values.numpy(), '-->', next_value.numpy())
[0 1 2] --> [3 4]
[1 2 3] --> [4 5]
[2 3 4] --> [5 6]
[3 4 5] --> [6 7]
With multiple variables:
import tensorflow as tf
import numpy as np
history_size = 3
lookahead = 2
x = tf.concat([
tf.reshape(tf.range(20, dtype=tf.float32), (-1, 1)),
tf.reshape(tf.linspace(0., .19, 20), (-1, 1))], axis=-1)
ds = tf.data.Dataset.from_tensor_slices(x)
ds = ds.window(history_size + lookahead, shift=1, drop_remainder=True)
ds = ds.flat_map(lambda window: window.batch(history_size + lookahead))
ds = ds.map(lambda window: (window[:-lookahead], window[-lookahead:]))
for present_values, next_value in ds.take(8):
print(tf.squeeze(np.round(present_values, 2)).numpy(), '-->',
tf.squeeze(np.round(next_value, 2)).numpy())
print()
[[0. 0. ]
[1. 0.01]
[2. 0.02]] --> [[3. 0.03]
[4. 0.04]]
[[1. 0.01]
[2. 0.02]
[3. 0.03]] --> [[4. 0.04]
[5. 0.05]]
[[2. 0.02]
[3. 0.03]
[4. 0.04]] --> [[5. 0.05]
[6. 0.06]]
[[3. 0.03]
[4. 0.04]
[5. 0.05]] --> [[6. 0.06]
[7. 0.07]]
[[4. 0.04]
[5. 0.05]
[6. 0.06]] --> [[7. 0.07]
[8. 0.08]]
[[5. 0.05]
[6. 0.06]
[7. 0.07]] --> [[8. 0.08]
[9. 0.09]]