It's a bit difficult to answer the question without a minimal, reproducible example but here's my take.
A sklearn perceptron has an attribute batch_size
which has a default value of 200. When you set verbose=True
of your MLPClassifier
, you will see that your first example (two consecutive calls) results in two iterations, while the 2nd example results in one iteration, i.e. the the 2nd partial_fit
call improves the result from the first call. In each iteration the total sample is split into chunks again.
When you have a small sample_size (<5000 in the snippet below), the default batch_size
is too big relative to the sample size, reducing it to 100 will give better results for both approaches but there won't be a difference between the consecutive calls and the one line approach.
This artifact disappears with higher sample numbers (>10^6).
from sklearn.datasets import make_moons
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
def get_mlp(resize_batch, n):
mlp = MLPClassifier(verbose=True, random_state=random_state)
if resize_batch:
mlp.batch_size = min(n // 2, 100)
return mlp
n_samples = [10**2, 10**3, 5*10**3, 10**4, 10**5, 10**6, 10**7]
batch_resize = [False, True]
random_state = 1
results = list()
for n in n_samples:
x = make_moons(n_samples=n, noise=0.3, random_state=random_state)
X = StandardScaler().fit_transform(x[0])
results.append([n])
for resize in batch_resize:
mlp = get_mlp(resize, n)
mlp.partial_fit(X, x[1], [0, 1])
results[-1].append([mlp.score(X, x[1]), 0, resize])
mlp = get_mlp(resize, n)
for i in range(2):
train_start = i * n // 2
train_stop = (i + 1) * n // 2
mlp.partial_fit(X[train_start:train_stop], x[1][train_start:train_stop], [0, 1])
results[-1].append([mlp.score(X, x[1]), 1, resize])
x = [i[0] for i in results]
colors = ['red', 'green', 'blue', 'black']
labels = ['one call, batch=auto', 'two calls, batch=auto', 'one call, batch=100', 'two calls, batch=100']
fig, ax = plt.subplots()
handles = list()
for n in range(4):
plt.subplot(210 + i)
handles.append(plt.plot(x, [i[n + 1][0] for i in results], c=colors[n], label=labels[n])[0])
plt.xscale('log')
plt.legend(handles=handles, loc=2)
plt.show()