max_epoch = 20
C1 = np.array([[0., -0.8], [1.5, 0.8]])
C2 = np.array([[1., -0.7], [2., 0.7]])
gauss1 = np.dot(np.random.randn(200, 2) + np.array([5, 3]), C1)
gauss2 = np.dot(np.random.randn(200, 2) + np.array([1.5, 0]), C2)
X = np.vstack([gauss1, gauss2])
y = np.r_[np.ones(200), np.zeros(200)]
fig, ax = plt.subplots()
fig.set_size_inches(20,10)
ax.scatter(X[:,0], X[:,1], c=y)
x_min, x_max = X[:, 0].min(), X[:, 0].max()
y_min, y_max = X[:, 1].min(), X[:, 1].max()
model = MySGDClassifier(alpha=.01, max_epoch=max_epoch, penalty = 'none', C = 100)
model.fit(X, y)
print("theta = ", model.theta)
t0 = model.theta.item(0)
t1 = model.theta.item(1)
t2 = model.theta.item(2)
x_ = np.array([x_min, x_max])
y_ = -(x_ * t0 + t2) / t1
ax.plot(x_, y_)
ax.set_xlim(x_min - 1, x_max + 1)
ax.set_ylim(y_min - 1, y_max + 1)
plt.show()
gradients = xi.T.dot(sigma(xi.dot(self.theta)) - yi)
а усреднять по количеству примеров?