I learned linear regression so I've decided to test it to this real estate dataset https://archive.ics.uci.edu/ml/datasets/Real+estate+valuation+data+set
I use gradient descend to calculate my weights but they not converge to optimum. If i set the learning rate to 0.01 and iterations equal to 100 I get nan and inf values and if I set the learning rate to something like 0.0001 it will not converge or I get nan and inf values.
import openpyxl
from pathlib import Path
import numpy as np
file = Path(r'C:\Users\Cata\Desktop', 'lini.xlsx')
file = openpyxl.load_workbook(file)
fila = file.active
def Loss(x, w, y):
pred = np.dot(x, w)
m = len(y)
return 1/(2*m) * np.sum(np.square(pred - y))
def dLoss(x, w, y):
pred = np.dot(x, w)
m = len(y)
return 1/m * np.dot(np.transpose(x), (pred - y))
def Gradient(x, y ,learning = 0.000001, iterations = 1000):
w = np.random.rand(7)
m = len(y)
for i in range(iterations):
w = w - learning*dLoss(x,w,y)
return w
x = []
y = []
for row in fila.iter_rows(min_row=2,max_row=400):
acm = []
acm.append(1)
for i in range(1, 7):
acm.append(float(row[i].value))
#print(row[i].value, end=" ")
x.append(acm)
y.append(float(row[7].value))
y = np.array(y)
sa = Gradient(x,y)
print(sa)
test = [1, 2013.500, 6.5, 90.45606, 9, 24.97433, 121.5431]
print(test)
print(np.dot(test, sa))
Also i get this error
RuntimeWarning: invalid value encountered in subtract
w = w - learning*dLoss(x,w,y)