Hi so I'm trying to learn python and machine learning using the wisconsin breast cancer dataset, and I'm trying to code logistic regression from scratch but keep getting an attribute error ('AttributeError: 'DataFrame' object has no attribute 'target') in the first parts of my code (Y = data.target), for which I can't find the solution online. I know the rest of it probably does not work with the data either, I have taken it from different things I've seen online, but I'm currently just trying to solve one problem at a time to understand how logistic regression works. I'd really appreciate any kind of help.
Here's my code:
data = pd.read_csv("C:\\Users\Hannah\Desktop\Research Project\data.csv",header=0)
X = data.values[:, :30]
Y = data.target['diagnosis']
X = X[:500,:]
Y = Y[:500]
def sigmoid(z):
s= 1/(1 + np.exp(-z))
return s
def propagate(w, b, X, Y):
m = X.shape[1]
A = sigmoid(np.dot(w.T,X)+b)
cost = -1/m * np.sum(Y * np.log(A) + (1-Y) * (np.log(1-A)))
dz= (1/m)*(A - Y)
dw = np.dot(X, dz.T)
db = np.sum(dz)
cost = np.squeeze(cost)
grads = {"dw": dw,
"db": db}
return grads, cost
def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False):
costs = []
for i in range(num_iterations):
m = X.shape[1]
grads,cost = propagate(w, b, X, Y)
b = b - learning_rate*grads["db"]
w = w - learning_rate*grads["dw"]
if i % 100 == 0:
costs.append(cost)
if print_cost and i % 100 == 0:
print ("Cost after iteration %i: %f" %(i, cost))
params = {"w": w,
"b": b}
return params, grads, costs
def predict(w, b, X):
m = X.shape[1]
Y_prediction = np.zeros((1,m))
w = w.reshape(X.shape[0], 1)
A = sigmoid(np.dot(w.T,X)+ b)
for i in range(A.shape[1]):
x_exp = np.exp(A)
x_sum = np.sum(x_exp,axis=1,keepdims=True)
s = np.divide(x_exp,x_sum)
Y_prediction = 1. * (A > 0.5)
return Y_prediction
def model(X_train, Y_train, X_test, Y_test, num_iterations = 2000, learning_rate = 0.5, print_cost = False):
w, b = initialize_with_zeros(X_train.shape[0])
print("learning rate:",learning_rate)
parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost = False)
w = parameters["w"]
b = parameters["b"]
Y_prediction_train = predict(w,b,X_train)
Y_prediction_test = predict(w,b,X_test)
d = {"costs": costs,
"Y_prediction_test": Y_prediction_test,
"Y_prediction_train" : Y_prediction_train,
"w" : w,
"b" : b,
"learning_rate" : learning_rate,
"num_iterations": num_iterations}
return d
d = model(train_set_x, train_set_y, test_set_x, test_set_y, num_iterations = 2000, learning_rate = 0.005, print_cost = True)