0
votes

Hi so I'm trying to learn python and machine learning using the wisconsin breast cancer dataset, and I'm trying to code logistic regression from scratch but keep getting an attribute error ('AttributeError: 'DataFrame' object has no attribute 'target') in the first parts of my code (Y = data.target), for which I can't find the solution online. I know the rest of it probably does not work with the data either, I have taken it from different things I've seen online, but I'm currently just trying to solve one problem at a time to understand how logistic regression works. I'd really appreciate any kind of help.

Here's my code:

data = pd.read_csv("C:\\Users\Hannah\Desktop\Research Project\data.csv",header=0)
    X = data.values[:, :30] 
    Y = data.target['diagnosis']

X = X[:500,:]
Y = Y[:500]

def sigmoid(z):
    s= 1/(1 + np.exp(-z))
    return s

def propagate(w, b, X, Y):

    m = X.shape[1]

    A = sigmoid(np.dot(w.T,X)+b)                                

    cost = -1/m * np.sum(Y * np.log(A) + (1-Y) * (np.log(1-A)))

    dz= (1/m)*(A - Y)
    dw = np.dot(X, dz.T)
    db = np.sum(dz)


    cost = np.squeeze(cost)
    grads = {"dw": dw,
             "db": db}

    return grads, cost


def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False):

    costs = []

    for i in range(num_iterations):
        m = X.shape[1]
        grads,cost = propagate(w, b, X, Y)
        b = b - learning_rate*grads["db"]
        w = w - learning_rate*grads["dw"]
         if i % 100 == 0:
            costs.append(cost)
        if print_cost and i % 100 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))

    params = {"w": w,
              "b": b}
    return params, grads, costs



def predict(w, b, X):
    m = X.shape[1]
    Y_prediction = np.zeros((1,m))
    w = w.reshape(X.shape[0], 1)
    A = sigmoid(np.dot(w.T,X)+ b)

    for i in range(A.shape[1]):
        x_exp = np.exp(A)
        x_sum = np.sum(x_exp,axis=1,keepdims=True)
        s = np.divide(x_exp,x_sum)

    Y_prediction = 1. * (A > 0.5)

    return Y_prediction



def model(X_train, Y_train, X_test, Y_test, num_iterations = 2000, learning_rate = 0.5, print_cost = False):
    w, b = initialize_with_zeros(X_train.shape[0])

    print("learning rate:",learning_rate)

    parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost = False)

    w = parameters["w"]
    b = parameters["b"]

    Y_prediction_train = predict(w,b,X_train)
    Y_prediction_test = predict(w,b,X_test)
    d = {"costs": costs,
         "Y_prediction_test": Y_prediction_test, 
         "Y_prediction_train" : Y_prediction_train, 
         "w" : w, 
         "b" : b,
         "learning_rate" : learning_rate,
         "num_iterations": num_iterations}

    return d

d = model(train_set_x, train_set_y, test_set_x, test_set_y, num_iterations = 2000, learning_rate = 0.005, print_cost = True)
1

1 Answers

0
votes

As I understood, you are using pandas to read csv and want to assign values in column diagnosis to variable Y. In this case you no need target. DataFrame have not the property at all, according error. Just data['diagnosis'] should return what you need.