0
votes

Build multiple Random forest regressor on X_train set and Y_train labels with max_depth parameter value changing from 3 to 5 and also setting n_estimators to one of 50, 100, 200 values.

Evaluate each model accuracy on testing data set.

Hint: Make use of for loop Print the max_depth and n_estimators values of the model with highest accuracy.

Note: Print the parameter values in the form of tuple (a, b). a refers to max_depth value and b refers to n_estimators

This is what I've tried so far:

boston= datasets.load_boston()
X_train, X_test, Y_train, Y_test = train_test_split(boston.data, boston.target, random_state=30)
for m in range(3,6) :
    rf_reg = RandomForestRegressor(n_estimators =100, max_depth=m)
    rf_reg = rf_reg.fit(X_train, Y_train) 
    print(rf_reg.score(X_test,Y_test))

This gives me the accuracy score for the 3 models but I am not able to fetch the highest accuracy's parameters individually. I can use rf_reg.get_params(), but it gives me all the parameters. I only want max_depth and n_estimators of the highest score one's

7
What have you tried so far?Anwarvic
boston= datasets.load_boston() X_train, X_test, Y_train, Y_test = train_test_split(boston.data, boston.target, random_state=30) for m in range(3,6) : rf_reg = RandomForestRegressor(n_estimators =100, max_depth=m) rf_reg = rf_reg.fit(X_train, Y_train) print(rf_reg.score(X_test,Y_test)) this gives me the accuracy score for the 3 models but i am not able to fetch the hisghest accuracy's parameters individually. i can use rf_reg.get_params(), but is gives me all the parameters . i only want max_depth and n_estimators of the highest score one'ssaurabh bijola
I've edited your question with this code, if that's OKAnwarvic

7 Answers

3
votes
max_reg = None  #<--- add this to represent the regressor with maximum score
max_score = 0   #<--- add this to represent maximum score
t=() # <--- add this to tuple declaration
c_estimators = 100 
for m in range(3,6) :
    rf_reg = RandomForestRegressor(n_estimators =c_estimators , max_depth=m)
    rf_reg = rf_reg.fit(X_train, Y_train) 
    rf_reg_score = rf_reg.score(X_test,Y_test)
    t = (m,c_estimators,rf_reg.score) # tuple assignment
    rf_reg_score = t[2]
    print (t)
    if rf_reg_score > max_score :
        max_score = rf_reg_score
        max_reg = rf_reg
        t = (m,c_estimators) # tuple assignment
print (t)
1
votes

You will get max_score with (5, 100) combination. And as per question they are asking to perform total 9 combinations. 3 x 3.

0
votes

You can do something like that:

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor


boston= datasets.load_boston()
X_train, X_test, Y_train, Y_test = train_test_split(boston.data, boston.target, random_state=30)

max_reg = None  #<--- add this to represent the regressor with maximum score
max_score = 0   #<--- add this to represent maximum score
for m in range(3,6) :
    rf_reg = RandomForestRegressor(n_estimators =100, max_depth=m)
    rf_reg = rf_reg.fit(X_train, Y_train) 
    rf_reg_score = rf_reg.score(X_test,Y_test)
    print(rf_reg_score)

    #add the following three lines this to check score
    if rf_reg_score > max_score:
        max_score = rf_reg_score
        max_reg = rf_reg

print("Max Score:", max_score)
0
votes

how to Print the parameter values in the form of tuple (a, b). a refers to max_depth value and b refers to n_estimators

0
votes

You can create an empty dictionary and add the scores with max_depth and n_estimators as key to dictionary in the form of tuple. Then calculate the key with max score from dictionary and lastly print the key with max value.

all_scores = {}

n= 100

for m in range(3,6):
    rf_reg = RandomForestRegressor(n_estimators=n, max_depth=m)
    rf_reg = rf_reg.fit(X_train, Y_train)
    print(m,n, rf_reg.score(X_test,Y_test))
    all_scores[(m,n)] = rf_reg.score(X_test,Y_test)

max_score = max(all_scores, key=all_scores.get)
print(max_score)
0
votes

Below solution worked for:

#Write your code here
from sklearn.ensemble import RandomForestRegressor
import sklearn.datasets as datasets
import sklearn.model_selection as model_selection
import numpy as np

np.random.seed(100)
boston = datasets.load_boston()

X_train, X_test, Y_train, Y_test = model_selection.train_test_split(boston.data, 
      boston.target,  random_state=30)
print(X_train.shape, file=open("output.txt", "a"))
print(X_test.shape, file=open("output.txt", "a"))

rf_Regressor = RandomForestRegressor()
rf_reg = rf_Regressor.fit(X_train, Y_train)

print(rf_reg.score(X_train,Y_train), file=open("output.txt", "a"))
print(rf_reg.score(X_test,Y_test), file=open("output.txt", "a"))

predicted = rf_reg.predict(X_test[:2])
print(predicted, file=open("output.txt", "a"))


depths = []
scores = []
c_estimators = 100
for x in range(2, 6):
   rf_Regressor = RandomForestRegressor(n_estimators=c_estimators, max_depth=x)
   rf_reg = rf_Regressor.fit(X_train, Y_train)
   score = rf_reg.score(X_test, Y_test)
   depths.append(x)
   scores.append(rf_reg.score(X_test, Y_test))

print( (depths[np.argmax(scores)],c_estimators) , file=open("output.txt", "a"))
-1
votes
#Write your code here
import sklearn.datasets as datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import numpy as np

np.random.seed(100)
max_depth = range(2, 6)

boston = datasets.load_boston()

X_train, X_test, Y_train, Y_test = train_test_split(boston.data, boston.target, random_state=30)

print(X_train.shape)
print(X_test.shape)


rf_classifier = RandomForestRegressor()
rf_reg = rf_classifier.fit(X_train, Y_train)
print(rf_reg.score(X_train,Y_train))
print(rf_reg.score(X_test,Y_test))
a = rf_reg.predict(X_test)
print(a[:2:])


a = [3,4,5]
b = [50,100,200]
score = []
for i in range(3):
  rf_classifier = RandomForestRegressor(n_estimators=b[i], max_depth=a[i])
  rf_reg = rf_classifier.fit(X_train, Y_train)
  score.append(rf_reg.score(X_test,Y_test))
# print(score)
print(tuple([5,100]))