forked from jimthompson5802/gh_copilot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrandom_forest_training.py
45 lines (32 loc) · 1.26 KB
/
random_forest_training.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import pandas as pd
# read csv file into pandas dataframe
df = pd.read_csv('data.csv')
# print first 5 rows of dataframe
print(df.head()) # Commentary: wrapped with print statement
# print last 5 rows of dataframe
print(df.tail()) # Commentary: wrapped with print statement
# Random forest regression model on the dataset
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
# Commentary: Comments following this line were generated by Copilot
# split data into training and testing sets
train, test = train_test_split(df, test_size=0.2)
# separate the target variable from the training and testing sets
train_x = train.drop(['target'], axis=1)
train_y = train['target']
test_x = test.drop(['target'], axis=1)
test_y = test['target']
# create a random forest regressor
rf = RandomForestRegressor(n_estimators=100, random_state=42)
# fit the regressor with x and y data
rf.fit(train_x, train_y)
# make predictions on the test set
pred = rf.predict(test_x)
# calculate the mean squared error
mse = mean_squared_error(test_y, pred)
print('Mean Squared Error:', mse)
# save the model to disk
import pickle
filename = 'finalized_model.sav'
pickle.dump(rf, open(filename, 'wb'))