1-Gridsearchcv-1
Sat 17 May 2025
import pyutil as pyu
pyu.get_local_pyinfo()
'conda env: ml312-2024; pyv: 3.12.7 | packaged by Anaconda, Inc. | (main, Oct 4 2024, 13:27:36) [GCC 11.2.0]'
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
# Load the Titanic dataset
url = 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv'
data = pd.read_csv(url)
# Preprocessing
# Select features and target
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
X = data[features]
y = data['Survived']
# Handle missing values
# X['Age'] = X['Age'].fillna(X['Age'].median())
# X['Embarked'] = X['Embarked'].fillna(X['Embarked'].mode()[0])
X.loc[:, 'Age'] = X['Age'].fillna(X['Age'].median())
X.loc[:, 'Embarked'] = X['Embarked'].fillna(X['Embarked'].mode()[0])
# Convert categorical variables to numerical
X = pd.get_dummies(X, columns=['Sex', 'Embarked'], drop_first=True)
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Define the model
model = RandomForestClassifier(random_state=42)
# Define the hyperparameter grid
param_grid = {
'n_estimators': [100, 200, 300],
'max_depth': [5, 10, 15],
'min_samples_split': [2, 5, 10]
}
# GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)
GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=42),
param_grid={'max_depth': [5, 10, 15],
'min_samples_split': [2, 5, 10],
'n_estimators': [100, 200, 300]},
scoring='accuracy')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=42),
param_grid={'max_depth': [5, 10, 15],
'min_samples_split': [2, 5, 10],
'n_estimators': [100, 200, 300]},
scoring='accuracy')RandomForestClassifier(max_depth=5, random_state=42)
RandomForestClassifier(max_depth=5, random_state=42)
# Print best parameters and score
print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)
Best Parameters: {'max_depth': 5, 'min_samples_split': 2, 'n_estimators': 100}
Best Score: 0.8356446370530877
# Evaluate on the test set
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))
Test Accuracy: 0.8156424581005587
Score: 15
Category: hyperparam-tuning