4-Genetic-Algorithm-1

Sat 17 May 2025

import pyutil as pyu
pyu.get_local_pyinfo()
'conda env: ml312-2024; pyv: 3.12.7 | packaged by Anaconda, Inc. | (main, Oct  4 2024, 13:27:36) [GCC 11.2.0]'

# !pip install deap
!pip show deap | grep "Version:"
Version: 1.4.1
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score
from deap import base, creator, tools, algorithms
# Load the Titanic dataset
url = 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv'
data = pd.read_csv(url)
# Preprocessing
# Select features and target
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
X = data[features]
y = data['Survived']
# Handle missing values
X.loc[:, 'Age'] = X['Age'].fillna(X['Age'].median())
X.loc[:, 'Embarked'] = X['Embarked'].fillna(X['Embarked'].mode()[0])
# Convert categorical variables to numerical
X = pd.get_dummies(X, columns=['Sex', 'Embarked'], drop_first=True)
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Define the Genetic Algorithm
# Create the fitness function
def evaluate(individual):
    n_estimators, max_depth, min_samples_split = individual

    # Ensure parameters are valid
    n_estimators = max(1, int(n_estimators))  # Minimum 1 tree
    max_depth = max(1, int(max_depth))  # Minimum depth of 1
    min_samples_split = max(2, int(min_samples_split))  # Minimum split of 2

    model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        random_state=42
    )
    # Use 5-fold cross-validation
    scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy', n_jobs=-1)
    return np.mean(scores),
# Create the Genetic Algorithm structure
creator.create("FitnessMax", base.Fitness, weights=(1.0,))  # Maximize accuracy
creator.create("Individual", list, fitness=creator.FitnessMax)
/home/rajaraman/miniconda3/envs/ml312-2024/lib/python3.12/site-packages/deap/creator.py:185: RuntimeWarning: A class named 'FitnessMax' has already been created and it will be overwritten. Consider deleting previous creation of that class or rename it.
  warnings.warn("A class named '{0}' has already been created and it "
/home/rajaraman/miniconda3/envs/ml312-2024/lib/python3.12/site-packages/deap/creator.py:185: RuntimeWarning: A class named 'Individual' has already been created and it will be overwritten. Consider deleting previous creation of that class or rename it.
  warnings.warn("A class named '{0}' has already been created and it "
toolbox = base.Toolbox()

# toolbox.register("attr_int", np.random.randint, 100, 500)  # n_estimators range
# toolbox.register("attr_depth", np.random.randint, 5, 20)   # max_depth range
# toolbox.register("attr_split", np.random.randint, 2, 10)   # min_samples_split range

# Register attributes for hyperparameters
toolbox.register("attr_int", np.random.randint, 100, 500)  # n_estimators range
toolbox.register("attr_depth", np.random.randint, 5, 20)   # max_depth range
toolbox.register("attr_split", np.random.randint, 2, 10)   # min_samples_split range

# Register individual and population
toolbox.register("individual", tools.initCycle, creator.Individual,
                 (toolbox.attr_int, toolbox.attr_depth, toolbox.attr_split))
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

# Register genetic operators
toolbox.register("mate", tools.cxBlend, alpha=0.5)  # Crossover
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=10, indpb=0.2)  # Mutation
toolbox.register("select", tools.selTournament, tournsize=3)  # Selection
toolbox.register("evaluate", evaluate)  # Evaluation function


# toolbox.register("individual", tools.initCycle, creator.Individual,
#                  (toolbox.attr_int, toolbox.attr_depth, toolbox.attr_split))
# toolbox.register("population", tools.initRepeat, list, toolbox.individual)
# toolbox.register("mate", tools.cxBlend, alpha=0.5)          # Crossover
# toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=10, indpb=0.2)  # Mutation
# toolbox.register("select", tools.selTournament, tournsize=3)  # Selection
# toolbox.register("evaluate", evaluate)
# Genetic Algorithm execution
population = toolbox.population(n=20)  # Initial population of 20
NGEN = 10  # Number of generations
CXPB, MUTPB = 0.5, 0.2  # Crossover and mutation probabilities
for gen in range(NGEN):
    offspring = algorithms.varAnd(population, toolbox, cxpb=CXPB, mutpb=MUTPB)
    fits = list(map(toolbox.evaluate, offspring))
    for fit, ind in zip(fits, offspring):
        ind.fitness.values = fit
    population = toolbox.select(offspring, k=len(population))
    print(f"Generation {gen + 1}: Best Fitness = {max(ind.fitness.values for ind in population)}")
Generation 1: Best Fitness = (0.8300305328474342,)
Generation 2: Best Fitness = (0.832837584950261,)
Generation 3: Best Fitness = (0.832837584950261,)
Generation 4: Best Fitness = (0.832837584950261,)
Generation 5: Best Fitness = (0.832837584950261,)
Generation 6: Best Fitness = (0.832837584950261,)
Generation 7: Best Fitness = (0.8356446370530879,)
Generation 8: Best Fitness = (0.8356446370530879,)
Generation 9: Best Fitness = (0.8356446370530879,)
Generation 10: Best Fitness = (0.8356446370530879,)
# Get the best individual
best_individual = tools.selBest(population, k=1)[0]
print("Best Hyperparameters:", {
    "n_estimators": int(best_individual[0]),
    "max_depth": int(best_individual[1]),
    "min_samples_split": int(best_individual[2]),
})
Best Hyperparameters: {'n_estimators': 540, 'max_depth': 6, 'min_samples_split': 12}
# Train the model with the best hyperparameters
best_model = RandomForestClassifier(
    n_estimators=int(best_individual[0]),
    max_depth=int(best_individual[1]),
    min_samples_split=int(best_individual[2]),
    random_state=42
)
best_model.fit(X_train, y_train)
RandomForestClassifier(max_depth=6, min_samples_split=12, n_estimators=540,
                       random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
# Evaluate on the test set
y_pred = best_model.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))
Test Accuracy: 0.8100558659217877





Score: 25

Category: hyperparam-tuning