Validate-Classification

Sat 17 May 2025
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from deepchecks import Dataset
from deepchecks.checks import TrainTestFeatureDrift, ConfusionMatrixReport
from deepchecks.suites import full_suite

# Generate a synthetic dataset
from sklearn.datasets import make_classification
# Generate a dataset with 2 classes
X, y = make_classification(n_samples=1000, n_features=10, random_state=42)
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Convert the datasets to pandas DataFrames
columns = [f"feature_{i}" for i in range(X.shape[1])]
train_df = pd.DataFrame(X_train, columns=columns)
test_df = pd.DataFrame(X_test, columns=columns)
# Add the target column
train_df['target'] = y_train
test_df['target'] = y_test
# Train a Random Forest Classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
RandomForestClassifier(random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
# Wrap datasets into Deepchecks Dataset objects
dc_train = Dataset(train_df, label='target', cat_features=[])
dc_test = Dataset(test_df, label='target', cat_features=[])
# Run a single check: Train-Test Feature Drift
feature_drift_check = TrainTestFeatureDrift()
drift_result = feature_drift_check.run(dc_train, dc_test)
drift_result.show()
/tmp/ipykernel_1278507/2763849825.py:2: DeprecationWarning:

The TrainTestFeatureDrift check is deprecated and will be removed in the 0.14 version. Please use the FeatureDrift check instead




VBox(children=(HTML(value='<h4><b>Feature Drift</b></h4>'), HTML(value='<p>The TrainTestFeatureDrift check is 

# Wrap datasets into Deepchecks Dataset objects
dc_train = Dataset(train_df, label='target', cat_features=[])
dc_test = Dataset(test_df, label='target', cat_features=[])
# Run the Confusion Matrix Report
confusion_matrix_check = ConfusionMatrixReport()
confusion_matrix_result = confusion_matrix_check.run(dc_test, model=model)
confusion_matrix_result.show()
/home/rajaraman/miniconda3/envs/ml3105/lib/python3.10/site-packages/sklearn/base.py:458: UserWarning:

X has feature names, but RandomForestClassifier was fitted without feature names

/home/rajaraman/miniconda3/envs/ml3105/lib/python3.10/site-packages/sklearn/base.py:458: UserWarning:

X has feature names, but RandomForestClassifier was fitted without feature names




VBox(children=(HTML(value='<h4><b>Confusion Matrix Report</b></h4>'), HTML(value='<p>Calculate the confusion m…
# https://docs.deepchecks.com/0.18/tabular/auto_checks/model_evaluation/plot_confusion_matrix_report.html?utm_source=display_output&utm_medium=referral&utm_campaign=check_link


# Convert X_train and X_test to DataFrames with column names
X_train_df = pd.DataFrame(X_train, columns=columns)
X_test_df = pd.DataFrame(X_test, columns=columns)
# Train the model with feature names
model = RandomForestClassifier(random_state=42)
model.fit(X_train_df, y_train)
RandomForestClassifier(random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
# Wrap datasets into Deepchecks Dataset objects
dc_train = Dataset(pd.concat([X_train_df, pd.Series(y_train, name='target')], axis=1), 
                   label='target', cat_features=[])
dc_test = Dataset(pd.concat([X_test_df, pd.Series(y_test, name='target')], axis=1), 
                  label='target', cat_features=[])
# Run the Confusion Matrix Report
confusion_matrix_check = ConfusionMatrixReport()
confusion_matrix_result = confusion_matrix_check.run(dc_test, model=model)
confusion_matrix_result.show()
VBox(children=(HTML(value='<h4><b>Confusion Matrix Report</b></h4>'), HTML(value='<p>Calculate the confusion m…


Score: 15

Category: deepchecks