Titanic-Deepcheck-Analysis

Sat 17 May 2025
!python --version
Python 3.10.5
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from deepchecks.tabular import Dataset
from deepchecks.tabular.suites import full_suite
# Load the Titanic dataset
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
titanic = pd.read_csv(url)
# Preprocess the data
# Drop irrelevant columns
titanic = titanic.drop(columns=['PassengerId', 'Name', 'Ticket', 'Cabin'])
# Fill missing values
titanic['Age'] = titanic['Age'].fillna(titanic['Age'].median())
titanic['Embarked'] = titanic['Embarked'].fillna('S')
# Encode categorical variables
label_encoder = LabelEncoder()
titanic['Sex'] = label_encoder.fit_transform(titanic['Sex'])
titanic['Embarked'] = label_encoder.fit_transform(titanic['Embarked'])
# Split the data
X = titanic.drop(columns=['Survived'])
y = titanic['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Train a Random Forest model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
RandomForestClassifier(random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
# Wrap datasets into Deepchecks Dataset objects
columns = X.columns.tolist()
train_df = pd.DataFrame(X_train, columns=columns)
train_df['Survived'] = y_train
test_df = pd.DataFrame(X_test, columns=columns)
test_df['Survived'] = y_test
dc_train = Dataset(train_df, label='Survived', cat_features=['Sex', 'Embarked'])
dc_test = Dataset(test_df, label='Survived', cat_features=['Sex', 'Embarked'])
# Run the full Deepchecks suite
suite = full_suite()
suite_result = suite.run(train_dataset=dc_train, test_dataset=dc_test, model=model)
/home/rajaraman/miniconda3/envs/ml3105/lib/python3.10/site-packages/deepchecks/utils/abstracts/weak_segment_abstract.py:57: FutureWarning:

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'Other' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.

/home/rajaraman/miniconda3/envs/ml3105/lib/python3.10/site-packages/deepchecks/utils/abstracts/weak_segment_abstract.py:57: FutureWarning:

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'Other' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.

/home/rajaraman/miniconda3/envs/ml3105/lib/python3.10/site-packages/deepchecks/tabular/checks/train_test_validation/train_test_samples_mix.py:85: FutureWarning:

DataFrame.applymap has been deprecated. Use DataFrame.map instead.
# Display and save the results
suite_result.save_as_html('titanic_analysis_report.html')
print("Deepchecks analysis report saved to 'titanic_analysis_report.html'")
Deepchecks analysis report saved to 'titanic_analysis_report.html'
# Display the results in the notebook or terminal
suite_result.show()
Accordion(children=(VBox(children=(HTML(value='\n<h1 id="summary_EKL0D0IU0T3X3W4TN4V3CMXRK">Full Suite</h1>\n<


Score: 15

Category: deepchecks