Various Algorithms Titanic
Source:
https://www.kaggle.com/drfrank/estonia-disaster-visualization-machine-learning
https://stackoverflow.com/questions/55240330/how-to-read-csv-file-from-github-using-pandas
https://gist.github.com/michhar/2dfd2de0d4f8727f873422c5d959fff5
https://www.kaggle.com/rajacsp/titanic-basic-analysis
import pandas as pd
url = 'https://gist.githubusercontent.com/michhar/2dfd2de0d4f8727f873422c5d959fff5/raw/fa71405126017e6a37bea592440b4bee94bf7b9e/titanic.csv'
df = pd.read_csv(url, index_col=0)
data = df
data.head(5)
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
data.drop(['SibSp', 'Pclass', 'Name', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'], axis = 1, inplace = True)
data.head(5)
Survived | Sex | Age | |
---|---|---|---|
PassengerId | |||
1 | 0 | male | 22.0 |
2 | 1 | female | 38.0 |
3 | 1 | female | 26.0 |
4 | 1 | female | 35.0 |
5 | 0 | male | 35.0 |
data.isnull().sum()
Survived 0
Sex 0
Age 177
dtype: int64
data['Age'] = data['Age'].fillna(data['Age'].mean())
data.head()
Survived | Sex | Age | |
---|---|---|---|
PassengerId | |||
1 | 0 | male | 22.0 |
2 | 1 | female | 38.0 |
3 | 1 | female | 26.0 |
4 | 1 | female | 35.0 |
5 | 0 | male | 35.0 |
data.isnull().sum()
Survived 0
Sex 0
Age 0
dtype: int64
data = pd.get_dummies(data, columns = ['Sex'])
data.head()
Survived | Age | Sex_female | Sex_male | |
---|---|---|---|---|
PassengerId | ||||
1 | 0 | 22.0 | 0 | 1 |
2 | 1 | 38.0 | 1 | 0 |
3 | 1 | 26.0 | 1 | 0 |
4 | 1 | 35.0 | 1 | 0 |
5 | 0 | 35.0 | 0 | 1 |
y = data['Survived']
y.head()
PassengerId
1 0
2 1
3 1
4 1
5 0
Name: Survived, dtype: int64
X = data.drop('Survived', axis = 1)
X.head()
Age | Sex_female | Sex_male | |
---|---|---|---|
PassengerId | |||
1 | 22.0 | 0 | 1 |
2 | 38.0 | 1 | 0 |
3 | 26.0 | 1 | 0 |
4 | 35.0 | 1 | 0 |
5 | 35.0 | 0 | 1 |
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size = 0.3,
random_state = 21)
reg = LinearRegression().fit(X, y)
reg.score(X, y)
reg.coef_
reg.intercept_
# reg.predict(np.array([[3, 5]]))
0.4921256890039903
1. KNN
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn_model = knn.fit(X_train, y_train)
knn_model
KNeighborsClassifier()
2. Logistic Regression
from sklearn.linear_model import LogisticRegression
loj = LogisticRegression(solver = "liblinear")
loj_model = loj.fit(X,y)
# loj_model
3. Support Vector Machine
from sklearn.svm import SVC
svm_model = SVC(kernel = "linear").fit(X_train, y_train)
4. Gaussian Naive Bayes
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
nb_model = nb.fit(X_train, y_train)
nb_model
GaussianNB()
5. Artificial Neural Networks
from sklearn.neural_network import MLPClassifier
mlpc = MLPClassifier().fit(X_train, y_train)
6. CART
from sklearn.tree import DecisionTreeClassifier
cart = DecisionTreeClassifier()
cart_model = cart.fit(X_train, y_train)
7. Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
rf_model = RandomForestClassifier().fit(X_train, y_train)
models = [
knn_model,
loj_model,
svm_model,
nb_model,
mlpc,
cart_model,
rf_model
]
for model in models:
names = model.__class__.__name__
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("-"*28)
print(names + ":" )
print("Accuracy: {:.4%}".format(accuracy))
----------------------------
KNeighborsClassifier:
Accuracy: 75.3731%
----------------------------
LogisticRegression:
Accuracy: 78.7313%
----------------------------
SVC:
Accuracy: 78.7313%
----------------------------
GaussianNB:
Accuracy: 78.7313%
----------------------------
MLPClassifier:
Accuracy: 79.1045%
----------------------------
DecisionTreeClassifier:
Accuracy: 77.6119%
----------------------------
RandomForestClassifier:
Accuracy: 77.6119%