Various Algorithms Titanic

Source:

https://www.kaggle.com/drfrank/estonia-disaster-visualization-machine-learning

https://stackoverflow.com/questions/55240330/how-to-read-csv-file-from-github-using-pandas

https://gist.github.com/michhar/2dfd2de0d4f8727f873422c5d959fff5

https://www.kaggle.com/rajacsp/titanic-basic-analysis

import pandas as pd

url = 'https://gist.githubusercontent.com/michhar/2dfd2de0d4f8727f873422c5d959fff5/raw/fa71405126017e6a37bea592440b4bee94bf7b9e/titanic.csv'
df = pd.read_csv(url, index_col=0)

data = df

data.head(5)
Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
PassengerId
1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C
3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
data.drop(['SibSp', 'Pclass', 'Name', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'], axis = 1, inplace = True)
data.head(5)
Survived Sex Age
PassengerId
1 0 male 22.0
2 1 female 38.0
3 1 female 26.0
4 1 female 35.0
5 0 male 35.0
data.isnull().sum()
Survived      0
Sex           0
Age         177
dtype: int64
data['Age'] = data['Age'].fillna(data['Age'].mean())
data.head()
Survived Sex Age
PassengerId
1 0 male 22.0
2 1 female 38.0
3 1 female 26.0
4 1 female 35.0
5 0 male 35.0
data.isnull().sum()
Survived    0
Sex         0
Age         0
dtype: int64
data = pd.get_dummies(data, columns = ['Sex'])
data.head()
Survived Age Sex_female Sex_male
PassengerId
1 0 22.0 0 1
2 1 38.0 1 0
3 1 26.0 1 0
4 1 35.0 1 0
5 0 35.0 0 1
y = data['Survived']
y.head()
PassengerId
1    0
2    1
3    1
4    1
5    0
Name: Survived, dtype: int64
X = data.drop('Survived', axis = 1)
X.head()
Age Sex_female Sex_male
PassengerId
1 22.0 0 1
2 38.0 1 0
3 26.0 1 0
4 35.0 1 0
5 35.0 0 1
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                               test_size = 0.3,
                                               random_state = 21)
reg = LinearRegression().fit(X, y)
reg.score(X, y)

reg.coef_

reg.intercept_

# reg.predict(np.array([[3, 5]]))
0.4921256890039903

1. KNN

from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn_model = knn.fit(X_train, y_train)
knn_model
KNeighborsClassifier()

2. Logistic Regression

from sklearn.linear_model import LogisticRegression

loj = LogisticRegression(solver = "liblinear")
loj_model = loj.fit(X,y)
# loj_model

3. Support Vector Machine

from sklearn.svm import SVC
svm_model = SVC(kernel = "linear").fit(X_train, y_train)

4. Gaussian Naive Bayes

from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
nb_model = nb.fit(X_train, y_train)
nb_model
GaussianNB()

5. Artificial Neural Networks

from sklearn.neural_network import MLPClassifier
mlpc = MLPClassifier().fit(X_train, y_train)

6. CART

from sklearn.tree import DecisionTreeClassifier

cart = DecisionTreeClassifier()
cart_model = cart.fit(X_train, y_train)

7. Random Forest Classifier

from sklearn.ensemble import RandomForestClassifier
rf_model = RandomForestClassifier().fit(X_train, y_train)
models = [
    knn_model,
    loj_model,
    svm_model,
    nb_model,
    mlpc,
    cart_model,
    rf_model
]

for model in models:
    names = model.__class__.__name__
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print("-"*28)
    print(names + ":" )
    print("Accuracy: {:.4%}".format(accuracy))
----------------------------
KNeighborsClassifier:
Accuracy: 75.3731%
----------------------------
LogisticRegression:
Accuracy: 78.7313%
----------------------------
SVC:
Accuracy: 78.7313%
----------------------------
GaussianNB:
Accuracy: 78.7313%
----------------------------
MLPClassifier:
Accuracy: 79.1045%
----------------------------
DecisionTreeClassifier:
Accuracy: 77.6119%
----------------------------
RandomForestClassifier:
Accuracy: 77.6119%