Heart Attack Classification With Pytorch

import pandas as pdimport numpy as npimport seaborn as snsimport torchimport torch.nn as nnimport torch.nn.functional as Ffrom torch.utils.data import DataLoaderfrom torch.utils.data import TensorDatasetimport matplotlib.pyplot as pltfrom sklearn.model_selection import train_test_splitfrom sklearn.metrics import accuracy_score, confusion_matrix, classification_reportfrom sklearn.preprocessing import StandardScaler# set default figure sizeplt.rcParams['figure.figsize'] = (15, 7.0)

heart_data = '../input/heart-attack-analysis-prediction-dataset/heart.csv'heart_df = pd.read_csv(heart_data)heart_df.head()

	age	sex	cp	trtbps	chol	fbs	restecg	thalachh	exng	oldpeak	slp	thall	output
0	63	1	3	145	233	1	0	150	0	2.3	0	1	1
1	37	1	2	130	250	0	1	187	0	3.5	0	2	1
2	41	0	1	130	204	0	0	172	0	1.4	2	2	1
3	56	1	1	120	236	0	1	178	0	0.8	2	2	1
4	57	0	0	120	354	0	1	163	1	0.6	2	2	1

# describe the dataheart_df.describe()

	age	sex	cp	trtbps	chol	fbs	restecg	thalachh	exng	oldpeak	slp	caa	thall	output
count	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000
mean	54.366337	0.683168	0.966997	131.623762	246.264026	0.148515	0.528053	149.646865	0.326733	1.039604	1.399340	0.729373	2.313531	0.544554
std	9.082101	0.466011	1.032052	17.538143	51.830751	0.356198	0.525860	22.905161	0.469794	1.161075	0.616226	1.022606	0.612277	0.498835
min	29.000000	0.000000	0.000000	94.000000	126.000000	0.000000	0.000000	71.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
25%	47.500000	0.000000	0.000000	120.000000	211.000000	0.000000	0.000000	133.500000	0.000000	0.000000	1.000000	0.000000	2.000000	0.000000
50%	55.000000	1.000000	1.000000	130.000000	240.000000	0.000000	1.000000	153.000000	0.000000	0.800000	1.000000	0.000000	2.000000	1.000000
75%	61.000000	1.000000	2.000000	140.000000	274.500000	0.000000	1.000000	166.000000	1.000000	1.600000	2.000000	1.000000	3.000000	1.000000
max	77.000000	1.000000	3.000000	200.000000	564.000000	1.000000	2.000000	202.000000	1.000000	6.200000	2.000000	4.000000	3.000000	1.000000

# checking data typesheart_df.dtypes

age           int64sex           int64cp            int64trtbps        int64chol          int64fbs           int64restecg       int64thalachh      int64exng          int64oldpeak     float64slp           int64caa           int64thall         int64output        int64dtype: object

# drop duplicates if anyheart_df.drop_duplicates()# check missing valusheart_df.isna().sum()

age         0sex         0cp          0trtbps      0chol        0fbs         0restecg     0thalachh    0exng        0oldpeak     0slp         0caa         0thall       0output      0dtype: int64

# check output column class distributionsns.countplot(x='output', data=heart_df).set_title("output Column Distribution")

Text(0.5, 1.0, 'output Column Distribution')

png

# check sex column class distributionsns.countplot(x='sex', data=heart_df).set_title("Sex Column Distribution")

Text(0.5, 1.0, 'Sex Column Distribution')

png

# box plot for output and cholestrol levelsns.boxplot(x="output",y="chol",data=heart_df)

<AxesSubplot:xlabel='output', ylabel='chol'>

png

# box plot for output and cholestrol levelsns.boxplot(x="output",y="thalachh",data=heart_df)

<AxesSubplot:xlabel='output', ylabel='thalachh'>

png

# box plot for output and cholestrol levelsns.boxplot(x="output",y="oldpeak",data=heart_df)

<AxesSubplot:xlabel='output', ylabel='oldpeak'>

png

# box plot for output and cholestrol levelsns.boxplot(x="output",y="age",data=heart_df)

<AxesSubplot:xlabel='output', ylabel='age'>

png

ax = sns.countplot(x='age', data=heart_df)

png

# check correlationcorr = heart_df.corr()# Generate a mask for the upper trianglemask = np.triu(np.ones_like(corr, dtype=bool))# Set up the matplotlib figuref, ax = plt.subplots(figsize=(11, 9))# Generate a custom diverging colormapcmap = sns.diverging_palette(230, 20, as_cmap=True)# Draw the heatmap with the mask and correct aspect ratiosns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0,            square=True, linewidths=.5, cbar_kws={"shrink": .5}).set_title("Columns Correlation")

Text(0.5, 1.0, 'Columns Correlation')

png

# split data for trainingy = heart_df.output.to_numpy()X = heart_df.drop('output', axis=1).to_numpy()# scale X valuesscaler = StandardScaler()X = scaler.fit_transform(X)# split data while keeping output class distribution consistentX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

# convert data to pytorch tensorsdef df_to_tensor(df):    return torch.from_numpy(df).float()X_traint = df_to_tensor(X_train)y_traint = df_to_tensor(y_train)X_testt = df_to_tensor(X_test)y_testt = df_to_tensor(y_test)

# create pytorch datasettrain_ds = TensorDataset(X_traint, y_traint)test_ds = TensorDataset(X_testt, y_testt)# create data loadersbatch_size = 5train_dl = DataLoader(train_ds, batch_size, shuffle=True)test_dl = DataLoader(test_ds, batch_size, shuffle=False)

# model architectureclass BinaryNetwork(nn.Module):    def __init__(self, input_size, output_size):        super().__init__()        self.l1 = nn.Linear(input_size, 64)        self.l2 = nn.Linear(64, 32)        self.l3 = nn.Linear(32, 16)        self.out = nn.Linear(16, output_size)    def forward(self, x):        x = self.l1(x)        x = F.relu(x)        x = self.l2(x)        x = F.relu(x)        x = self.l3(x)        x = F.relu(x)        x = self.out(x)        return torch.sigmoid(x) # scaling values between 0 and 1

input_size = 13 # number of featuresoutput_size = 1model = BinaryNetwork(input_size, output_size)loss_fn = nn.BCELoss() # Binary Cross Entropyoptim = torch.optim.Adam(model.parameters(), lr=1e-3)model

BinaryNetwork(  (l1): Linear(in_features=13, out_features=64, bias=True)  (l2): Linear(in_features=64, out_features=32, bias=True)  (l3): Linear(in_features=32, out_features=16, bias=True)  (out): Linear(in_features=16, out_features=1, bias=True))

epochs = 100losses = []for i in range(epochs):    epoch_loss = 0    for feat, target in train_dl:        optim.zero_grad()        out = model(feat)        loss = loss_fn(out, target.unsqueeze(1))        epoch_loss += loss.item()        loss.backward()        optim.step()    losses.append(epoch_loss)    # print loss every 10    if i % 10 == 0:        print(f"Epoch: {i}/{epochs}, Loss = {loss:.5f}")

Epoch: 0/100, Loss = 0.79641Epoch: 10/100, Loss = 0.03637Epoch: 20/100, Loss = 0.07704Epoch: 30/100, Loss = 0.02023Epoch: 40/100, Loss = 0.00084Epoch: 50/100, Loss = 0.00000Epoch: 60/100, Loss = 0.00001Epoch: 70/100, Loss = 0.00000Epoch: 80/100, Loss = 0.00018Epoch: 90/100, Loss = 0.00029

# plot lossesgraph = sns.lineplot(x=[x for x in range(0, epochs)], y=losses)graph.set(title="Loss change during training", xlabel='epochs', ylabel='loss')plt.show()

png

# evaluate the modely_pred_list = []model.eval()with torch.no_grad():    for X, y in test_dl:        y_test_pred = model(X)        y_pred_tag = torch.round(y_test_pred)        y_pred_list.append(y_pred_tag)# convert predictions to a list of tensors with 1 dimentiony_pred_list = [a.squeeze() for a in y_pred_list]

# check confusion matrix (hstack will merge all tensor lists into one list)cfm = confusion_matrix(y_test, torch.hstack(y_pred_list))sns.heatmap(cfm / np.sum(cfm), annot=True, fmt='.2%')

<AxesSubplot:>

png

# print metricsprint(classification_report(y_test, torch.hstack(y_pred_list)))

              precision    recall  f1-score   support           0       0.91      0.75      0.82        28           1       0.82      0.94      0.87        33    accuracy                           0.85        61   macro avg       0.86      0.84      0.85        61weighted avg       0.86      0.85      0.85        61