This notebook classifies credit card transactions to fraudulent or non fraudulent, the dataset is a set of PCA features extracted from the original data in order to conceal the identities of the parties in question.

# install torchsummary!pip install -q torchsummary
WARNING: Running pip as root will break packages and permissions. You should install packages reliably by using venv: https://pip.pypa.io/warnings/venv
import torchimport torch.nn as nnimport pandas as pdimport matplotlib.pyplot as pltimport seaborn as snsfrom numpy import sumfrom sklearn.metrics import confusion_matrix, classification_reportfrom sklearn.model_selection import train_test_splitfrom sklearn.preprocessing import StandardScalerfrom torch.utils.data import TensorDataset, DataLoaderfrom tqdm import tqdmfrom torchsummary import summary# set figure sizeplt.rcParams["figure.figsize"] = (14,7)
df = pd.read_csv("../input/creditcardfraud/creditcard.csv")df.head()
Time V1 V2 V3 V4 V5 V6 V7 V8 V9 ... V21 V22 V23 V24 V25 V26 V27 V28 Amount Class
0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 0.098698 0.363787 ... -0.018307 0.277838 -0.110474 0.066928 0.128539 -0.189115 0.133558 -0.021053 149.62 0
1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 0.085102 -0.255425 ... -0.225775 -0.638672 0.101288 -0.339846 0.167170 0.125895 -0.008983 0.014724 2.69 0
2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 0.247676 -1.514654 ... 0.247998 0.771679 0.909412 -0.689281 -0.327642 -0.139097 -0.055353 -0.059752 378.66 0
3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 0.377436 -1.387024 ... -0.108300 0.005274 -0.190321 -1.175575 0.647376 -0.221929 0.062723 0.061458 123.50 0
4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 -0.270533 0.817739 ... -0.009431 0.798278 -0.137458 0.141267 -0.206010 0.502292 0.219422 0.215153 69.99 0

5 rows × 31 columns

# some stats about the dataprint(f"Number of data points: {df.size}")print(f"Number of Fradulant Transactions: {df['Class'].value_counts()[1]}")print(f"Number of non-fradulant Transactions: {df['Class'].value_counts()[0]}\n\n")sns.countplot(x=df["Class"], palette="YlGnBu").set(title="Class Balance Between Transcation Types")plt.show()
Number of data points: 8829017Number of Fradulant Transactions: 492Number of non-fradulant Transactions: 284315

png

there is huge class impalance in the data, this might lead to a biased model, we can mitigate this by only using the same amount of class 0 while training or we could generate some sample data from the given features.

# Amount per transaction for each typesns.scatterplot(data=df.reset_index(), x="index", y="Amount", hue="Class", cmap="YlGnBu").set(title="Amount per transaction")plt.show()

png

fraudulent transactions dont tend to have a large sum of cash per transaction, we can confirm this by calculating some statistics such as max, min and mean for each type of transaction.

for i, word in zip(range(2), ["Positive", "Negative"]):    print(f"{word} transactions statistics")    print(df[df["Class"] == i]["Amount"].describe(), "\n\n")
Positive transactions statisticscount    284315.000000mean         88.291022std         250.105092min           0.00000025%           5.65000050%          22.00000075%          77.050000max       25691.160000Name: Amount, dtype: float64Negative transactions statisticscount     492.000000mean      122.211321std       256.683288min         0.00000025%         1.00000050%         9.25000075%       105.890000max      2125.870000Name: Amount, dtype: float64
# split data into training and testingX = df.drop("Class", axis=1)y = df["Class"]# scale the values of x (better training)scaler = StandardScaler()scaler.fit(X)X = scaler.transform(X)# split data to train and testX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, stratify=y) # stratify keeps class balance
# create tensor datasets from dfX_train = torch.FloatTensor(X_train)X_test = torch.FloatTensor(X_test)y_train = torch.FloatTensor(y_train.values)y_test = torch.FloatTensor(y_test.values)train_ds = TensorDataset(X_train, y_train)
device = 'cuda' if torch.cuda.is_available() else 'cpu'print(device)
cuda
# create dataloadersbatch_size = 128train_dl = DataLoader(train_ds, batch_size=batch_size)
# Network Architectureclass FraudNet(nn.Module):    def __init__(self, input_dim, hidden_dim, num_layers=4):        super().__init__()        self.input = nn.Sequential(            nn.Linear(input_dim, hidden_dim),            nn.ReLU()        )        # make the number of hidden dim layers configurable        self.layers = nn.ModuleList()        for i in range(num_layers):            self.layers.append(nn.Linear(hidden_dim, hidden_dim))            self.layers.append(nn.ReLU())        # final layer        self.fc = nn.Linear(hidden_dim, 2)    def forward(self, x):        out = self.input(x)        for layer in self.layers:            out = layer(out)        return self.fc(out)
# training functiondef train_model(model, epochs, loss_fn, optimizer):    model.train()    for epoch in range(epochs):        with tqdm(train_dl, unit="batch") as tepoch:            for data, target in tepoch:                data, target = data.to(device), target.to(device)                tepoch.set_description(f"Epoch {epoch}")                optimizer.zero_grad()                preds = model(data)                loss = loss_fn(preds, target.long())                loss.backward()                optimizer.step()                tepoch.set_postfix(loss=loss.item())
inp_size = X_train.shape[1]model = FraudNet(inp_size, inp_size).to(device)loss = nn.CrossEntropyLoss()optim = torch.optim.Adam(model.parameters(),  lr = 1e-4)# summarize the model layerssummary(model, (inp_size, inp_size))
----------------------------------------------------------------        Layer (type)               Output Shape         Param #================================================================            Linear-1               [-1, 30, 30]             930              ReLU-2               [-1, 30, 30]               0            Linear-3               [-1, 30, 30]             930              ReLU-4               [-1, 30, 30]               0            Linear-5               [-1, 30, 30]             930              ReLU-6               [-1, 30, 30]               0            Linear-7               [-1, 30, 30]             930              ReLU-8               [-1, 30, 30]               0            Linear-9               [-1, 30, 30]             930             ReLU-10               [-1, 30, 30]               0           Linear-11                [-1, 30, 2]              62================================================================Total params: 4,712Trainable params: 4,712Non-trainable params: 0----------------------------------------------------------------Input size (MB): 0.00Forward/backward pass size (MB): 0.07Params size (MB): 0.02Estimated Total Size (MB): 0.09----------------------------------------------------------------
epochs = 10train_model(model, epochs, loss, optim)
Epoch 0: 100%|██████████| 1558/1558 [00:11<00:00, 137.25batch/s, loss=0.000822]Epoch 1: 100%|██████████| 1558/1558 [00:11<00:00, 138.96batch/s, loss=0.000756]Epoch 2: 100%|██████████| 1558/1558 [00:11<00:00, 139.36batch/s, loss=0.0022]Epoch 3: 100%|██████████| 1558/1558 [00:11<00:00, 133.62batch/s, loss=0.00293]Epoch 4: 100%|██████████| 1558/1558 [00:11<00:00, 137.87batch/s, loss=0.00276]Epoch 5: 100%|██████████| 1558/1558 [00:11<00:00, 137.02batch/s, loss=0.00241]Epoch 6: 100%|██████████| 1558/1558 [00:11<00:00, 135.01batch/s, loss=0.00206]Epoch 7: 100%|██████████| 1558/1558 [00:11<00:00, 139.75batch/s, loss=0.0018]Epoch 8: 100%|██████████| 1558/1558 [00:11<00:00, 134.36batch/s, loss=0.00159]Epoch 9: 100%|██████████| 1558/1558 [00:11<00:00, 138.24batch/s, loss=0.00143]
model.eval()preds = model(X_test.to(device)).argmax(dim=1)print(classification_report(y_test, preds.cpu()))
              precision    recall  f1-score   support         0.0       1.00      1.00      1.00     85295         1.0       0.90      0.76      0.82       148    accuracy                           1.00     85443   macro avg       0.95      0.88      0.91     85443weighted avg       1.00      1.00      1.00     85443
class_def = {0 : "Not Fraudulent", 1 : "Fraudulent"}cm_df = pd.DataFrame(confusion_matrix(y_test, preds.cpu())).rename(columns=class_def, index=class_def)cm_df = cm_df / sum(cm_df)sns.heatmap(cm_df, annot=True, fmt='0.2%', cmap="YlGnBu").set(title="Confusion Matrix", xlabel="Predicted Label", ylabel="True Label")plt.show()

png

as expected the model does have some issue with classifiying fraudulent transactions, this can be addressed in multiple ways:

  • use the same amount of data for both classes
  • generate more data for fraudulent class 1
  • use a deeper network (more layers)
  • use a different network architecture
  • use other algorithms