import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import scipy
import sklearn


from sklearn.linear_model import LinearRegression


data = pd.read_csv("creditcard.csv")


data.head()


data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 31 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    284807 non-null  float64
 1   V1      284807 non-null  float64
 2   V2      284807 non-null  float64
 3   V3      284807 non-null  float64
 4   V4      284807 non-null  float64
 5   V5      284807 non-null  float64
 6   V6      284807 non-null  float64
 7   V7      284807 non-null  float64
 8   V8      284807 non-null  float64
 9   V9      284807 non-null  float64
 10  V10     284807 non-null  float64
 11  V11     284807 non-null  float64
 12  V12     284807 non-null  float64
 13  V13     284807 non-null  float64
 14  V14     284807 non-null  float64
 15  V15     284807 non-null  float64
 16  V16     284807 non-null  float64
 17  V17     284807 non-null  float64
 18  V18     284807 non-null  float64
 19  V19     284807 non-null  float64
 20  V20     284807 non-null  float64
 21  V21     284807 non-null  float64
 22  V22     284807 non-null  float64
 23  V23     284807 non-null  float64
 24  V24     284807 non-null  float64
 25  V25     284807 non-null  float64
 26  V26     284807 non-null  float64
 27  V27     284807 non-null  float64
 28  V28     284807 non-null  float64
 29  Amount  284807 non-null  float64
 30  Class   284807 non-null  int64  
dtypes: float64(30), int64(1)
memory usage: 67.4 MB


print(data.columns)

Index(['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
       'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
       'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount',
       'Class'],
      dtype='object')


print(data.shape)

(284807, 31)


print(data.describe())

                Time            V1            V2            V3            V4  \
count  284807.000000  2.848070e+05  2.848070e+05  2.848070e+05  2.848070e+05   
mean    94813.859575  1.165980e-15  3.416908e-16 -1.373150e-15  2.086869e-15   
std     47488.145955  1.958696e+00  1.651309e+00  1.516255e+00  1.415869e+00   
min         0.000000 -5.640751e+01 -7.271573e+01 -4.832559e+01 -5.683171e+00   
25%     54201.500000 -9.203734e-01 -5.985499e-01 -8.903648e-01 -8.486401e-01   
50%     84692.000000  1.810880e-02  6.548556e-02  1.798463e-01 -1.984653e-02   
75%    139320.500000  1.315642e+00  8.037239e-01  1.027196e+00  7.433413e-01   
max    172792.000000  2.454930e+00  2.205773e+01  9.382558e+00  1.687534e+01   

                 V5            V6            V7            V8            V9  \
count  2.848070e+05  2.848070e+05  2.848070e+05  2.848070e+05  2.848070e+05   
mean   9.604066e-16  1.490107e-15 -5.556467e-16  1.177556e-16 -2.406455e-15   
std    1.380247e+00  1.332271e+00  1.237094e+00  1.194353e+00  1.098632e+00   
min   -1.137433e+02 -2.616051e+01 -4.355724e+01 -7.321672e+01 -1.343407e+01   
25%   -6.915971e-01 -7.682956e-01 -5.540759e-01 -2.086297e-01 -6.430976e-01   
50%   -5.433583e-02 -2.741871e-01  4.010308e-02  2.235804e-02 -5.142873e-02   
75%    6.119264e-01  3.985649e-01  5.704361e-01  3.273459e-01  5.971390e-01   
max    3.480167e+01  7.330163e+01  1.205895e+02  2.000721e+01  1.559499e+01   

       ...           V21           V22           V23           V24  \
count  ...  2.848070e+05  2.848070e+05  2.848070e+05  2.848070e+05   
mean   ...  1.656562e-16 -3.444850e-16  2.578648e-16  4.471968e-15   
std    ...  7.345240e-01  7.257016e-01  6.244603e-01  6.056471e-01   
min    ... -3.483038e+01 -1.093314e+01 -4.480774e+01 -2.836627e+00   
25%    ... -2.283949e-01 -5.423504e-01 -1.618463e-01 -3.545861e-01   
50%    ... -2.945017e-02  6.781943e-03 -1.119293e-02  4.097606e-02   
75%    ...  1.863772e-01  5.285536e-01  1.476421e-01  4.395266e-01   
max    ...  2.720284e+01  1.050309e+01  2.252841e+01  4.584549e+00   

                V25           V26           V27           V28         Amount  \
count  2.848070e+05  2.848070e+05  2.848070e+05  2.848070e+05  284807.000000   
mean   5.340915e-16  1.687098e-15 -3.666453e-16 -1.220404e-16      88.349619   
std    5.212781e-01  4.822270e-01  4.036325e-01  3.300833e-01     250.120109   
min   -1.029540e+01 -2.604551e+00 -2.256568e+01 -1.543008e+01       0.000000   
25%   -3.171451e-01 -3.269839e-01 -7.083953e-02 -5.295979e-02       5.600000   
50%    1.659350e-02 -5.213911e-02  1.342146e-03  1.124383e-02      22.000000   
75%    3.507156e-01  2.409522e-01  9.104512e-02  7.827995e-02      77.165000   
max    7.519589e+00  3.517346e+00  3.161220e+01  3.384781e+01   25691.160000   

               Class  
count  284807.000000  
mean        0.001727  
std         0.041527  
min         0.000000  
25%         0.000000  
50%         0.000000  
75%         0.000000  
max         1.000000  

[8 rows x 31 columns]


data = data.sample(frac = 0.1, random_state = 1)

print(data.shape)

(28481, 31)


data.hist(figsize = (20, 20));


Fraud = data[data['Class'] == 1]
Valid = data[data['Class'] == 0]

outlier_fraction = len(Fraud) / float(len(Valid))
print(outlier_fraction)

print('Fraud Cases: {}'.format(len(Fraud)))
print('Valid Cases: {}'.format(len(Valid)))

0.0017234102419808666
Fraud Cases: 49
Valid Cases: 28432


corrmat = data.corr()
fig = plt.figure(figsize = (12, 9))

sns.heatmap(corrmat, vmax = .8, square = True)

<AxesSubplot:>


corrmat


# Get all the columns from the DataFrame
columns = data.columns.tolist()

# Filter the columns to remove data we do not want
columns = [c for c in columns if c not in ['Class']]

# Store the variable we'll be predicting on
target = 'Class'

X = data[columns]
Y = data[target]

# Print the shapes of X and Y
print(X.shape)
print(Y.shape)

(28481, 30)
(28481,)


from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor


# define a random state
state = 1

# define the outlier detection methods
classifiers = {
    "Isolation Forest": IsolationForest(max_samples=len(X),
                                       contamination = outlier_fraction,
                                       random_state = state),
    'Local Outlier Factor': LocalOutlierFactor(
    n_neighbors = 20,
    contamination = outlier_fraction)
}


n_outliers = len(Fraud)

for i, (clf_name, clf) in enumerate(classifiers.items()):
    
    # fit data and tag outliers
    if clf_name == 'Local Outlier Factor':
        y_pred = clf.fit_predict(X)
        scores_pred = clf.negative_outlier_factor_
    else:
        clf.fit(X)
        scores_pred = clf.decision_function(X)
        y_pred = clf.predict(X)
        
    # Reshape the prediction values to 0 for valid, 1 for fraud
    y_pred[y_pred == 1] = 0
    y_pred[y_pred == -1] = 1
    
    n_errors = (y_pred != Y).sum()
    
    # Run classification metrics
    print('{}: {}'.format(clf_name, n_errors))
    print(accuracy_score(Y, y_pred))
    print(classification_report(Y, y_pred))

Isolation Forest: 71
0.99750711000316
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     28432
           1       0.28      0.29      0.28        49

    accuracy                           1.00     28481
   macro avg       0.64      0.64      0.64     28481
weighted avg       1.00      1.00      1.00     28481

Local Outlier Factor: 97
0.9965942207085425
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     28432
           1       0.02      0.02      0.02        49

    accuracy                           1.00     28481
   macro avg       0.51      0.51      0.51     28481
weighted avg       1.00      1.00      1.00     28481

	Time	V1	V2	V3	V4	V5	V6	V7	V8	V9	...	V21	V22	V23	V24	V25	V26	V27	V28	Amount
0	0.0	-1.359807	-0.072781	2.536347	1.378155	-0.338321	0.462388	0.239599	0.098698	0.363787	...	-0.018307	0.277838	-0.110474	0.066928	0.128539	-0.189115	0.133558	-0.021053	149.62
1	0.0	1.191857	0.266151	0.166480	0.448154	0.060018	-0.082361	-0.078803	0.085102	-0.255425	...	-0.225775	-0.638672	0.101288	-0.339846	0.167170	0.125895	-0.008983	0.014724	2.69
2	1.0	-1.358354	-1.340163	1.773209	0.379780	-0.503198	1.800499	0.791461	0.247676	-1.514654	...	0.247998	0.771679	0.909412	-0.689281	-0.327642	-0.139097	-0.055353	-0.059752	378.66
3	1.0	-0.966272	-0.185226	1.792993	-0.863291	-0.010309	1.247203	0.237609	0.377436	-1.387024	...	-0.108300	0.005274	-0.190321	-1.175575	0.647376	-0.221929	0.062723	0.061458	123.50
4	2.0	-1.158233	0.877737	1.548718	0.403034	-0.407193	0.095921	0.592941	-0.270533	0.817739	...	-0.009431	0.798278	-0.137458	0.141267	-0.206010	0.502292	0.219422	0.215153	69.99

	Time	V1	V2	V3	V4	V5	V6	V7	V8	V9	...	V21	V22	V23	V24	V25	V26	V27	V28	Amount	Class
Time	1.000000	0.126475	-0.001584	-0.413547	-0.104527	0.182205	-0.060483	0.078924	-0.040474	-0.008428	...	0.041323	0.150603	0.047941	-0.020018	-0.229491	-0.048131	-0.005541	-0.004339	-0.026969	-0.005087
V1	0.126475	1.000000	0.048796	0.015452	-0.010592	0.019888	0.006417	-0.020583	-0.003013	0.001658	...	-0.016415	0.014896	0.049447	-0.003709	0.014055	0.007203	-0.011545	0.085035	-0.262703	-0.079820
V2	-0.001584	0.048796	1.000000	0.027270	-0.022539	0.009666	-0.004411	-0.013456	0.015662	0.003456	...	-0.020127	0.021923	0.047591	-0.011386	0.011838	0.005366	-0.009611	0.084873	-0.556401	0.069598
V3	-0.413547	0.015452	0.027270	1.000000	-0.005423	0.013997	-0.006903	-0.024640	-0.025529	0.002525	...	-0.006083	0.014177	0.042603	-0.001883	0.005975	0.006869	-0.017094	0.029973	-0.225099	-0.160051
V4	-0.104527	-0.010592	-0.022539	-0.005423	1.000000	-0.003708	0.002029	0.004432	0.011659	-0.004395	...	-0.004423	-0.011251	-0.017682	0.001829	-0.009692	0.004087	0.024489	-0.024554	0.111692	0.122631
V5	0.182205	0.019888	0.009666	0.013997	-0.003708	1.000000	-0.016656	-0.037463	-0.013263	-0.008506	...	0.002288	0.022065	0.064703	-0.007184	0.006493	0.000048	-0.027934	0.010991	-0.397437	-0.073519
V6	-0.060483	0.006417	-0.004411	-0.006903	0.002029	-0.016656	1.000000	0.006923	0.003695	-0.002762	...	0.004490	-0.003705	-0.036726	0.001428	-0.015012	0.009938	-0.004811	-0.009772	0.213007	-0.035085
V7	0.078924	-0.020583	-0.013456	-0.024640	0.004432	-0.037463	0.006923	1.000000	-0.028291	-0.005510	...	0.007012	-0.013871	-0.055242	0.002899	-0.016941	-0.000075	-0.012973	-0.037593	0.417814	-0.134247
V8	-0.040474	-0.003013	0.015662	-0.025529	0.011659	-0.013263	0.003695	-0.028291	1.000000	-0.018645	...	-0.005651	-0.004195	0.030092	-0.008821	0.017298	0.015385	0.008495	0.015525	-0.102221	0.024896
V9	-0.008428	0.001658	0.003456	0.002525	-0.004395	-0.008506	-0.002762	-0.005510	-0.018645	1.000000	...	0.009462	-0.002297	0.002360	0.007441	-0.009149	-0.003652	-0.011701	-0.026290	-0.039773	-0.079962
V10	0.035939	0.010686	0.017218	-0.006955	-0.000669	0.011446	-0.003120	-0.035149	-0.017995	-0.021718	...	0.001434	0.006397	0.010754	0.000734	-0.010712	0.006086	-0.025756	-0.017631	-0.122025	-0.191189
V11	-0.237613	0.007177	-0.002982	0.000836	0.007657	0.006286	0.001582	0.000863	-0.002562	0.000587	...	0.009261	-0.000061	-0.004784	-0.002600	-0.001712	-0.002694	-0.004893	-0.000608	-0.000394	0.140513
V12	0.126985	-0.012290	0.000646	-0.008236	0.005942	-0.011393	-0.000219	-0.008740	-0.009387	0.002794	...	-0.016599	-0.002456	-0.000431	-0.004571	-0.003269	-0.004400	0.002477	-0.008220	-0.000972	-0.244444
V13	-0.069353	-0.018849	0.001655	0.004030	-0.011637	-0.002728	-0.001591	0.002445	0.012011	0.001733	...	0.002637	0.004413	-0.017930	0.006788	-0.002572	-0.000929	0.005606	-0.014200	0.009694	-0.003380
V14	-0.093264	-0.001905	-0.006617	-0.016702	0.003485	0.001548	-0.007828	-0.001300	-0.001876	0.004310	...	-0.007459	0.001944	0.005999	-0.003827	0.006148	-0.004120	0.014247	0.011290	0.035498	-0.296297
V15	-0.182255	-0.012884	0.002046	0.004421	-0.000575	-0.006731	0.006407	-0.003969	-0.000946	-0.014436	...	0.000210	0.002279	-0.013669	0.009640	-0.014448	0.004390	0.005639	-0.020654	0.000165	-0.003760
V16	0.007392	-0.015569	-0.003062	-0.012780	-0.004783	-0.014823	0.009237	-0.010928	-0.000048	0.000286	...	-0.007622	0.004838	-0.026575	0.000209	-0.017642	-0.014824	0.002792	-0.018434	0.008798	-0.175216
V17	-0.074555	-0.009644	0.003567	-0.017722	0.012553	-0.013090	0.009201	-0.017525	-0.020343	-0.007033	...	-0.015777	0.005118	0.013149	0.001230	-0.010343	0.005719	0.000143	0.007173	0.012607	-0.293225
V18	0.083959	-0.011822	-0.005922	0.001056	0.001852	0.000902	-0.002719	-0.015221	-0.013191	-0.001036	...	-0.003275	-0.011108	0.008045	-0.001235	0.004092	-0.007275	-0.002274	0.004075	0.038996	-0.098311
V19	0.019469	0.003860	0.011950	0.020282	0.001759	0.001468	-0.005205	0.002621	0.003453	-0.003865	...	0.004207	0.003725	0.000711	-0.004049	0.003764	0.012193	-0.000812	-0.012631	-0.067748	0.025784
V20	-0.050967	-0.017883	-0.054467	0.003068	0.015348	0.005350	-0.008991	0.023011	0.013911	-0.012547	...	0.014580	0.000622	0.064035	-0.001015	0.013898	-0.008713	-0.015707	0.074731	0.367057	0.005640
V21	0.041323	-0.016415	-0.020127	-0.006083	-0.004423	0.002288	0.004490	0.007012	-0.005651	0.009462	...	1.000000	-0.011120	0.001835	-0.005780	0.004783	0.001042	-0.016617	0.035645	0.121948	0.037570
V22	0.150603	0.014896	0.021923	0.014177	-0.011251	0.022065	-0.003705	-0.013871	-0.004195	-0.002297	...	-0.011120	1.000000	0.015702	0.005390	-0.006371	0.008263	0.004071	0.002156	-0.095698	-0.001683
V23	0.047941	0.049447	0.047591	0.042603	-0.017682	0.064703	-0.036726	-0.055242	0.030092	0.002360	...	0.001835	0.015702	1.000000	-0.009760	0.001382	-0.005050	-0.052343	0.011088	-0.192711	-0.005856
V24	-0.020018	-0.003709	-0.011386	-0.001883	0.001829	-0.007184	0.001428	0.002899	-0.008821	0.007441	...	-0.005780	0.005390	-0.009760	1.000000	0.001870	-0.010473	0.008380	-0.015790	0.017335	-0.003727
V25	-0.229491	0.014055	0.011838	0.005975	-0.009692	0.006493	-0.015012	-0.016941	0.017298	-0.009149	...	0.004783	-0.006371	0.001382	0.001870	1.000000	0.000248	-0.014799	-0.024547	-0.064454	0.011958
V26	-0.048131	0.007203	0.005366	0.006869	0.004087	0.000048	0.009938	-0.000075	0.015385	-0.003652	...	0.001042	0.008263	-0.005050	-0.010473	0.000248	1.000000	0.002964	-0.008006	-0.015768	-0.001884
V27	-0.005541	-0.011545	-0.009611	-0.017094	0.024489	-0.027934	-0.004811	-0.012973	0.008495	-0.011701	...	-0.016617	0.004071	-0.052343	0.008380	-0.014799	0.002964	1.000000	-0.007671	0.039471	0.024421
V28	-0.004339	0.085035	0.084873	0.029973	-0.024554	0.010991	-0.009772	-0.037593	0.015525	-0.026290	...	0.035645	0.002156	0.011088	-0.015790	-0.024547	-0.008006	-0.007671	1.000000	-0.033855	0.014344
Amount	-0.026969	-0.262703	-0.556401	-0.225099	0.111692	-0.397437	0.213007	0.417814	-0.102221	-0.039773	...	0.121948	-0.095698	-0.192711	0.017335	-0.064454	-0.015768	0.039471	-0.033855	1.000000	0.012804
Class	-0.005087	-0.079820	0.069598	-0.160051	0.122631	-0.073519	-0.035085	-0.134247	0.024896	-0.079962	...	0.037570	-0.001683	-0.005856	-0.003727	0.011958	-0.001884	0.024421	0.014344	0.012804	1.000000

Project In Machine Learning | Credit Card Fraud Detection¶

Import the necessary packages¶

Loading the DataSet¶

Explore the DataSet¶

Plot Histogram of each parameter¶

Determine number of fraud cases in Dataset¶

Correlation matrix¶

Machine Learning¶

Fit the model¶