Spaces:

saifhmb
/

Fraud-Detection-ML-App

Running

App Files Files Community

saifhmb commited on Jun 27, 2024

Commit

01cc70d

unverified ·

1 Parent(s): be24054

Create app.py

Browse files

Files changed (1) hide show

app.py +118 -0

app.py ADDED Viewed

	@@ -0,0 +1,118 @@

+# importing libraries
+from datasets import load_dataset, load_dataset_builder
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import sklearn
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
+from sklearn.pipeline import Pipeline
+from sklearn.model_selection import train_test_split
+from sklearn.naive_bayes import GaussianNB
+from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, precision_score, recall_score, classification_report
+import imblearn
+from imblearn.under_sampling import RandomUnderSampler
+from skops import hub_utils
+import pickle
+from skops.card import Card, metadata_from_config
+from pathlib import Path
+from tempfile import mkdtemp, mkstemp
+# Loading the dataset
+dataset_name = "saifhmb/FraudPaymentData"
+dataset = load_dataset(dataset_name, split = 'train')
+dataset = pd.DataFrame(dataset)
+dataset = dataset.dropna()
+dataset = dataset.drop(['Time_step','Transaction_Id','Sender_Id', 'Sender_Account','Bene_Id','Bene_Account'], axis = 1) #  deleting high cardinality features
+y = dataset.iloc[:, 5].values
+dataset = dataset.drop(['Label'], axis = 1)
+dataset = dataset.drop(['Sender_lob', 'Sender_Sector'], axis = 1) # delete column since there is only a single unique value for 'Sender_lob' and 'Sender_sector' is a high cardinal feature
+# Encoding the Independent Variables
+categoricalColumns = ['Sender_Country', 'Bene_Country', 'Transaction_Type']
+onehot_categorical = OneHotEncoder(handle_unknown='ignore', sparse_output= False)
+categorical_transformer = Pipeline(steps = [('onehot', onehot_categorical)])
+numericalColumns = dataset.select_dtypes(include = np.number).columns
+sc = StandardScaler()
+numerical_transformer = Pipeline(steps = [('scale', sc)])
+preprocessorForCategoricalColumns = ColumnTransformer(transformers=[('cat', categorical_transformer, categoricalColumns)], remainder ='passthrough')
+preprocessorForAllColumns = ColumnTransformer(transformers=[('cat', categorical_transformer, categoricalColumns),('num',numerical_transformer,numericalColumns)],
+                                            remainder="passthrough")
+# Spliting the datset into Training and Test set
+X = dataset
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, random_state = 42) # random state is 0 or 42
+# Train Naive Bayes Model using the Training set
+# Handling imbalanced dataset
+under_sampler = RandomUnderSampler()
+X_under, y_under = under_sampler.fit_resample(X_train, y_train)
+classifier = GaussianNB() # select the appropriate algorithm for the problem statement
+model = Pipeline(steps = [('preprocessorAll', preprocessorForAllColumns),('classifier', classifier)])
+model.fit(X_under, y_under)
+# Predicting the Test result
+y_pred = model.predict(X_test)
+# Making the Confusion Matrix and evaluating performance
+cm = confusion_matrix(y_pred, y_test, labels=model.classes_)
+disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=np.array(['0 - Normal', '1 - Fraudulent']))
+disp.plot()
+plt.show()
+acc = accuracy_score(y_test, y_pred)
+# Pickling the model
+pickle_out = open("model.pkl", "wb")
+pickle.dump(model, pickle_out)
+pickle_out.close()
+# Loading the model to predict on the data
+pickle_in = open('model.pkl', 'rb')
+model = pickle.load(pickle_in)
+def welcome():
+    return 'welcome all'
+# defining the function which will make the prediction using the data which the user inputs
+def prediction(Sender_Country, Bene_Country, USD_amount, Transaction_Type):
+  X = pd.DataFrame([[Sender_Country, Bene_Country, USD_amount, Transaction_Type]], columns = ['Sender_Country', 'Bene_Country', 'USD_amount', 'Transaction_Type'])
+  prediction = model.predict(X)
+  print(prediction)
+  return prediction
+# this is the main function in which we define our webpage
+def main():
+      # giving the webpage a title
+    st.title("Fraud Detection ML App")
+    st.header("Model Description", divider = "gray")
+    multi = '''This is a Gaussian Naive Bayes model trained on a synthetic dataset, containining a large variety of transaction types representing normal activities
+    as well as abnormal/fraudulent activities. The model predicts whether a transaction is normal or fraudulent.
+    For more details on the model please refer to the model card at https://huggingface.co/saifhmb/fraud-detection-model
+    '''
+    st.markdown(multi)
+    st.markdown("To determine whether a transaction is normal or fraudulent, please **ENTER** the Sender Country, Beneficiary Country, Amount in USD and Transaction Type :")
+    col1, col2 = st.columns(2)
+    with col1:
+        Sender_Country = st.text_input("Sender Country")
+    with col2:
+      Bene_Country = st.text_input("Beneficiary Country")
+    col3, col4 = st.columns(2)
+    with col3:
+      USD_amount = st.number_input("Amount in USD")
+    with col4:
+      Transaction_Type = st.text_input("Transaction Type (Please enter one of the following: make-payment, quick-payment, move-funds, pay-check)")
+    result = ""
+    if st.button("Predict"):
+        result = prediction(Sender_Country, Bene_Country, USD_amount, Transaction_Type)
+        if result == 0:
+            st.success("The output is {}".format(result) + " This is a NORMAL transaction")
+        if result == 1:
+            st.success("The output is {}".format(result) + " This is a FRAUDULENT TRANSACTION")
+if __name__=='__main__':
+    main()