Spaces:

saifhmb
/

Fraud-Detection-ML-App

Running

saifhmb

Create app.py

01cc70d unverified over 1 year ago

5.47 kB

	# importing libraries
	from datasets import load_dataset, load_dataset_builder
	import numpy as np
	import matplotlib.pyplot as plt
	import pandas as pd
	import sklearn
	from sklearn.compose import ColumnTransformer
	from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
	from sklearn.pipeline import Pipeline
	from sklearn.model_selection import train_test_split
	from sklearn.naive_bayes import GaussianNB
	from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, precision_score, recall_score, classification_report
	import imblearn
	from imblearn.under_sampling import RandomUnderSampler
	from skops import hub_utils
	import pickle
	from skops.card import Card, metadata_from_config
	from pathlib import Path
	from tempfile import mkdtemp, mkstemp

	# Loading the dataset
	dataset_name = "saifhmb/FraudPaymentData"
	dataset = load_dataset(dataset_name, split = 'train')
	dataset = pd.DataFrame(dataset)

	dataset = dataset.dropna()
	dataset = dataset.drop(['Time_step','Transaction_Id','Sender_Id', 'Sender_Account','Bene_Id','Bene_Account'], axis = 1) # deleting high cardinality features
	y = dataset.iloc[:, 5].values
	dataset = dataset.drop(['Label'], axis = 1)
	dataset = dataset.drop(['Sender_lob', 'Sender_Sector'], axis = 1) # delete column since there is only a single unique value for 'Sender_lob' and 'Sender_sector' is a high cardinal feature

	# Encoding the Independent Variables
	categoricalColumns = ['Sender_Country', 'Bene_Country', 'Transaction_Type']
	onehot_categorical = OneHotEncoder(handle_unknown='ignore', sparse_output= False)
	categorical_transformer = Pipeline(steps = [('onehot', onehot_categorical)])

	numericalColumns = dataset.select_dtypes(include = np.number).columns
	sc = StandardScaler()
	numerical_transformer = Pipeline(steps = [('scale', sc)])
	preprocessorForCategoricalColumns = ColumnTransformer(transformers=[('cat', categorical_transformer, categoricalColumns)], remainder ='passthrough')
	preprocessorForAllColumns = ColumnTransformer(transformers=[('cat', categorical_transformer, categoricalColumns),('num',numerical_transformer,numericalColumns)],
	remainder="passthrough")

	# Spliting the datset into Training and Test set
	X = dataset
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, random_state = 42) # random state is 0 or 42

	# Train Naive Bayes Model using the Training set
	# Handling imbalanced dataset
	under_sampler = RandomUnderSampler()
	X_under, y_under = under_sampler.fit_resample(X_train, y_train)

	classifier = GaussianNB() # select the appropriate algorithm for the problem statement
	model = Pipeline(steps = [('preprocessorAll', preprocessorForAllColumns),('classifier', classifier)])
	model.fit(X_under, y_under)

	# Predicting the Test result
	y_pred = model.predict(X_test)

	# Making the Confusion Matrix and evaluating performance
	cm = confusion_matrix(y_pred, y_test, labels=model.classes_)
	disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=np.array(['0 - Normal', '1 - Fraudulent']))
	disp.plot()
	plt.show()
	acc = accuracy_score(y_test, y_pred)

	# Pickling the model
	pickle_out = open("model.pkl", "wb")
	pickle.dump(model, pickle_out)
	pickle_out.close()

	# Loading the model to predict on the data
	pickle_in = open('model.pkl', 'rb')
	model = pickle.load(pickle_in)

	def welcome():
	return 'welcome all'

	# defining the function which will make the prediction using the data which the user inputs
	def prediction(Sender_Country, Bene_Country, USD_amount, Transaction_Type):
	X = pd.DataFrame([[Sender_Country, Bene_Country, USD_amount, Transaction_Type]], columns = ['Sender_Country', 'Bene_Country', 'USD_amount', 'Transaction_Type'])
	prediction = model.predict(X)
	print(prediction)
	return prediction

	# this is the main function in which we define our webpage
	def main():
	# giving the webpage a title
	st.title("Fraud Detection ML App")
	st.header("Model Description", divider = "gray")
	multi = '''This is a Gaussian Naive Bayes model trained on a synthetic dataset, containining a large variety of transaction types representing normal activities
	as well as abnormal/fraudulent activities. The model predicts whether a transaction is normal or fraudulent.
	For more details on the model please refer to the model card at https://huggingface.co/saifhmb/fraud-detection-model
	'''
	st.markdown(multi)
	st.markdown("To determine whether a transaction is normal or fraudulent, please ENTER the Sender Country, Beneficiary Country, Amount in USD and Transaction Type :")
	col1, col2 = st.columns(2)
	with col1:
	Sender_Country = st.text_input("Sender Country")
	with col2:
	Bene_Country = st.text_input("Beneficiary Country")

	col3, col4 = st.columns(2)
	with col3:
	USD_amount = st.number_input("Amount in USD")
	with col4:
	Transaction_Type = st.text_input("Transaction Type (Please enter one of the following: make-payment, quick-payment, move-funds, pay-check)")
	result = ""
	if st.button("Predict"):
	result = prediction(Sender_Country, Bene_Country, USD_amount, Transaction_Type)
	if result == 0:
	st.success("The output is {}".format(result) + " This is a NORMAL transaction")
	if result == 1:
	st.success("The output is {}".format(result) + " This is a FRAUDULENT TRANSACTION")

	if __name__=='__main__':
	main()