AfshinMA's picture
Update app.py
f67a67c verified
import os
import joblib
import pandas as pd
import streamlit as st
from typing import Any, Dict, List
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# Constants for directories and file names
MODEL_DIR = 'models'
DATA_DIR = 'datasets'
DATA_FILE = 'cleaned_transaction_dataset.csv'
MODEL_NAMES = [
'LGBM Classifier',
'CatBoost Classifier',
'XGBoost Classifier',
]
# Load dataset
data_path = os.path.join(DATA_DIR, DATA_FILE)
df = pd.read_csv(data_path)
# Load models
def load_models(model_names: List[str]) -> Dict[str, Any]:
"""Load machine learning models from disk."""
models = {}
for name in model_names:
path = os.path.join(MODEL_DIR, f"{name.replace(' ', '')}.joblib")
try:
models[name] = joblib.load(path)
except Exception as e:
st.error(f"Error loading model {name}: {str(e)}")
return models
models = load_models(MODEL_NAMES)
# Prepare features and target
X = df.drop(columns=['FLAG'])
y = df['FLAG']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=123)
# Prediction and metrics evaluation function
def calculate_metrics(y_true, y_pred, average_type='binary'):
"""Calculate and return accuracy, recall, F1, and precision scores."""
acc = accuracy_score(y_true, y_pred)
rec = recall_score(y_true, y_pred, average=average_type)
f1 = f1_score(y_true, y_pred, average=average_type)
prec = precision_score(y_true, y_pred, average=average_type)
return acc, rec, f1, prec
def load_and_predict(sample):
try:
# Using StandardScaler to scale numric features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
sample_trans = scaler.fit_transform(sample)
# Using SMOTE to handle class imbalance
X_resampled, y_resampled = SMOTE(random_state=123).fit_resample(X_train_scaled, y_train)
results = []
for name, model in models.items():
y_resampled_pred = model.predict(X_resampled)
flag_pred = model.predict(sample_trans)
acc, rec, f1, prec = calculate_metrics(y_resampled, y_resampled_pred)
results.append({
'Model': name,
'Predicted Fraud': 'Yes' if flag_pred[0] == 1 else 'No',
'Accuracy %': acc * 100,
'Recall %': rec * 100,
'F1 %': f1 * 100,
'Precision %': prec * 100
})
return pd.DataFrame(results).sort_values(by='Accuracy %', ascending=False)
except Exception as e:
st.error(f"An error occurred during model loading or prediction: {str(e)}")
return pd.DataFrame()
# Streamlit UI setup
st.set_page_config(page_title="Fraud Detection Etherium Prediction App", page_icon="πŸ•΅οΈ", layout="wide")
st.title("😎 **Fraud Detection Etherium Prediction App**")
st.subheader("Enter the following information to predict **Fraud Detection Etherium**.")
st.sidebar.title("πŸ•΅οΈ **Fraud Detection Parameters**")
# Input features
input_features = {
"Avg min between sent tnx": st.sidebar.number_input("Avg min between sent tnx", min_value=0.0, value=float(df["Avg min between sent tnx"].mean())),
"Avg min between received tnx": st.sidebar.number_input("Avg min between received tnx", min_value=0.0, value=float(df["Avg min between received tnx"].mean())),
"Time difference between first and last (mins)": st.sidebar.number_input("Time difference between first and last (mins)", min_value=0.0, value=float(df["Time difference between first and last (mins)"].mean())),
"Sent tnx": st.sidebar.number_input("Sent tnx", min_value=0.0, value=float(df["Sent tnx"].mean())),
"Received tnx": st.sidebar.number_input("Received tnx", min_value=0.0, value=float(df["Received tnx"].mean())),
"Number of created contracts": st.sidebar.number_input("Number of created contracts", min_value=0, value=int(df["Number of created contracts"].mean())),
"Max value received": st.sidebar.number_input("Max value received", min_value=0.0, value=float(df["Max value received"].mean())),
"Avg value received": st.sidebar.number_input("Avg value received", min_value=0.0, value=float(df["Avg value received"].mean())),
"Avg value sent": st.sidebar.number_input("Avg value sent", min_value=0.0, value=float(df["Avg value sent"].mean())),
"Total either sent": st.sidebar.number_input("Total either sent", min_value=0.0, value=float(df["Total either sent"].mean())),
"Total either balance": st.sidebar.number_input("Total either balance", min_value=0.0, value=float(df["Total either balance"].mean())),
"ERC20 total either received": st.sidebar.number_input("ERC20 total either received", min_value=0.0, value=float(df["ERC20 total either received"].mean())),
"ERC20 total either sent": st.sidebar.number_input("ERC20 total either sent", min_value=0.0, value=float(df["ERC20 total either sent"].mean())),
"ERC20 total either sent contract": st.sidebar.number_input("ERC20 total either sent contract", min_value=0.0, value=float(df["ERC20 total either sent contract"].mean())),
"ERC20 unique sent address": st.sidebar.number_input("ERC20 unique sent address", min_value=0.0, value=float(df["ERC20 unique sent address"].mean())),
"ERC20 unique received token name": st.sidebar.number_input("ERC20 unique received token name", min_value=0.0, value=float(df["ERC20 unique received token name"].mean())),
}
# Display predict button in main area
st.markdown("---")
if st.button(label=':rainbow[Predict Fraud]'):
# Prepare input data for prediction
input_data = pd.DataFrame([input_features])
# Predicting the input data
results_df = load_and_predict(input_data)
# Displaying results
if not results_df.empty:
st.write("### 😎 Prediction Results:")
styled_df = results_df.style.map(lambda x: 'color: green' if x == 'Yes' else 'color: red', subset=['Predicted Fraud'])
st.dataframe(styled_df)
# Description Section
st.markdown("---")
st.subheader("Description")
st.markdown('''This Streamlit application predicts fraud in Ethereum transactions using multiple machine learning models including LGBM, XGBoost, and Gradient Boosting classifiers.
Users can input transaction information through a user-friendly interface, which includes various fields related to transaction metrics and user activity.
> **Features:**
> - **Input Components:** Users can provide data using number inputs for transaction-related features.
> - **Data Processing:** Upon submitting the form, the app processes the input data and transforms it using a pre-trained data preprocessor.
> - It leverages SMOTE to address any class imbalance in the data.
> - **Prediction:** The app runs predictions using the loaded models and calculates performance metrics like accuracy, recall, F1 score, and precision.
> - **Results Display:** The predicted fraud status and model performance metrics are displayed in a formatted output for easy interpretation.
> **Usage:** Just fill out the information about the transaction and click "Predict Fraud" to receive insights on whether the transaction is likely to be fraudulent and how well each model performed.
> **Disclaimer:** This application is intended for educational purposes only.
''')
# Disclaimer Section
st.markdown("---")
st.subheader("Disclaimer")
st.text('''The fraud detection results provided by this app are for informational purposes only.
While we strive for accuracy, the predictions made by the models depend on the quality of the input data
and the model's training. Use this information at your own discretion, and do not solely rely on it for
making financial decisions. Consulting with a financial expert is recommended for critical decisions.''')