Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import plotly.express as px | |
| import streamlit as st | |
| from transformers import pipeline | |
| # Upload CSV file containing transaction data | |
| uploaded_file = st.file_uploader("Upload Expense CSV", type="csv") | |
| if uploaded_file is not None: | |
| # Load the file into a DataFrame | |
| df = pd.read_csv(uploaded_file) | |
| # Debug: Display the column names to check if 'Description' exists | |
| st.write("Columns in the uploaded file:", df.columns) | |
| # Check if the 'Description' column exists | |
| if 'Description' not in df.columns: | |
| st.error("Error: The CSV file does not contain a 'Description' column.") | |
| else: | |
| # Initialize Hugging Face's zero-shot text classification model | |
| model_name = 'distilbert-base-uncased' | |
| classifier = pipeline('zero-shot-classification', model=model_name) | |
| # List of possible expense categories | |
| categories = ["Groceries", "Rent", "Utilities", "Entertainment", "Dining", "Transportation", "Salary"] | |
| # Function to classify transaction descriptions into categories | |
| def categorize_expense(description): | |
| result = classifier(description, candidate_labels=categories) | |
| return result['labels'][0] # Choose the most probable category | |
| # Apply the categorization function to the 'Description' column in the dataset | |
| df['Category'] = df['Description'].apply(categorize_expense) | |
| # Show the categorized data | |
| st.write("Categorized Data:", df.head()) | |
| # Visualization 1: Pie Chart of Spending by Category | |
| category_expenses = df.groupby('Category')['Amount'].sum() | |
| # Plot pie chart for expense distribution by category | |
| fig1, ax1 = plt.subplots(figsize=(8, 8)) | |
| category_expenses.plot(kind='pie', autopct='%1.1f%%', startangle=90, colors=plt.cm.Paired.colors, ax=ax1) | |
| ax1.set_title('Expense Distribution by Category') | |
| ax1.set_ylabel('') # Hide the y-axis label | |
| st.pyplot(fig1) | |
| # Visualization 2: Monthly Spending Trends (Line Chart) | |
| # Convert 'Date' to datetime and remove time part | |
| df['Date'] = pd.to_datetime(df['Date']).dt.date # Keep only the date, no time | |
| # Extract month-year for grouping and convert the Period to string to avoid JSON serialization issues | |
| df['Month'] = df['Date'].apply(lambda x: x.strftime('%Y-%m')) # Extract Year-Month as string | |
| # Group by month and calculate the total amount spent per month | |
| monthly_expenses = df.groupby('Month')['Amount'].sum() | |
| # Plot monthly spending trends as a line chart | |
| fig2 = px.line( | |
| monthly_expenses, | |
| x=monthly_expenses.index, | |
| y=monthly_expenses.values, | |
| title="Monthly Expenses", | |
| labels={"x": "Month", "y": "Amount ($)"} | |
| ) | |
| st.plotly_chart(fig2) | |
| # Default Budget Values | |
| default_budgets = { | |
| "Groceries": 300, | |
| "Rent": 1000, | |
| "Utilities": 150, | |
| "Entertainment": 100, | |
| "Dining": 150, | |
| "Transportation": 120, | |
| } | |
| # Sliders for adjusting the monthly budget | |
| st.write("Adjust your monthly budget for each category:") | |
| budgets = {} | |
| for category in default_budgets: | |
| budgets[category] = st.slider(f"Budget for {category} ($)", | |
| min_value=0, | |
| max_value=2000, | |
| value=default_budgets[category], | |
| step=50) | |
| # Track if any category exceeds its budget | |
| df['Budget_Exceeded'] = df.apply(lambda row: row['Amount'] > budgets.get(row['Category'], 0), axis=1) | |
| # Show which categories exceeded their budgets | |
| exceeded_budget = df[df['Budget_Exceeded'] == True] | |
| st.write("Categories that exceeded the budget:", exceeded_budget[['Date', 'Category', 'Amount']]) | |
| # Visualization 3: Monthly Spending vs Budget (Bar Chart) | |
| # Create a figure explicitly for the bar chart | |
| fig3, ax3 = plt.subplots(figsize=(10, 6)) # Create figure and axes | |
| monthly_expenses_df = pd.DataFrame({ | |
| 'Actual': monthly_expenses, | |
| 'Budget': [sum(budgets.values())] * len(monthly_expenses) # Same budget for simplicity | |
| }) | |
| monthly_expenses_df.plot(kind='bar', ax=ax3) # Pass the axes to the plot | |
| ax3.set_title('Monthly Spending vs Budget') | |
| ax3.set_ylabel('Amount ($)') | |
| # Display the plot with Streamlit | |
| st.pyplot(fig3) | |