Spaces:
Sleeping
Sleeping
| import os | |
| import base64 | |
| import json | |
| import gradio as gr | |
| from typing_extensions import TypedDict | |
| from openai import OpenAI | |
| from langchain_openai import ChatOpenAI | |
| from langgraph.graph import StateGraph, START, END | |
| from langchain_core.messages import SystemMessage, HumanMessage | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from markdown_pdf import MarkdownPdf, Section | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| client = OpenAI( | |
| api_key=os.environ["OPENAI_API_KEY"], | |
| base_url=os.environ["OPENAI_BASE_URL"] | |
| ) | |
| llm = ChatOpenAI( | |
| api_key=os.environ["OPENAI_API_KEY"], | |
| base_url=os.environ["OPENAI_BASE_URL"], | |
| model="gpt-4o-mini", | |
| temperature=0 | |
| ) | |
| employee_name = 'John Doe' | |
| type_of_expense = 'Restaurant' | |
| company_policy_file_path = 'Company Policy on Expense Claims.pdf' | |
| loader = PyPDFLoader(company_policy_file_path) | |
| company_policy_document = loader.load() | |
| class State(TypedDict): | |
| image_path: str | |
| extracted_text: str | |
| categorized_text: str | |
| relevant_company_policy: str | |
| verified_text: str | |
| revised_calculation: str | |
| final_output: str | |
| def generate_data_uri(jpg_file_path): | |
| with open(jpg_file_path, 'rb') as image_file: | |
| image_data = image_file.read() | |
| # Encode the binary image data to base64 | |
| base64_encoded_data = base64.b64encode(image_data).decode('utf-8') | |
| # Construct the data URI | |
| data_uri = f"data:image/png;base64,{base64_encoded_data}" | |
| return data_uri | |
| def text_extractor(state: State): | |
| """ | |
| This function extracts text from an image using OpenAI's GPT-4o mini model. | |
| """ | |
| text_extraction_system_message = """ | |
| You are an expert in extracting the text in images. | |
| Extract the following details from the bill presented in the input. | |
| - Date of bill | |
| - Bill No | |
| - Restaurant Name and Address | |
| - Items ordered quantity and price | |
| - Tax and Charges | |
| - Total amount | |
| Do not output anything except the above details in your output. | |
| """ | |
| text_extraction_prompt = [ | |
| { | |
| 'role': 'system', | |
| 'content': text_extraction_system_message | |
| }, | |
| { | |
| 'role': 'user', | |
| 'content': [ | |
| {'type': "image_url", "image_url": {'url': generate_data_uri(state['image_path'])}} | |
| ] | |
| } | |
| ] | |
| print("I have access to the Open AI API for text extraction.") | |
| gr.Info("I have access to the Open AI API for text extraction. I need to format the input image in the Open AI format", duration=2) | |
| print("I need to format the input image in the Open AI format") | |
| response = client.chat.completions.create( | |
| model='gpt-4o-mini', | |
| messages=text_extraction_prompt, | |
| temperature=0 | |
| ) | |
| extracted_text = response.choices[0].message.content | |
| print("Extracted text from the input image") | |
| gr.Info("Extracted text from the input image", duration=2) | |
| return {'extracted_text': extracted_text} | |
| def categorizer(state: State): | |
| categorization_system_message = """ | |
| You are an expert accountant tasked to categorize the items ordered in the bill. | |
| Categorize the items STRICTLY into the following categories: Alcoholic Drinks, Non-Alcoholic Drinks and Food. | |
| Remember to categorize the items into one of the three categories only. Do not use new categories. | |
| Present your output as a JSON with the following fields: | |
| [{'item': '<name of the item>', 'category': '<category assigned>', 'quantity': '<quantity>', 'price': '<price>'}, ... and so on] | |
| Do not output anything except the above fields in your JSON output. | |
| Do not delimit the JSON with any extra tags (e.g., ``` or ```JSON). | |
| """ | |
| print("Categorizing items in the input text to one of: Alcoholic Drinks, Non-Alcoholic Drinks and Food.") | |
| gr.Info("Categorizing items in the input text to one of: Alcoholic Drinks, Non-Alcoholic Drinks and Food.", duration=1) | |
| categorization_prompt = [ | |
| SystemMessage(content=categorization_system_message), | |
| HumanMessage(content=state['extracted_text']) | |
| ] | |
| categorized_text = llm.invoke(categorization_prompt) | |
| return {'categorized_text': categorized_text.content} | |
| def verifier(state: State): | |
| print("I now have to retrieve relevant sections of the company policy to exclude items that are not reimbursable.") | |
| gr.Info("I now have to retrieve relevant sections of the company policy to exclude items that are not reimbursable. I will use the search tool to execute this step", duration=2) | |
| print("I will use the search tool to execute this step.") | |
| for document in company_policy_document: | |
| if document.page_content.find(f'{type_of_expense}') != -1: | |
| relevant_company_policy = document.page_content | |
| verification_system_message = """ | |
| You are an expert accountant tasked to verify the bill details against the provided company policy. | |
| Verify the items in the submitted bill against the company policy presented below. | |
| Present your output in the following JSON format after removing the items inthat are not aligned with the company policy. | |
| [{'item': '<name of the item>', 'category': '<category assigned>', 'quantity': '<quantity>', 'price': '<price>'}, ... and so on] | |
| Do not output anything except the above details in your JSON output. | |
| Do not delimit the JSON with any extra tags (e.g., ``` or ```JSON). | |
| """ | |
| verification_prompt = [ | |
| SystemMessage(content=verification_system_message + f"\n Company Policy: \n{relevant_company_policy}"), | |
| HumanMessage(content=state['categorized_text']) | |
| ] | |
| verified_text = llm.invoke(verification_prompt) | |
| return {'verified_text': verified_text.content, 'relevant_company_policy': relevant_company_policy} | |
| def estimator(state: State): | |
| print("Calculating the revised total amount and taxes") | |
| gr.Info("Calculating the revised total amount and taxes", duration=1) | |
| total_bill = 0 | |
| total_taxes_and_charges = 0 | |
| for item in json.loads(state['verified_text']): | |
| total_bill += float(item['quantity']) * float(item['price']) | |
| total_taxes_and_charges = total_bill * 0.10 + total_bill * 0.025 + total_bill * 0.025 + total_bill * 0.20 | |
| revised_calculation = { | |
| 'taxes_and_charges': total_taxes_and_charges, | |
| 'total_amount': total_bill + total_taxes_and_charges | |
| } | |
| return {'revised_calculation': revised_calculation} | |
| def formatter(state: State): | |
| print("Formatting the output into a markdown file") | |
| gr.Info("Formatting the output into a markdown file", duration=1) | |
| final_output_system_message = """ | |
| You are an expert accountant tasked to generate the expense claim report. | |
| Generate the expense claim report based on the calculated total amount to be reimbursed and other details available to you. | |
| The details of the fields needed for the report are present in the input. | |
| These are: | |
| - Employee Name: | |
| - Original Bill: | |
| - Verified items ordered quantity and price: | |
| - Total amount to be reimbursed: | |
| - Tax and Charges: | |
| Use only the details from the input to generate the report. | |
| Present your output in the following markdown format. | |
| # Expense Claim Report | |
| ## Employee Name: <Insert Employee Name> | |
| ## Date: <Insert Date from original bill> | |
| ## Bill No: <Insert Bill No from original bill> | |
| ## Restaurant Name and Address: <Insert Restaurant Name and Address fromm original bill> | |
| ## Items ordered quantity and price (<arrange in a table format from verified list of items>): | |
| |Item|Quantity|Price| | |
| ... | |
| ... | |
| ### Tax and Charges: <enter the tax amount from calculated amounts> | |
| ### Total amount to be reimbursed: <enter the total from calculated amounts> | |
| Do not output anything except the above details in your output. | |
| Do not delimit the output with any extra tags (e.g., ```). | |
| """ | |
| input = f""" | |
| Employee Name: {employee_name} | |
| --- | |
| Original Bill: | |
| {state['extracted_text']} | |
| --- | |
| Verified items ordered quantity and price: | |
| {state['verified_text']} | |
| --- | |
| Calculated amounts: | |
| Taxes and Charges: {state['revised_calculation']['taxes_and_charges']} | |
| Total amount to be reimbursed: {state['revised_calculation']['total_amount']} | |
| """ | |
| final_output_prompt = [ | |
| SystemMessage(content=final_output_system_message), | |
| HumanMessage(content=input) | |
| ] | |
| final_output = llm.invoke(final_output_prompt) | |
| return {'final_output': final_output.content} | |
| def claim_generator(input_bill_path, progress=gr.Progress()): | |
| progress(0, desc="Starting workflow") | |
| workflow = StateGraph(State) | |
| workflow.add_node("text_extractor", text_extractor) | |
| workflow.add_node("categorizer", categorizer) | |
| workflow.add_node("verifier", verifier) | |
| workflow.add_node("estimator", estimator) | |
| workflow.add_node("formatter", formatter) | |
| workflow.add_edge(START, "text_extractor") | |
| workflow.add_edge("text_extractor", "categorizer") | |
| workflow.add_edge("categorizer", "verifier") | |
| workflow.add_edge("verifier", "estimator") | |
| workflow.add_edge("estimator", "formatter") | |
| workflow.add_edge("formatter", END) | |
| chain = workflow.compile() | |
| progress(0.05) | |
| output = chain.invoke({'image_path': input_bill_path}) | |
| progress(0.25) | |
| gr.Info("Converting the markdown file to pdf", duration=2) | |
| pdf = MarkdownPdf(toc_level=3) | |
| pdf.add_section(Section(output['final_output'])) | |
| pdf.save("expense-claim.pdf") | |
| return output['final_output'], 'expense-claim.pdf' | |
| agentic_workflow_representation = 'The agentic workflow used to generate an expense claim document is represented below: \n <img src="https://cdn-uploads.huggingface.co/production/uploads/64118e60756b9e455c7eddd6/iqdGjUG7POKJXLItzWb-K.png">' | |
| agentic_workflow_description = """ | |
| This demo represents a multi-agent collaborative workflow that generates an expense claim document based on a submitted bill. | |
| Once a user uploads a bill to the interface, the following steps are executed: | |
| 1. Extract text from an image of the bill. | |
| 2. Categorize items in the bill to: alcoholic drinks, non-alcoholic drinks and food. | |
| 3. Based on the categories, retrieve relevant sections from the company reimbursement policy; remove items that are non reimbursable. | |
| 4. Compute the total amount that can be reimbursed (along with taxes). | |
| 5. Prepare a claim document in the company format (as a markdown document). | |
| Each step in this workflow are executed by function-calling agents. | |
| For example, the text extractor is an API-calling agent that calls uses the Open AI APIs to extract text from the bill. | |
| Similarly, the verifier is a search agent that extracts relevant portions of the company policy based on the nature of the bill. | |
| """ | |
| demo = gr.Interface( | |
| fn=claim_generator, | |
| inputs=gr.Image(type="filepath", label="Upload an image of the bill"), | |
| outputs=[gr.Markdown(label="Expense Claim Document", show_copy_button=True, container=True), | |
| gr.File(label="Download your claim document", show_label=True)], | |
| title="Expense Claim Document Generator", | |
| description=agentic_workflow_description, | |
| article=agentic_workflow_representation, | |
| examples='images', | |
| cache_examples=False, | |
| theme=gr.themes.Base(), | |
| concurrency_limit=16 | |
| ) | |
| demo.queue() | |
| demo.launch(auth=("johndoe", os.getenv('PASSWD')), ssr_mode=False) |