frkhan commited on
Commit
2e4cb4f
·
1 Parent(s): 483c169

-- Updated the main UI and system prompt so that LLM can provide data for non-product based info also.

Browse files
Files changed (3) hide show
  1. app.py +21 -4
  2. llm_inference_service.py +3 -0
  3. requirements.txt +6 -20
app.py CHANGED
@@ -53,6 +53,21 @@ with gr.Blocks() as gradio_ui:
53
  </div>
54
  """)
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  with gr.Column():
58
  url_input = gr.Textbox(label="Enter URL to scrape", placeholder="https://example.com/query?search=cat+food", lines=1)
@@ -62,9 +77,11 @@ with gr.Blocks() as gradio_ui:
62
 
63
  scrape_result_textbox = gr.Textbox(label="Scrape Result", lines=20, show_copy_button=True)
64
 
65
- label_llm_section = gr.Label("Use LLM to extract information from the scraped content")
66
- gr.HTML("<hr>")
67
-
 
 
68
 
69
  with gr.Row():
70
 
@@ -83,7 +100,7 @@ with gr.Blocks() as gradio_ui:
83
  )
84
 
85
 
86
- llm_response_btn = gr.Button("Extracted Info by LLM")
87
 
88
 
89
  # LLM response output area and loader
 
53
  </div>
54
  """)
55
 
56
+ gr.HTML("""
57
+ <div style="margin-bottom: 20px; padding: 20px; border: 1px solid #ddd; border-radius: 5px;">
58
+ <h2 style="margin-top: 0;">How to Use This App</h2>
59
+ <p>This app combines web scraping with the power of Large Language Models (LLMs) to extract specific information from web pages. Here's how it works:</p>
60
+ <ol>
61
+ <li><strong>Enter a URL:</strong> Provide the URL of the web page you want to analyze.</li>
62
+ <li><strong>Define Your Query:</strong> Specify the exact information you're looking for (e.g., product name, price, customer ratings).</li>
63
+ <li><strong>Scrape the Web Page:</strong> Click the "Scrape with FireCrawl" button to extract the content of the page.</li>
64
+ <li><strong>Select Model & Provider:</strong> Choose the LLM model you want to use for information extraction.</li>
65
+ <li><strong>Extract Info by LLM:</strong> Click the "Extract Info by LLM" button to get the information based on your query.</li>
66
+ </ol>
67
+ <p><strong>What makes this different from a regular web scraper?</strong> Traditional web scrapers require pre-programming to extract product data for each specific website. These scrapers are brittle and can break if the website's design changes. This app uses LLMs to <em>understand</em> your query and extract only the relevant information, saving you time and effort and removing the need for constant maintenance.</p>
68
+ </div>
69
+ """)
70
+
71
 
72
  with gr.Column():
73
  url_input = gr.Textbox(label="Enter URL to scrape", placeholder="https://example.com/query?search=cat+food", lines=1)
 
77
 
78
  scrape_result_textbox = gr.Textbox(label="Scrape Result", lines=20, show_copy_button=True)
79
 
80
+ gr.HTML("<hr style='margin-top:10px; margin-bottom:10px;'>")
81
+ gr.Markdown("### 🧠 LLM Extraction")
82
+ gr.Markdown("Use a language model to extract structured information from the scraped content.")
83
+ gr.HTML("<hr style='margin-top:10px; margin-bottom:10px;'>")
84
+
85
 
86
  with gr.Row():
87
 
 
100
  )
101
 
102
 
103
+ llm_response_btn = gr.Button("Extract Info by LLM")
104
 
105
 
106
  # LLM response output area and loader
llm_inference_service.py CHANGED
@@ -19,6 +19,9 @@ def extract_page_info_by_llm(user_query: str, scraped_markdown_content: str, mod
19
 
20
  If user asks for JSON format, please provide the answer in JSON format only.
21
 
 
 
 
22
  If you do not find or know the answer, do not hallucinate, do not try to generate fake answers.
23
  If no Context is given, simply state "No relevant information found to answer your question."
24
 
 
19
 
20
  If user asks for JSON format, please provide the answer in JSON format only.
21
 
22
+ User will mostly request you to extract product information but can also ask you to extract other information from the content.
23
+ So always read the user query carefully and extract information accordingly.
24
+
25
  If you do not find or know the answer, do not hallucinate, do not try to generate fake answers.
26
  If no Context is given, simply state "No relevant information found to answer your question."
27
 
requirements.txt CHANGED
@@ -1,21 +1,7 @@
1
- # gradio==5.46.1
2
- # langchain==0.3.27
3
- # langchain-community==0.3.29
4
- # chromadb==1.1.0
5
- # PyMuPDF==1.26.4
6
- # langchain-google-genai==2.1.12
7
- # langchain-nvidia-ai-endpoints==0.3.18
8
- # dotenv==0.9.9
9
-
10
-
11
-
12
- gradio
13
- requests
14
- # python-dotenv
15
- dotenv
16
-
17
- firecrawl-py
18
  langchain-community
19
- langchain-google-genai
20
- langchain-nvidia-ai-endpoints
21
- # groq
 
1
+ gradio==5.46.1
2
+ requests==2.32.5
3
+ dotenv==0.9.9
4
+ firecrawl-py==4.3.6
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  langchain-community
6
+ langchain-google-genai==2.1.12
7
+ langchain-nvidia-ai-endpoints==0.3.18