Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -214,42 +214,62 @@ async def web_search_and_extract(
|
|
| 214 |
|
| 215 |
@app.get("/api/adv_web_search")
|
| 216 |
async def adv_web_search(
|
| 217 |
-
|
| 218 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
):
|
| 220 |
-
"""
|
|
|
|
|
|
|
| 221 |
try:
|
| 222 |
with WEBS() as webs:
|
| 223 |
-
# Perform
|
| 224 |
-
search_results = webs.text(keywords=
|
| 225 |
-
|
| 226 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
for result in search_results:
|
| 228 |
if 'href' in result:
|
| 229 |
link = result['href']
|
| 230 |
try:
|
| 231 |
-
response = requests.get(link, headers={"User-Agent": "Mozilla/5.0
|
| 232 |
response.raise_for_status()
|
| 233 |
visible_text = extract_text_from_webpage(response.text)
|
| 234 |
-
if len(visible_text) >
|
| 235 |
-
visible_text = visible_text[:
|
| 236 |
-
|
| 237 |
except requests.exceptions.RequestException as e:
|
| 238 |
print(f"Error fetching or processing {link}: {e}")
|
| 239 |
-
extracted_results.append({"link": link, "text": None})
|
| 240 |
else:
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
raise HTTPException(status_code=500, detail=f"Error during LLM chat: {e}")
|
| 251 |
|
|
|
|
|
|
|
| 252 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
@app.get("/api/website_summarizer")
|
| 254 |
async def website_summarizer(url: str):
|
| 255 |
"""Summarizes the content of a given URL using a chat model."""
|
|
|
|
| 214 |
|
| 215 |
@app.get("/api/adv_web_search")
|
| 216 |
async def adv_web_search(
|
| 217 |
+
q: str,
|
| 218 |
+
model: str = "gpt-3.5",
|
| 219 |
+
max_results: int = 3,
|
| 220 |
+
timelimit: Optional[str] = None,
|
| 221 |
+
safesearch: str = "moderate",
|
| 222 |
+
region: str = "wt-wt",
|
| 223 |
+
backend: str = "api",
|
| 224 |
+
max_chars: int = 6000,
|
| 225 |
+
system_prompt: str = Query(None, description="Optional custom system prompt")
|
| 226 |
):
|
| 227 |
+
"""
|
| 228 |
+
Combines web search, web extraction, and LLM chat for advanced search.
|
| 229 |
+
"""
|
| 230 |
try:
|
| 231 |
with WEBS() as webs:
|
| 232 |
+
# 1. Perform the web search
|
| 233 |
+
search_results = webs.text(keywords=q, region=region,
|
| 234 |
+
safesearch=safesearch,
|
| 235 |
+
timelimit=timelimit, backend=backend,
|
| 236 |
+
max_results=max_results)
|
| 237 |
+
|
| 238 |
+
# 2. Extract text from top search result URLs
|
| 239 |
+
extracted_text = ""
|
| 240 |
for result in search_results:
|
| 241 |
if 'href' in result:
|
| 242 |
link = result['href']
|
| 243 |
try:
|
| 244 |
+
response = requests.get(link, headers={"User-Agent": "Mozilla/5.0"})
|
| 245 |
response.raise_for_status()
|
| 246 |
visible_text = extract_text_from_webpage(response.text)
|
| 247 |
+
if len(visible_text) > max_chars:
|
| 248 |
+
visible_text = visible_text[:max_chars] + "..."
|
| 249 |
+
extracted_text += f"## Content from: {link}\n\n{visible_text}\n\n"
|
| 250 |
except requests.exceptions.RequestException as e:
|
| 251 |
print(f"Error fetching or processing {link}: {e}")
|
|
|
|
| 252 |
else:
|
| 253 |
+
pass
|
| 254 |
+
|
| 255 |
+
# 3. Construct the prompt for the LLM
|
| 256 |
+
llm_prompt = f"Query by user: {q} , Answerthe query asked by user using these search results, Which contains real time data, Try to give best response: {extracted_text}"
|
| 257 |
+
|
| 258 |
+
# 4. Get the LLM's response using LLM class (similar to /api/llm)
|
| 259 |
+
messages = [{"role": "user", "content": llm_prompt}]
|
| 260 |
+
if system_prompt:
|
| 261 |
+
messages.insert(0, {"role": "system", "content": system_prompt})
|
|
|
|
| 262 |
|
| 263 |
+
llm = LLM(model=model)
|
| 264 |
+
llm_response = llm.chat(messages=messages)
|
| 265 |
|
| 266 |
+
# 5. Return the results
|
| 267 |
+
return JSONResponse(content=jsonable_encoder({ "llm_response": llm_response }))
|
| 268 |
+
|
| 269 |
+
except Exception as e:
|
| 270 |
+
raise HTTPException(status_code=500, detail=f"Error during advanced search: {e}")
|
| 271 |
+
|
| 272 |
+
|
| 273 |
@app.get("/api/website_summarizer")
|
| 274 |
async def website_summarizer(url: str):
|
| 275 |
"""Summarizes the content of a given URL using a chat model."""
|