Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -108,7 +108,7 @@ class HTML_TO_MARKDOWN_CONVERTER:
|
|
| 108 |
return f"\n\n\n\n"
|
| 109 |
return inner_md
|
| 110 |
|
| 111 |
-
async def perform_web_browse(query: str, browser_name: str, search_engine_name: str):
|
| 112 |
browser_key = browser_name.lower()
|
| 113 |
if "playwright" not in PLAYWRIGHT_STATE:
|
| 114 |
PLAYWRIGHT_STATE["playwright"] = await async_playwright().start()
|
|
@@ -126,11 +126,12 @@ async def perform_web_browse(query: str, browser_name: str, search_engine_name:
|
|
| 126 |
|
| 127 |
browser_instance = PLAYWRIGHT_STATE[browser_key]
|
| 128 |
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
|
|
|
| 134 |
url_template = SEARCH_ENGINES.get(search_engine_name)
|
| 135 |
if not url_template:
|
| 136 |
return {"status": "error", "query": query, "error_message": f"Invalid search engine: '{search_engine_name}'."}
|
|
@@ -159,7 +160,7 @@ async def perform_web_browse(query: str, browser_name: str, search_engine_name:
|
|
| 159 |
markdown_text = converter.convert()
|
| 160 |
status_code = response.status if response else 0
|
| 161 |
|
| 162 |
-
return {"status": "success", "query": query, "final_url": final_url, "page_title": title, "http_status": status_code, "proxy_used": proxy_server_used, "markdown_content": markdown_text}
|
| 163 |
except Exception as e:
|
| 164 |
error_message = str(e).splitlines()[0]
|
| 165 |
if "Timeout" in error_message:
|
|
@@ -173,16 +174,17 @@ with gr.Blocks(title="Web Browse API", theme=gr.themes.Soft()) as demo:
|
|
| 173 |
gr.Markdown("# Web Browse API")
|
| 174 |
gr.Markdown(f"This interface exposes a stateless API endpoint (`/api/web_browse`) to fetch and parse web content. {REVOLVER.count()} proxies loaded.")
|
| 175 |
|
| 176 |
-
|
|
|
|
| 177 |
|
| 178 |
with gr.Row():
|
| 179 |
browser_input = gr.Dropdown(label="Browser", choices=["firefox", "chromium", "webkit"], value="firefox", scale=1)
|
| 180 |
-
search_engine_input = gr.Dropdown(label="Search Engine (
|
| 181 |
|
| 182 |
submit_button = gr.Button("Browse", variant="primary")
|
| 183 |
output_json = gr.JSON(label="API Result")
|
| 184 |
|
| 185 |
-
submit_button.click(fn=perform_web_browse, inputs=[query_input, browser_input, search_engine_input], outputs=output_json, api_name="web_browse")
|
| 186 |
|
| 187 |
if __name__ == "__main__":
|
| 188 |
demo.launch()
|
|
|
|
| 108 |
return f"\n\n\n\n"
|
| 109 |
return inner_md
|
| 110 |
|
| 111 |
+
async def perform_web_browse(action: str, query: str, browser_name: str, search_engine_name: str):
|
| 112 |
browser_key = browser_name.lower()
|
| 113 |
if "playwright" not in PLAYWRIGHT_STATE:
|
| 114 |
PLAYWRIGHT_STATE["playwright"] = await async_playwright().start()
|
|
|
|
| 126 |
|
| 127 |
browser_instance = PLAYWRIGHT_STATE[browser_key]
|
| 128 |
|
| 129 |
+
if action == "Scrape URL":
|
| 130 |
+
if not query.startswith(('http://', 'https://')):
|
| 131 |
+
url = f"http://{query}"
|
| 132 |
+
else:
|
| 133 |
+
url = query
|
| 134 |
+
else: # action == "Search"
|
| 135 |
url_template = SEARCH_ENGINES.get(search_engine_name)
|
| 136 |
if not url_template:
|
| 137 |
return {"status": "error", "query": query, "error_message": f"Invalid search engine: '{search_engine_name}'."}
|
|
|
|
| 160 |
markdown_text = converter.convert()
|
| 161 |
status_code = response.status if response else 0
|
| 162 |
|
| 163 |
+
return {"status": "success", "query": query, "action": action, "final_url": final_url, "page_title": title, "http_status": status_code, "proxy_used": proxy_server_used, "markdown_content": markdown_text}
|
| 164 |
except Exception as e:
|
| 165 |
error_message = str(e).splitlines()[0]
|
| 166 |
if "Timeout" in error_message:
|
|
|
|
| 174 |
gr.Markdown("# Web Browse API")
|
| 175 |
gr.Markdown(f"This interface exposes a stateless API endpoint (`/api/web_browse`) to fetch and parse web content. {REVOLVER.count()} proxies loaded.")
|
| 176 |
|
| 177 |
+
action_input = gr.Radio(label="Action", choices=["Search", "Scrape URL"], value="Search")
|
| 178 |
+
query_input = gr.Textbox(label="Query or URL", placeholder="e.g., 'best cat food' or 'www.wikipedia.org'")
|
| 179 |
|
| 180 |
with gr.Row():
|
| 181 |
browser_input = gr.Dropdown(label="Browser", choices=["firefox", "chromium", "webkit"], value="firefox", scale=1)
|
| 182 |
+
search_engine_input = gr.Dropdown(label="Search Engine (if action is Search)", choices=sorted(list(SEARCH_ENGINES.keys())), value="DuckDuckGo", scale=2)
|
| 183 |
|
| 184 |
submit_button = gr.Button("Browse", variant="primary")
|
| 185 |
output_json = gr.JSON(label="API Result")
|
| 186 |
|
| 187 |
+
submit_button.click(fn=perform_web_browse, inputs=[action_input, query_input, browser_input, search_engine_input], outputs=output_json, api_name="web_browse")
|
| 188 |
|
| 189 |
if __name__ == "__main__":
|
| 190 |
demo.launch()
|