Spaces:
Sleeping
Sleeping
Update helper_functions_api.py
Browse files- helper_functions_api.py +22 -7
helper_functions_api.py
CHANGED
|
@@ -221,13 +221,28 @@ def process_content(data_format, url, query):
|
|
| 221 |
return rephrased_content, url
|
| 222 |
return "", url
|
| 223 |
|
| 224 |
-
def fetch_and_extract_content(
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
|
| 232 |
return all_text_with_urls
|
| 233 |
|
|
|
|
| 221 |
return rephrased_content, url
|
| 222 |
return "", url
|
| 223 |
|
| 224 |
+
def fetch_and_extract_content(
|
| 225 |
+
data_format: str, query: str, urls: List[str], num_refrences: int = 8
|
| 226 |
+
) -> List[Tuple[str | None, str]]:
|
| 227 |
+
"""
|
| 228 |
+
Asynchronously makeing request to urls and doing further process
|
| 229 |
+
"""
|
| 230 |
+
all_text_with_urls = []
|
| 231 |
+
start_url = 0
|
| 232 |
+
while (len(all_text_with_urls) != num_refrences) and (start_url < len(urls)):
|
| 233 |
+
end_url = start_url + (num_refrences - len(all_text_with_urls))
|
| 234 |
+
urls_subset = urls[start_url:end_url]
|
| 235 |
+
with ThreadPoolExecutor(max_workers=len(urls_subset)) as executor:
|
| 236 |
+
future_to_url = {
|
| 237 |
+
executor.submit(process_content, data_format, url, query): url
|
| 238 |
+
for url in urls_subset
|
| 239 |
+
}
|
| 240 |
+
all_text_with_urls += [
|
| 241 |
+
future.result()
|
| 242 |
+
for future in as_completed(future_to_url)
|
| 243 |
+
if future.result()[0] != ""
|
| 244 |
+
]
|
| 245 |
+
start_url = end_url
|
| 246 |
|
| 247 |
return all_text_with_urls
|
| 248 |
|