Spaces:
Paused
Paused
:zap: [Enhance] Loop multiple conditions for extracting abstract
Browse files
documents/query_results_extractor.py
CHANGED
|
@@ -21,10 +21,18 @@ class QueryResultsExtractor:
|
|
| 21 |
url = result.find("a")["href"]
|
| 22 |
title = result.find("h3").text.strip()
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
logger.mesg(
|
| 29 |
f"{title}\n" f" - {site}\n" f" - {url}\n" f" - {abstract}\n" f"\n"
|
| 30 |
)
|
|
|
|
| 21 |
url = result.find("a")["href"]
|
| 22 |
title = result.find("h3").text.strip()
|
| 23 |
|
| 24 |
+
abstract_element_conditions = [
|
| 25 |
+
{"data-sncf": "1"},
|
| 26 |
+
{"class_": "ITZIwc"},
|
| 27 |
+
]
|
| 28 |
+
for condition in abstract_element_conditions:
|
| 29 |
+
abstract_element = result.find("div", condition)
|
| 30 |
+
if abstract_element is not None:
|
| 31 |
+
abstract = abstract_element.text.strip()
|
| 32 |
+
break
|
| 33 |
+
else:
|
| 34 |
+
abstract = ""
|
| 35 |
+
|
| 36 |
logger.mesg(
|
| 37 |
f"{title}\n" f" - {site}\n" f" - {url}\n" f" - {abstract}\n" f"\n"
|
| 38 |
)
|