Spaces:
Paused
Paused
:gem: [Feature] SearchResultsExtractor: related questions
Browse files
documents/search_results_extractor.py
CHANGED
|
@@ -27,17 +27,21 @@ class SearchResultsExtractor:
|
|
| 27 |
print(
|
| 28 |
f"{title}\n" f" - {site}\n" f" - {link}\n" f" - {abstract}\n" f"\n"
|
| 29 |
)
|
|
|
|
| 30 |
|
| 31 |
def extract_related_questions(self):
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
| 34 |
print(question)
|
| 35 |
-
|
| 36 |
-
# print(question.find("a").text)
|
| 37 |
|
| 38 |
def extract(self, html_path):
|
| 39 |
self.load_html(html_path)
|
| 40 |
self.extract_search_results()
|
|
|
|
| 41 |
|
| 42 |
|
| 43 |
if __name__ == "__main__":
|
|
|
|
| 27 |
print(
|
| 28 |
f"{title}\n" f" - {site}\n" f" - {link}\n" f" - {abstract}\n" f"\n"
|
| 29 |
)
|
| 30 |
+
print(len(search_result_elements))
|
| 31 |
|
| 32 |
def extract_related_questions(self):
|
| 33 |
+
related_question_elements = self.soup.find_all(
|
| 34 |
+
"div", class_="related-question-pair"
|
| 35 |
+
)
|
| 36 |
+
for question_element in related_question_elements:
|
| 37 |
+
question = question_element.find("span").text.strip()
|
| 38 |
print(question)
|
| 39 |
+
print(len(related_question_elements))
|
|
|
|
| 40 |
|
| 41 |
def extract(self, html_path):
|
| 42 |
self.load_html(html_path)
|
| 43 |
self.extract_search_results()
|
| 44 |
+
self.extract_related_questions()
|
| 45 |
|
| 46 |
|
| 47 |
if __name__ == "__main__":
|