Spaces:
Paused
Paused
:zap: [Enhance] FilepathConverter: New parent param when init
Browse files
networks/filepath_converter.py
CHANGED
|
@@ -34,8 +34,9 @@ WINDOWS_INVALID_FILE_PATH_NAMES = [
|
|
| 34 |
|
| 35 |
|
| 36 |
class FilepathConverter:
|
| 37 |
-
def __init__(self):
|
| 38 |
self.output_root = Path(__file__).parents[1] / "files"
|
|
|
|
| 39 |
|
| 40 |
def preprocess(self, input_string):
|
| 41 |
return input_string
|
|
@@ -63,6 +64,7 @@ class FilepathConverter:
|
|
| 63 |
filename = self.validate(filename)
|
| 64 |
filename = self.append_extension(filename)
|
| 65 |
|
|
|
|
| 66 |
if parent:
|
| 67 |
filepath = self.output_root / parent / filename
|
| 68 |
else:
|
|
@@ -75,8 +77,8 @@ class FilepathConverter:
|
|
| 75 |
|
| 76 |
|
| 77 |
class UrlToFilepathConverter(FilepathConverter):
|
| 78 |
-
def __init__(self):
|
| 79 |
-
super().__init__()
|
| 80 |
self.output_root = self.output_root / "urls"
|
| 81 |
|
| 82 |
def preprocess(self, url):
|
|
@@ -85,8 +87,8 @@ class UrlToFilepathConverter(FilepathConverter):
|
|
| 85 |
|
| 86 |
|
| 87 |
class QueryToFilepathConverter(FilepathConverter):
|
| 88 |
-
def __init__(self):
|
| 89 |
-
super().__init__()
|
| 90 |
self.output_root = self.output_root / "queries"
|
| 91 |
|
| 92 |
|
|
@@ -100,5 +102,5 @@ if __name__ == "__main__":
|
|
| 100 |
"https://stackoverflow.com/questions/295135/turn-a-string-into-a-valid-filename"
|
| 101 |
)
|
| 102 |
|
| 103 |
-
url_converter = UrlToFilepathConverter()
|
| 104 |
-
print(url_converter.convert(url
|
|
|
|
| 34 |
|
| 35 |
|
| 36 |
class FilepathConverter:
|
| 37 |
+
def __init__(self, parent: str = None):
|
| 38 |
self.output_root = Path(__file__).parents[1] / "files"
|
| 39 |
+
self.parent = parent
|
| 40 |
|
| 41 |
def preprocess(self, input_string):
|
| 42 |
return input_string
|
|
|
|
| 64 |
filename = self.validate(filename)
|
| 65 |
filename = self.append_extension(filename)
|
| 66 |
|
| 67 |
+
parent = parent or self.parent
|
| 68 |
if parent:
|
| 69 |
filepath = self.output_root / parent / filename
|
| 70 |
else:
|
|
|
|
| 77 |
|
| 78 |
|
| 79 |
class UrlToFilepathConverter(FilepathConverter):
|
| 80 |
+
def __init__(self, parent: str = None):
|
| 81 |
+
super().__init__(parent)
|
| 82 |
self.output_root = self.output_root / "urls"
|
| 83 |
|
| 84 |
def preprocess(self, url):
|
|
|
|
| 87 |
|
| 88 |
|
| 89 |
class QueryToFilepathConverter(FilepathConverter):
|
| 90 |
+
def __init__(self, parent: str = None):
|
| 91 |
+
super().__init__(parent)
|
| 92 |
self.output_root = self.output_root / "queries"
|
| 93 |
|
| 94 |
|
|
|
|
| 102 |
"https://stackoverflow.com/questions/295135/turn-a-string-into-a-valid-filename"
|
| 103 |
)
|
| 104 |
|
| 105 |
+
url_converter = UrlToFilepathConverter(parent=query)
|
| 106 |
+
print(url_converter.convert(url))
|
networks/google_searcher.py
CHANGED
|
@@ -2,6 +2,7 @@ import requests
|
|
| 2 |
from pathlib import Path
|
| 3 |
from utils.enver import enver
|
| 4 |
from utils.logger import logger
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
class GoogleSearcher:
|
|
@@ -10,7 +11,7 @@ class GoogleSearcher:
|
|
| 10 |
self.url = "https://www.google.com/search"
|
| 11 |
self.enver = enver
|
| 12 |
self.enver.set_envs(proxies=True)
|
| 13 |
-
self.
|
| 14 |
|
| 15 |
def send_request(self, result_num=10):
|
| 16 |
logger.note(f"Searching: [{self.query}]")
|
|
@@ -27,12 +28,11 @@ class GoogleSearcher:
|
|
| 27 |
)
|
| 28 |
|
| 29 |
def save_response(self):
|
| 30 |
-
|
| 31 |
-
if not self.
|
| 32 |
-
self.
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
with open(output_path, "wb") as wf:
|
| 36 |
wf.write(self.request_response.content)
|
| 37 |
|
| 38 |
def search(self, query):
|
|
|
|
| 2 |
from pathlib import Path
|
| 3 |
from utils.enver import enver
|
| 4 |
from utils.logger import logger
|
| 5 |
+
from networks.filepath_converter import QueryToFilepathConverter
|
| 6 |
|
| 7 |
|
| 8 |
class GoogleSearcher:
|
|
|
|
| 11 |
self.url = "https://www.google.com/search"
|
| 12 |
self.enver = enver
|
| 13 |
self.enver.set_envs(proxies=True)
|
| 14 |
+
self.filepath_converter = QueryToFilepathConverter()
|
| 15 |
|
| 16 |
def send_request(self, result_num=10):
|
| 17 |
logger.note(f"Searching: [{self.query}]")
|
|
|
|
| 28 |
)
|
| 29 |
|
| 30 |
def save_response(self):
|
| 31 |
+
self.output_path = self.filepath_converter.convert(self.query)
|
| 32 |
+
if not self.output_path.exists():
|
| 33 |
+
self.output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 34 |
+
logger.note(f"Saving to: [{self.output_path}]")
|
| 35 |
+
with open(self.output_path, "wb") as wf:
|
|
|
|
| 36 |
wf.write(self.request_response.content)
|
| 37 |
|
| 38 |
def search(self, query):
|