Spaces:
Runtime error
Runtime error
Slight optimizations
Browse files- app/data.py +7 -7
- app/model.py +1 -1
- justfile +2 -0
app/data.py
CHANGED
|
@@ -93,19 +93,19 @@ def load_amazonreviews(merge: bool = True) -> tuple[list[str], list[int]]:
|
|
| 93 |
raise FileNotFoundError(msg)
|
| 94 |
|
| 95 |
# Load the datasets
|
|
|
|
| 96 |
with bz2.BZ2File(AMAZONREVIEWS_PATH[1]) as train_file:
|
| 97 |
-
|
| 98 |
|
| 99 |
-
test_data = []
|
| 100 |
if merge:
|
| 101 |
with bz2.BZ2File(AMAZONREVIEWS_PATH[0]) as test_file:
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
# Merge the datasets
|
| 105 |
-
data = train_data + test_data
|
| 106 |
|
| 107 |
# Split the data into labels and text
|
| 108 |
-
labels, texts = zip(*(line.split(" ", 1) for line in
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
# Map sentiment values
|
| 111 |
sentiments = [int(label.split("__label__")[1]) - 1 for label in labels]
|
|
|
|
| 93 |
raise FileNotFoundError(msg)
|
| 94 |
|
| 95 |
# Load the datasets
|
| 96 |
+
dataset = []
|
| 97 |
with bz2.BZ2File(AMAZONREVIEWS_PATH[1]) as train_file:
|
| 98 |
+
dataset.extend([line.decode("utf-8") for line in train_file])
|
| 99 |
|
|
|
|
| 100 |
if merge:
|
| 101 |
with bz2.BZ2File(AMAZONREVIEWS_PATH[0]) as test_file:
|
| 102 |
+
dataset.extend([line.decode("utf-8") for line in test_file])
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
# Split the data into labels and text
|
| 105 |
+
labels, texts = zip(*(line.split(" ", 1) for line in dataset)) # NOTE: Occasionally OOM
|
| 106 |
+
|
| 107 |
+
# Free up memory
|
| 108 |
+
del dataset
|
| 109 |
|
| 110 |
# Map sentiment values
|
| 111 |
sentiments = [int(label.split("__label__")[1]) - 1 for label in labels]
|
app/model.py
CHANGED
|
@@ -121,7 +121,7 @@ def create_model(
|
|
| 121 |
token_pattern=None,
|
| 122 |
),
|
| 123 |
),
|
| 124 |
-
("classifier", LogisticRegression(max_iter=1000,
|
| 125 |
],
|
| 126 |
memory=Memory(CACHE_DIR, verbose=0),
|
| 127 |
verbose=verbose,
|
|
|
|
| 121 |
token_pattern=None,
|
| 122 |
),
|
| 123 |
),
|
| 124 |
+
("classifier", LogisticRegression(max_iter=1000, random_state=seed)),
|
| 125 |
],
|
| 126 |
memory=Memory(CACHE_DIR, verbose=0),
|
| 127 |
verbose=verbose,
|
justfile
CHANGED
|
@@ -13,9 +13,11 @@
|
|
| 13 |
@install-dev:
|
| 14 |
poetry self add poetry-plugin-export
|
| 15 |
poetry install
|
|
|
|
| 16 |
|
| 17 |
@requirements:
|
| 18 |
poetry export -f requirements.txt --output requirements.txt --without dev
|
|
|
|
| 19 |
|
| 20 |
[no-exit-message]
|
| 21 |
@app *ARGS:
|
|
|
|
| 13 |
@install-dev:
|
| 14 |
poetry self add poetry-plugin-export
|
| 15 |
poetry install
|
| 16 |
+
poetry run spacy download en_core_web_sm
|
| 17 |
|
| 18 |
@requirements:
|
| 19 |
poetry export -f requirements.txt --output requirements.txt --without dev
|
| 20 |
+
poetry export -f requirements.txt --output requirements-dev.txt
|
| 21 |
|
| 22 |
[no-exit-message]
|
| 23 |
@app *ARGS:
|