Spaces:
Runtime error
Runtime error
Try to fix pickle error
Browse files
app.py
CHANGED
|
@@ -137,7 +137,7 @@ def calculate_embeddings(docs):
|
|
| 137 |
|
| 138 |
|
| 139 |
@spaces.GPU
|
| 140 |
-
def fit_model(
|
| 141 |
new_model = BERTopic(
|
| 142 |
"english",
|
| 143 |
# Sub-models
|
|
@@ -154,15 +154,7 @@ def fit_model(base_model, docs, embeddings):
|
|
| 154 |
logging.debug("Fitting new model")
|
| 155 |
new_model.fit(docs, embeddings)
|
| 156 |
logging.debug("End fitting new model")
|
| 157 |
-
|
| 158 |
-
if base_model is None:
|
| 159 |
-
return new_model, new_model
|
| 160 |
-
|
| 161 |
-
updated_model = BERTopic.merge_models([base_model, new_model])
|
| 162 |
-
nr_new_topics = len(set(updated_model.topics_)) - len(set(base_model.topics_))
|
| 163 |
-
new_topics = list(updated_model.topic_labels_.values())[-nr_new_topics:]
|
| 164 |
-
logging.info(f"The following topics are newly found: {new_topics}")
|
| 165 |
-
return updated_model, new_model
|
| 166 |
|
| 167 |
|
| 168 |
def generate_topics(dataset, config, split, column, nested_column):
|
|
@@ -188,7 +180,18 @@ def generate_topics(dataset, config, split, column, nested_column):
|
|
| 188 |
)
|
| 189 |
|
| 190 |
embeddings = calculate_embeddings(docs)
|
| 191 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
|
| 193 |
repr_model_topics = {
|
| 194 |
key: label[0][0].split("\n")[0]
|
|
|
|
| 137 |
|
| 138 |
|
| 139 |
@spaces.GPU
|
| 140 |
+
def fit_model(docs, embeddings):
|
| 141 |
new_model = BERTopic(
|
| 142 |
"english",
|
| 143 |
# Sub-models
|
|
|
|
| 154 |
logging.debug("Fitting new model")
|
| 155 |
new_model.fit(docs, embeddings)
|
| 156 |
logging.debug("End fitting new model")
|
| 157 |
+
return new_model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
|
| 159 |
|
| 160 |
def generate_topics(dataset, config, split, column, nested_column):
|
|
|
|
| 180 |
)
|
| 181 |
|
| 182 |
embeddings = calculate_embeddings(docs)
|
| 183 |
+
new_model = fit_model(docs, embeddings)
|
| 184 |
+
|
| 185 |
+
if base_model is None:
|
| 186 |
+
base_model = new_model
|
| 187 |
+
else:
|
| 188 |
+
updated_model = BERTopic.merge_models([base_model, new_model])
|
| 189 |
+
nr_new_topics = len(set(updated_model.topics_)) - len(
|
| 190 |
+
set(base_model.topics_)
|
| 191 |
+
)
|
| 192 |
+
new_topics = list(updated_model.topic_labels_.values())[-nr_new_topics:]
|
| 193 |
+
logging.info(f"The following topics are newly found: {new_topics}")
|
| 194 |
+
base_model = updated_model
|
| 195 |
|
| 196 |
repr_model_topics = {
|
| 197 |
key: label[0][0].split("\n")[0]
|