Spaces:
Runtime error
Runtime error
Try to fix piclking error
Browse files
app.py
CHANGED
|
@@ -110,6 +110,8 @@ reduce_umap_model = UMAP(
|
|
| 110 |
random_state=42,
|
| 111 |
)
|
| 112 |
|
|
|
|
|
|
|
| 113 |
|
| 114 |
def get_parquet_urls(dataset, config, split):
|
| 115 |
parquet_files = session.get(
|
|
@@ -138,6 +140,8 @@ def calculate_embeddings(docs):
|
|
| 138 |
|
| 139 |
@spaces.GPU
|
| 140 |
def fit_model(docs, embeddings):
|
|
|
|
|
|
|
| 141 |
new_model = BERTopic(
|
| 142 |
"english",
|
| 143 |
# Sub-models
|
|
@@ -151,10 +155,13 @@ def fit_model(docs, embeddings):
|
|
| 151 |
verbose=True,
|
| 152 |
min_topic_size=15, # TODO: Should this value be coherent with N_NEIGHBORS?
|
| 153 |
)
|
| 154 |
-
logging.
|
| 155 |
new_model.fit(docs, embeddings)
|
| 156 |
-
logging.
|
| 157 |
-
|
|
|
|
|
|
|
|
|
|
| 158 |
|
| 159 |
|
| 160 |
def generate_topics(dataset, config, split, column, nested_column):
|
|
@@ -180,12 +187,12 @@ def generate_topics(dataset, config, split, column, nested_column):
|
|
| 180 |
)
|
| 181 |
|
| 182 |
embeddings = calculate_embeddings(docs)
|
| 183 |
-
|
| 184 |
|
| 185 |
if base_model is None:
|
| 186 |
-
base_model =
|
| 187 |
else:
|
| 188 |
-
updated_model = BERTopic.merge_models([base_model,
|
| 189 |
nr_new_topics = len(set(updated_model.topics_)) - len(
|
| 190 |
set(base_model.topics_)
|
| 191 |
)
|
|
|
|
| 110 |
random_state=42,
|
| 111 |
)
|
| 112 |
|
| 113 |
+
global_topic_model = None
|
| 114 |
+
|
| 115 |
|
| 116 |
def get_parquet_urls(dataset, config, split):
|
| 117 |
parquet_files = session.get(
|
|
|
|
| 140 |
|
| 141 |
@spaces.GPU
|
| 142 |
def fit_model(docs, embeddings):
|
| 143 |
+
global global_topic_model
|
| 144 |
+
|
| 145 |
new_model = BERTopic(
|
| 146 |
"english",
|
| 147 |
# Sub-models
|
|
|
|
| 155 |
verbose=True,
|
| 156 |
min_topic_size=15, # TODO: Should this value be coherent with N_NEIGHBORS?
|
| 157 |
)
|
| 158 |
+
logging.info("Fitting new model")
|
| 159 |
new_model.fit(docs, embeddings)
|
| 160 |
+
logging.info("End fitting new model")
|
| 161 |
+
|
| 162 |
+
global_topic_model = new_model
|
| 163 |
+
|
| 164 |
+
logging.info("Global model updated")
|
| 165 |
|
| 166 |
|
| 167 |
def generate_topics(dataset, config, split, column, nested_column):
|
|
|
|
| 187 |
)
|
| 188 |
|
| 189 |
embeddings = calculate_embeddings(docs)
|
| 190 |
+
fit_model(docs, embeddings)
|
| 191 |
|
| 192 |
if base_model is None:
|
| 193 |
+
base_model = global_topic_model
|
| 194 |
else:
|
| 195 |
+
updated_model = BERTopic.merge_models([base_model, global_topic_model])
|
| 196 |
nr_new_topics = len(set(updated_model.topics_)) - len(
|
| 197 |
set(base_model.topics_)
|
| 198 |
)
|