Spaces:
Runtime error
Runtime error
Move spaces.GPU to generate_topics
Browse files
app.py
CHANGED
|
@@ -15,8 +15,6 @@ from bertopic import BERTopic
|
|
| 15 |
from bertopic.representation import KeyBERTInspired
|
| 16 |
from bertopic.representation import TextGeneration
|
| 17 |
|
| 18 |
-
from cuml.manifold import UMAP
|
| 19 |
-
from cuml.cluster import HDBSCAN
|
| 20 |
|
| 21 |
from huggingface_hub import HfApi, SpaceCard
|
| 22 |
from sklearn.feature_extraction.text import CountVectorizer
|
|
@@ -126,7 +124,7 @@ def get_docs_from_parquet(parquet_urls, column, offset, limit):
|
|
| 126 |
return df[column].tolist()
|
| 127 |
|
| 128 |
|
| 129 |
-
@spaces.GPU
|
| 130 |
def calculate_embeddings(docs):
|
| 131 |
return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
|
| 132 |
|
|
@@ -137,8 +135,11 @@ def calculate_n_neighbors_and_components(n_rows):
|
|
| 137 |
return n_neighbors, n_components
|
| 138 |
|
| 139 |
|
| 140 |
-
@spaces.GPU
|
| 141 |
def fit_model(docs, embeddings, n_neighbors, n_components):
|
|
|
|
|
|
|
|
|
|
| 142 |
umap_model = UMAP(
|
| 143 |
n_neighbors=n_neighbors,
|
| 144 |
n_components=n_components,
|
|
@@ -234,7 +235,10 @@ datasets:
|
|
| 234 |
return repo_id
|
| 235 |
|
| 236 |
|
|
|
|
| 237 |
def generate_topics(dataset, config, split, column, nested_column, plot_type):
|
|
|
|
|
|
|
| 238 |
logging.info(
|
| 239 |
f"Generating topics for {dataset} with config {config} {split} {column} {nested_column}"
|
| 240 |
)
|
|
|
|
| 15 |
from bertopic.representation import KeyBERTInspired
|
| 16 |
from bertopic.representation import TextGeneration
|
| 17 |
|
|
|
|
|
|
|
| 18 |
|
| 19 |
from huggingface_hub import HfApi, SpaceCard
|
| 20 |
from sklearn.feature_extraction.text import CountVectorizer
|
|
|
|
| 124 |
return df[column].tolist()
|
| 125 |
|
| 126 |
|
| 127 |
+
# @spaces.GPU
|
| 128 |
def calculate_embeddings(docs):
|
| 129 |
return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
|
| 130 |
|
|
|
|
| 135 |
return n_neighbors, n_components
|
| 136 |
|
| 137 |
|
| 138 |
+
# @spaces.GPU
|
| 139 |
def fit_model(docs, embeddings, n_neighbors, n_components):
|
| 140 |
+
from cuml.manifold import UMAP
|
| 141 |
+
from cuml.cluster import HDBSCAN
|
| 142 |
+
|
| 143 |
umap_model = UMAP(
|
| 144 |
n_neighbors=n_neighbors,
|
| 145 |
n_components=n_components,
|
|
|
|
| 235 |
return repo_id
|
| 236 |
|
| 237 |
|
| 238 |
+
@spaces.GPU(duration=600)
|
| 239 |
def generate_topics(dataset, config, split, column, nested_column, plot_type):
|
| 240 |
+
from cuml.manifold import UMAP
|
| 241 |
+
|
| 242 |
logging.info(
|
| 243 |
f"Generating topics for {dataset} with config {config} {split} {column} {nested_column}"
|
| 244 |
)
|