Spaces:
Runtime error
Runtime error
Try to fix cuml error
Browse files
app.py
CHANGED
|
@@ -23,13 +23,21 @@ from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
|
| 23 |
from bertopic import BERTopic
|
| 24 |
from bertopic.representation import KeyBERTInspired
|
| 25 |
from bertopic.representation import TextGeneration
|
| 26 |
-
from cuml.manifold import UMAP
|
| 27 |
-
from cuml.cluster import HDBSCAN
|
| 28 |
from huggingface_hub import HfApi
|
| 29 |
from sklearn.feature_extraction.text import CountVectorizer
|
| 30 |
from sentence_transformers import SentenceTransformer
|
| 31 |
from prompts import REPRESENTATION_PROMPT
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
"""
|
| 34 |
TODOs:
|
| 35 |
- Improve representation layer (Try with llamacpp or TextGeneration)
|
|
@@ -127,11 +135,6 @@ def get_docs_from_parquet(parquet_urls, column, offset, limit):
|
|
| 127 |
return df[column].tolist()
|
| 128 |
|
| 129 |
|
| 130 |
-
@spaces.GPU
|
| 131 |
-
def calculate_embeddings(docs):
|
| 132 |
-
return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
|
| 133 |
-
|
| 134 |
-
|
| 135 |
def calculate_n_neighbors_and_components(n_rows):
|
| 136 |
n_neighbors = min(max(n_rows // 20, 15), 100)
|
| 137 |
n_components = 10 if n_rows > 1000 else 5 # Higher components for larger datasets
|
|
|
|
| 23 |
from bertopic import BERTopic
|
| 24 |
from bertopic.representation import KeyBERTInspired
|
| 25 |
from bertopic.representation import TextGeneration
|
|
|
|
|
|
|
| 26 |
from huggingface_hub import HfApi
|
| 27 |
from sklearn.feature_extraction.text import CountVectorizer
|
| 28 |
from sentence_transformers import SentenceTransformer
|
| 29 |
from prompts import REPRESENTATION_PROMPT
|
| 30 |
|
| 31 |
+
|
| 32 |
+
@spaces.GPU
|
| 33 |
+
def calculate_embeddings(docs):
|
| 34 |
+
return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
from cuml.manifold import UMAP
|
| 38 |
+
from cuml.cluster import HDBSCAN
|
| 39 |
+
|
| 40 |
+
|
| 41 |
"""
|
| 42 |
TODOs:
|
| 43 |
- Improve representation layer (Try with llamacpp or TextGeneration)
|
|
|
|
| 135 |
return df[column].tolist()
|
| 136 |
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
def calculate_n_neighbors_and_components(n_rows):
|
| 139 |
n_neighbors = min(max(n_rows // 20, 15), 100)
|
| 140 |
n_components = 10 if n_rows > 1000 else 5 # Higher components for larger datasets
|