Spaces:
Runtime error
Runtime error
Try to fix cuml not finding cuda library?
Browse files- app.py +7 -10
- requirements.txt +3 -1
app.py
CHANGED
|
@@ -23,21 +23,13 @@ from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
|
| 23 |
from bertopic import BERTopic
|
| 24 |
from bertopic.representation import KeyBERTInspired
|
| 25 |
from bertopic.representation import TextGeneration
|
|
|
|
|
|
|
| 26 |
from huggingface_hub import HfApi
|
| 27 |
from sklearn.feature_extraction.text import CountVectorizer
|
| 28 |
from sentence_transformers import SentenceTransformer
|
| 29 |
from prompts import REPRESENTATION_PROMPT
|
| 30 |
|
| 31 |
-
|
| 32 |
-
@spaces.GPU
|
| 33 |
-
def calculate_embeddings(docs):
|
| 34 |
-
return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
from cuml.manifold import UMAP
|
| 38 |
-
from cuml.cluster import HDBSCAN
|
| 39 |
-
|
| 40 |
-
|
| 41 |
"""
|
| 42 |
TODOs:
|
| 43 |
- Improve representation layer (Try with llamacpp or TextGeneration)
|
|
@@ -135,6 +127,11 @@ def get_docs_from_parquet(parquet_urls, column, offset, limit):
|
|
| 135 |
return df[column].tolist()
|
| 136 |
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
def calculate_n_neighbors_and_components(n_rows):
|
| 139 |
n_neighbors = min(max(n_rows // 20, 15), 100)
|
| 140 |
n_components = 10 if n_rows > 1000 else 5 # Higher components for larger datasets
|
|
|
|
| 23 |
from bertopic import BERTopic
|
| 24 |
from bertopic.representation import KeyBERTInspired
|
| 25 |
from bertopic.representation import TextGeneration
|
| 26 |
+
from cuml.manifold import UMAP
|
| 27 |
+
from cuml.cluster import HDBSCAN
|
| 28 |
from huggingface_hub import HfApi
|
| 29 |
from sklearn.feature_extraction.text import CountVectorizer
|
| 30 |
from sentence_transformers import SentenceTransformer
|
| 31 |
from prompts import REPRESENTATION_PROMPT
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
"""
|
| 34 |
TODOs:
|
| 35 |
- Improve representation layer (Try with llamacpp or TextGeneration)
|
|
|
|
| 127 |
return df[column].tolist()
|
| 128 |
|
| 129 |
|
| 130 |
+
@spaces.GPU
|
| 131 |
+
def calculate_embeddings(docs):
|
| 132 |
+
return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
|
| 133 |
+
|
| 134 |
+
|
| 135 |
def calculate_n_neighbors_and_components(n_rows):
|
| 136 |
n_neighbors = min(max(n_rows // 20, 15), 100)
|
| 137 |
n_components = 10 if n_rows > 1000 else 5 # Higher components for larger datasets
|
requirements.txt
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
gradio_huggingfacehub_search==0.0.7
|
| 2 |
duckdb
|
| 3 |
accelerate
|
|
@@ -7,7 +10,6 @@ bitsandbytes
|
|
| 7 |
datamapplot==0.3.0
|
| 8 |
bertopic
|
| 9 |
pandas
|
| 10 |
-
torch
|
| 11 |
numpy
|
| 12 |
python-dotenv
|
| 13 |
kaleido
|
|
|
|
| 1 |
+
spaces
|
| 2 |
+
gradio
|
| 3 |
+
torch
|
| 4 |
gradio_huggingfacehub_search==0.0.7
|
| 5 |
duckdb
|
| 6 |
accelerate
|
|
|
|
| 10 |
datamapplot==0.3.0
|
| 11 |
bertopic
|
| 12 |
pandas
|
|
|
|
| 13 |
numpy
|
| 14 |
python-dotenv
|
| 15 |
kaleido
|