Spaces:
Runtime error
Runtime error
Removing stop words but just for english
Browse files
app.py
CHANGED
|
@@ -22,6 +22,7 @@ from transformers import (
|
|
| 22 |
from prompts import system_prompt, example_prompt, main_prompt
|
| 23 |
from umap import UMAP
|
| 24 |
from hdbscan import HDBSCAN
|
|
|
|
| 25 |
|
| 26 |
# from cuml.cluster import HDBSCAN
|
| 27 |
# from cuml.manifold import UMAP
|
|
@@ -36,7 +37,7 @@ session = requests.Session()
|
|
| 36 |
sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
|
| 37 |
keybert = KeyBERTInspired()
|
| 38 |
mmr = MaximalMarginalRelevance(diversity=0.3)
|
| 39 |
-
|
| 40 |
|
| 41 |
model_id = "meta-llama/Llama-2-7b-chat-hf"
|
| 42 |
device = f"cuda:{cuda.current_device()}" if cuda.is_available() else "cpu"
|
|
@@ -125,6 +126,7 @@ def fit_model(base_model, docs, embeddings):
|
|
| 125 |
umap_model=umap_model,
|
| 126 |
hdbscan_model=hdbscan_model,
|
| 127 |
representation_model=representation_model,
|
|
|
|
| 128 |
# Hyperparameters
|
| 129 |
top_n_words=10,
|
| 130 |
verbose=True,
|
|
|
|
| 22 |
from prompts import system_prompt, example_prompt, main_prompt
|
| 23 |
from umap import UMAP
|
| 24 |
from hdbscan import HDBSCAN
|
| 25 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
| 26 |
|
| 27 |
# from cuml.cluster import HDBSCAN
|
| 28 |
# from cuml.manifold import UMAP
|
|
|
|
| 37 |
sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
|
| 38 |
keybert = KeyBERTInspired()
|
| 39 |
mmr = MaximalMarginalRelevance(diversity=0.3)
|
| 40 |
+
vectorizer_model = CountVectorizer(stop_words="english")
|
| 41 |
|
| 42 |
model_id = "meta-llama/Llama-2-7b-chat-hf"
|
| 43 |
device = f"cuda:{cuda.current_device()}" if cuda.is_available() else "cpu"
|
|
|
|
| 126 |
umap_model=umap_model,
|
| 127 |
hdbscan_model=hdbscan_model,
|
| 128 |
representation_model=representation_model,
|
| 129 |
+
vectorizer_model=vectorizer_model,
|
| 130 |
# Hyperparameters
|
| 131 |
top_n_words=10,
|
| 132 |
verbose=True,
|