Spaces:
Running
Running
chore: update estimate tokens
Browse files
lightweight_embeddings/service.py
CHANGED
|
@@ -369,12 +369,13 @@ class EmbeddingsService:
|
|
| 369 |
}
|
| 370 |
|
| 371 |
def estimate_tokens(self, input_data: Union[str, List[str]]) -> int:
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
|
|
|
| 378 |
|
| 379 |
@staticmethod
|
| 380 |
def softmax(scores: np.ndarray) -> np.ndarray:
|
|
|
|
| 369 |
}
|
| 370 |
|
| 371 |
def estimate_tokens(self, input_data: Union[str, List[str]]) -> int:
|
| 372 |
+
"""
|
| 373 |
+
Estimate token count using the model's tokenizer.
|
| 374 |
+
"""
|
| 375 |
+
texts = self._validate_text_input(input_data)
|
| 376 |
+
model = self.text_models[self.config.text_model_type]
|
| 377 |
+
tokenized = model.tokenize(texts)
|
| 378 |
+
return sum(len(ids) for ids in tokenized['input_ids'])
|
| 379 |
|
| 380 |
@staticmethod
|
| 381 |
def softmax(scores: np.ndarray) -> np.ndarray:
|