Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -334,6 +334,8 @@ EXTERNAL_MODELS = [
|
|
| 334 |
"st-polish-paraphrase-from-mpnet",
|
| 335 |
"text2vec-base-chinese",
|
| 336 |
"text2vec-large-chinese",
|
|
|
|
|
|
|
| 337 |
"text-embedding-ada-002",
|
| 338 |
"text-similarity-ada-001",
|
| 339 |
"text-similarity-babbage-001",
|
|
@@ -414,17 +416,19 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
| 414 |
"st-polish-paraphrase-from-mpnet": "https://huggingface.co/sdadas/st-polish-paraphrase-from-mpnet",
|
| 415 |
"text2vec-base-chinese": "https://huggingface.co/shibing624/text2vec-base-chinese",
|
| 416 |
"text2vec-large-chinese": "https://huggingface.co/GanymedeNil/text2vec-large-chinese",
|
| 417 |
-
"text-embedding-
|
| 418 |
-
"text-
|
| 419 |
-
"text-
|
| 420 |
-
"text-similarity-
|
| 421 |
-
"text-similarity-
|
| 422 |
-
"text-
|
| 423 |
-
"text-
|
| 424 |
-
"text-search-ada-001": "https://
|
| 425 |
-
"text-search-
|
| 426 |
-
"text-search-
|
| 427 |
-
"text-search-
|
|
|
|
|
|
|
| 428 |
"titan-embed-text-v1": "https://docs.aws.amazon.com/bedrock/latest/userguide/embeddings.html",
|
| 429 |
"unsup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/unsup-simcse-bert-base-uncased",
|
| 430 |
"use-cmlm-multilingual": "https://huggingface.co/sentence-transformers/use-cmlm-multilingual",
|
|
@@ -494,6 +498,8 @@ EXTERNAL_MODEL_TO_DIM = {
|
|
| 494 |
"st-polish-paraphrase-from-mpnet": 768,
|
| 495 |
"text2vec-base-chinese": 768,
|
| 496 |
"text2vec-large-chinese": 1024,
|
|
|
|
|
|
|
| 497 |
"text-embedding-ada-002": 1536,
|
| 498 |
"text-similarity-ada-001": 1024,
|
| 499 |
"text-similarity-babbage-001": 2048,
|
|
@@ -574,6 +580,8 @@ EXTERNAL_MODEL_TO_SEQLEN = {
|
|
| 574 |
"st-polish-paraphrase-from-mpnet": 514,
|
| 575 |
"text2vec-base-chinese": 512,
|
| 576 |
"text2vec-large-chinese": 512,
|
|
|
|
|
|
|
| 577 |
"text-embedding-ada-002": 8191,
|
| 578 |
"text-similarity-ada-001": 2046,
|
| 579 |
"text-similarity-babbage-001": 2046,
|
|
@@ -943,7 +951,11 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
|
|
| 943 |
# Model & at least one result
|
| 944 |
if len(out) > 1:
|
| 945 |
if add_emb_dim:
|
| 946 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 947 |
df_list.append(out)
|
| 948 |
df = pd.DataFrame(df_list)
|
| 949 |
# If there are any models that are the same, merge them
|
|
|
|
| 334 |
"st-polish-paraphrase-from-mpnet",
|
| 335 |
"text2vec-base-chinese",
|
| 336 |
"text2vec-large-chinese",
|
| 337 |
+
"text-embedding-3-small",
|
| 338 |
+
"text-embedding-3-large",
|
| 339 |
"text-embedding-ada-002",
|
| 340 |
"text-similarity-ada-001",
|
| 341 |
"text-similarity-babbage-001",
|
|
|
|
| 416 |
"st-polish-paraphrase-from-mpnet": "https://huggingface.co/sdadas/st-polish-paraphrase-from-mpnet",
|
| 417 |
"text2vec-base-chinese": "https://huggingface.co/shibing624/text2vec-base-chinese",
|
| 418 |
"text2vec-large-chinese": "https://huggingface.co/GanymedeNil/text2vec-large-chinese",
|
| 419 |
+
"text-embedding-3-small": "https://openai.com/blog/new-embedding-models-and-api-updates",
|
| 420 |
+
"text-embedding-3-large": "https://openai.com/blog/new-embedding-models-and-api-updates",
|
| 421 |
+
"text-embedding-ada-002": "https://openai.com/blog/new-and-improved-embedding-model",
|
| 422 |
+
"text-similarity-ada-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
|
| 423 |
+
"text-similarity-babbage-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
|
| 424 |
+
"text-similarity-curie-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
|
| 425 |
+
"text-similarity-davinci-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
|
| 426 |
+
"text-search-ada-doc-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
|
| 427 |
+
"text-search-ada-query-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
|
| 428 |
+
"text-search-ada-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
|
| 429 |
+
"text-search-curie-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
|
| 430 |
+
"text-search-babbage-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
|
| 431 |
+
"text-search-davinci-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
|
| 432 |
"titan-embed-text-v1": "https://docs.aws.amazon.com/bedrock/latest/userguide/embeddings.html",
|
| 433 |
"unsup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/unsup-simcse-bert-base-uncased",
|
| 434 |
"use-cmlm-multilingual": "https://huggingface.co/sentence-transformers/use-cmlm-multilingual",
|
|
|
|
| 498 |
"st-polish-paraphrase-from-mpnet": 768,
|
| 499 |
"text2vec-base-chinese": 768,
|
| 500 |
"text2vec-large-chinese": 1024,
|
| 501 |
+
"text-embedding-3-large": 3072,
|
| 502 |
+
"text-embedding-3-small": 1536,
|
| 503 |
"text-embedding-ada-002": 1536,
|
| 504 |
"text-similarity-ada-001": 1024,
|
| 505 |
"text-similarity-babbage-001": 2048,
|
|
|
|
| 580 |
"st-polish-paraphrase-from-mpnet": 514,
|
| 581 |
"text2vec-base-chinese": 512,
|
| 582 |
"text2vec-large-chinese": 512,
|
| 583 |
+
"text-embedding-3-large": 8191,
|
| 584 |
+
"text-embedding-3-small": 8191,
|
| 585 |
"text-embedding-ada-002": 8191,
|
| 586 |
"text-similarity-ada-001": 2046,
|
| 587 |
"text-similarity-babbage-001": 2046,
|
|
|
|
| 951 |
# Model & at least one result
|
| 952 |
if len(out) > 1:
|
| 953 |
if add_emb_dim:
|
| 954 |
+
try:
|
| 955 |
+
# Fails on gated repos, so we only include scores for them
|
| 956 |
+
out["Embedding Dimensions"], out["Sequence Length"], out["Model Size (GB)"] = get_dim_seq_size(model)
|
| 957 |
+
except:
|
| 958 |
+
pass
|
| 959 |
df_list.append(out)
|
| 960 |
df = pd.DataFrame(df_list)
|
| 961 |
# If there are any models that are the same, merge them
|