Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	add_followir_tab (#102)
Browse files- add instruction following (cf7ddc6fa21dbe43cd4c5f806946100e72ba8b72)
- update (0d0563c24895d35047cbd4018d9f86afb7e8a239)
- merge in main (aeb9d6091824165ded58f9a5f3b230a4434986b5)
- minor cleanup (b5c28bdf082216a290351c60305b599a952c7a73)
- add bi-encoder button (77cc9e7a65257c5af5784bb60a3dac2073e7fe05)
- EXTERNAL_MODEL_RESULTS.json +0 -0
- app.py +39 -7
- config.yaml +25 -0
- model_meta.yaml +134 -0
    	
        EXTERNAL_MODEL_RESULTS.json
    CHANGED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        app.py
    CHANGED
    
    | @@ -17,6 +17,11 @@ TASKS_CONFIG = LEADERBOARD_CONFIG["tasks"] | |
| 17 | 
             
            BOARDS_CONFIG = LEADERBOARD_CONFIG["boards"]
         | 
| 18 |  | 
| 19 | 
             
            TASKS = list(TASKS_CONFIG.keys())
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 20 |  | 
| 21 | 
             
            TASK_TO_METRIC = {k:v["metric"] for k,v in TASKS_CONFIG.items()}
         | 
| 22 |  | 
| @@ -34,18 +39,30 @@ EXTERNAL_MODEL_TO_DIM = {k: v["dim"] for k,v in MODEL_META["model_meta"].items() | |
| 34 | 
             
            EXTERNAL_MODEL_TO_SEQLEN = {k: v["seq_len"] for k,v in MODEL_META["model_meta"].items() if v.get("seq_len", False)}
         | 
| 35 | 
             
            EXTERNAL_MODEL_TO_SIZE = {k: v["size"] for k,v in MODEL_META["model_meta"].items() if v.get("size", False)}
         | 
| 36 | 
             
            PROPRIETARY_MODELS = {k for k,v in MODEL_META["model_meta"].items() if v.get("is_proprietary", False)}
         | 
|  | |
|  | |
| 37 | 
             
            SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {k for k,v in MODEL_META["model_meta"].items() if v.get("is_sentence_transformers_compatible", False)}
         | 
| 38 | 
             
            MODELS_TO_SKIP = MODEL_META["models_to_skip"]
         | 
|  | |
|  | |
| 39 |  | 
| 40 | 
             
            PROPRIETARY_MODELS = {
         | 
| 41 | 
             
                make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
         | 
| 42 | 
             
                for model in PROPRIETARY_MODELS
         | 
| 43 | 
             
            }
         | 
| 44 | 
            -
             | 
| 45 | 
             
            SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {
         | 
| 46 | 
             
                make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
         | 
| 47 | 
             
                for model in SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS
         | 
| 48 | 
             
            }
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 49 |  | 
| 50 | 
             
            TASK_TO_TASK_TYPE = {task_category: [] for task_category in TASKS}
         | 
| 51 | 
             
            for board_config in BOARDS_CONFIG.values():
         | 
| @@ -164,7 +181,13 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_ | |
| 164 | 
             
                # Initialize list to models that we cannot fetch metadata from
         | 
| 165 | 
             
                df_list = []
         | 
| 166 | 
             
                for model in EXTERNAL_MODEL_RESULTS:
         | 
| 167 | 
            -
                    results_list = [ | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 168 | 
             
                    if len(datasets) > 0:
         | 
| 169 | 
             
                        res = {k: v for d in results_list for k, v in d.items() if (k == "Model") or any([x in k for x in datasets])}
         | 
| 170 | 
             
                    elif langs:
         | 
| @@ -383,7 +406,10 @@ for task in TASKS: | |
| 383 | 
             
                data[task] = {"metric": TASKS_CONFIG[task]["metric_description"], "data": []}
         | 
| 384 |  | 
| 385 | 
             
            for board, board_config in BOARDS_CONFIG.items():
         | 
| 386 | 
            -
                 | 
|  | |
|  | |
|  | |
| 387 | 
             
                acronym = board_config.get("acronym", None)
         | 
| 388 | 
             
                board_icon = board_config.get("icon", None)
         | 
| 389 | 
             
                if board_icon is None:
         | 
| @@ -439,7 +465,7 @@ function(goalUrlObject) { | |
| 439 | 
             
            def update_url_task(event: gr.SelectData, current_task_language: dict, language_per_task: dict):
         | 
| 440 | 
             
                current_task_language["task"] = event.target.id
         | 
| 441 | 
             
                # Either use the cached language for this task or the 1st language
         | 
| 442 | 
            -
                current_task_language["language"] = language_per_task.get(event.target.id, event.target.children[ | 
| 443 | 
             
                return current_task_language, language_per_task
         | 
| 444 |  | 
| 445 | 
             
            def update_url_language(event: gr.SelectData, current_task_language: dict, language_per_task: dict):
         | 
| @@ -461,6 +487,8 @@ MODEL_TYPES = [ | |
| 461 | 
             
                "Open",
         | 
| 462 | 
             
                "Proprietary",
         | 
| 463 | 
             
                "Sentence Transformers",
         | 
|  | |
|  | |
| 464 | 
             
            ]
         | 
| 465 |  | 
| 466 | 
             
            def filter_data(search_query, model_types, model_sizes, *full_dataframes):
         | 
| @@ -484,6 +512,10 @@ def filter_data(search_query, model_types, model_sizes, *full_dataframes): | |
| 484 | 
             
                                masks.append(df["Model"].isin(PROPRIETARY_MODELS))
         | 
| 485 | 
             
                            elif model_type == "Sentence Transformers":
         | 
| 486 | 
             
                                masks.append(df["Model"].isin(SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS))
         | 
|  | |
|  | |
|  | |
|  | |
| 487 | 
             
                        if masks:
         | 
| 488 | 
             
                            df = df[reduce(lambda a, b: a | b, masks)]
         | 
| 489 | 
             
                        else:
         | 
| @@ -535,16 +567,16 @@ with gr.Blocks(css=css) as block: | |
| 535 | 
             
                with gr.Tabs() as outer_tabs:
         | 
| 536 | 
             
                    # Store the tabs for updating them on load based on URL parameters
         | 
| 537 | 
             
                    tabs.append(outer_tabs)
         | 
| 538 | 
            -
             | 
| 539 | 
             
                    for task, task_values in data.items():
         | 
| 540 | 
             
                        metric = task_values["metric"]
         | 
| 541 | 
             
                        task_tab_id = task.lower().replace(" ", "-")
         | 
| 542 |  | 
| 543 | 
             
                        # Overall, Bitext Mining, Classification, etc.
         | 
| 544 | 
            -
                         | 
|  | |
| 545 | 
             
                            # For updating the 'task' in the URL
         | 
| 546 | 
             
                            task_tab.select(update_url_task, [current_task_language, language_per_task], [current_task_language, language_per_task]).then(None, [current_task_language], [], js=set_window_url_params)
         | 
| 547 | 
            -
             | 
| 548 | 
             
                            with gr.Tabs() as task_tabs:
         | 
| 549 | 
             
                                # Store the task tabs for updating them on load based on URL parameters
         | 
| 550 | 
             
                                tabs.append(task_tabs)
         | 
|  | |
| 17 | 
             
            BOARDS_CONFIG = LEADERBOARD_CONFIG["boards"]
         | 
| 18 |  | 
| 19 | 
             
            TASKS = list(TASKS_CONFIG.keys())
         | 
| 20 | 
            +
            PRETTY_NAMES = {
         | 
| 21 | 
            +
                "InstructionRetrieval": "Retrieval w/Instructions",
         | 
| 22 | 
            +
                "PairClassification": "Pair Classification",
         | 
| 23 | 
            +
                "BitextMining": "Bitext Mining",
         | 
| 24 | 
            +
            }
         | 
| 25 |  | 
| 26 | 
             
            TASK_TO_METRIC = {k:v["metric"] for k,v in TASKS_CONFIG.items()}
         | 
| 27 |  | 
|  | |
| 39 | 
             
            EXTERNAL_MODEL_TO_SEQLEN = {k: v["seq_len"] for k,v in MODEL_META["model_meta"].items() if v.get("seq_len", False)}
         | 
| 40 | 
             
            EXTERNAL_MODEL_TO_SIZE = {k: v["size"] for k,v in MODEL_META["model_meta"].items() if v.get("size", False)}
         | 
| 41 | 
             
            PROPRIETARY_MODELS = {k for k,v in MODEL_META["model_meta"].items() if v.get("is_proprietary", False)}
         | 
| 42 | 
            +
            TASK_DESCRIPTIONS = {k: v["task_description"] for k,v in TASKS_CONFIG.items()}
         | 
| 43 | 
            +
            TASK_DESCRIPTIONS["Overall"] = "Overall performance across MTEB tasks."
         | 
| 44 | 
             
            SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {k for k,v in MODEL_META["model_meta"].items() if v.get("is_sentence_transformers_compatible", False)}
         | 
| 45 | 
             
            MODELS_TO_SKIP = MODEL_META["models_to_skip"]
         | 
| 46 | 
            +
            CROSS_ENCODERS = MODEL_META["cross_encoders"]
         | 
| 47 | 
            +
            BI_ENCODERS = [k for k, _ in MODEL_META["model_meta"].items() if k not in CROSS_ENCODERS + ["bm25"]]
         | 
| 48 |  | 
| 49 | 
             
            PROPRIETARY_MODELS = {
         | 
| 50 | 
             
                make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
         | 
| 51 | 
             
                for model in PROPRIETARY_MODELS
         | 
| 52 | 
             
            }
         | 
|  | |
| 53 | 
             
            SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {
         | 
| 54 | 
             
                make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
         | 
| 55 | 
             
                for model in SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS
         | 
| 56 | 
             
            }
         | 
| 57 | 
            +
            CROSS_ENCODERS = {
         | 
| 58 | 
            +
                make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
         | 
| 59 | 
            +
                for model in CROSS_ENCODERS
         | 
| 60 | 
            +
            }
         | 
| 61 | 
            +
            BI_ENCODERS = {
         | 
| 62 | 
            +
                make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
         | 
| 63 | 
            +
                for model in BI_ENCODERS
         | 
| 64 | 
            +
            }
         | 
| 65 | 
            +
             | 
| 66 |  | 
| 67 | 
             
            TASK_TO_TASK_TYPE = {task_category: [] for task_category in TASKS}
         | 
| 68 | 
             
            for board_config in BOARDS_CONFIG.values():
         | 
|  | |
| 181 | 
             
                # Initialize list to models that we cannot fetch metadata from
         | 
| 182 | 
             
                df_list = []
         | 
| 183 | 
             
                for model in EXTERNAL_MODEL_RESULTS:
         | 
| 184 | 
            +
                    results_list = []
         | 
| 185 | 
            +
                    for task in tasks:
         | 
| 186 | 
            +
                        # Not all models have InstructionRetrieval, other new tasks
         | 
| 187 | 
            +
                        if task not in EXTERNAL_MODEL_RESULTS[model]:
         | 
| 188 | 
            +
                            continue
         | 
| 189 | 
            +
                        results_list += EXTERNAL_MODEL_RESULTS[model][task][task_to_metric[task]]
         | 
| 190 | 
            +
                    
         | 
| 191 | 
             
                    if len(datasets) > 0:
         | 
| 192 | 
             
                        res = {k: v for d in results_list for k, v in d.items() if (k == "Model") or any([x in k for x in datasets])}
         | 
| 193 | 
             
                    elif langs:
         | 
|  | |
| 406 | 
             
                data[task] = {"metric": TASKS_CONFIG[task]["metric_description"], "data": []}
         | 
| 407 |  | 
| 408 | 
             
            for board, board_config in BOARDS_CONFIG.items():
         | 
| 409 | 
            +
                init_name = board_config["title"]
         | 
| 410 | 
            +
                if init_name in PRETTY_NAMES:
         | 
| 411 | 
            +
                    init_name = PRETTY_NAMES[init_name]
         | 
| 412 | 
            +
                board_pretty_name = f"{init_name} leaderboard"
         | 
| 413 | 
             
                acronym = board_config.get("acronym", None)
         | 
| 414 | 
             
                board_icon = board_config.get("icon", None)
         | 
| 415 | 
             
                if board_icon is None:
         | 
|  | |
| 465 | 
             
            def update_url_task(event: gr.SelectData, current_task_language: dict, language_per_task: dict):
         | 
| 466 | 
             
                current_task_language["task"] = event.target.id
         | 
| 467 | 
             
                # Either use the cached language for this task or the 1st language
         | 
| 468 | 
            +
                current_task_language["language"] = language_per_task.get(event.target.id, event.target.children[1].children[0].id)
         | 
| 469 | 
             
                return current_task_language, language_per_task
         | 
| 470 |  | 
| 471 | 
             
            def update_url_language(event: gr.SelectData, current_task_language: dict, language_per_task: dict):
         | 
|  | |
| 487 | 
             
                "Open",
         | 
| 488 | 
             
                "Proprietary",
         | 
| 489 | 
             
                "Sentence Transformers",
         | 
| 490 | 
            +
                "Cross-Encoders",
         | 
| 491 | 
            +
                "Bi-Encoders"
         | 
| 492 | 
             
            ]
         | 
| 493 |  | 
| 494 | 
             
            def filter_data(search_query, model_types, model_sizes, *full_dataframes):
         | 
|  | |
| 512 | 
             
                                masks.append(df["Model"].isin(PROPRIETARY_MODELS))
         | 
| 513 | 
             
                            elif model_type == "Sentence Transformers":
         | 
| 514 | 
             
                                masks.append(df["Model"].isin(SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS))
         | 
| 515 | 
            +
                            elif model_type == "Cross-Encoders":
         | 
| 516 | 
            +
                                masks.append(df["Model"].isin(CROSS_ENCODERS))
         | 
| 517 | 
            +
                            elif model_type == "Bi-Encoders":
         | 
| 518 | 
            +
                                masks.append(df["Model"].isin(BI_ENCODERS))
         | 
| 519 | 
             
                        if masks:
         | 
| 520 | 
             
                            df = df[reduce(lambda a, b: a | b, masks)]
         | 
| 521 | 
             
                        else:
         | 
|  | |
| 567 | 
             
                with gr.Tabs() as outer_tabs:
         | 
| 568 | 
             
                    # Store the tabs for updating them on load based on URL parameters
         | 
| 569 | 
             
                    tabs.append(outer_tabs)
         | 
|  | |
| 570 | 
             
                    for task, task_values in data.items():
         | 
| 571 | 
             
                        metric = task_values["metric"]
         | 
| 572 | 
             
                        task_tab_id = task.lower().replace(" ", "-")
         | 
| 573 |  | 
| 574 | 
             
                        # Overall, Bitext Mining, Classification, etc.
         | 
| 575 | 
            +
                        pretty_task_name = task if task not in PRETTY_NAMES.keys() else PRETTY_NAMES[task]
         | 
| 576 | 
            +
                        with gr.Tab(pretty_task_name, id=task_tab_id) as task_tab:
         | 
| 577 | 
             
                            # For updating the 'task' in the URL
         | 
| 578 | 
             
                            task_tab.select(update_url_task, [current_task_language, language_per_task], [current_task_language, language_per_task]).then(None, [current_task_language], [], js=set_window_url_params)
         | 
| 579 | 
            +
                            gr.Markdown(TASK_DESCRIPTIONS[task])
         | 
| 580 | 
             
                            with gr.Tabs() as task_tabs:
         | 
| 581 | 
             
                                # Store the task tabs for updating them on load based on URL parameters
         | 
| 582 | 
             
                                tabs.append(task_tabs)
         | 
    	
        config.yaml
    CHANGED
    
    | @@ -7,34 +7,47 @@ tasks: | |
| 7 | 
             
                icon: "🎌"
         | 
| 8 | 
             
                metric: f1
         | 
| 9 | 
             
                metric_description: "[F1](https://huggingface.co/spaces/evaluate-metric/f1)"
         | 
|  | |
| 10 | 
             
              Classification:
         | 
| 11 | 
             
                icon: "❤️"
         | 
| 12 | 
             
                metric: accuracy
         | 
| 13 | 
             
                metric_description: "[Accuracy](https://huggingface.co/spaces/evaluate-metric/accuracy)"
         | 
|  | |
| 14 | 
             
              Clustering:
         | 
| 15 | 
             
                icon: "✨"
         | 
| 16 | 
             
                metric: v_measure
         | 
| 17 | 
             
                metric_description: "Validity Measure (v_measure)"
         | 
|  | |
| 18 | 
             
              PairClassification:
         | 
| 19 | 
             
                icon: "🎭"
         | 
| 20 | 
             
                metric: cos_sim_ap
         | 
| 21 | 
             
                metric_description: "Average Precision based on Cosine Similarities (cos_sim_ap)"
         | 
|  | |
| 22 | 
             
              Reranking: 
         | 
| 23 | 
             
                icon: "🥈"
         | 
| 24 | 
             
                metric: map
         | 
| 25 | 
             
                metric_description: "Mean Average Precision (MAP)"
         | 
|  | |
| 26 | 
             
              Retrieval:
         | 
| 27 | 
             
                icon: "🔎"
         | 
| 28 | 
             
                metric: ndcg_at_10
         | 
| 29 | 
             
                metric_description: "Normalized Discounted Cumulative Gain @ k (ndcg_at_10)"
         | 
|  | |
| 30 | 
             
              STS:
         | 
| 31 | 
             
                icon: "🤖"
         | 
| 32 | 
             
                metric: cos_sim_spearman
         | 
| 33 | 
             
                metric_description: "Spearman correlation based on cosine similarity"
         | 
|  | |
| 34 | 
             
              Summarization:
         | 
| 35 | 
             
                icon: "📜"
         | 
| 36 | 
             
                metric: cos_sim_spearman
         | 
| 37 | 
             
                metric_description: "Spearman correlation	based on cosine similarity"
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 38 | 
             
            boards:
         | 
| 39 | 
             
              en:
         | 
| 40 | 
             
                title: English
         | 
| @@ -250,6 +263,18 @@ boards: | |
| 250 | 
             
                    - MassiveIntentClassification (nb)
         | 
| 251 | 
             
                    - MassiveScenarioClassification (nb)
         | 
| 252 | 
             
                    - ScalaNbClassification
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 253 | 
             
              law:
         | 
| 254 | 
             
                title: Law
         | 
| 255 | 
             
                language_long: "English, German, Chinese"
         | 
|  | |
| 7 | 
             
                icon: "🎌"
         | 
| 8 | 
             
                metric: f1
         | 
| 9 | 
             
                metric_description: "[F1](https://huggingface.co/spaces/evaluate-metric/f1)"
         | 
| 10 | 
            +
                task_description: "Bitext mining is the task of finding parallel sentences in two languages."
         | 
| 11 | 
             
              Classification:
         | 
| 12 | 
             
                icon: "❤️"
         | 
| 13 | 
             
                metric: accuracy
         | 
| 14 | 
             
                metric_description: "[Accuracy](https://huggingface.co/spaces/evaluate-metric/accuracy)"
         | 
| 15 | 
            +
                task_description: "Classification is the task of assigning a label to a text."
         | 
| 16 | 
             
              Clustering:
         | 
| 17 | 
             
                icon: "✨"
         | 
| 18 | 
             
                metric: v_measure
         | 
| 19 | 
             
                metric_description: "Validity Measure (v_measure)"
         | 
| 20 | 
            +
                task_description: "Clustering is the task of grouping similar documents together."
         | 
| 21 | 
             
              PairClassification:
         | 
| 22 | 
             
                icon: "🎭"
         | 
| 23 | 
             
                metric: cos_sim_ap
         | 
| 24 | 
             
                metric_description: "Average Precision based on Cosine Similarities (cos_sim_ap)"
         | 
| 25 | 
            +
                task_description: "Pair classification is the task of determining whether two texts are similar."
         | 
| 26 | 
             
              Reranking: 
         | 
| 27 | 
             
                icon: "🥈"
         | 
| 28 | 
             
                metric: map
         | 
| 29 | 
             
                metric_description: "Mean Average Precision (MAP)"
         | 
| 30 | 
            +
                task_description: "Reranking is the task of reordering a list of documents to improve relevance."
         | 
| 31 | 
             
              Retrieval:
         | 
| 32 | 
             
                icon: "🔎"
         | 
| 33 | 
             
                metric: ndcg_at_10
         | 
| 34 | 
             
                metric_description: "Normalized Discounted Cumulative Gain @ k (ndcg_at_10)"
         | 
| 35 | 
            +
                task_description: "Retrieval is the task of finding relevant documents for a query."
         | 
| 36 | 
             
              STS:
         | 
| 37 | 
             
                icon: "🤖"
         | 
| 38 | 
             
                metric: cos_sim_spearman
         | 
| 39 | 
             
                metric_description: "Spearman correlation based on cosine similarity"
         | 
| 40 | 
            +
                task_description: "Semantic Textual Similarity is the task of determining how similar two texts are."
         | 
| 41 | 
             
              Summarization:
         | 
| 42 | 
             
                icon: "📜"
         | 
| 43 | 
             
                metric: cos_sim_spearman
         | 
| 44 | 
             
                metric_description: "Spearman correlation	based on cosine similarity"
         | 
| 45 | 
            +
                task_description: "Summarization is the task of generating a summary of a text."
         | 
| 46 | 
            +
              InstructionRetrieval:
         | 
| 47 | 
            +
                icon: "🔎📋"
         | 
| 48 | 
            +
                metric: "p-MRR"
         | 
| 49 | 
            +
                metric_description: "paired mean reciprocal rank"
         | 
| 50 | 
            +
                task_description: "Retrieval w/Instructions is the task of finding relevant documents for a query that has detailed instructions."
         | 
| 51 | 
             
            boards:
         | 
| 52 | 
             
              en:
         | 
| 53 | 
             
                title: English
         | 
|  | |
| 263 | 
             
                    - MassiveIntentClassification (nb)
         | 
| 264 | 
             
                    - MassiveScenarioClassification (nb)
         | 
| 265 | 
             
                    - ScalaNbClassification
         | 
| 266 | 
            +
              instructions:
         | 
| 267 | 
            +
                title: English
         | 
| 268 | 
            +
                language_long: "English"
         | 
| 269 | 
            +
                has_overall: false
         | 
| 270 | 
            +
                acronym: null
         | 
| 271 | 
            +
                icon: null
         | 
| 272 | 
            +
                credits: "[Orion Weller, FollowIR](https://arxiv.org/abs/2403.15246)"
         | 
| 273 | 
            +
                tasks:
         | 
| 274 | 
            +
                  InstructionRetrieval:
         | 
| 275 | 
            +
                  - Robust04InstructionRetrieval
         | 
| 276 | 
            +
                  - News21InstructionRetrieval
         | 
| 277 | 
            +
                  - Core17InstructionRetrieval
         | 
| 278 | 
             
              law:
         | 
| 279 | 
             
                title: Law
         | 
| 280 | 
             
                language_long: "English, German, Chinese"
         | 
    	
        model_meta.yaml
    CHANGED
    
    | @@ -47,6 +47,20 @@ model_meta: | |
| 47 | 
             
                is_external: true
         | 
| 48 | 
             
                is_proprietary: false
         | 
| 49 | 
             
                is_sentence_transformers_compatible: true
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 50 | 
             
              LASER2:
         | 
| 51 | 
             
                link: https://github.com/facebookresearch/LASER
         | 
| 52 | 
             
                seq_len: N/A
         | 
| @@ -263,6 +277,12 @@ model_meta: | |
| 263 | 
             
                is_external: true
         | 
| 264 | 
             
                is_proprietary: false
         | 
| 265 | 
             
                is_sentence_transformers_compatible: true
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 266 | 
             
              camembert-base:
         | 
| 267 | 
             
                link: https://huggingface.co/almanach/camembert-base
         | 
| 268 | 
             
                seq_len: 512
         | 
| @@ -359,6 +379,14 @@ model_meta: | |
| 359 | 
             
                is_external: true
         | 
| 360 | 
             
                is_proprietary: false
         | 
| 361 | 
             
                is_sentence_transformers_compatible: true
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 362 | 
             
              e5-base:
         | 
| 363 | 
             
                link: https://huggingface.co/intfloat/e5-base
         | 
| 364 | 
             
                seq_len: 512
         | 
| @@ -367,6 +395,14 @@ model_meta: | |
| 367 | 
             
                is_external: true
         | 
| 368 | 
             
                is_proprietary: false
         | 
| 369 | 
             
                is_sentence_transformers_compatible: true
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 370 | 
             
              e5-large:
         | 
| 371 | 
             
                link: https://huggingface.co/intfloat/e5-large
         | 
| 372 | 
             
                seq_len: 512
         | 
| @@ -407,6 +443,22 @@ model_meta: | |
| 407 | 
             
                is_external: true
         | 
| 408 | 
             
                is_proprietary: false
         | 
| 409 | 
             
                is_sentence_transformers_compatible: true
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 410 | 
             
              flaubert_base_cased:
         | 
| 411 | 
             
                link: https://huggingface.co/flaubert/flaubert_base_cased
         | 
| 412 | 
             
                seq_len: 512
         | 
| @@ -535,6 +587,22 @@ model_meta: | |
| 535 | 
             
                is_external: true
         | 
| 536 | 
             
                is_proprietary: false
         | 
| 537 | 
             
                is_sentence_transformers_compatible: true
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 538 | 
             
              komninos:
         | 
| 539 | 
             
                link: https://huggingface.co/sentence-transformers/average_word_embeddings_komninos
         | 
| 540 | 
             
                seq_len: N/A
         | 
| @@ -543,6 +611,14 @@ model_meta: | |
| 543 | 
             
                is_external: true
         | 
| 544 | 
             
                is_proprietary: false
         | 
| 545 | 
             
                is_sentence_transformers_compatible: true
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 546 | 
             
              luotuo-bert-medium:
         | 
| 547 | 
             
                link: https://huggingface.co/silk-road/luotuo-bert-medium
         | 
| 548 | 
             
                seq_len: 512
         | 
| @@ -567,6 +643,14 @@ model_meta: | |
| 567 | 
             
                is_external: true
         | 
| 568 | 
             
                is_proprietary: false
         | 
| 569 | 
             
                is_sentence_transformers_compatible: true
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 570 | 
             
              mistral-embed:
         | 
| 571 | 
             
                link: https://docs.mistral.ai/guides/embeddings
         | 
| 572 | 
             
                seq_len: null
         | 
| @@ -575,6 +659,30 @@ model_meta: | |
| 575 | 
             
                is_external: true
         | 
| 576 | 
             
                is_proprietary: true
         | 
| 577 | 
             
                is_sentence_transformers_compatible: false
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 578 | 
             
              msmarco-bert-co-condensor:
         | 
| 579 | 
             
                link: https://huggingface.co/sentence-transformers/msmarco-bert-co-condensor
         | 
| 580 | 
             
                seq_len: 512
         | 
| @@ -903,6 +1011,22 @@ model_meta: | |
| 903 | 
             
                is_external: true
         | 
| 904 | 
             
                is_proprietary: true
         | 
| 905 | 
             
                is_sentence_transformers_compatible: false
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 906 | 
             
              text2vec-base-chinese:
         | 
| 907 | 
             
                link: https://huggingface.co/shibing624/text2vec-base-chinese
         | 
| 908 | 
             
                seq_len: 512
         | 
| @@ -1184,3 +1308,13 @@ models_to_skip: | |
| 1184 | 
             
            - michaelfeil/ct2fast-gte-large
         | 
| 1185 | 
             
            - gizmo-ai/Cohere-embed-multilingual-v3.0
         | 
| 1186 | 
             
            - McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-unsup-simcse
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 47 | 
             
                is_external: true
         | 
| 48 | 
             
                is_proprietary: false
         | 
| 49 | 
             
                is_sentence_transformers_compatible: true
         | 
| 50 | 
            +
              FollowIR-7B:
         | 
| 51 | 
            +
                link: https://huggingface.co/jhu-clsp/FollowIR-7B
         | 
| 52 | 
            +
                seq_len: 4096
         | 
| 53 | 
            +
                size: 7240
         | 
| 54 | 
            +
                is_external: true
         | 
| 55 | 
            +
                is_propietary: false
         | 
| 56 | 
            +
                is_sentence_transformer_compatible: false
         | 
| 57 | 
            +
              GritLM-7B:
         | 
| 58 | 
            +
                link: https://huggingface.co/GritLM/GritLM-7B
         | 
| 59 | 
            +
                seq_len: 4096
         | 
| 60 | 
            +
                size: 7240
         | 
| 61 | 
            +
                is_external: true
         | 
| 62 | 
            +
                is_propietary: false
         | 
| 63 | 
            +
                is_sentence_transformer_compatible: false
         | 
| 64 | 
             
              LASER2:
         | 
| 65 | 
             
                link: https://github.com/facebookresearch/LASER
         | 
| 66 | 
             
                seq_len: N/A
         | 
|  | |
| 277 | 
             
                is_external: true
         | 
| 278 | 
             
                is_proprietary: false
         | 
| 279 | 
             
                is_sentence_transformers_compatible: true
         | 
| 280 | 
            +
              bm25: 
         | 
| 281 | 
            +
                link: https://en.wikipedia.org/wiki/Okapi_BM25
         | 
| 282 | 
            +
                size: 0
         | 
| 283 | 
            +
                is_external: true
         | 
| 284 | 
            +
                is_proprietary: false
         | 
| 285 | 
            +
                is_sentence_transformers_compatible: false
         | 
| 286 | 
             
              camembert-base:
         | 
| 287 | 
             
                link: https://huggingface.co/almanach/camembert-base
         | 
| 288 | 
             
                seq_len: 512
         | 
|  | |
| 379 | 
             
                is_external: true
         | 
| 380 | 
             
                is_proprietary: false
         | 
| 381 | 
             
                is_sentence_transformers_compatible: true
         | 
| 382 | 
            +
              e5-base-v2:
         | 
| 383 | 
            +
                link: https://huggingface.co/intfloat/e5-base-v2
         | 
| 384 | 
            +
                seq_len: 512
         | 
| 385 | 
            +
                size: 110
         | 
| 386 | 
            +
                dim: 768
         | 
| 387 | 
            +
                is_external: true
         | 
| 388 | 
            +
                is_proprietary: false
         | 
| 389 | 
            +
                is_sentence_transformers_compatible: true
         | 
| 390 | 
             
              e5-base:
         | 
| 391 | 
             
                link: https://huggingface.co/intfloat/e5-base
         | 
| 392 | 
             
                seq_len: 512
         | 
|  | |
| 395 | 
             
                is_external: true
         | 
| 396 | 
             
                is_proprietary: false
         | 
| 397 | 
             
                is_sentence_transformers_compatible: true
         | 
| 398 | 
            +
              e5-large-v2:
         | 
| 399 | 
            +
                link: https://huggingface.co/intfloat/e5-large-v2
         | 
| 400 | 
            +
                seq_len: 512
         | 
| 401 | 
            +
                size: 335
         | 
| 402 | 
            +
                dim: 1024
         | 
| 403 | 
            +
                is_external: true
         | 
| 404 | 
            +
                is_proprietary: false
         | 
| 405 | 
            +
                is_sentence_transformers_compatible: true
         | 
| 406 | 
             
              e5-large:
         | 
| 407 | 
             
                link: https://huggingface.co/intfloat/e5-large
         | 
| 408 | 
             
                seq_len: 512
         | 
|  | |
| 443 | 
             
                is_external: true
         | 
| 444 | 
             
                is_proprietary: false
         | 
| 445 | 
             
                is_sentence_transformers_compatible: true
         | 
| 446 | 
            +
              flan-t5-base:
         | 
| 447 | 
            +
                link: https://huggingface.co/google/flan-t5-base
         | 
| 448 | 
            +
                seq_len: 512
         | 
| 449 | 
            +
                size: 220
         | 
| 450 | 
            +
                dim: -1
         | 
| 451 | 
            +
                is_external: true
         | 
| 452 | 
            +
                is_proprietary: false
         | 
| 453 | 
            +
                is_sentence_transformers_compatible: true
         | 
| 454 | 
            +
              flan-t5-large:
         | 
| 455 | 
            +
                link: https://huggingface.co/google/flan-t5-large
         | 
| 456 | 
            +
                seq_len: 512
         | 
| 457 | 
            +
                size: 770
         | 
| 458 | 
            +
                dim: -1
         | 
| 459 | 
            +
                is_external: true
         | 
| 460 | 
            +
                is_proprietary: false
         | 
| 461 | 
            +
                is_sentence_transformers_compatible: true
         | 
| 462 | 
             
              flaubert_base_cased:
         | 
| 463 | 
             
                link: https://huggingface.co/flaubert/flaubert_base_cased
         | 
| 464 | 
             
                seq_len: 512
         | 
|  | |
| 587 | 
             
                is_external: true
         | 
| 588 | 
             
                is_proprietary: false
         | 
| 589 | 
             
                is_sentence_transformers_compatible: true
         | 
| 590 | 
            +
              instructor-base:
         | 
| 591 | 
            +
                link: https://huggingface.co/hkunlp/instructor-base
         | 
| 592 | 
            +
                seq_len: N/A
         | 
| 593 | 
            +
                size: 110
         | 
| 594 | 
            +
                dim: 768
         | 
| 595 | 
            +
                is_external: true
         | 
| 596 | 
            +
                is_proprietary: false
         | 
| 597 | 
            +
                is_sentence_transformers_compatible: true
         | 
| 598 | 
            +
              instructor-xl:
         | 
| 599 | 
            +
                link: https://huggingface.co/hkunlp/instructor-xl
         | 
| 600 | 
            +
                seq_len: N/A
         | 
| 601 | 
            +
                size: 1241
         | 
| 602 | 
            +
                dim: 768
         | 
| 603 | 
            +
                is_external: true
         | 
| 604 | 
            +
                is_proprietary: false
         | 
| 605 | 
            +
                is_sentence_transformers_compatible: true
         | 
| 606 | 
             
              komninos:
         | 
| 607 | 
             
                link: https://huggingface.co/sentence-transformers/average_word_embeddings_komninos
         | 
| 608 | 
             
                seq_len: N/A
         | 
|  | |
| 611 | 
             
                is_external: true
         | 
| 612 | 
             
                is_proprietary: false
         | 
| 613 | 
             
                is_sentence_transformers_compatible: true
         | 
| 614 | 
            +
              llama-2-7b-chat:
         | 
| 615 | 
            +
                link: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
         | 
| 616 | 
            +
                seq_len: 4096
         | 
| 617 | 
            +
                size: 7000
         | 
| 618 | 
            +
                dim: -1
         | 
| 619 | 
            +
                is_external: true
         | 
| 620 | 
            +
                is_proprietary: false
         | 
| 621 | 
            +
                is_sentence_transformers_compatible: false
         | 
| 622 | 
             
              luotuo-bert-medium:
         | 
| 623 | 
             
                link: https://huggingface.co/silk-road/luotuo-bert-medium
         | 
| 624 | 
             
                seq_len: 512
         | 
|  | |
| 643 | 
             
                is_external: true
         | 
| 644 | 
             
                is_proprietary: false
         | 
| 645 | 
             
                is_sentence_transformers_compatible: true
         | 
| 646 | 
            +
              mistral-7b-instruct-v0.2:
         | 
| 647 | 
            +
                link: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2
         | 
| 648 | 
            +
                seq_len: 4096
         | 
| 649 | 
            +
                size: 7240
         | 
| 650 | 
            +
                dim: -1
         | 
| 651 | 
            +
                is_external: true
         | 
| 652 | 
            +
                is_proprietary: false
         | 
| 653 | 
            +
                is_sentence_transformers_compatible: false
         | 
| 654 | 
             
              mistral-embed:
         | 
| 655 | 
             
                link: https://docs.mistral.ai/guides/embeddings
         | 
| 656 | 
             
                seq_len: null
         | 
|  | |
| 659 | 
             
                is_external: true
         | 
| 660 | 
             
                is_proprietary: true
         | 
| 661 | 
             
                is_sentence_transformers_compatible: false
         | 
| 662 | 
            +
              monobert-large-msmarco:
         | 
| 663 | 
            +
                link: https://huggingface.co/castorini/monobert-large-msmarco
         | 
| 664 | 
            +
                seq_len: 512
         | 
| 665 | 
            +
                size: 770
         | 
| 666 | 
            +
                dim: -1
         | 
| 667 | 
            +
                is_external: true
         | 
| 668 | 
            +
                is_proprietary: false
         | 
| 669 | 
            +
                is_sentence_transformers_compatible: false
         | 
| 670 | 
            +
              monot5-3b-msmarco-10k:
         | 
| 671 | 
            +
                link: https://huggingface.co/castorini/monot5-3b-msmarco-10k
         | 
| 672 | 
            +
                seq_len: 512
         | 
| 673 | 
            +
                size: 2480
         | 
| 674 | 
            +
                dim: -1
         | 
| 675 | 
            +
                is_external: true
         | 
| 676 | 
            +
                is_proprietary: false
         | 
| 677 | 
            +
                is_sentence_transformers_compatible: false
         | 
| 678 | 
            +
              monot5-base-msmarco-10k:
         | 
| 679 | 
            +
                link: https://huggingface.co/castorini/monot5-base-msmarco-10k
         | 
| 680 | 
            +
                seq_len: 512
         | 
| 681 | 
            +
                size: 220
         | 
| 682 | 
            +
                dim: -1
         | 
| 683 | 
            +
                is_external: true
         | 
| 684 | 
            +
                is_proprietary: false
         | 
| 685 | 
            +
                is_sentence_transformers_compatible: false
         | 
| 686 | 
             
              msmarco-bert-co-condensor:
         | 
| 687 | 
             
                link: https://huggingface.co/sentence-transformers/msmarco-bert-co-condensor
         | 
| 688 | 
             
                seq_len: 512
         | 
|  | |
| 1011 | 
             
                is_external: true
         | 
| 1012 | 
             
                is_proprietary: true
         | 
| 1013 | 
             
                is_sentence_transformers_compatible: false
         | 
| 1014 | 
            +
              tart-dual-contriever-msmarco:
         | 
| 1015 | 
            +
                link: https://huggingface.co/orionweller/tart-dual-contriever-msmarco
         | 
| 1016 | 
            +
                seq_len: 512
         | 
| 1017 | 
            +
                size: 110
         | 
| 1018 | 
            +
                dim: 768
         | 
| 1019 | 
            +
                is_external: true
         | 
| 1020 | 
            +
                is_proprietary: false
         | 
| 1021 | 
            +
                is_sentence_transformers_compatible: false
         | 
| 1022 | 
            +
              tart-full-flan-t5-xl:
         | 
| 1023 | 
            +
                link: https://huggingface.co/facebook/tart-full-flan-t5-xl
         | 
| 1024 | 
            +
                seq_len: 512
         | 
| 1025 | 
            +
                size: 2480
         | 
| 1026 | 
            +
                dim: -1
         | 
| 1027 | 
            +
                is_external: true
         | 
| 1028 | 
            +
                is_proprietary: false
         | 
| 1029 | 
            +
                is_sentence_transformers_compatible: false
         | 
| 1030 | 
             
              text2vec-base-chinese:
         | 
| 1031 | 
             
                link: https://huggingface.co/shibing624/text2vec-base-chinese
         | 
| 1032 | 
             
                seq_len: 512
         | 
|  | |
| 1308 | 
             
            - michaelfeil/ct2fast-gte-large
         | 
| 1309 | 
             
            - gizmo-ai/Cohere-embed-multilingual-v3.0
         | 
| 1310 | 
             
            - McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-unsup-simcse
         | 
| 1311 | 
            +
            cross_encoders:
         | 
| 1312 | 
            +
            - FollowIR-7B
         | 
| 1313 | 
            +
            - flan-t5-base
         | 
| 1314 | 
            +
            - flan-t5-large
         | 
| 1315 | 
            +
            - monobert-large-msmarco
         | 
| 1316 | 
            +
            - monot5-3b-msmarco-10k
         | 
| 1317 | 
            +
            - monot5-base-msmarco-10k
         | 
| 1318 | 
            +
            - llama-2-7b-chat
         | 
| 1319 | 
            +
            - mistral-7b-instruct-v0.2
         | 
| 1320 | 
            +
            - tart-full-flan-t5-xl
         | 
 
			
