Upload folder using huggingface_hub
Browse files- README.md +7 -2
- evaluation_scripts/eval_bright_short.sh +49 -0
    	
        README.md
    CHANGED
    
    | @@ -165,7 +165,7 @@ print(scores.cpu().tolist()) | |
| 165 |  | 
| 166 | 
             
            ## Evaluation  
         | 
| 167 |  | 
| 168 | 
            -
            BGE-Reasoner-Embed-Qwen3-8B-0923 exhibits strong performance in reasoning-intensive retrieval tasks, as demonstrated by its results (nDCG@10 = 37.1 using original query) on the BRIGHT benchmark.
         | 
| 169 |  | 
| 170 | 
             
            <img src="./imgs/bright-performance.png" alt="BRIGHT Performance" style="zoom:200%;" />
         | 
| 171 |  | 
| @@ -194,5 +194,10 @@ Note: | |
| 194 | 
             
            If you find this repository useful, please consider giving a star :star: and citation
         | 
| 195 |  | 
| 196 | 
             
            ```
         | 
| 197 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 198 | 
             
            ```
         | 
|  | |
| 165 |  | 
| 166 | 
             
            ## Evaluation  
         | 
| 167 |  | 
| 168 | 
            +
            BGE-Reasoner-Embed-Qwen3-8B-0923 exhibits strong performance in reasoning-intensive retrieval tasks, as demonstrated by its results (nDCG@10 = 37.1 using original query) on the BRIGHT benchmark. You can try reproduce the evaluation results using [this script](https://huggingface.co/BAAI/bge-reasoner-embed-qwen3-8b-0923/tree/main/evaluation_scripts/eval_bright_short.sh) (refer to [FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding/blob/master/examples/evaluation/bright/eval_bright_short.sh)).
         | 
| 169 |  | 
| 170 | 
             
            <img src="./imgs/bright-performance.png" alt="BRIGHT Performance" style="zoom:200%;" />
         | 
| 171 |  | 
|  | |
| 194 | 
             
            If you find this repository useful, please consider giving a star :star: and citation
         | 
| 195 |  | 
| 196 | 
             
            ```
         | 
| 197 | 
            +
            @article{chen2025reasonembed,
         | 
| 198 | 
            +
              title={ReasonEmbed: Enhanced Text Embeddings for Reasoning-Intensive Document Retrieval},
         | 
| 199 | 
            +
              author={Chen, Jianlyu and Lan, Junwei and Li, Chaofan and Lian, Defu and Liu, Zheng},
         | 
| 200 | 
            +
              journal={arXiv preprint arXiv:2510.08252},
         | 
| 201 | 
            +
              year={2025}
         | 
| 202 | 
            +
            }
         | 
| 203 | 
             
            ```
         | 
    	
        evaluation_scripts/eval_bright_short.sh
    ADDED
    
    | @@ -0,0 +1,49 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            if [ -z "$HF_HUB_CACHE" ]; then
         | 
| 2 | 
            +
                export HF_HUB_CACHE="$HOME/.cache/huggingface/hub"
         | 
| 3 | 
            +
            fi
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            # full datasets
         | 
| 6 | 
            +
            dataset_names="biology earth_science economics psychology robotics stackoverflow sustainable_living leetcode pony aops theoremqa_questions theoremqa_theorems"
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            model_args="\
         | 
| 9 | 
            +
                --embedder_name_or_path BAAI/bge-reasoner-embed-qwen3-8b-0923 \
         | 
| 10 | 
            +
                --embedder_model_class decoder-only-base \
         | 
| 11 | 
            +
                --query_instruction_format_for_retrieval 'Instruct: {}\nQuery: {}' \
         | 
| 12 | 
            +
                --pooling_method last_token \
         | 
| 13 | 
            +
                --devices cuda:0 cuda:1 cuda:2 cuda:3 cuda:4 cuda:5 cuda:6 cuda:7 \
         | 
| 14 | 
            +
                --cache_dir $HF_HUB_CACHE \
         | 
| 15 | 
            +
                --embedder_batch_size 8 \
         | 
| 16 | 
            +
                --embedder_query_max_length 8192 \
         | 
| 17 | 
            +
                --embedder_passage_max_length 8192 \
         | 
| 18 | 
            +
            "
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            split_list=("examples" "gpt4_reason")
         | 
| 21 | 
            +
             | 
| 22 | 
            +
            for split in "${split_list[@]}"; do
         | 
| 23 | 
            +
                eval_args="\
         | 
| 24 | 
            +
                    --task_type short \
         | 
| 25 | 
            +
                    --use_special_instructions True \
         | 
| 26 | 
            +
                    --eval_name bright_short \
         | 
| 27 | 
            +
                    --dataset_dir ./bright_short/data \
         | 
| 28 | 
            +
                    --dataset_names $dataset_names \
         | 
| 29 | 
            +
                    --splits $split \
         | 
| 30 | 
            +
                    --corpus_embd_save_dir ./bright_short/corpus_embd \
         | 
| 31 | 
            +
                    --output_dir ./bright_short/search_results/$split \
         | 
| 32 | 
            +
                    --search_top_k 2000 \
         | 
| 33 | 
            +
                    --cache_path $HF_HUB_CACHE \
         | 
| 34 | 
            +
                    --overwrite False \
         | 
| 35 | 
            +
                    --k_values 1 10 100 \
         | 
| 36 | 
            +
                    --eval_output_method markdown \
         | 
| 37 | 
            +
                    --eval_output_path ./bright_short/eval_results_$split.md \
         | 
| 38 | 
            +
                    --eval_metrics ndcg_at_10 recall_at_10 recall_at_100 \
         | 
| 39 | 
            +
                "
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                cmd="python -m FlagEmbedding.evaluation.bright \
         | 
| 42 | 
            +
                    $eval_args \
         | 
| 43 | 
            +
                    $model_args \
         | 
| 44 | 
            +
                "
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                echo $cmd
         | 
| 47 | 
            +
                eval $cmd
         | 
| 48 | 
            +
             | 
| 49 | 
            +
            done
         | 
