lvkaokao
		
	commited on
		
		
					Commit 
							
							·
						
						653f44e
	
1
								Parent(s):
							
							dca5dbd
								
support fp32/fp16/bf16 eval.
Browse files- app.py +1 -1
- src/display/utils.py +18 -4
- src/leaderboard/read_evals.py +2 -2
- src/submission/check_validity.py +21 -3
- src/submission/submit.py +32 -4
    	
        app.py
    CHANGED
    
    | @@ -572,7 +572,7 @@ with demo: | |
| 572 | 
             
                                base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)",
         | 
| 573 | 
             
                                        visible=not IS_PUBLIC)
         | 
| 574 | 
             
                                compute_type = gr.Dropdown(
         | 
| 575 | 
            -
                                    choices=[i.value.name for i in ComputeDtype],
         | 
| 576 | 
             
                                    label="Compute dtype",
         | 
| 577 | 
             
                                    multiselect=False,
         | 
| 578 | 
             
                                    value="float16",
         | 
|  | |
| 572 | 
             
                                base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)",
         | 
| 573 | 
             
                                        visible=not IS_PUBLIC)
         | 
| 574 | 
             
                                compute_type = gr.Dropdown(
         | 
| 575 | 
            +
                                    choices=[i.value.name for i in ComputeDtype if i.value.name != "All"],
         | 
| 576 | 
             
                                    label="Compute dtype",
         | 
| 577 | 
             
                                    multiselect=False,
         | 
| 578 | 
             
                                    value="float16",
         | 
    	
        src/display/utils.py
    CHANGED
    
    | @@ -242,6 +242,9 @@ class WeightDtype(Enum): | |
| 242 | 
             
                int4 = ModelDetails("int4")
         | 
| 243 | 
             
                nf4 = ModelDetails("nf4")
         | 
| 244 | 
             
                fp4 = ModelDetails("fp4")
         | 
|  | |
|  | |
|  | |
| 245 |  | 
| 246 | 
             
                Unknown = ModelDetails("?")
         | 
| 247 |  | 
| @@ -260,6 +263,12 @@ class WeightDtype(Enum): | |
| 260 | 
             
                        return WeightDtype.fp4
         | 
| 261 | 
             
                    if weight_dtype in ["All"]:
         | 
| 262 | 
             
                        return WeightDtype.all
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 263 | 
             
                    return WeightDtype.Unknown
         | 
| 264 |  | 
| 265 | 
             
            class ComputeDtype(Enum):
         | 
| @@ -317,8 +326,9 @@ class Precision(Enum): | |
| 317 | 
             
                qt_2bit = ModelDetails("2bit")
         | 
| 318 | 
             
                qt_3bit = ModelDetails("3bit")
         | 
| 319 | 
             
                qt_4bit = ModelDetails("4bit")
         | 
| 320 | 
            -
                 | 
| 321 | 
            -
                 | 
|  | |
| 322 | 
             
                Unknown = ModelDetails("?")
         | 
| 323 |  | 
| 324 | 
             
                def from_str(precision):
         | 
| @@ -332,8 +342,12 @@ class Precision(Enum): | |
| 332 | 
             
                        return Precision.qt_3bit
         | 
| 333 | 
             
                    if precision in ["4bit"]:
         | 
| 334 | 
             
                        return Precision.qt_4bit
         | 
| 335 | 
            -
                     | 
| 336 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
| 337 | 
             
                    return Precision.Unknown
         | 
| 338 |  | 
| 339 |  | 
|  | |
| 242 | 
             
                int4 = ModelDetails("int4")
         | 
| 243 | 
             
                nf4 = ModelDetails("nf4")
         | 
| 244 | 
             
                fp4 = ModelDetails("fp4")
         | 
| 245 | 
            +
                fp16 = ModelDetails("float16")
         | 
| 246 | 
            +
                bf16 = ModelDetails("bfloat16")
         | 
| 247 | 
            +
                fp32 = ModelDetails("float32")
         | 
| 248 |  | 
| 249 | 
             
                Unknown = ModelDetails("?")
         | 
| 250 |  | 
|  | |
| 263 | 
             
                        return WeightDtype.fp4
         | 
| 264 | 
             
                    if weight_dtype in ["All"]:
         | 
| 265 | 
             
                        return WeightDtype.all
         | 
| 266 | 
            +
                    if weight_dtype in ["float16"]:
         | 
| 267 | 
            +
                        return WeightDtype.fp16
         | 
| 268 | 
            +
                    if weight_dtype in ["bfloat16"]:
         | 
| 269 | 
            +
                        return WeightDtype.bf16
         | 
| 270 | 
            +
                    if weight_dtype in ["float32"]:
         | 
| 271 | 
            +
                        return WeightDtype.fp32
         | 
| 272 | 
             
                    return WeightDtype.Unknown
         | 
| 273 |  | 
| 274 | 
             
            class ComputeDtype(Enum):
         | 
|  | |
| 326 | 
             
                qt_2bit = ModelDetails("2bit")
         | 
| 327 | 
             
                qt_3bit = ModelDetails("3bit")
         | 
| 328 | 
             
                qt_4bit = ModelDetails("4bit")
         | 
| 329 | 
            +
                qt_8bit = ModelDetails("8bit")
         | 
| 330 | 
            +
                qt_16bit = ModelDetails("16bit")
         | 
| 331 | 
            +
                qt_32bit = ModelDetails("32bit")
         | 
| 332 | 
             
                Unknown = ModelDetails("?")
         | 
| 333 |  | 
| 334 | 
             
                def from_str(precision):
         | 
|  | |
| 342 | 
             
                        return Precision.qt_3bit
         | 
| 343 | 
             
                    if precision in ["4bit"]:
         | 
| 344 | 
             
                        return Precision.qt_4bit
         | 
| 345 | 
            +
                    if precision in ["8bit"]:
         | 
| 346 | 
            +
                        return Precision.qt_8bit
         | 
| 347 | 
            +
                    if precision in ["16bit"]:
         | 
| 348 | 
            +
                        return Precision.qt_16bit
         | 
| 349 | 
            +
                    if precision in ["32bit"]:
         | 
| 350 | 
            +
                        return Precision.qt_32bit
         | 
| 351 | 
             
                    return Precision.Unknown
         | 
| 352 |  | 
| 353 |  | 
    	
        src/leaderboard/read_evals.py
    CHANGED
    
    | @@ -56,7 +56,7 @@ class EvalResult: | |
| 56 |  | 
| 57 | 
             
                    # Precision
         | 
| 58 | 
             
                    precision = Precision.from_str(config.get("precision", "4bit"))
         | 
| 59 | 
            -
                    quant_type = QuantType.from_str(config.get("quant_type", "GPTQ"))
         | 
| 60 | 
             
                    weight_dtype = WeightDtype.from_str(data["task_info"].get("weight_dtype", "int4"))
         | 
| 61 | 
             
                    compute_dtype = ComputeDtype.from_str(data["task_info"].get("compute_dtype", "bfloat16"))
         | 
| 62 | 
             
                    # double_quant = data["quantization_config"].get("bnb_4bit_use_double_quant", False)
         | 
| @@ -209,7 +209,7 @@ def get_request_file_for_model(requests_path, model_name, | |
| 209 | 
             
                        if (
         | 
| 210 | 
             
                            req_content["status"] in ["Finished"]
         | 
| 211 | 
             
                            and req_content["precision"] == precision.split(".")[-1]
         | 
| 212 | 
            -
                            and req_content["quant_type"] == quant_type
         | 
| 213 | 
             
                            and req_content["weight_dtype"] == weight_dtype.split(".")[-1]
         | 
| 214 | 
             
                            and req_content["compute_dtype"] == compute_dtype.split(".")[-1]
         | 
| 215 | 
             
                        ):
         | 
|  | |
| 56 |  | 
| 57 | 
             
                    # Precision
         | 
| 58 | 
             
                    precision = Precision.from_str(config.get("precision", "4bit"))
         | 
| 59 | 
            +
                    quant_type = QuantType.from_str(str(config.get("quant_type", "GPTQ")))
         | 
| 60 | 
             
                    weight_dtype = WeightDtype.from_str(data["task_info"].get("weight_dtype", "int4"))
         | 
| 61 | 
             
                    compute_dtype = ComputeDtype.from_str(data["task_info"].get("compute_dtype", "bfloat16"))
         | 
| 62 | 
             
                    # double_quant = data["quantization_config"].get("bnb_4bit_use_double_quant", False)
         | 
|  | |
| 209 | 
             
                        if (
         | 
| 210 | 
             
                            req_content["status"] in ["Finished"]
         | 
| 211 | 
             
                            and req_content["precision"] == precision.split(".")[-1]
         | 
| 212 | 
            +
                            and str(req_content["quant_type"]) == quant_type
         | 
| 213 | 
             
                            and req_content["weight_dtype"] == weight_dtype.split(".")[-1]
         | 
| 214 | 
             
                            and req_content["compute_dtype"] == compute_dtype.split(".")[-1]
         | 
| 215 | 
             
                        ):
         | 
    	
        src/submission/check_validity.py
    CHANGED
    
    | @@ -69,13 +69,27 @@ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_rem | |
| 69 | 
             
                        return True, "uses a gated model.", None
         | 
| 70 | 
             
                    return False, f"was not found or misconfigured on the hub! Error raised was {e.args[0]}", None
         | 
| 71 |  | 
|  | |
| 72 | 
             
            def get_model_size(model_info: ModelInfo, precision: str):
         | 
| 73 | 
             
                size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
         | 
| 74 | 
             
                safetensors = None
         | 
| 75 | 
             
                try:
         | 
| 76 | 
             
                    safetensors = get_safetensors_metadata(model_info.id)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 77 | 
             
                except Exception as e:
         | 
| 78 | 
            -
                    print(e)
         | 
| 79 |  | 
| 80 | 
             
                if safetensors is not None:
         | 
| 81 | 
             
                    model_size = round(sum(safetensors.parameter_count.values()) / 1e9, 3)
         | 
| @@ -87,9 +101,13 @@ def get_model_size(model_info: ModelInfo, precision: str): | |
| 87 | 
             
                    except AttributeError as e:
         | 
| 88 | 
             
                        return 0  # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
         | 
| 89 |  | 
| 90 | 
            -
                size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.id.lower()) else 1
         | 
| 91 | 
             
                # model_size = size_factor * model_size
         | 
| 92 | 
            -
                 | 
|  | |
|  | |
|  | |
|  | |
| 93 |  | 
| 94 | 
             
            KNOWN_SIZE_FACTOR = {
         | 
| 95 | 
             
                "gptq": {"4bit": 8, "8bit": 4, "2bit": 8, "3bit": 12},
         | 
|  | |
| 69 | 
             
                        return True, "uses a gated model.", None
         | 
| 70 | 
             
                    return False, f"was not found or misconfigured on the hub! Error raised was {e.args[0]}", None
         | 
| 71 |  | 
| 72 | 
            +
             | 
| 73 | 
             
            def get_model_size(model_info: ModelInfo, precision: str):
         | 
| 74 | 
             
                size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
         | 
| 75 | 
             
                safetensors = None
         | 
| 76 | 
             
                try:
         | 
| 77 | 
             
                    safetensors = get_safetensors_metadata(model_info.id)
         | 
| 78 | 
            +
                    num_parameters = 0
         | 
| 79 | 
            +
                    mem = 0
         | 
| 80 | 
            +
                    for key in safetensors.parameter_count:
         | 
| 81 | 
            +
                        if key in ["F16", "BF16"]:
         | 
| 82 | 
            +
                            mem += safetensors.parameter_count[key] * 2
         | 
| 83 | 
            +
                        else:
         | 
| 84 | 
            +
                            mem += safetensors.parameter_count[key] * 4
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                        num_parameters += safetensors.parameter_count[key]
         | 
| 87 | 
            +
             | 
| 88 | 
            +
                    params_b = round(num_parameters / 1e9, 2)
         | 
| 89 | 
            +
                    size_gb = round(mem / 1e9,2)
         | 
| 90 | 
            +
                    return params_b, size_gb
         | 
| 91 | 
             
                except Exception as e:
         | 
| 92 | 
            +
                    print(str(e))
         | 
| 93 |  | 
| 94 | 
             
                if safetensors is not None:
         | 
| 95 | 
             
                    model_size = round(sum(safetensors.parameter_count.values()) / 1e9, 3)
         | 
|  | |
| 101 | 
             
                    except AttributeError as e:
         | 
| 102 | 
             
                        return 0  # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
         | 
| 103 |  | 
| 104 | 
            +
                # size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.id.lower()) else 1
         | 
| 105 | 
             
                # model_size = size_factor * model_size
         | 
| 106 | 
            +
                if precision == "16bit":
         | 
| 107 | 
            +
                    size_gb = model_size * 2
         | 
| 108 | 
            +
                else:
         | 
| 109 | 
            +
                    size_gb = model_size * 4
         | 
| 110 | 
            +
                return model_size, size_gb
         | 
| 111 |  | 
| 112 | 
             
            KNOWN_SIZE_FACTOR = {
         | 
| 113 | 
             
                "gptq": {"4bit": 8, "8bit": 4, "2bit": 8, "3bit": 12},
         | 
    	
        src/submission/submit.py
    CHANGED
    
    | @@ -157,11 +157,36 @@ def add_new_eval( | |
| 157 | 
             
                    weight_dtype = "int2"
         | 
| 158 |  | 
| 159 | 
             
                if quant_type is None or quant_type == "":
         | 
| 160 | 
            -
                    return styled_error("Please select a quantization model like GPTQ, AWQ etc.")
         | 
| 161 | 
            -
             | 
| 162 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 163 | 
             
                        quant_method=quant_type.lower(),
         | 
| 164 | 
             
                        bits=precision)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 165 |  | 
| 166 | 
             
                if quant_type == "llama.cpp":
         | 
| 167 | 
             
                    hardware = "cpu"
         | 
| @@ -170,6 +195,9 @@ def add_new_eval( | |
| 170 | 
             
                else:
         | 
| 171 | 
             
                    hardware = "gpu"
         | 
| 172 |  | 
|  | |
|  | |
|  | |
| 173 | 
             
                eval_entry = {
         | 
| 174 | 
             
                    "model": model,
         | 
| 175 | 
             
                    "revision": revision,
         | 
| @@ -187,7 +215,7 @@ def add_new_eval( | |
| 187 | 
             
                    "hardware": hardware,
         | 
| 188 | 
             
                    "status": "Pending",
         | 
| 189 | 
             
                    "submitted_time": current_time,
         | 
| 190 | 
            -
                    "model_type":  | 
| 191 | 
             
                    "job_id": -1,
         | 
| 192 | 
             
                    "job_start_time": None,
         | 
| 193 | 
             
                    "scripts": script
         | 
|  | |
| 157 | 
             
                    weight_dtype = "int2"
         | 
| 158 |  | 
| 159 | 
             
                if quant_type is None or quant_type == "":
         | 
| 160 | 
            +
                    # return styled_error("Please select a quantization model like GPTQ, AWQ etc.")
         | 
| 161 | 
            +
                    # for eval fp32/fp16/bf16
         | 
| 162 | 
            +
                    quant_type = None
         | 
| 163 | 
            +
             | 
| 164 | 
            +
                if quant_type is None:
         | 
| 165 | 
            +
                    weight_dtype = str(getattr(model_config, "torch_dtype", "float16"))
         | 
| 166 | 
            +
                    if weight_dtype in ["torch.float16", "float16"]:
         | 
| 167 | 
            +
                        weight_dtype = "float16"
         | 
| 168 | 
            +
                        precision = "16bit"
         | 
| 169 | 
            +
                    elif weight_dtype in ["torch.bfloat16", "bfloat16"]:
         | 
| 170 | 
            +
                        weight_dtype = "bfloat16"
         | 
| 171 | 
            +
                        precision = "16bit"
         | 
| 172 | 
            +
                    elif weight_dtype in ["torch.float32", "float32"]:
         | 
| 173 | 
            +
                        weight_dtype = "float32"
         | 
| 174 | 
            +
                        precision = "32bit"
         | 
| 175 | 
            +
                    else:
         | 
| 176 | 
            +
                        weight_dtype = "?"
         | 
| 177 | 
            +
                        precision = "?"
         | 
| 178 | 
            +
                    model_type = "original"
         | 
| 179 | 
            +
                    model_params, model_size = get_model_size(model_info=model_info, precision=precision)
         | 
| 180 | 
            +
                else:
         | 
| 181 | 
            +
                    model_params, model_size = get_quantized_model_parameters_memory(model_info,
         | 
| 182 | 
             
                        quant_method=quant_type.lower(),
         | 
| 183 | 
             
                        bits=precision)
         | 
| 184 | 
            +
                    model_type = "quantization"
         | 
| 185 | 
            +
                else:
         | 
| 186 | 
            +
                    model_params, model_size = get_quantized_model_parameters_memory(model_info,
         | 
| 187 | 
            +
                            quant_method=quant_type.lower(),
         | 
| 188 | 
            +
                            bits=precision)
         | 
| 189 | 
            +
                    model_type = "quantization"
         | 
| 190 |  | 
| 191 | 
             
                if quant_type == "llama.cpp":
         | 
| 192 | 
             
                    hardware = "cpu"
         | 
|  | |
| 195 | 
             
                else:
         | 
| 196 | 
             
                    hardware = "gpu"
         | 
| 197 |  | 
| 198 | 
            +
                if compute_dtype == "?":
         | 
| 199 | 
            +
                    compute_dtype = "float16"
         | 
| 200 | 
            +
             | 
| 201 | 
             
                eval_entry = {
         | 
| 202 | 
             
                    "model": model,
         | 
| 203 | 
             
                    "revision": revision,
         | 
|  | |
| 215 | 
             
                    "hardware": hardware,
         | 
| 216 | 
             
                    "status": "Pending",
         | 
| 217 | 
             
                    "submitted_time": current_time,
         | 
| 218 | 
            +
                    "model_type": model_type,
         | 
| 219 | 
             
                    "job_id": -1,
         | 
| 220 | 
             
                    "job_start_time": None,
         | 
| 221 | 
             
                    "scripts": script
         | 
