|  | import json | 
					
						
						|  | import os | 
					
						
						|  | from datetime import datetime, timezone | 
					
						
						|  |  | 
					
						
						|  | from huggingface_hub import ModelCard, snapshot_download | 
					
						
						|  |  | 
					
						
						|  | from src.display.formatting import styled_error, styled_message, styled_warning | 
					
						
						|  | from src.envs import API, EVAL_REQUESTS_PATH, DYNAMIC_INFO_PATH, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_REPO, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA, REPO, GIT_REQUESTS_PATH, GIT_STATUS_PATH | 
					
						
						|  | from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS | 
					
						
						|  | from src.submission.check_validity import ( | 
					
						
						|  | already_submitted_models, | 
					
						
						|  | check_model_card, | 
					
						
						|  | get_model_size, | 
					
						
						|  | is_model_on_hub, | 
					
						
						|  | is_gguf_on_hub, | 
					
						
						|  | user_submission_permission, | 
					
						
						|  | get_model_tags | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | REQUESTED_MODELS = None | 
					
						
						|  | USERS_TO_SUBMISSION_DATES = None | 
					
						
						|  |  | 
					
						
						|  | def add_new_eval( | 
					
						
						|  | model: str, | 
					
						
						|  | revision: str, | 
					
						
						|  | private: bool, | 
					
						
						|  | precision: str="4bit", | 
					
						
						|  | weight_dtype: str="int4", | 
					
						
						|  | compute_dtype: str="float16", | 
					
						
						|  | gguf_ftype: str="*Q4_0.gguf", | 
					
						
						|  | ): | 
					
						
						|  | global REQUESTED_MODELS | 
					
						
						|  | global USERS_TO_SUBMISSION_DATES | 
					
						
						|  | if not REQUESTED_MODELS: | 
					
						
						|  | REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(GIT_STATUS_PATH) | 
					
						
						|  |  | 
					
						
						|  | quant_type = None | 
					
						
						|  | user_name = "" | 
					
						
						|  | model_path = model | 
					
						
						|  | if "/" in model: | 
					
						
						|  | user_name = model.split("/")[0] | 
					
						
						|  | model_path = model.split("/")[1] | 
					
						
						|  |  | 
					
						
						|  | precision = precision.split(" ")[0] | 
					
						
						|  | current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if user_name != "": | 
					
						
						|  | user_can_submit, error_msg = user_submission_permission( | 
					
						
						|  | user_name, USERS_TO_SUBMISSION_DATES, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA | 
					
						
						|  | ) | 
					
						
						|  | if not user_can_submit: | 
					
						
						|  | return styled_error(error_msg) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if model in DO_NOT_SUBMIT_MODELS: | 
					
						
						|  | return styled_warning("Model authors have requested that their model be not submitted on the leaderboard.") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if revision == "": | 
					
						
						|  | revision = "main" | 
					
						
						|  |  | 
					
						
						|  | architecture = "?" | 
					
						
						|  | downloads = 0 | 
					
						
						|  | created_at = "" | 
					
						
						|  | gguf_on_hub, error, gguf_files, new_gguf_ftype = is_gguf_on_hub(repo_id=model, filename=gguf_ftype) | 
					
						
						|  | if new_gguf_ftype is not None: | 
					
						
						|  | gguf_ftype = new_gguf_ftype | 
					
						
						|  |  | 
					
						
						|  | model_on_hub, error, model_config = is_model_on_hub(model_name=model, revision=revision, test_tokenizer=True) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if (not model_on_hub or model_config is None) and (not gguf_on_hub or gguf_files is None): | 
					
						
						|  | return styled_error(f'Model "{model}" {error}') | 
					
						
						|  |  | 
					
						
						|  | if model_config is not None: | 
					
						
						|  | architectures = getattr(model_config, "architectures", None) | 
					
						
						|  | if architectures: | 
					
						
						|  | architecture = ";".join(architectures) | 
					
						
						|  | downloads = getattr(model_config, 'downloads', 0) | 
					
						
						|  | created_at = getattr(model_config, 'created_at', '') | 
					
						
						|  | quantization_config = getattr(model_config, 'quantization_config', None) | 
					
						
						|  |  | 
					
						
						|  | if gguf_files is not None: | 
					
						
						|  | architectures = "" | 
					
						
						|  | downloads = 0 | 
					
						
						|  | created_at = "" | 
					
						
						|  | quantization_config = None | 
					
						
						|  | quant_type = "llama.cpp" | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | try: | 
					
						
						|  | model_info = API.model_info(repo_id=model, revision=revision) | 
					
						
						|  | except Exception: | 
					
						
						|  | return styled_error("Could not get your model information. Please fill it up properly.") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | model_size = get_model_size(model_info=model_info, precision=precision) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | try: | 
					
						
						|  | if model_info.cardData is None: | 
					
						
						|  | license = "unknown" | 
					
						
						|  | else: | 
					
						
						|  | license = model_info.cardData.get("license", "unknown") | 
					
						
						|  | except Exception: | 
					
						
						|  | return styled_error("Please select a license for your model") | 
					
						
						|  |  | 
					
						
						|  | modelcard_OK, error_msg, model_card = check_model_card(model) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | """ | 
					
						
						|  | if not modelcard_OK: | 
					
						
						|  | return styled_error(error_msg) | 
					
						
						|  | """ | 
					
						
						|  |  | 
					
						
						|  | tags = get_model_tags(model_card, model) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | print("Adding new eval") | 
					
						
						|  |  | 
					
						
						|  | script = "ITREX" | 
					
						
						|  | hardware = "cpu" | 
					
						
						|  | precision = "4bit" | 
					
						
						|  | if quantization_config is not None: | 
					
						
						|  | quant_method = quantization_config.get("quant_method", None) | 
					
						
						|  | if "bnb_4bit_quant_type" in quantization_config: | 
					
						
						|  | quant_method = "bitsandbytes" | 
					
						
						|  | quant_type = "bitsandbytes" | 
					
						
						|  | hardware = "gpu" | 
					
						
						|  | if quantization_config.get("load_in_4bit", True): | 
					
						
						|  | precision = "4bit" | 
					
						
						|  | if quantization_config.get("load_in_8bit", True): | 
					
						
						|  | precision = "8bit" | 
					
						
						|  | if quant_method == "gptq": | 
					
						
						|  | hardware = "cpu" | 
					
						
						|  | quant_type = "GPTQ" | 
					
						
						|  | precision = f"{quantization_config.get('bits', '4bit')}bit" | 
					
						
						|  | if quant_method == "awq": | 
					
						
						|  | hardware = "gpu" | 
					
						
						|  | quant_type = "AWQ" | 
					
						
						|  | precision = f"{quantization_config.get('bits', '4bit')}bit" | 
					
						
						|  |  | 
					
						
						|  | if quant_type is None or quant_type == "": | 
					
						
						|  | return styled_error("Please select a quantization model like GPTQ, AWQ etc.") | 
					
						
						|  |  | 
					
						
						|  | if precision in ["4bit", "8bit"]: | 
					
						
						|  | model_params = model_size * 8 | 
					
						
						|  |  | 
					
						
						|  | if precision == "4bit": | 
					
						
						|  | model_size = model_params * 0.5 | 
					
						
						|  |  | 
					
						
						|  | if precision == "8bit": | 
					
						
						|  | model_size = model_params | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if quant_type == "llama.cpp": | 
					
						
						|  | hardware = "cpu" | 
					
						
						|  | script = "llama_cpp" | 
					
						
						|  | tags = "llama.cpp" | 
					
						
						|  | else: | 
					
						
						|  | hardware = "gpu" | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if hardware == "gpu" and compute_dtype == "bfloat16": | 
					
						
						|  | compute_dtype = "float16" | 
					
						
						|  |  | 
					
						
						|  | eval_entry = { | 
					
						
						|  | "model": model, | 
					
						
						|  | "revision": revision, | 
					
						
						|  | "private": private, | 
					
						
						|  | "params": model_size, | 
					
						
						|  | "architectures": architecture, | 
					
						
						|  | "quant_type": quant_type, | 
					
						
						|  | "precision": precision, | 
					
						
						|  | "model_params": model_params, | 
					
						
						|  | "model_size": model_size, | 
					
						
						|  | "precision": precision, | 
					
						
						|  | "weight_dtype": weight_dtype, | 
					
						
						|  | "compute_dtype": compute_dtype, | 
					
						
						|  | "gguf_ftype": gguf_ftype, | 
					
						
						|  | "hardware": hardware, | 
					
						
						|  | "status": "Pending", | 
					
						
						|  | "submitted_time": current_time, | 
					
						
						|  | "model_type": "quantization", | 
					
						
						|  | "job_id": -1, | 
					
						
						|  | "job_start_time": None, | 
					
						
						|  | "scripts": script | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | supplementary_info = { | 
					
						
						|  | "likes": model_info.likes, | 
					
						
						|  | "license": license, | 
					
						
						|  | "still_on_hub": True, | 
					
						
						|  | "tags": tags, | 
					
						
						|  | "downloads": downloads, | 
					
						
						|  | "created_at": created_at | 
					
						
						|  | } | 
					
						
						|  | print(eval_entry) | 
					
						
						|  | print(supplementary_info) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | print("Creating eval file") | 
					
						
						|  | OUT_DIR = f"{GIT_REQUESTS_PATH}/{user_name}" | 
					
						
						|  | os.makedirs(OUT_DIR, exist_ok=True) | 
					
						
						|  | req_out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{quant_type}_{precision}_{weight_dtype}_{compute_dtype}.json" | 
					
						
						|  | req_git_path = "/".join(req_out_path.split('/')[1:]) | 
					
						
						|  |  | 
					
						
						|  | print("Creating status file") | 
					
						
						|  | OUT_DIR = f"{GIT_STATUS_PATH}/{user_name}" | 
					
						
						|  | os.makedirs(OUT_DIR, exist_ok=True) | 
					
						
						|  | sta_out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{quant_type}_{precision}_{weight_dtype}_{compute_dtype}.json" | 
					
						
						|  | sta_git_path = "/".join(sta_out_path.split('/')[1:]) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | print("Uploading eval file") | 
					
						
						|  |  | 
					
						
						|  | REPO.index.remove("requests", False, r=True) | 
					
						
						|  |  | 
					
						
						|  | with open(req_out_path, "w") as f: | 
					
						
						|  | f.write(json.dumps(eval_entry, indent=4)) | 
					
						
						|  | with open(sta_out_path, "w") as f: | 
					
						
						|  | f.write(json.dumps(eval_entry, indent=4)) | 
					
						
						|  |  | 
					
						
						|  | branch = REPO.active_branch.name | 
					
						
						|  | REPO.index.add([req_git_path, sta_git_path]) | 
					
						
						|  | commit = REPO.index.commit(f"Add {model} to eval requests/status.") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | REPO.remotes.origin.pull(branch) | 
					
						
						|  | REPO.remotes.origin.push(branch) | 
					
						
						|  |  | 
					
						
						|  | return styled_message( | 
					
						
						|  | "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list." | 
					
						
						|  | ) | 
					
						
						|  |  |