Spaces:

ggml-org
/

gguf-my-lora

Running on CPU Upgrade

App Files Files Community

ngxson HF Staff commited on Jan 9

Commit

114dc33

verified ·

1 Parent(s): 674f636

add --base-model-id

Browse files

Files changed (1) hide show

app.py +19 -3

app.py CHANGED Viewed

@@ -14,9 +14,13 @@ from apscheduler.schedulers.background import BackgroundScheduler
 CONVERSION_SCRIPT = "convert_lora_to_gguf.py"
-def process_model(peft_model_id: str, q_method: str, private_repo, oauth_token: gr.OAuthToken | None):
     if oauth_token is None or oauth_token.token is None:
         raise gr.Error("You must be logged in to use GGUF-my-lora")
     model_name = peft_model_id.split('/')[-1]
     gguf_output_name = f"{model_name}-{q_method.lower()}.gguf"
@@ -62,7 +66,7 @@ def process_model(peft_model_id: str, q_method: str, private_repo, oauth_token:
                 if not os.path.exists(adapter_config_dir):
                     raise Exception('adapter_config.json not found. Please ensure the selected repo is a PEFT LoRA model.<br/><br/>If you are converting a model (not a LoRA adapter), please use <a href="https://huggingface.co/spaces/ggml-org/gguf-my-repo" target="_blank" style="text-decoration:underline">GGUF-my-repo</a> instead.')
-                result = subprocess.run([
                     "python",
                     f"llama.cpp/{CONVERSION_SCRIPT}",
                     local_dir,
@@ -70,7 +74,12 @@ def process_model(peft_model_id: str, q_method: str, private_repo, oauth_token:
                     q_method.lower(),
                     "--outfile",
                     gguf_output_path,
-                ], shell=False, capture_output=True)
                 print(result)
                 if result.returncode != 0:
                     raise Exception(f"Error converting to GGUF {q_method}: {result.stderr}")
@@ -148,6 +157,12 @@ with gr.Blocks(css=css) as demo:
         search_type="model",
     )
     q_method = gr.Dropdown(
         ["F32", "F16", "Q8_0"],
         label="Quantization Method",
@@ -167,6 +182,7 @@ with gr.Blocks(css=css) as demo:
         fn=process_model,
         inputs=[
             peft_model_id,
             q_method,
             private_repo,
         ],

 CONVERSION_SCRIPT = "convert_lora_to_gguf.py"
+def process_model(peft_model_id: str, base_model_id: str, q_method: str, private_repo, oauth_token: gr.OAuthToken | None):
     if oauth_token is None or oauth_token.token is None:
         raise gr.Error("You must be logged in to use GGUF-my-lora")
+    # validate the oauth token
+    whoami(oauth_token.token)
     model_name = peft_model_id.split('/')[-1]
     gguf_output_name = f"{model_name}-{q_method.lower()}.gguf"
                 if not os.path.exists(adapter_config_dir):
                     raise Exception('adapter_config.json not found. Please ensure the selected repo is a PEFT LoRA model.<br/><br/>If you are converting a model (not a LoRA adapter), please use <a href="https://huggingface.co/spaces/ggml-org/gguf-my-repo" target="_blank" style="text-decoration:underline">GGUF-my-repo</a> instead.')
+                cmd = [
                     "python",
                     f"llama.cpp/{CONVERSION_SCRIPT}",
                     local_dir,
                     q_method.lower(),
                     "--outfile",
                     gguf_output_path,
+                ]
+                if base_model_id:
+                    cmd.extend(["--base-model-id", base_model_id])
+                print("cmd", cmd)
+                result = subprocess.run(cmd, shell=False, capture_output=True)
                 print(result)
                 if result.returncode != 0:
                     raise Exception(f"Error converting to GGUF {q_method}: {result.stderr}")
         search_type="model",
     )
+    base_model_id = HuggingfaceHubSearch(
+        label="Base model repository (Optional)",
+        placeholder="If empty, we will use the value from adapter_config.json",
+        search_type="model",
+    )
     q_method = gr.Dropdown(
         ["F32", "F16", "Q8_0"],
         label="Quantization Method",
         fn=process_model,
         inputs=[
             peft_model_id,
+            base_model_id,
             q_method,
             private_repo,
         ],