Quantize-HF-Models

Running

App Files Files Community

KBaba7 commited on Feb 7

Commit

3ec5df4

verified ·

1 Parent(s): 2846a5f

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -13

app.py CHANGED Viewed

@@ -3,8 +3,6 @@ import subprocess
 import streamlit as st
 from huggingface_hub import snapshot_download
-import subprocess
 def check_directory_path(directory_name: str) -> str:
     if os.path.exists(directory_name):
         path = os.path.abspath(directory_name)
@@ -16,9 +14,9 @@ QUANT_TYPES = [
     "Q5_K_M", "Q5_K_S", "Q6_K"
 ]
-model_dir_path=check_directory_path("llama.cpp")
-def download_model(hf_model_name, output_dir="models"):
     """
     Downloads a Hugging Face model and saves it locally.
     """
@@ -35,8 +33,8 @@ def convert_to_gguf(model_dir, output_file):
     os.makedirs(os.path.dirname(output_file), exist_ok=True)
     st.write(model_dir_path)
     cmd = [
-    "python3", "/app/llama.cpp/convert_hf_to_gguf.py", model_dir,
-    "--outtype", "f16", "--outfile", output_file
     ]
     process = subprocess.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     if process.returncode == 0:
@@ -54,10 +52,10 @@ def quantize_llama(model_path, quantized_output_path, quant_type):
     subprocess.run(["chmod", "+x", quantize_path], check=True)
     cmd = [
-    "/app/llama.cpp/build/bin/llama-quantize",
-    model_path,
-    quantized_output_path,
-    quant_type
     ]
     process = subprocess.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@@ -71,7 +69,7 @@ def automate_llama_quantization(hf_model_name, quant_type):
     """
     Orchestrates the entire quantization process.
     """
-    output_dir = "models"
     gguf_file = os.path.join(output_dir, f"{hf_model_name.replace('/', '_')}.gguf")
     quantized_file = gguf_file.replace(".gguf", f"-{quant_type}.gguf")
@@ -107,5 +105,4 @@ if start_button:
         quantized_model_path = automate_llama_quantization(hf_model_name, quant_type)
     if quantized_model_path:
         with open(quantized_model_path, "rb") as f:
-            st.download_button("⬇️ Download Quantized Model", f, file_name=os.path.basename(quantized_model_path))

 import streamlit as st
 from huggingface_hub import snapshot_download
 def check_directory_path(directory_name: str) -> str:
     if os.path.exists(directory_name):
         path = os.path.abspath(directory_name)
     "Q5_K_M", "Q5_K_S", "Q6_K"
 ]
+model_dir_path = check_directory_path("/app/llama.cpp")
+def download_model(hf_model_name, output_dir="/tmp/models"):
     """
     Downloads a Hugging Face model and saves it locally.
     """
     os.makedirs(os.path.dirname(output_file), exist_ok=True)
     st.write(model_dir_path)
     cmd = [
+        "python3", "/app/llama.cpp/convert_hf_to_gguf.py", model_dir,
+        "--outtype", "f16", "--outfile", output_file
     ]
     process = subprocess.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     if process.returncode == 0:
     subprocess.run(["chmod", "+x", quantize_path], check=True)
     cmd = [
+        "/app/llama.cpp/build/bin/llama-quantize",
+        model_path,
+        quantized_output_path,
+        quant_type
     ]
     process = subprocess.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     """
     Orchestrates the entire quantization process.
     """
+    output_dir = "/tmp/models"
     gguf_file = os.path.join(output_dir, f"{hf_model_name.replace('/', '_')}.gguf")
     quantized_file = gguf_file.replace(".gguf", f"-{quant_type}.gguf")
         quantized_model_path = automate_llama_quantization(hf_model_name, quant_type)
     if quantized_model_path:
         with open(quantized_model_path, "rb") as f:
+            st.download_button("⬇️ Download Quantized Model", f, file_name=os.path.basename(quantized_model_path))