Spaces:
Running
on
A10G
Running
on
A10G
app : clear trailing whitespace
Browse files
app.py
CHANGED
|
@@ -26,12 +26,12 @@ def script_to_use(model_id, api):
|
|
| 26 |
def process_model(model_id, q_method, hf_token, private_repo):
|
| 27 |
model_name = model_id.split('/')[-1]
|
| 28 |
fp16 = f"{model_name}/{model_name.lower()}.fp16.bin"
|
| 29 |
-
|
| 30 |
try:
|
| 31 |
api = HfApi(token=hf_token)
|
| 32 |
|
| 33 |
dl_pattern = ["*.md", "*.json", "*.model"]
|
| 34 |
-
|
| 35 |
pattern = (
|
| 36 |
"*.safetensors"
|
| 37 |
if any(
|
|
@@ -48,7 +48,7 @@ def process_model(model_id, q_method, hf_token, private_repo):
|
|
| 48 |
|
| 49 |
snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False, token=hf_token, allow_patterns=dl_pattern)
|
| 50 |
print("Model downloaded successully!")
|
| 51 |
-
|
| 52 |
conversion_script = script_to_use(model_id, api)
|
| 53 |
fp16_conversion = f"python llama.cpp/{conversion_script} {model_name} --outtype f16 --outfile {fp16}"
|
| 54 |
result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
|
|
@@ -90,13 +90,13 @@ def process_model(model_id, q_method, hf_token, private_repo):
|
|
| 90 |
Invoke the llama.cpp server or the CLI.
|
| 91 |
|
| 92 |
CLI:
|
| 93 |
-
|
| 94 |
```bash
|
| 95 |
llama-cli --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -p "The meaning to life and the universe is "
|
| 96 |
```
|
| 97 |
|
| 98 |
Server:
|
| 99 |
-
|
| 100 |
```bash
|
| 101 |
llama-server --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -c 2048
|
| 102 |
```
|
|
@@ -139,22 +139,22 @@ def process_model(model_id, q_method, hf_token, private_repo):
|
|
| 139 |
|
| 140 |
# Create Gradio interface
|
| 141 |
iface = gr.Interface(
|
| 142 |
-
fn=process_model,
|
| 143 |
inputs=[
|
| 144 |
gr.Textbox(
|
| 145 |
-
lines=1,
|
| 146 |
label="Hub Model ID",
|
| 147 |
info="Model repo ID",
|
| 148 |
),
|
| 149 |
gr.Dropdown(
|
| 150 |
-
["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"],
|
| 151 |
-
label="Quantization Method",
|
| 152 |
info="GGML quantisation type",
|
| 153 |
value="Q4_K_M",
|
| 154 |
filterable=False
|
| 155 |
),
|
| 156 |
gr.Textbox(
|
| 157 |
-
lines=1,
|
| 158 |
label="HF Write Token",
|
| 159 |
info="https://hf.co/settings/token",
|
| 160 |
type="password",
|
|
@@ -164,7 +164,7 @@ iface = gr.Interface(
|
|
| 164 |
label="Private Repo",
|
| 165 |
info="Create a private repo under your username."
|
| 166 |
)
|
| 167 |
-
],
|
| 168 |
outputs=[
|
| 169 |
gr.Markdown(label="output"),
|
| 170 |
gr.Image(show_label=False),
|
|
@@ -172,8 +172,7 @@ iface = gr.Interface(
|
|
| 172 |
title="Create your own GGUF Quants, blazingly fast ⚡!",
|
| 173 |
description="The space takes an HF repo as an input, quantises it and creates a Public repo containing the selected quant under your HF user namespace. You need to specify a write token obtained in https://hf.co/settings/tokens.",
|
| 174 |
article="<p>Find your write token at <a href='https://huggingface.co/settings/tokens' target='_blank'>token settings</a></p>",
|
| 175 |
-
|
| 176 |
)
|
| 177 |
|
| 178 |
# Launch the interface
|
| 179 |
-
iface.launch(debug=True)
|
|
|
|
| 26 |
def process_model(model_id, q_method, hf_token, private_repo):
|
| 27 |
model_name = model_id.split('/')[-1]
|
| 28 |
fp16 = f"{model_name}/{model_name.lower()}.fp16.bin"
|
| 29 |
+
|
| 30 |
try:
|
| 31 |
api = HfApi(token=hf_token)
|
| 32 |
|
| 33 |
dl_pattern = ["*.md", "*.json", "*.model"]
|
| 34 |
+
|
| 35 |
pattern = (
|
| 36 |
"*.safetensors"
|
| 37 |
if any(
|
|
|
|
| 48 |
|
| 49 |
snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False, token=hf_token, allow_patterns=dl_pattern)
|
| 50 |
print("Model downloaded successully!")
|
| 51 |
+
|
| 52 |
conversion_script = script_to_use(model_id, api)
|
| 53 |
fp16_conversion = f"python llama.cpp/{conversion_script} {model_name} --outtype f16 --outfile {fp16}"
|
| 54 |
result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
|
|
|
|
| 90 |
Invoke the llama.cpp server or the CLI.
|
| 91 |
|
| 92 |
CLI:
|
| 93 |
+
|
| 94 |
```bash
|
| 95 |
llama-cli --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -p "The meaning to life and the universe is "
|
| 96 |
```
|
| 97 |
|
| 98 |
Server:
|
| 99 |
+
|
| 100 |
```bash
|
| 101 |
llama-server --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -c 2048
|
| 102 |
```
|
|
|
|
| 139 |
|
| 140 |
# Create Gradio interface
|
| 141 |
iface = gr.Interface(
|
| 142 |
+
fn=process_model,
|
| 143 |
inputs=[
|
| 144 |
gr.Textbox(
|
| 145 |
+
lines=1,
|
| 146 |
label="Hub Model ID",
|
| 147 |
info="Model repo ID",
|
| 148 |
),
|
| 149 |
gr.Dropdown(
|
| 150 |
+
["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"],
|
| 151 |
+
label="Quantization Method",
|
| 152 |
info="GGML quantisation type",
|
| 153 |
value="Q4_K_M",
|
| 154 |
filterable=False
|
| 155 |
),
|
| 156 |
gr.Textbox(
|
| 157 |
+
lines=1,
|
| 158 |
label="HF Write Token",
|
| 159 |
info="https://hf.co/settings/token",
|
| 160 |
type="password",
|
|
|
|
| 164 |
label="Private Repo",
|
| 165 |
info="Create a private repo under your username."
|
| 166 |
)
|
| 167 |
+
],
|
| 168 |
outputs=[
|
| 169 |
gr.Markdown(label="output"),
|
| 170 |
gr.Image(show_label=False),
|
|
|
|
| 172 |
title="Create your own GGUF Quants, blazingly fast ⚡!",
|
| 173 |
description="The space takes an HF repo as an input, quantises it and creates a Public repo containing the selected quant under your HF user namespace. You need to specify a write token obtained in https://hf.co/settings/tokens.",
|
| 174 |
article="<p>Find your write token at <a href='https://huggingface.co/settings/tokens' target='_blank'>token settings</a></p>",
|
|
|
|
| 175 |
)
|
| 176 |
|
| 177 |
# Launch the interface
|
| 178 |
+
iface.launch(debug=True)
|