Spaces:
Running
on
A10G
Running
on
A10G
Update app.py (#26)
Browse files- Update app.py (ce1edeed17a361c0bec40bf031e53132750c0ded)
app.py
CHANGED
|
@@ -23,14 +23,30 @@ def script_to_use(model_id, api):
|
|
| 23 |
arch = arch[0]
|
| 24 |
return "convert.py" if arch in LLAMA_LIKE_ARCHS else "convert-hf-to-gguf.py"
|
| 25 |
|
| 26 |
-
def process_model(model_id, q_method, hf_token):
|
| 27 |
model_name = model_id.split('/')[-1]
|
| 28 |
fp16 = f"{model_name}/{model_name.lower()}.fp16.bin"
|
| 29 |
|
| 30 |
try:
|
| 31 |
api = HfApi(token=hf_token)
|
| 32 |
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
print("Model downloaded successully!")
|
| 35 |
|
| 36 |
conversion_script = script_to_use(model_id, api)
|
|
@@ -49,7 +65,7 @@ def process_model(model_id, q_method, hf_token):
|
|
| 49 |
print("Quantised successfully!")
|
| 50 |
|
| 51 |
# Create empty repo
|
| 52 |
-
new_repo_url = api.create_repo(repo_id=f"{model_name}-{q_method}-GGUF", exist_ok=True)
|
| 53 |
new_repo_id = new_repo_url.repo_id
|
| 54 |
print("Repo created successfully!", new_repo_url)
|
| 55 |
|
|
@@ -58,6 +74,7 @@ def process_model(model_id, q_method, hf_token):
|
|
| 58 |
except:
|
| 59 |
card = ModelCard("")
|
| 60 |
card.data.tags = ["llama-cpp"] if card.data.tags is None else card.data.tags + ["llama-cpp"]
|
|
|
|
| 61 |
card.text = dedent(
|
| 62 |
f"""
|
| 63 |
# {new_repo_id}
|
|
@@ -84,7 +101,7 @@ def process_model(model_id, q_method, hf_token):
|
|
| 84 |
llama-server --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -c 2048
|
| 85 |
```
|
| 86 |
|
| 87 |
-
Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the
|
| 88 |
|
| 89 |
```
|
| 90 |
git clone https://github.com/ggerganov/llama.cpp && cd llama.cpp && make && ./main -m {qtype.split("/")[-1]} -n 128
|
|
@@ -138,6 +155,11 @@ iface = gr.Interface(
|
|
| 138 |
label="HF Write Token",
|
| 139 |
info="https://hf.co/settings/token",
|
| 140 |
type="password",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
)
|
| 142 |
],
|
| 143 |
outputs=[
|
|
@@ -145,7 +167,7 @@ iface = gr.Interface(
|
|
| 145 |
gr.Image(show_label=False),
|
| 146 |
],
|
| 147 |
title="Create your own GGUF Quants, blazingly fast ⚡!",
|
| 148 |
-
description="The space takes
|
| 149 |
article="<p>Find your write token at <a href='https://huggingface.co/settings/tokens' target='_blank'>token settings</a></p>",
|
| 150 |
|
| 151 |
)
|
|
|
|
| 23 |
arch = arch[0]
|
| 24 |
return "convert.py" if arch in LLAMA_LIKE_ARCHS else "convert-hf-to-gguf.py"
|
| 25 |
|
| 26 |
+
def process_model(model_id, q_method, hf_token, private_repo):
|
| 27 |
model_name = model_id.split('/')[-1]
|
| 28 |
fp16 = f"{model_name}/{model_name.lower()}.fp16.bin"
|
| 29 |
|
| 30 |
try:
|
| 31 |
api = HfApi(token=hf_token)
|
| 32 |
|
| 33 |
+
dl_pattern = ["*.md", "*.json", "*.model"]
|
| 34 |
+
|
| 35 |
+
pattern = (
|
| 36 |
+
"*.safetensors"
|
| 37 |
+
if any(
|
| 38 |
+
file.path.endswith(".safetensors")
|
| 39 |
+
for file in api.list_repo_tree(
|
| 40 |
+
repo_id=model_id,
|
| 41 |
+
recursive=True,
|
| 42 |
+
)
|
| 43 |
+
)
|
| 44 |
+
else "*.bin"
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
dl_pattern += pattern
|
| 48 |
+
|
| 49 |
+
snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False, token=hf_token, allow_patterns=dl_pattern)
|
| 50 |
print("Model downloaded successully!")
|
| 51 |
|
| 52 |
conversion_script = script_to_use(model_id, api)
|
|
|
|
| 65 |
print("Quantised successfully!")
|
| 66 |
|
| 67 |
# Create empty repo
|
| 68 |
+
new_repo_url = api.create_repo(repo_id=f"{model_name}-{q_method}-GGUF", exist_ok=True, private=private_repo)
|
| 69 |
new_repo_id = new_repo_url.repo_id
|
| 70 |
print("Repo created successfully!", new_repo_url)
|
| 71 |
|
|
|
|
| 74 |
except:
|
| 75 |
card = ModelCard("")
|
| 76 |
card.data.tags = ["llama-cpp"] if card.data.tags is None else card.data.tags + ["llama-cpp"]
|
| 77 |
+
card.data.tags += ["gguf-my-repo"]
|
| 78 |
card.text = dedent(
|
| 79 |
f"""
|
| 80 |
# {new_repo_id}
|
|
|
|
| 101 |
llama-server --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -c 2048
|
| 102 |
```
|
| 103 |
|
| 104 |
+
Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
|
| 105 |
|
| 106 |
```
|
| 107 |
git clone https://github.com/ggerganov/llama.cpp && cd llama.cpp && make && ./main -m {qtype.split("/")[-1]} -n 128
|
|
|
|
| 155 |
label="HF Write Token",
|
| 156 |
info="https://hf.co/settings/token",
|
| 157 |
type="password",
|
| 158 |
+
),
|
| 159 |
+
gr.Checkbox(
|
| 160 |
+
value=False,
|
| 161 |
+
label="Private Repo",
|
| 162 |
+
info="Create a private repo under your username."
|
| 163 |
)
|
| 164 |
],
|
| 165 |
outputs=[
|
|
|
|
| 167 |
gr.Image(show_label=False),
|
| 168 |
],
|
| 169 |
title="Create your own GGUF Quants, blazingly fast ⚡!",
|
| 170 |
+
description="The space takes an HF repo as an input, quantises it and creates a Public repo containing the selected quant under your HF user namespace. You need to specify a write token obtained in https://hf.co/settings/tokens.",
|
| 171 |
article="<p>Find your write token at <a href='https://huggingface.co/settings/tokens' target='_blank'>token settings</a></p>",
|
| 172 |
|
| 173 |
)
|