Spaces:
Running
Running
improve desccription
Browse files
README.md
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
emoji: 🦀
|
| 4 |
colorFrom: pink
|
| 5 |
colorTo: blue
|
|
|
|
| 1 |
---
|
| 2 |
+
title: NNCF quantization
|
| 3 |
emoji: 🦀
|
| 4 |
colorFrom: pink
|
| 5 |
colorTo: blue
|
app.py
CHANGED
|
@@ -69,9 +69,10 @@ def process_model(
|
|
| 69 |
raise ValueError("Export of Seq2Seq models is currently disabled.")
|
| 70 |
|
| 71 |
auto_model_class = _HEAD_TO_AUTOMODELS[task]
|
| 72 |
-
pattern = r"(.*)?openvino(.*)?\_model.xml"
|
| 73 |
ov_files = _find_files_matching_pattern(
|
| 74 |
-
model_id,
|
|
|
|
|
|
|
| 75 |
)
|
| 76 |
export = len(ov_files) == 0
|
| 77 |
quantization_config = OVWeightQuantizationConfig(bits=8 if dtype == "int8" else 4)
|
|
@@ -119,10 +120,8 @@ def process_model(
|
|
| 119 |
card.data.base_model = model_id
|
| 120 |
card.text = dedent(
|
| 121 |
f"""
|
| 122 |
-
This model
|
| 123 |
|
| 124 |
-
Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
|
| 125 |
-
|
| 126 |
First make sure you have optimum-intel installed:
|
| 127 |
|
| 128 |
```bash
|
|
@@ -147,52 +146,51 @@ def process_model(
|
|
| 147 |
path_in_repo="README.md",
|
| 148 |
repo_id=new_repo_id,
|
| 149 |
)
|
| 150 |
-
return f"
|
| 151 |
finally:
|
| 152 |
shutil.rmtree(folder, ignore_errors=True)
|
| 153 |
|
| 154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
with gr.Blocks() as demo:
|
| 156 |
gr.Markdown("You must be logged in to use this space")
|
| 157 |
gr.LoginButton(min_width=250)
|
| 158 |
-
|
| 159 |
-
model_id = HuggingfaceHubSearch(
|
| 160 |
-
label="Hub Model ID",
|
| 161 |
-
placeholder="Search for model id on the hub",
|
| 162 |
-
search_type="model",
|
| 163 |
-
)
|
| 164 |
-
dtype = gr.Dropdown(
|
| 165 |
-
["int8", "int4"],
|
| 166 |
-
value="int8",
|
| 167 |
-
label="Precision data types",
|
| 168 |
-
filterable=False,
|
| 169 |
-
visible=True,
|
| 170 |
-
)
|
| 171 |
-
private_repo = gr.Checkbox(
|
| 172 |
-
value=False,
|
| 173 |
-
label="Private Repo",
|
| 174 |
-
info="Create a private repo under your username",
|
| 175 |
-
)
|
| 176 |
-
task = gr.Textbox(
|
| 177 |
-
value="auto",
|
| 178 |
-
label="Task : can be left to auto, will be automatically inferred",
|
| 179 |
-
)
|
| 180 |
-
interface = gr.Interface(
|
| 181 |
-
fn=process_model,
|
| 182 |
-
inputs=[
|
| 183 |
-
model_id,
|
| 184 |
-
dtype,
|
| 185 |
-
private_repo,
|
| 186 |
-
task,
|
| 187 |
-
],
|
| 188 |
-
outputs=[
|
| 189 |
-
gr.Markdown(label="output"),
|
| 190 |
-
],
|
| 191 |
-
title="Quantize your model with OpenVINO NNCF ⚡!",
|
| 192 |
-
description="The space takes an HF repo as an input, quantize it and export it to OpenVINO, then push it to a repo under your HF user namespace.",
|
| 193 |
-
api_name=False,
|
| 194 |
-
)
|
| 195 |
-
|
| 196 |
interface.render()
|
| 197 |
|
| 198 |
demo.launch()
|
|
|
|
| 69 |
raise ValueError("Export of Seq2Seq models is currently disabled.")
|
| 70 |
|
| 71 |
auto_model_class = _HEAD_TO_AUTOMODELS[task]
|
|
|
|
| 72 |
ov_files = _find_files_matching_pattern(
|
| 73 |
+
model_id,
|
| 74 |
+
pattern=r"(.*)?openvino(.*)?\_model.xml",
|
| 75 |
+
use_auth_token=oauth_token.token,
|
| 76 |
)
|
| 77 |
export = len(ov_files) == 0
|
| 78 |
quantization_config = OVWeightQuantizationConfig(bits=8 if dtype == "int8" else 4)
|
|
|
|
| 120 |
card.data.base_model = model_id
|
| 121 |
card.text = dedent(
|
| 122 |
f"""
|
| 123 |
+
This model is a quantized version of [`{model_id}`](https://huggingface.co/{model_id}) and was exported to the OpenVINO format using [optimum-intel](https://github.com/huggingface/optimum-intel) via the [nncf-quantization](https://huggingface.co/spaces/echarlaix/nncf-quantization) space.
|
| 124 |
|
|
|
|
|
|
|
| 125 |
First make sure you have optimum-intel installed:
|
| 126 |
|
| 127 |
```bash
|
|
|
|
| 146 |
path_in_repo="README.md",
|
| 147 |
repo_id=new_repo_id,
|
| 148 |
)
|
| 149 |
+
return f"This model was successfully quantized, find it under your repo {new_repo_url}'"
|
| 150 |
finally:
|
| 151 |
shutil.rmtree(folder, ignore_errors=True)
|
| 152 |
|
| 153 |
|
| 154 |
+
model_id = HuggingfaceHubSearch(
|
| 155 |
+
label="Hub Model ID",
|
| 156 |
+
placeholder="Search for model id on the hub",
|
| 157 |
+
search_type="model",
|
| 158 |
+
)
|
| 159 |
+
dtype = gr.Dropdown(
|
| 160 |
+
["int8", "int4"],
|
| 161 |
+
value="int8",
|
| 162 |
+
label="Precision data types",
|
| 163 |
+
filterable=False,
|
| 164 |
+
visible=True,
|
| 165 |
+
)
|
| 166 |
+
private_repo = gr.Checkbox(
|
| 167 |
+
value=False,
|
| 168 |
+
label="Private Repo",
|
| 169 |
+
info="Create a private repo under your username",
|
| 170 |
+
)
|
| 171 |
+
task = gr.Textbox(
|
| 172 |
+
value="auto",
|
| 173 |
+
label="Task : can be left to auto, will be automatically inferred",
|
| 174 |
+
)
|
| 175 |
+
interface = gr.Interface(
|
| 176 |
+
fn=process_model,
|
| 177 |
+
inputs=[
|
| 178 |
+
model_id,
|
| 179 |
+
dtype,
|
| 180 |
+
private_repo,
|
| 181 |
+
task,
|
| 182 |
+
],
|
| 183 |
+
outputs=[
|
| 184 |
+
gr.Markdown(label="output"),
|
| 185 |
+
],
|
| 186 |
+
title="Quantize your model with NNCF",
|
| 187 |
+
description="The space takes a model, converts it to the OpenVINO format and applies NNCF weight only quantization. The resulting model will then be pushed on the Hub under your HF user namespace",
|
| 188 |
+
api_name=False,
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
with gr.Blocks() as demo:
|
| 192 |
gr.Markdown("You must be logged in to use this space")
|
| 193 |
gr.LoginButton(min_width=250)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
interface.render()
|
| 195 |
|
| 196 |
demo.launch()
|