Spaces:
Running
Running
rephrase description
Browse files
app.py
CHANGED
|
@@ -95,11 +95,13 @@ def quantize_model(
|
|
| 95 |
calibration_dataset = None
|
| 96 |
|
| 97 |
is_int8 = dtype == "int8"
|
| 98 |
-
if library_name == "diffusers":
|
| 99 |
-
|
| 100 |
-
|
| 101 |
quant_method = "awq"
|
| 102 |
else:
|
|
|
|
|
|
|
| 103 |
quant_method = "default"
|
| 104 |
|
| 105 |
quantization_config = OVWeightQuantizationConfig(
|
|
@@ -112,7 +114,7 @@ def quantize_model(
|
|
| 112 |
|
| 113 |
api = HfApi(token=oauth_token.token)
|
| 114 |
if api.repo_exists(new_repo_id) and not overwritte:
|
| 115 |
-
return f"Model {new_repo_id} already exist, please set overwritte=True to push on an existing
|
| 116 |
|
| 117 |
with TemporaryDirectory() as d:
|
| 118 |
folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
|
|
@@ -130,7 +132,7 @@ def quantize_model(
|
|
| 130 |
ov_model.save_pretrained(folder)
|
| 131 |
new_repo_url = api.create_repo(repo_id=new_repo_id, exist_ok=True, private=private_repo)
|
| 132 |
new_repo_id = new_repo_url.repo_id
|
| 133 |
-
print("
|
| 134 |
|
| 135 |
folder = Path(folder)
|
| 136 |
for dir_name in (
|
|
@@ -169,9 +171,9 @@ def quantize_model(
|
|
| 169 |
card.data.base_model = model_id
|
| 170 |
card.text = dedent(
|
| 171 |
f"""
|
| 172 |
-
This model is a quantized version of [`{model_id}`](https://huggingface.co/{model_id}) and
|
| 173 |
|
| 174 |
-
First make sure you have optimum-intel installed:
|
| 175 |
|
| 176 |
```bash
|
| 177 |
pip install optimum[openvino]
|
|
@@ -195,16 +197,16 @@ def quantize_model(
|
|
| 195 |
path_in_repo="README.md",
|
| 196 |
repo_id=new_repo_id,
|
| 197 |
)
|
| 198 |
-
return f"This model was successfully quantized, find it under your
|
| 199 |
finally:
|
| 200 |
shutil.rmtree(folder, ignore_errors=True)
|
| 201 |
except Exception as e:
|
| 202 |
return f"### Error: {e}"
|
| 203 |
|
| 204 |
DESCRIPTION = """
|
| 205 |
-
This Space uses [Optimum Intel](https://
|
| 206 |
|
| 207 |
-
|
| 208 |
|
| 209 |
The list of the supported architectures can be found in the [documentation](https://huggingface.co/docs/optimum/main/en/intel/openvino/models)
|
| 210 |
"""
|
|
@@ -215,9 +217,9 @@ model_id = HuggingfaceHubSearch(
|
|
| 215 |
search_type="model",
|
| 216 |
)
|
| 217 |
dtype = gr.Dropdown(
|
| 218 |
-
["
|
| 219 |
-
value="
|
| 220 |
-
label="
|
| 221 |
filterable=False,
|
| 222 |
visible=True,
|
| 223 |
)
|
|
@@ -255,13 +257,13 @@ ratio = gr.Slider(
|
|
| 255 |
)
|
| 256 |
private_repo = gr.Checkbox(
|
| 257 |
value=False,
|
| 258 |
-
label="Private
|
| 259 |
-
info="Create a private
|
| 260 |
)
|
| 261 |
overwritte = gr.Checkbox(
|
| 262 |
value=False,
|
| 263 |
-
label="Overwrite
|
| 264 |
-
info="Enable pushing files on existing
|
| 265 |
)
|
| 266 |
interface = gr.Interface(
|
| 267 |
fn=quantize_model,
|
|
|
|
| 95 |
calibration_dataset = None
|
| 96 |
|
| 97 |
is_int8 = dtype == "int8"
|
| 98 |
+
# if library_name == "diffusers":
|
| 99 |
+
# quant_method = "hybrid"
|
| 100 |
+
if not is_int8 and calibration_dataset is not None:
|
| 101 |
quant_method = "awq"
|
| 102 |
else:
|
| 103 |
+
if calibration_dataset is not None:
|
| 104 |
+
print("Default quantization was selected, calibration dataset won't be used")
|
| 105 |
quant_method = "default"
|
| 106 |
|
| 107 |
quantization_config = OVWeightQuantizationConfig(
|
|
|
|
| 114 |
|
| 115 |
api = HfApi(token=oauth_token.token)
|
| 116 |
if api.repo_exists(new_repo_id) and not overwritte:
|
| 117 |
+
return f"Model {new_repo_id} already exist, please set overwritte=True to push on an existing repository"
|
| 118 |
|
| 119 |
with TemporaryDirectory() as d:
|
| 120 |
folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
|
|
|
|
| 132 |
ov_model.save_pretrained(folder)
|
| 133 |
new_repo_url = api.create_repo(repo_id=new_repo_id, exist_ok=True, private=private_repo)
|
| 134 |
new_repo_id = new_repo_url.repo_id
|
| 135 |
+
print("Repository created successfully!", new_repo_url)
|
| 136 |
|
| 137 |
folder = Path(folder)
|
| 138 |
for dir_name in (
|
|
|
|
| 171 |
card.data.base_model = model_id
|
| 172 |
card.text = dedent(
|
| 173 |
f"""
|
| 174 |
+
This model is a quantized version of [`{model_id}`](https://huggingface.co/{model_id}) and is converted to the OpenVINO format. This model was obtained via the [nncf-quantization](https://huggingface.co/spaces/echarlaix/nncf-quantization) space with [optimum-intel](https://github.com/huggingface/optimum-intel).
|
| 175 |
|
| 176 |
+
First make sure you have `optimum-intel` installed:
|
| 177 |
|
| 178 |
```bash
|
| 179 |
pip install optimum[openvino]
|
|
|
|
| 197 |
path_in_repo="README.md",
|
| 198 |
repo_id=new_repo_id,
|
| 199 |
)
|
| 200 |
+
return f"This model was successfully quantized, find it under your repository {new_repo_url}"
|
| 201 |
finally:
|
| 202 |
shutil.rmtree(folder, ignore_errors=True)
|
| 203 |
except Exception as e:
|
| 204 |
return f"### Error: {e}"
|
| 205 |
|
| 206 |
DESCRIPTION = """
|
| 207 |
+
This Space uses [Optimum Intel](https://github.com/huggingface/optimum-intel) to automatically apply NNCF [Weight Only Quantization](https://huggingface.co/docs/optimum/main/en/intel/openvino/optimization) (WOQ) on your model and convert it to the [OpenVINO format](https://docs.openvino.ai/2024/documentation/openvino-ir-format.html) if not already.
|
| 208 |
|
| 209 |
+
After conversion, a repository will be pushed under your namespace with the resulting model.
|
| 210 |
|
| 211 |
The list of the supported architectures can be found in the [documentation](https://huggingface.co/docs/optimum/main/en/intel/openvino/models)
|
| 212 |
"""
|
|
|
|
| 217 |
search_type="model",
|
| 218 |
)
|
| 219 |
dtype = gr.Dropdown(
|
| 220 |
+
["8-bit", "4-bit"],
|
| 221 |
+
value="8-bit",
|
| 222 |
+
label="Weights precision",
|
| 223 |
filterable=False,
|
| 224 |
visible=True,
|
| 225 |
)
|
|
|
|
| 257 |
)
|
| 258 |
private_repo = gr.Checkbox(
|
| 259 |
value=False,
|
| 260 |
+
label="Private repository",
|
| 261 |
+
info="Create a private repository instead of a public one",
|
| 262 |
)
|
| 263 |
overwritte = gr.Checkbox(
|
| 264 |
value=False,
|
| 265 |
+
label="Overwrite repository content",
|
| 266 |
+
info="Enable pushing files on existing repositories, potentially overwriting existing files",
|
| 267 |
)
|
| 268 |
interface = gr.Interface(
|
| 269 |
fn=quantize_model,
|