Spaces:

OpenVINO
/

nncf-quantization

Running

App Files Files Community

echarlaix HF Staff commited on Jul 9, 2024

Commit

0cbd808

1 Parent(s): 9881ed5

improve desccription

Browse files

Files changed (2) hide show

README.md +1 -1
app.py +42 -44

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: OpenVINO NNCF quantization
 emoji: 🦀
 colorFrom: pink
 colorTo: blue

 ---
+title: NNCF quantization
 emoji: 🦀
 colorFrom: pink
 colorTo: blue

app.py CHANGED Viewed

@@ -69,9 +69,10 @@ def process_model(
         raise ValueError("Export of Seq2Seq models is currently disabled.")
     auto_model_class = _HEAD_TO_AUTOMODELS[task]
-    pattern = r"(.*)?openvino(.*)?\_model.xml"
     ov_files = _find_files_matching_pattern(
-        model_id, pattern, use_auth_token=oauth_token.token
     )
     export = len(ov_files) == 0
     quantization_config = OVWeightQuantizationConfig(bits=8 if dtype == "int8" else 4)
@@ -119,10 +120,8 @@ def process_model(
             card.data.base_model = model_id
             card.text = dedent(
                 f"""
-                This model was exported to OpenVINO from [`{model_id}`](https://huggingface.co/{model_id}) using [optimum-intel](https://github.com/huggingface/optimum-intel) via the [nncf-quantization](https://huggingface.co/spaces/echarlaix/nncf-quantization) space.
-                Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
                 First make sure you have optimum-intel installed:
                 ```bash
@@ -147,52 +146,51 @@ def process_model(
                 path_in_repo="README.md",
                 repo_id=new_repo_id,
             )
-            return f"Uploaded successfully with {dtype} option! Find your repo <a href='{new_repo_url}'"
         finally:
             shutil.rmtree(folder, ignore_errors=True)
 with gr.Blocks() as demo:
     gr.Markdown("You must be logged in to use this space")
     gr.LoginButton(min_width=250)
-    model_id = HuggingfaceHubSearch(
-        label="Hub Model ID",
-        placeholder="Search for model id on the hub",
-        search_type="model",
-    )
-    dtype = gr.Dropdown(
-        ["int8", "int4"],
-        value="int8",
-        label="Precision data types",
-        filterable=False,
-        visible=True,
-    )
-    private_repo = gr.Checkbox(
-        value=False,
-        label="Private Repo",
-        info="Create a private repo under your username",
-    )
-    task = gr.Textbox(
-        value="auto",
-        label="Task : can be left to auto, will be automatically inferred",
-    )
-    interface = gr.Interface(
-        fn=process_model,
-        inputs=[
-            model_id,
-            dtype,
-            private_repo,
-            task,
-        ],
-        outputs=[
-            gr.Markdown(label="output"),
-        ],
-        title="Quantize your model with OpenVINO NNCF ⚡!",
-        description="The space takes an HF repo as an input, quantize it and export it to OpenVINO, then push it to a repo under your HF user namespace.",
-        api_name=False,
-    )
     interface.render()
 demo.launch()

         raise ValueError("Export of Seq2Seq models is currently disabled.")
     auto_model_class = _HEAD_TO_AUTOMODELS[task]
     ov_files = _find_files_matching_pattern(
+        model_id,
+        pattern=r"(.*)?openvino(.*)?\_model.xml",
+        use_auth_token=oauth_token.token,
     )
     export = len(ov_files) == 0
     quantization_config = OVWeightQuantizationConfig(bits=8 if dtype == "int8" else 4)
             card.data.base_model = model_id
             card.text = dedent(
                 f"""
+                This model is a quantized version of [`{model_id}`](https://huggingface.co/{model_id}) and was exported to the OpenVINO format using [optimum-intel](https://github.com/huggingface/optimum-intel) via the [nncf-quantization](https://huggingface.co/spaces/echarlaix/nncf-quantization) space.
                 First make sure you have optimum-intel installed:
                 ```bash
                 path_in_repo="README.md",
                 repo_id=new_repo_id,
             )
+            return f"This model was successfully quantized, find it under your repo {new_repo_url}'"
         finally:
             shutil.rmtree(folder, ignore_errors=True)
+model_id = HuggingfaceHubSearch(
+    label="Hub Model ID",
+    placeholder="Search for model id on the hub",
+    search_type="model",
+)
+dtype = gr.Dropdown(
+    ["int8", "int4"],
+    value="int8",
+    label="Precision data types",
+    filterable=False,
+    visible=True,
+)
+private_repo = gr.Checkbox(
+    value=False,
+    label="Private Repo",
+    info="Create a private repo under your username",
+)
+task = gr.Textbox(
+    value="auto",
+    label="Task : can be left to auto, will be automatically inferred",
+)
+interface = gr.Interface(
+    fn=process_model,
+    inputs=[
+        model_id,
+        dtype,
+        private_repo,
+        task,
+    ],
+    outputs=[
+        gr.Markdown(label="output"),
+    ],
+    title="Quantize your model with NNCF",
+    description="The space takes a model, converts it to the OpenVINO format and applies NNCF weight only quantization. The resulting model will then be pushed on the Hub under your HF user namespace",
+    api_name=False,
+)
 with gr.Blocks() as demo:
     gr.Markdown("You must be logged in to use this space")
     gr.LoginButton(min_width=250)
     interface.render()
 demo.launch()