Spaces:

OpenVINO
/

nncf-quantization

Running

App Files Files Community

nncf-quantization / app.py

echarlaix HF Staff

remove task

93d09b5 over 1 year ago

raw

history blame

6.41 kB

	import os
	import shutil
	import gradio as gr
	from huggingface_hub import HfApi, whoami, ModelCard
	from gradio_huggingfacehub_search import HuggingfaceHubSearch
	from textwrap import dedent


	from tempfile import TemporaryDirectory

	from huggingface_hub.file_download import repo_folder_name
	from optimum.exporters.tasks import TasksManager
	from optimum.intel.utils.constant import _TASK_ALIASES
	from optimum.intel.openvino.utils import _HEAD_TO_AUTOMODELS
	from optimum.exporters import TasksManager

	from optimum.intel.utils.modeling_utils import _find_files_matching_pattern
	from optimum.intel import (
	OVModelForAudioClassification,
	OVModelForCausalLM,
	OVModelForFeatureExtraction,
	OVModelForImageClassification,
	OVModelForMaskedLM,
	OVModelForQuestionAnswering,
	OVModelForSeq2SeqLM,
	OVModelForSequenceClassification,
	OVModelForTokenClassification,
	OVStableDiffusionPipeline,
	OVStableDiffusionXLPipeline,
	OVLatentConsistencyModelPipeline,
	OVModelForPix2Struct,
	OVWeightQuantizationConfig,
	)

	def process_model(
	model_id: str,
	dtype: str,
	private_repo: bool,
	# task: str,
	oauth_token: gr.OAuthToken,
	):
	task = "auto"
	if oauth_token.token is None:
	raise ValueError("You must be logged in to use this space")

	model_name = model_id.split("/")[-1]
	username = whoami(oauth_token.token)["name"]
	new_repo_id = f"{username}/{model_name}-openvino-{dtype}"

	task = TasksManager.map_from_synonym(task)
	if task == "auto":
	try:
	task = TasksManager.infer_task_from_model(model_id)
	except Exception as e:
	raise ValueError(
	"The task could not be automatically inferred. "
	f"Please pass explicitely the task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. {e}"
	)

	task = _TASK_ALIASES.get(task, task)
	if task not in _HEAD_TO_AUTOMODELS:
	raise ValueError(
	f"The task '{task}' is not supported, only {_HEAD_TO_AUTOMODELS.keys()} tasks are supported"
	)

	if task == "text2text-generation":
	raise ValueError("Export of Seq2Seq models is currently disabled.")

	auto_model_class = _HEAD_TO_AUTOMODELS[task]
	ov_files = _find_files_matching_pattern(
	model_id,
	pattern=r"(.)?openvino(.)?\_model.xml",
	use_auth_token=oauth_token.token,
	)
	export = len(ov_files) == 0
	quantization_config = OVWeightQuantizationConfig(bits=8 if dtype == "int8" else 4)
	api = HfApi(token=oauth_token.token)

	with TemporaryDirectory() as d:
	folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
	os.makedirs(folder)
	try:
	api.snapshot_download(repo_id=model_id, local_dir=folder, allow_patterns=["*.json"])

	ov_model = eval(auto_model_class).from_pretrained(
	model_id, export=export, quantization_config=quantization_config
	)
	ov_model.save_pretrained(folder)

	new_repo_url = api.create_repo(
	repo_id=new_repo_id, exist_ok=True, private=private_repo
	)
	new_repo_id = new_repo_url.repo_id
	print("Repo created successfully!", new_repo_url)

	file_names = (f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f)))

	for file in file_names:
	file_path = os.path.join(folder, file)
	try:
	api.upload_file(
	path_or_fileobj=file_path,
	path_in_repo=file,
	repo_id=new_repo_id,
	)

	except Exception as e:
	raise Exception(f"Error uploading file {file_path}: {e}")

	try:
	card = ModelCard.load(model_id, token=oauth_token.token)
	except:
	card = ModelCard("")

	if card.data.tags is None:
	card.data.tags = []
	card.data.tags.append("openvino")
	card.data.base_model = model_id
	card.text = dedent(
	f"""
	This model is a quantized version of [`{model_id}`](https://huggingface.co/{model_id}) and was exported to the OpenVINO format using [optimum-intel](https://github.com/huggingface/optimum-intel) via the [nncf-quantization](https://huggingface.co/spaces/echarlaix/nncf-quantization) space.

	First make sure you have optimum-intel installed:

	```bash
	pip install optimum[openvino]
	```

	To load your model you can do as follows:

	```python
	from optimum.intel import {auto_model_class}

	model_id = "{new_repo_id}"
	model = {auto_model_class}.from_pretrained(model_id)
	```
	"""
	)
	card_path = os.path.join(folder, "README.md")
	card.save(card_path)

	api.upload_file(
	path_or_fileobj=card_path,
	path_in_repo="README.md",
	repo_id=new_repo_id,
	)
	return f"This model was successfully quantized, find it under your repo {new_repo_url}'"
	finally:
	shutil.rmtree(folder, ignore_errors=True)


	model_id = HuggingfaceHubSearch(
	label="Hub Model ID",
	placeholder="Search for model id on the hub",
	search_type="model",
	)
	dtype = gr.Dropdown(
	["int8", "int4"],
	value="int8",
	label="Precision data types",
	filterable=False,
	visible=True,
	)
	private_repo = gr.Checkbox(
	value=False,
	label="Private Repo",
	info="Create a private repo under your username",
	)
	interface = gr.Interface(
	fn=process_model,
	inputs=[
	model_id,
	dtype,
	private_repo,
	],
	outputs=[
	gr.Markdown(label="output"),
	],
	title="Quantize your model with NNCF",
	description="The space takes a model, converts it to the OpenVINO format and applies NNCF weight only quantization. The resulting model will then be pushed on the Hub under your HF user namespace",
	api_name=False,
	)

	with gr.Blocks() as demo:
	gr.Markdown("You must be logged in to use this space")
	gr.LoginButton(min_width=250)
	interface.render()

	demo.launch()