Model_Cards_Writing_Tool

Sleeping

Model_Cards_Writing_Tool / extract_code.py

Ezi Ozoani

live push

2d5ffb9 about 3 years ago

14.6 kB

	#!/usr/bin/env python3

	import re

	"""
	Extracts code from the file "./Libraries.ts".
	(Note that "Libraries.ts", must be in the same directory as
	this script).
	"""

	file = None

	def read_file(library: str, model_name: str) -> str:
	text = file

	match = re.search('const ' + library + '.*', text, re.DOTALL).group()
	if match:
	text = match[match.index('`') + 1:match.index('`;')].replace('${model.id}', model_name)

	return text

	file = """
	import type { ModelData } from "./Types";
	/**
	* Add your new library here.
	*/
	export enum ModelLibrary {
	"adapter-transformers" = "Adapter Transformers",
	"allennlp" = "allenNLP",
	"asteroid" = "Asteroid",
	"diffusers" = "Diffusers",
	"espnet" = "ESPnet",
	"fairseq" = "Fairseq",
	"flair" = "Flair",
	"keras" = "Keras",
	"nemo" = "NeMo",
	"pyannote-audio" = "pyannote.audio",
	"sentence-transformers" = "Sentence Transformers",
	"sklearn" = "Scikit-learn",
	"spacy" = "spaCy",
	"speechbrain" = "speechbrain",
	"tensorflowtts" = "TensorFlowTTS",
	"timm" = "Timm",
	"fastai" = "fastai",
	"transformers" = "Transformers",
	"stanza" = "Stanza",
	"fasttext" = "fastText",
	"stable-baselines3" = "Stable-Baselines3",
	"ml-agents" = "ML-Agents",
	}

	export const ALL_MODEL_LIBRARY_KEYS = Object.keys(ModelLibrary) as (keyof typeof ModelLibrary)[];


	/**
	* Elements configurable by a model library.
	*/
	export interface LibraryUiElement {
	/**
	* Name displayed on the main
	* call-to-action button on the model page.
	*/
	btnLabel: string;
	/**
	* Repo name
	*/
	repoName: string;
	/**
	* URL to library's repo
	*/
	repoUrl: string;
	/**
	* Code snippet displayed on model page
	*/
	snippet: (model: ModelData) => string;
	}

	function nameWithoutNamespace(modelId: string): string {
	const splitted = modelId.split("/");
	return splitted.length === 1 ? splitted[0] : splitted[1];
	}

	//#region snippets

	const adapter_transformers = (model: ModelData) =>
	`from transformers import ${model.config?.adapter_transformers?.model_class}

	model = ${model.config?.adapter_transformers?.model_class}.from_pretrained("${model.config?.adapter_transformers?.{model.id}}")
	model.load_adapter("${model.id}", source="hf")`;

	const allennlpUnknown = (model: ModelData) =>
	`import allennlp_models
	from allennlp.predictors.predictor import Predictor

	predictor = Predictor.from_path("hf://${model.id}")`;

	const allennlpQuestionAnswering = (model: ModelData) =>
	`import allennlp_models
	from allennlp.predictors.predictor import Predictor

	predictor = Predictor.from_path("hf://${model.id}")
	predictor_input = {"passage": "My name is Wolfgang and I live in Berlin", "question": "Where do I live?"}
	predictions = predictor.predict_json(predictor_input)`;

	const allennlp = (model: ModelData) => {
	if (model.tags?.includes("question-answering")) {
	return allennlpQuestionAnswering(model);
	}
	return allennlpUnknown(model);
	};

	const asteroid = (model: ModelData) =>
	`from asteroid.models import BaseModel

	model = BaseModel.from_pretrained("${model.id}")`;

	const diffusers = (model: ModelData) =>
	`from diffusers import DiffusionPipeline

	pipeline = DiffusionPipeline.from_pretrained("${model.id}"${model.private ? ", use_auth_token=True" : ""})`;

	const espnetTTS = (model: ModelData) =>
	`from espnet2.bin.tts_inference import Text2Speech

	model = Text2Speech.from_pretrained("${model.id}")

	speech, *_ = model("text to generate speech from")`;

	const espnetASR = (model: ModelData) =>
	`from espnet2.bin.asr_inference import Speech2Text

	model = Speech2Text.from_pretrained(
	"${model.id}"
	)

	speech, rate = soundfile.read("speech.wav")
	text, *_ = model(speech)`;

	const espnetUnknown = () =>
	`unknown model type (must be text-to-speech or automatic-speech-recognition)`;

	const espnet = (model: ModelData) => {
	if (model.tags?.includes("text-to-speech")) {
	return espnetTTS(model);
	} else if (model.tags?.includes("automatic-speech-recognition")) {
	return espnetASR(model);
	}
	return espnetUnknown();
	};

	const fairseq = (model: ModelData) =>
	`from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub

	models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
	"${model.id}"
	)`;


	const flair = (model: ModelData) =>
	`from flair.models import SequenceTagger

	tagger = SequenceTagger.load("${model.id}")`;

	const keras = (model: ModelData) =>
	`from huggingface_hub import from_pretrained_keras

	model = from_pretrained_keras("${model.id}")
	`;

	const pyannote_audio_pipeline = (model: ModelData) =>
	`from pyannote.audio import Pipeline

	pipeline = Pipeline.from_pretrained("${model.id}")

	# inference on the whole file
	pipeline("file.wav")

	# inference on an excerpt
	from pyannote.core import Segment
	excerpt = Segment(start=2.0, end=5.0)

	from pyannote.audio import Audio
	waveform, sample_rate = Audio().crop("file.wav", excerpt)
	pipeline({"waveform": waveform, "sample_rate": sample_rate})`;

	const pyannote_audio_model = (model: ModelData) =>
	`from pyannote.audio import Model, Inference

	model = Model.from_pretrained("${model.id}")
	inference = Inference(model)

	# inference on the whole file
	inference("file.wav")

	# inference on an excerpt
	from pyannote.core import Segment
	excerpt = Segment(start=2.0, end=5.0)
	inference.crop("file.wav", excerpt)`;

	const pyannote_audio = (model: ModelData) => {
	if (model.tags?.includes("pyannote-audio-pipeline")) {
	return pyannote_audio_pipeline(model);
	}
	return pyannote_audio_model(model);
	};

	const tensorflowttsTextToMel = (model: ModelData) =>
	`from tensorflow_tts.inference import AutoProcessor, TFAutoModel

	processor = AutoProcessor.from_pretrained("${model.id}")
	model = TFAutoModel.from_pretrained("${model.id}")
	`;

	const tensorflowttsMelToWav = (model: ModelData) =>
	`from tensorflow_tts.inference import TFAutoModel

	model = TFAutoModel.from_pretrained("${model.id}")
	audios = model.inference(mels)
	`;

	const tensorflowttsUnknown = (model: ModelData) =>
	`from tensorflow_tts.inference import TFAutoModel

	model = TFAutoModel.from_pretrained("${model.id}")
	`;

	const tensorflowtts = (model: ModelData) => {
	if (model.tags?.includes("text-to-mel")) {
	return tensorflowttsTextToMel(model);
	} else if (model.tags?.includes("mel-to-wav")) {
	return tensorflowttsMelToWav(model);
	}
	return tensorflowttsUnknown(model);
	};

	const timm = (model: ModelData) =>
	`import timm

	model = timm.create_model("hf_hub:${model.id}", pretrained=True)`;

	const sklearn = (model: ModelData) =>
	`from huggingface_hub import hf_hub_download
	import joblib

	model = joblib.load(
	hf_hub_download("${model.id}", "sklearn_model.joblib")
	)`;

	const fastai = (model: ModelData) =>
	`from huggingface_hub import from_pretrained_fastai

	learn = from_pretrained_fastai("${model.id}")`;

	const sentenceTransformers = (model: ModelData) =>
	`from sentence_transformers import SentenceTransformer

	model = SentenceTransformer("${model.id}")`;

	const spacy = (model: ModelData) =>
	`!pip install https://huggingface.co/${model.id}/resolve/main/${nameWithoutNamespace(model.id)}-any-py3-none-any.whl

	# Using spacy.load().
	import spacy
	nlp = spacy.load("${nameWithoutNamespace(model.id)}")

	# Importing as module.
	import ${nameWithoutNamespace(model.id)}
	nlp = ${nameWithoutNamespace(model.id)}.load()`;

	const stanza = (model: ModelData) =>
	`import stanza

	stanza.download("${nameWithoutNamespace(model.id).replace("stanza-", "")}")
	nlp = stanza.Pipeline("${nameWithoutNamespace(model.id).replace("stanza-", "")}")`;


	const speechBrainMethod = (speechbrainInterface: string) => {
	switch (speechbrainInterface) {
	case "EncoderClassifier":
	return "classify_file";
	case "EncoderDecoderASR":
	case "EncoderASR":
	return "transcribe_file";
	case "SpectralMaskEnhancement":
	return "enhance_file";
	case "SepformerSeparation":
	return "separate_file";
	default:
	return undefined;
	}
	};

	const speechbrain = (model: ModelData) => {
	const speechbrainInterface = model.config?.speechbrain?.interface;
	if (speechbrainInterface === undefined) {
	return `# interface not specified in config.json`;
	}

	const speechbrainMethod = speechBrainMethod(speechbrainInterface);
	if (speechbrainMethod === undefined) {
	return `# interface in config.json invalid`;
	}

	return `from speechbrain.pretrained import ${speechbrainInterface}
	model = ${speechbrainInterface}.from_hparams(
	"${model.id}"
	)
	model.${speechbrainMethod}("file.wav")`;
	};

	const transformers = (model: ModelData) => {
	const info = model.transformersInfo;
	if (!info) {
	return `# ⚠️ Type of model unknown`;
	}
	if (info.processor) {
	const varName = info.processor === "AutoTokenizer" ? "tokenizer"
	: info.processor === "AutoFeatureExtractor" ? "extractor"
	: "processor"
	;
	return [
	`from transformers import ${info.processor}, ${info.auto_model}`,
	"",
	`${varName} = ${info.processor}.from_pretrained("${model.id}"${model.private ? ", use_auth_token=True" : ""})`,
	"",
	`model = ${info.auto_model}.from_pretrained("${model.id}"${model.private ? ", use_auth_token=True" : ""})`,
	].join("\n");
	} else {
	return [
	`from transformers import ${info.auto_model}`,
	"",
	`model = ${info.auto_model}.from_pretrained("${model.id}"${model.private ? ", use_auth_token=True" : ""})`,
	].join("\n");
	}
	};

	const fasttext = (model: ModelData) =>
	`from huggingface_hub import hf_hub_download
	import fasttext

	model = fasttext.load_model(hf_hub_download("${model.id}", "model.bin"))`;

	const stableBaselines3 = (model: ModelData) =>
	`from huggingface_sb3 import load_from_hub
	checkpoint = load_from_hub(
	repo_id="${model.id}",
	filename="{MODEL FILENAME}.zip",
	)`;

	const nemoDomainResolver = (domain: string, model: ModelData): string \| undefined => {
	const modelName = `${nameWithoutNamespace(model.id)}.nemo`;

	switch (domain) {
	case "ASR":
	return `import nemo.collections.asr as nemo_asr
	asr_model = nemo_asr.models.ASRModel.from_pretrained("${model.id}")

	transcriptions = asr_model.transcribe(["file.wav"])`;
	default:
	return undefined;
	}
	};

	const mlAgents = (model: ModelData) =>
	`mlagents-load-from-hf --repo-id="${model.id}" --local-dir="./downloads"`;

	const nemo = (model: ModelData) => {
	let command: string \| undefined = undefined;
	// Resolve the tag to a nemo domain/sub-domain
	if (model.tags?.includes("automatic-speech-recognition")) {
	command = nemoDomainResolver("ASR", model);
	}

	return command ?? `# tag did not correspond to a valid NeMo domain.`;
	};

	//#endregion



	export const MODEL_LIBRARIES_UI_ELEMENTS: { [key in keyof typeof ModelLibrary]?: LibraryUiElement } = {
	// ^^ TODO(remove the optional ? marker when Stanza snippet is available)
	"adapter-transformers": {
	btnLabel: "Adapter Transformers",
	repoName: "adapter-transformers",
	repoUrl: "https://github.com/Adapter-Hub/adapter-transformers",
	snippet: adapter_transformers,
	},
	"allennlp": {
	btnLabel: "AllenNLP",
	repoName: "AllenNLP",
	repoUrl: "https://github.com/allenai/allennlp",
	snippet: allennlp,
	},
	"asteroid": {
	btnLabel: "Asteroid",
	repoName: "Asteroid",
	repoUrl: "https://github.com/asteroid-team/asteroid",
	snippet: asteroid,
	},
	"diffusers": {
	btnLabel: "Diffusers",
	repoName: "🤗/diffusers",
	repoUrl: "https://github.com/huggingface/diffusers",
	snippet: diffusers,
	},
	"espnet": {
	btnLabel: "ESPnet",
	repoName: "ESPnet",
	repoUrl: "https://github.com/espnet/espnet",
	snippet: espnet,
	},
	"fairseq": {
	btnLabel: "Fairseq",
	repoName: "fairseq",
	repoUrl: "https://github.com/pytorch/fairseq",
	snippet: fairseq,
	},
	"flair": {
	btnLabel: "Flair",
	repoName: "Flair",
	repoUrl: "https://github.com/flairNLP/flair",
	snippet: flair,
	},
	"keras": {
	btnLabel: "Keras",
	repoName: "Keras",
	repoUrl: "https://github.com/keras-team/keras",
	snippet: keras,
	},
	"nemo": {
	btnLabel: "NeMo",
	repoName: "NeMo",
	repoUrl: "https://github.com/NVIDIA/NeMo",
	snippet: nemo,
	},
	"pyannote-audio": {
	btnLabel: "pyannote.audio",
	repoName: "pyannote-audio",
	repoUrl: "https://github.com/pyannote/pyannote-audio",
	snippet: pyannote_audio,
	},
	"sentence-transformers": {
	btnLabel: "sentence-transformers",
	repoName: "sentence-transformers",
	repoUrl: "https://github.com/UKPLab/sentence-transformers",
	snippet: sentenceTransformers,
	},
	"sklearn": {
	btnLabel: "Scikit-learn",
	repoName: "Scikit-learn",
	repoUrl: "https://github.com/scikit-learn/scikit-learn",
	snippet: sklearn,
	},
	"fastai": {
	btnLabel: "fastai",
	repoName: "fastai",
	repoUrl: "https://github.com/fastai/fastai",
	snippet: fastai,
	},
	"spacy": {
	btnLabel: "spaCy",
	repoName: "spaCy",
	repoUrl: "https://github.com/explosion/spaCy",
	snippet: spacy,
	},
	"speechbrain": {
	btnLabel: "speechbrain",
	repoName: "speechbrain",
	repoUrl: "https://github.com/speechbrain/speechbrain",
	snippet: speechbrain,
	},
	"stanza": {
	btnLabel: "Stanza",
	repoName: "stanza",
	repoUrl: "https://github.com/stanfordnlp/stanza",
	snippet: stanza,
	},
	"tensorflowtts": {
	btnLabel: "TensorFlowTTS",
	repoName: "TensorFlowTTS",
	repoUrl: "https://github.com/TensorSpeech/TensorFlowTTS",
	snippet: tensorflowtts,
	},
	"timm": {
	btnLabel: "timm",
	repoName: "pytorch-image-models",
	repoUrl: "https://github.com/rwightman/pytorch-image-models",
	snippet: timm,
	},
	"transformers": {
	btnLabel: "Transformers",
	repoName: "🤗/transformers",
	repoUrl: "https://github.com/huggingface/transformers",
	snippet: transformers,
	},
	"fasttext": {
	btnLabel: "fastText",
	repoName: "fastText",
	repoUrl: "https://fasttext.cc/",
	snippet: fasttext,
	},
	"stable-baselines3": {
	btnLabel: "stable-baselines3",
	repoName: "stable-baselines3",
	repoUrl: "https://github.com/huggingface/huggingface_sb3",
	snippet: stableBaselines3,
	},
	"ml-agents": {
	btnLabel: "ml-agents",
	repoName: "ml-agents",
	repoUrl: "https://github.com/huggingface/ml-agents",
	snippet: mlAgents,
	},
	} as const;
	"""


	if __name__ == '__main__':
	import sys
	library_name = "keras"
	model_name = "Distillgpt2"
	print(read_file(library_name, model_name))

	""""
	try:
	args = sys.argv[1:]
	if args:
	print(read_file(args[0], args[1]))
	except IndexError:
	pass
	"""