Spaces:

fishaudio
/

openaudio-s1-mini

Running on L4

App Files Files Community

openaudio-s1-mini / tools /run_webui.py

Stardust-minus

Upload folder using huggingface_hub

a26769d verified 5 months ago

raw

history blame contribute delete

3.29 kB

	import os
	from argparse import ArgumentParser
	from pathlib import Path

	import pyrootutils
	import torch
	from loguru import logger

	pyrootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)

	from fish_speech.inference_engine import TTSInferenceEngine
	from fish_speech.models.dac.inference import load_model as load_decoder_model
	from fish_speech.models.text2semantic.inference import launch_thread_safe_queue
	from fish_speech.utils.schema import ServeTTSRequest
	from tools.webui import build_app
	from tools.webui.inference import get_inference_wrapper

	# Make einx happy
	os.environ["EINX_FILTER_TRACEBACK"] = "false"


	def parse_args():
	parser = ArgumentParser()
	parser.add_argument(
	"--llama-checkpoint-path",
	type=Path,
	default="checkpoints/openaudio-s1-mini",
	)
	parser.add_argument(
	"--decoder-checkpoint-path",
	type=Path,
	default="checkpoints/openaudio-s1-mini/codec.pth",
	)
	parser.add_argument("--decoder-config-name", type=str, default="modded_dac_vq")
	parser.add_argument("--device", type=str, default="cuda")
	parser.add_argument("--half", action="store_true")
	parser.add_argument("--compile", action="store_true")
	parser.add_argument("--max-gradio-length", type=int, default=0)
	parser.add_argument("--theme", type=str, default="light")

	return parser.parse_args()


	if __name__ == "__main__":
	args = parse_args()
	args.precision = torch.half if args.half else torch.bfloat16

	# Check if MPS or CUDA is available
	if torch.backends.mps.is_available():
	args.device = "mps"
	logger.info("mps is available, running on mps.")
	elif not torch.cuda.is_available():
	logger.info("CUDA is not available, running on CPU.")
	args.device = "cpu"

	logger.info("Loading Llama model...")
	llama_queue = launch_thread_safe_queue(
	checkpoint_path=args.llama_checkpoint_path,
	device=args.device,
	precision=args.precision,
	compile=args.compile,
	)

	logger.info("Loading VQ-GAN model...")
	decoder_model = load_decoder_model(
	config_name=args.decoder_config_name,
	checkpoint_path=args.decoder_checkpoint_path,
	device=args.device,
	)

	logger.info("Decoder model loaded, warming up...")

	# Create the inference engine
	inference_engine = TTSInferenceEngine(
	llama_queue=llama_queue,
	decoder_model=decoder_model,
	compile=args.compile,
	precision=args.precision,
	)

	# Dry run to check if the model is loaded correctly and avoid the first-time latency
	list(
	inference_engine.inference(
	ServeTTSRequest(
	text="Hello world.",
	references=[],
	reference_id=None,
	max_new_tokens=1024,
	chunk_length=200,
	top_p=0.7,
	repetition_penalty=1.5,
	temperature=0.7,
	format="wav",
	)
	)
	)

	logger.info("Warming up done, launching the web UI...")

	# Get the inference function with the immutable arguments
	inference_fct = get_inference_wrapper(inference_engine)

	app = build_app(inference_fct, args.theme)
	app.launch(show_api=True)