| import torch | |
| import soundfile as sf | |
| from transformers import AutoConfig | |
| from modeling_xcodec2 import XCodec2Model | |
| model_path = "/data/zheny/xcodec2" # 这是你在 huggingface 上的仓库名 | |
| model = XCodec2Model.from_pretrained(model_path) | |
| model.eval().cuda() | |
| # 准备一段音频 | |
| wav, sr = sf.read("test.flac") | |
| wav_tensor = torch.from_numpy(wav).float().unsqueeze(0) # [1, time] | |
| with torch.no_grad(): | |
| vq_code = model.encode_code(input_waveform=wav_tensor ) | |
| print(vq_code) | |
| recon_wav = model.decode_code(vq_code).cpu() | |
| sf.write("reconstructed.wav", recon_wav[0,0,:].numpy(), sr) | |