diff --git a/src/voxcpm/cli.py b/src/voxcpm/cli.py index d5ed57e..f6d40d6 100644 --- a/src/voxcpm/cli.py +++ b/src/voxcpm/cli.py @@ -11,11 +11,6 @@ import os import sys from pathlib import Path -import soundfile as sf - -from voxcpm.core import VoxCPM - - DEFAULT_HF_MODEL_ID = "openbmb/VoxCPM2" # ----------------------------- @@ -173,7 +168,9 @@ def validate_batch_args(args, parser): # ----------------------------- -def load_model(args) -> VoxCPM: +def load_model(args): + from voxcpm.core import VoxCPM + print("Loading VoxCPM model...", file=sys.stderr) zipenhancer_path = getattr(args, "zipenhancer_path", None) or os.environ.get( @@ -266,6 +263,8 @@ def _run_single(args, parser, *, text: str, output: str, prompt_text: str | None and (args.prompt_audio is not None or args.reference_audio is not None), ) + import soundfile as sf + sf.write(str(output_path), audio_array, model.tts_model.sample_rate) duration = len(audio_array) / model.tts_model.sample_rate @@ -307,6 +306,8 @@ def cmd_validate(args, parser): def cmd_batch(args, parser): + import soundfile as sf + input_file = require_file_exists(args.input, parser, "input file") output_dir = Path(args.output_dir) output_dir.mkdir(parents=True, exist_ok=True)