From dd7b78f2c07e1fe4dfdaa8df4fd535b18ad90c33 Mon Sep 17 00:00:00 2001 From: supermario_leo Date: Sat, 25 Apr 2026 05:09:23 +0800 Subject: [PATCH] refactor(cli): defer soundfile and voxcpm.core imports to inference commands Move `import soundfile as sf` and `from voxcpm.core import VoxCPM` from module-level into the functions that require model inference (load_model, _run_single, cmd_batch), so `voxcpm validate` can run without loading the model/inference stack. --- src/voxcpm/cli.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/voxcpm/cli.py b/src/voxcpm/cli.py index d5ed57e..f6d40d6 100644 --- a/src/voxcpm/cli.py +++ b/src/voxcpm/cli.py @@ -11,11 +11,6 @@ import os import sys from pathlib import Path -import soundfile as sf - -from voxcpm.core import VoxCPM - - DEFAULT_HF_MODEL_ID = "openbmb/VoxCPM2" # ----------------------------- @@ -173,7 +168,9 @@ def validate_batch_args(args, parser): # ----------------------------- -def load_model(args) -> VoxCPM: +def load_model(args): + from voxcpm.core import VoxCPM + print("Loading VoxCPM model...", file=sys.stderr) zipenhancer_path = getattr(args, "zipenhancer_path", None) or os.environ.get( @@ -266,6 +263,8 @@ def _run_single(args, parser, *, text: str, output: str, prompt_text: str | None and (args.prompt_audio is not None or args.reference_audio is not None), ) + import soundfile as sf + sf.write(str(output_path), audio_array, model.tts_model.sample_rate) duration = len(audio_array) / model.tts_model.sample_rate @@ -307,6 +306,8 @@ def cmd_validate(args, parser): def cmd_batch(args, parser): + import soundfile as sf + input_file = require_file_exists(args.input, parser, "input file") output_dir = Path(args.output_dir) output_dir.mkdir(parents=True, exist_ok=True)