surport voxcpm2 cli
This commit is contained in:
+375
-104
@@ -2,17 +2,22 @@
|
||||
"""
|
||||
VoxCPM Command Line Interface
|
||||
|
||||
Unified CLI for voice cloning, direct TTS synthesis, and batch processing.
|
||||
VoxCPM2-first CLI for voice design, cloning, and batch processing.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import soundfile as sf
|
||||
|
||||
from voxcpm.core import VoxCPM
|
||||
|
||||
|
||||
DEFAULT_HF_MODEL_ID = "openbmb/VoxCPM2"
|
||||
|
||||
# -----------------------------
|
||||
# Validators
|
||||
# -----------------------------
|
||||
@@ -25,6 +30,13 @@ def validate_file_exists(file_path: str, file_type: str = "file") -> Path:
|
||||
return path
|
||||
|
||||
|
||||
def require_file_exists(file_path: str, parser, file_type: str = "file") -> Path:
|
||||
try:
|
||||
return validate_file_exists(file_path, file_type)
|
||||
except FileNotFoundError as exc:
|
||||
parser.error(str(exc))
|
||||
|
||||
|
||||
def validate_output_path(output_path: str) -> Path:
|
||||
path = Path(output_path)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
@@ -49,6 +61,113 @@ def validate_ranges(args, parser):
|
||||
parser.error("--lora-dropout must be between 0.0 and 1.0")
|
||||
|
||||
|
||||
def warn_legacy_mode():
|
||||
print(
|
||||
"Warning: legacy root CLI arguments are deprecated. Prefer `voxcpm design|clone|batch ...`.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
|
||||
def build_final_text(text: str, control: str | None) -> str:
|
||||
control = (control or "").strip()
|
||||
return f"({control}){text}" if control else text
|
||||
|
||||
|
||||
def resolve_prompt_text(args, parser) -> str | None:
|
||||
prompt_text = getattr(args, "prompt_text", None)
|
||||
prompt_file = getattr(args, "prompt_file", None)
|
||||
|
||||
if prompt_text and prompt_file:
|
||||
parser.error("Use either --prompt-text or --prompt-file, not both.")
|
||||
|
||||
if prompt_file:
|
||||
prompt_path = require_file_exists(prompt_file, parser, "prompt text file")
|
||||
return prompt_path.read_text(encoding="utf-8").strip()
|
||||
|
||||
if prompt_text:
|
||||
return prompt_text.strip()
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def detect_model_architecture(args) -> str | None:
|
||||
model_location = getattr(args, "model_path", None) or getattr(
|
||||
args, "hf_model_id", None
|
||||
)
|
||||
if not model_location:
|
||||
return None
|
||||
|
||||
if os.path.isdir(model_location):
|
||||
config_path = Path(model_location) / "config.json"
|
||||
if not config_path.exists():
|
||||
return None
|
||||
|
||||
with open(config_path, "r", encoding="utf-8") as f:
|
||||
return json.load(f).get("architecture", "voxcpm").lower()
|
||||
|
||||
model_hint = str(model_location).lower()
|
||||
if "voxcpm2" in model_hint:
|
||||
return "voxcpm2"
|
||||
if (
|
||||
"voxcpm1.5" in model_hint
|
||||
or "voxcpm-1.5" in model_hint
|
||||
or "voxcpm_1.5" in model_hint
|
||||
):
|
||||
return "voxcpm"
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def validate_prompt_related_args(args, parser, prompt_text: str | None):
|
||||
if prompt_text and not args.prompt_audio:
|
||||
parser.error("--prompt-text/--prompt-file requires --prompt-audio.")
|
||||
|
||||
if args.prompt_audio and not prompt_text:
|
||||
parser.error("--prompt-audio requires --prompt-text or --prompt-file.")
|
||||
|
||||
if args.control and prompt_text:
|
||||
parser.error(
|
||||
"--control cannot be used together with --prompt-text or --prompt-file."
|
||||
)
|
||||
|
||||
|
||||
def validate_reference_support(args, parser):
|
||||
if not getattr(args, "reference_audio", None):
|
||||
return
|
||||
|
||||
arch = detect_model_architecture(args)
|
||||
if arch == "voxcpm":
|
||||
parser.error("--reference-audio is only supported with VoxCPM2 models.")
|
||||
|
||||
|
||||
def validate_design_args(args, parser):
|
||||
prompt_text = resolve_prompt_text(args, parser)
|
||||
if args.prompt_audio or args.reference_audio or prompt_text:
|
||||
parser.error(
|
||||
"`design` does not accept prompt/reference audio. Use `clone` instead."
|
||||
)
|
||||
|
||||
|
||||
def validate_clone_args(args, parser):
|
||||
prompt_text = resolve_prompt_text(args, parser)
|
||||
validate_prompt_related_args(args, parser, prompt_text)
|
||||
validate_reference_support(args, parser)
|
||||
|
||||
if not args.prompt_audio and not args.reference_audio:
|
||||
parser.error(
|
||||
"`clone` requires --reference-audio, or --prompt-audio with --prompt-text/--prompt-file."
|
||||
)
|
||||
|
||||
return prompt_text
|
||||
|
||||
|
||||
def validate_batch_args(args, parser):
|
||||
prompt_text = resolve_prompt_text(args, parser)
|
||||
validate_prompt_related_args(args, parser, prompt_text)
|
||||
validate_reference_support(args, parser)
|
||||
return prompt_text
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Model loading
|
||||
# -----------------------------
|
||||
@@ -57,7 +176,9 @@ def validate_ranges(args, parser):
|
||||
def load_model(args) -> VoxCPM:
|
||||
print("Loading VoxCPM model...", file=sys.stderr)
|
||||
|
||||
zipenhancer_path = getattr(args, "zipenhancer_path", None) or os.environ.get("ZIPENHANCER_MODEL_PATH", None)
|
||||
zipenhancer_path = getattr(args, "zipenhancer_path", None) or os.environ.get(
|
||||
"ZIPENHANCER_MODEL_PATH", None
|
||||
)
|
||||
|
||||
# Build LoRA config if provided
|
||||
lora_config = None
|
||||
@@ -87,6 +208,7 @@ def load_model(args) -> VoxCPM:
|
||||
voxcpm_model_path=args.model_path,
|
||||
zipenhancer_model_path=zipenhancer_path,
|
||||
enable_denoiser=not args.no_denoiser,
|
||||
optimize=not args.no_optimize,
|
||||
lora_config=lora_config,
|
||||
lora_weights_path=lora_weights_path,
|
||||
)
|
||||
@@ -104,6 +226,7 @@ def load_model(args) -> VoxCPM:
|
||||
zipenhancer_model_id=zipenhancer_path,
|
||||
cache_dir=args.cache_dir,
|
||||
local_files_only=args.local_files_only,
|
||||
optimize=not args.no_optimize,
|
||||
lora_config=lora_config,
|
||||
lora_weights_path=lora_weights_path,
|
||||
)
|
||||
@@ -119,32 +242,26 @@ def load_model(args) -> VoxCPM:
|
||||
# -----------------------------
|
||||
|
||||
|
||||
def cmd_clone(args):
|
||||
if not args.text:
|
||||
sys.exit("Error: Please provide --text for synthesis")
|
||||
|
||||
has_prompt = args.prompt_audio and args.prompt_text
|
||||
has_ref = args.reference_audio is not None
|
||||
if not has_prompt and not has_ref:
|
||||
sys.exit("Error: Voice cloning requires --prompt-audio + --prompt-text, or --reference-audio, or both")
|
||||
def _run_single(args, parser, *, text: str, output: str, prompt_text: str | None):
|
||||
output_path = validate_output_path(output)
|
||||
|
||||
if args.prompt_audio:
|
||||
validate_file_exists(args.prompt_audio, "prompt audio file")
|
||||
require_file_exists(args.prompt_audio, parser, "prompt audio file")
|
||||
if args.reference_audio:
|
||||
validate_file_exists(args.reference_audio, "reference audio file")
|
||||
output_path = validate_output_path(args.output)
|
||||
require_file_exists(args.reference_audio, parser, "reference audio file")
|
||||
|
||||
model = load_model(args)
|
||||
|
||||
audio_array = model.generate(
|
||||
text=args.text,
|
||||
prompt_wav_path=args.prompt_audio if has_prompt else None,
|
||||
prompt_text=args.prompt_text if has_prompt else None,
|
||||
text=text,
|
||||
prompt_wav_path=args.prompt_audio,
|
||||
prompt_text=prompt_text,
|
||||
reference_wav_path=args.reference_audio,
|
||||
cfg_value=args.cfg_value,
|
||||
inference_timesteps=args.inference_timesteps,
|
||||
normalize=args.normalize,
|
||||
denoise=args.denoise,
|
||||
denoise=args.denoise
|
||||
and (args.prompt_audio is not None or args.reference_audio is not None),
|
||||
)
|
||||
|
||||
sf.write(str(output_path), audio_array, model.tts_model.sample_rate)
|
||||
@@ -153,31 +270,24 @@ def cmd_clone(args):
|
||||
print(f"Saved audio to: {output_path} ({duration:.2f}s)", file=sys.stderr)
|
||||
|
||||
|
||||
def cmd_synthesize(args):
|
||||
if not args.text:
|
||||
sys.exit("Error: Please provide --text for synthesis")
|
||||
|
||||
output_path = validate_output_path(args.output)
|
||||
model = load_model(args)
|
||||
|
||||
audio_array = model.generate(
|
||||
text=args.text,
|
||||
prompt_wav_path=None,
|
||||
prompt_text=None,
|
||||
cfg_value=args.cfg_value,
|
||||
inference_timesteps=args.inference_timesteps,
|
||||
normalize=args.normalize,
|
||||
denoise=False,
|
||||
def cmd_design(args, parser):
|
||||
validate_design_args(args, parser)
|
||||
final_text = build_final_text(args.text, args.control)
|
||||
return _run_single(
|
||||
args, parser, text=final_text, output=args.output, prompt_text=None
|
||||
)
|
||||
|
||||
sf.write(str(output_path), audio_array, model.tts_model.sample_rate)
|
||||
|
||||
duration = len(audio_array) / model.tts_model.sample_rate
|
||||
print(f"Saved audio to: {output_path} ({duration:.2f}s)", file=sys.stderr)
|
||||
def cmd_clone(args, parser):
|
||||
prompt_text = validate_clone_args(args, parser)
|
||||
final_text = build_final_text(args.text, args.control)
|
||||
return _run_single(
|
||||
args, parser, text=final_text, output=args.output, prompt_text=prompt_text
|
||||
)
|
||||
|
||||
|
||||
def cmd_batch(args):
|
||||
input_file = validate_file_exists(args.input, "input file")
|
||||
def cmd_batch(args, parser):
|
||||
input_file = require_file_exists(args.input, parser, "input file")
|
||||
output_dir = Path(args.output_dir)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
@@ -187,29 +297,36 @@ def cmd_batch(args):
|
||||
if not texts:
|
||||
sys.exit("Error: Input file is empty")
|
||||
|
||||
prompt_text = validate_batch_args(args, parser)
|
||||
model = load_model(args)
|
||||
|
||||
prompt_audio_path = None
|
||||
if args.prompt_audio:
|
||||
prompt_audio_path = str(validate_file_exists(args.prompt_audio, "prompt audio file"))
|
||||
prompt_audio_path = str(
|
||||
require_file_exists(args.prompt_audio, parser, "prompt audio file")
|
||||
)
|
||||
|
||||
reference_audio_path = None
|
||||
if args.reference_audio:
|
||||
reference_audio_path = str(validate_file_exists(args.reference_audio, "reference audio file"))
|
||||
reference_audio_path = str(
|
||||
require_file_exists(args.reference_audio, parser, "reference audio file")
|
||||
)
|
||||
|
||||
success_count = 0
|
||||
|
||||
for i, text in enumerate(texts, 1):
|
||||
try:
|
||||
final_text = build_final_text(text, args.control)
|
||||
audio_array = model.generate(
|
||||
text=text,
|
||||
text=final_text,
|
||||
prompt_wav_path=prompt_audio_path,
|
||||
prompt_text=args.prompt_text,
|
||||
prompt_text=prompt_text,
|
||||
reference_wav_path=reference_audio_path,
|
||||
cfg_value=args.cfg_value,
|
||||
inference_timesteps=args.inference_timesteps,
|
||||
normalize=args.normalize,
|
||||
denoise=args.denoise and (prompt_audio_path is not None or reference_audio_path is not None),
|
||||
denoise=args.denoise
|
||||
and (prompt_audio_path is not None or reference_audio_path is not None),
|
||||
)
|
||||
|
||||
output_file = output_dir / f"output_{i:03d}.wav"
|
||||
@@ -230,97 +347,251 @@ def cmd_batch(args):
|
||||
# -----------------------------
|
||||
|
||||
|
||||
def _build_unified_parser():
|
||||
def _add_common_generation_args(parser):
|
||||
parser.add_argument("--text", "-t", help="Text to synthesize")
|
||||
parser.add_argument(
|
||||
"--control",
|
||||
type=str,
|
||||
help="Control instruction for VoxCPM2 voice design/cloning",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cfg-value",
|
||||
type=float,
|
||||
default=2.0,
|
||||
help="CFG guidance scale (float, recommended 0.5–5.0, default: 2.0)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--inference-timesteps",
|
||||
type=int,
|
||||
default=10,
|
||||
help="Inference steps (int, 1–100, default: 10)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--normalize", action="store_true", help="Enable text normalization"
|
||||
)
|
||||
|
||||
|
||||
def _add_prompt_reference_args(parser):
|
||||
parser.add_argument(
|
||||
"--prompt-audio",
|
||||
"-pa",
|
||||
help="Prompt audio file path (continuation mode, requires --prompt-text or --prompt-file)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--prompt-text", "-pt", help="Text corresponding to the prompt audio"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--prompt-file", type=str, help="Text file corresponding to the prompt audio"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--reference-audio",
|
||||
"-ra",
|
||||
help="Reference audio for voice cloning (VoxCPM2 only)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--denoise",
|
||||
action="store_true",
|
||||
help="Enable prompt/reference speech enhancement",
|
||||
)
|
||||
|
||||
|
||||
def _add_model_args(parser):
|
||||
parser.add_argument("--model-path", type=str, help="Local VoxCPM model path")
|
||||
parser.add_argument(
|
||||
"--hf-model-id",
|
||||
type=str,
|
||||
default=DEFAULT_HF_MODEL_ID,
|
||||
help=f"Hugging Face repo id (default: {DEFAULT_HF_MODEL_ID})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cache-dir", type=str, help="Cache directory for Hub downloads"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--local-files-only", action="store_true", help="Disable network access"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-denoiser", action="store_true", help="Disable denoiser model loading"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-optimize",
|
||||
action="store_true",
|
||||
help="Disable model optimization during loading",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--zipenhancer-path",
|
||||
type=str,
|
||||
help="ZipEnhancer model id or local path (or env ZIPENHANCER_MODEL_PATH)",
|
||||
)
|
||||
|
||||
|
||||
def _add_lora_args(parser):
|
||||
parser.add_argument("--lora-path", type=str, help="Path to LoRA weights")
|
||||
parser.add_argument(
|
||||
"--lora-r", type=int, default=32, help="LoRA rank (positive int, default: 32)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--lora-alpha",
|
||||
type=int,
|
||||
default=16,
|
||||
help="LoRA alpha (positive int, default: 16)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--lora-dropout",
|
||||
type=float,
|
||||
default=0.0,
|
||||
help="LoRA dropout rate (0.0–1.0, default: 0.0)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--lora-disable-lm", action="store_true", help="Disable LoRA on LM layers"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--lora-disable-dit", action="store_true", help="Disable LoRA on DiT layers"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--lora-enable-proj",
|
||||
action="store_true",
|
||||
help="Enable LoRA on projection layers",
|
||||
)
|
||||
|
||||
|
||||
def _build_parser():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="VoxCPM CLI - voice cloning, direct TTS, and batch processing",
|
||||
description="VoxCPM CLI - VoxCPM2-first voice design, cloning, and batch processing",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
voxcpm --text "Hello world" --output out.wav
|
||||
voxcpm --text "Hello" --prompt-audio ref.wav --prompt-text "hi" --output out.wav --denoise
|
||||
voxcpm --input texts.txt --output-dir ./outs
|
||||
voxcpm design --text "Hello world" --output out.wav
|
||||
voxcpm design --text "Hello world" --control "warm female voice" --output out.wav
|
||||
voxcpm clone --text "Hello" --reference-audio ref.wav --output out.wav
|
||||
voxcpm batch --input texts.txt --output-dir ./outs --reference-audio ref.wav
|
||||
""",
|
||||
)
|
||||
|
||||
# Mode selection
|
||||
subparsers = parser.add_subparsers(dest="command")
|
||||
|
||||
design_parser = subparsers.add_parser(
|
||||
"design", help="Generate speech with VoxCPM2-first voice design"
|
||||
)
|
||||
_add_common_generation_args(design_parser)
|
||||
_add_prompt_reference_args(design_parser)
|
||||
_add_model_args(design_parser)
|
||||
_add_lora_args(design_parser)
|
||||
design_parser.add_argument(
|
||||
"--output", "-o", required=True, help="Output audio file path"
|
||||
)
|
||||
|
||||
clone_parser = subparsers.add_parser(
|
||||
"clone", help="Clone a voice with reference/prompt audio"
|
||||
)
|
||||
_add_common_generation_args(clone_parser)
|
||||
_add_prompt_reference_args(clone_parser)
|
||||
_add_model_args(clone_parser)
|
||||
_add_lora_args(clone_parser)
|
||||
clone_parser.add_argument(
|
||||
"--output", "-o", required=True, help="Output audio file path"
|
||||
)
|
||||
|
||||
batch_parser = subparsers.add_parser(
|
||||
"batch", help="Batch-generate one line per output file"
|
||||
)
|
||||
batch_parser.add_argument(
|
||||
"--input", "-i", required=True, help="Input text file (one text per line)"
|
||||
)
|
||||
batch_parser.add_argument(
|
||||
"--output-dir", "-od", required=True, help="Output directory"
|
||||
)
|
||||
batch_parser.add_argument(
|
||||
"--control",
|
||||
type=str,
|
||||
help="Control instruction for VoxCPM2 voice design/cloning",
|
||||
)
|
||||
_add_prompt_reference_args(batch_parser)
|
||||
batch_parser.add_argument(
|
||||
"--cfg-value",
|
||||
type=float,
|
||||
default=2.0,
|
||||
help="CFG guidance scale (float, recommended 0.5–5.0, default: 2.0)",
|
||||
)
|
||||
batch_parser.add_argument(
|
||||
"--inference-timesteps",
|
||||
type=int,
|
||||
default=10,
|
||||
help="Inference steps (int, 1–100, default: 10)",
|
||||
)
|
||||
batch_parser.add_argument(
|
||||
"--normalize", action="store_true", help="Enable text normalization"
|
||||
)
|
||||
_add_model_args(batch_parser)
|
||||
_add_lora_args(batch_parser)
|
||||
|
||||
# Legacy root arguments
|
||||
parser.add_argument("--input", "-i", help="Input text file (batch mode only)")
|
||||
parser.add_argument("--output-dir", "-od", help="Output directory (batch mode only)")
|
||||
parser.add_argument("--text", "-t", help="Text to synthesize (single or clone mode)")
|
||||
parser.add_argument("--output", "-o", help="Output audio file path (single or clone mode)")
|
||||
|
||||
# Prompt / Reference
|
||||
parser.add_argument(
|
||||
"--prompt-audio", "-pa", help="Prompt audio file path (continuation mode, requires --prompt-text)"
|
||||
"--output-dir", "-od", help="Output directory (batch mode only)"
|
||||
)
|
||||
parser.add_argument("--prompt-text", "-pt", help="Text corresponding to the prompt audio")
|
||||
_add_common_generation_args(parser)
|
||||
parser.add_argument(
|
||||
"--reference-audio", "-ra", help="Reference audio for voice cloning (isolated mode, VoxCPM2 only)"
|
||||
"--output", "-o", help="Output audio file path (single or clone mode)"
|
||||
)
|
||||
parser.add_argument("--denoise", action="store_true", help="Enable prompt/reference speech enhancement")
|
||||
|
||||
# Generation parameters
|
||||
parser.add_argument(
|
||||
"--cfg-value", type=float, default=2.0, help="CFG guidance scale (float, recommended 0.5–5.0, default: 2.0)"
|
||||
)
|
||||
parser.add_argument("--inference-timesteps", type=int, default=10, help="Inference steps (int, 1–100, default: 10)")
|
||||
parser.add_argument("--normalize", action="store_true", help="Enable text normalization")
|
||||
|
||||
# Model loading
|
||||
parser.add_argument("--model-path", type=str, help="Local VoxCPM model path")
|
||||
parser.add_argument(
|
||||
"--hf-model-id", type=str, default="openbmb/VoxCPM1.5", help="Hugging Face repo id (default: openbmb/VoxCPM1.5)"
|
||||
)
|
||||
parser.add_argument("--cache-dir", type=str, help="Cache directory for Hub downloads")
|
||||
parser.add_argument("--local-files-only", action="store_true", help="Disable network access")
|
||||
parser.add_argument("--no-denoiser", action="store_true", help="Disable denoiser model loading")
|
||||
parser.add_argument(
|
||||
"--zipenhancer-path", type=str, help="ZipEnhancer model id or local path (or env ZIPENHANCER_MODEL_PATH)"
|
||||
)
|
||||
|
||||
# LoRA
|
||||
parser.add_argument("--lora-path", type=str, help="Path to LoRA weights")
|
||||
parser.add_argument("--lora-r", type=int, default=32, help="LoRA rank (positive int, default: 32)")
|
||||
parser.add_argument("--lora-alpha", type=int, default=16, help="LoRA alpha (positive int, default: 16)")
|
||||
parser.add_argument("--lora-dropout", type=float, default=0.0, help="LoRA dropout rate (0.0–1.0, default: 0.0)")
|
||||
parser.add_argument("--lora-disable-lm", action="store_true", help="Disable LoRA on LM layers")
|
||||
parser.add_argument("--lora-disable-dit", action="store_true", help="Disable LoRA on DiT layers")
|
||||
parser.add_argument("--lora-enable-proj", action="store_true", help="Enable LoRA on projection layers")
|
||||
_add_prompt_reference_args(parser)
|
||||
_add_model_args(parser)
|
||||
_add_lora_args(parser)
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
def _dispatch_legacy(args, parser):
|
||||
warn_legacy_mode()
|
||||
|
||||
if args.input and args.text:
|
||||
parser.error(
|
||||
"Use either batch mode (--input) or single mode (--text), not both."
|
||||
)
|
||||
|
||||
if args.input:
|
||||
if not args.output_dir:
|
||||
parser.error("Batch mode requires --output-dir")
|
||||
return cmd_batch(args, parser)
|
||||
|
||||
if not args.text or not args.output:
|
||||
parser.error("Single-sample legacy mode requires --text and --output")
|
||||
|
||||
if (
|
||||
args.prompt_audio
|
||||
or args.prompt_text
|
||||
or args.prompt_file
|
||||
or args.reference_audio
|
||||
):
|
||||
return cmd_clone(args, parser)
|
||||
|
||||
return cmd_design(args, parser)
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Entrypoint
|
||||
# -----------------------------
|
||||
|
||||
|
||||
def main():
|
||||
parser = _build_unified_parser()
|
||||
parser = _build_parser()
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate ranges
|
||||
validate_ranges(args, parser)
|
||||
|
||||
# Mode conflict checks
|
||||
if args.input and args.text:
|
||||
parser.error("Use either batch mode (--input) or single mode (--text), not both.")
|
||||
if args.command == "design":
|
||||
if not args.text:
|
||||
parser.error("`design` requires --text")
|
||||
return cmd_design(args, parser)
|
||||
|
||||
# Batch mode
|
||||
if args.input:
|
||||
if not args.output_dir:
|
||||
parser.error("Batch mode requires --output-dir")
|
||||
return cmd_batch(args)
|
||||
if args.command == "clone":
|
||||
if not args.text or not args.output:
|
||||
parser.error("`clone` requires --text and --output")
|
||||
return cmd_clone(args, parser)
|
||||
|
||||
# Single mode
|
||||
if not args.text or not args.output:
|
||||
parser.error("Single-sample mode requires --text and --output")
|
||||
if args.command == "batch":
|
||||
return cmd_batch(args, parser)
|
||||
|
||||
# Clone mode (prompt continuation, reference isolation, or both)
|
||||
if args.prompt_audio or args.prompt_text or args.reference_audio:
|
||||
return cmd_clone(args)
|
||||
|
||||
# Direct synthesis
|
||||
return cmd_synthesize(args)
|
||||
return _dispatch_legacy(args, parser)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user