surport voxcpm2 cli

This commit is contained in:
刘鑫
2026-04-01 21:15:55 +08:00
parent 42c428164c
commit addee2c550
8 changed files with 1642 additions and 375 deletions
+49 -24
View File
@@ -126,47 +126,72 @@ print("saved: output_streaming.wav")
After installation, the entry point is `voxcpm` (or use `python -m voxcpm.cli`).
```bash
# 1) Direct synthesis (single text)
voxcpm --text "VoxCPM is an innovative end-to-end TTS model from ModelBest, designed to generate highly expressive speech." --output out.wav
# 1) Voice design (VoxCPM2-first)
voxcpm design \
--text "VoxCPM is an innovative end-to-end TTS model from ModelBest, designed to generate highly expressive speech." \
--output out.wav
# 2) Voice cloning (reference audio + transcript)
voxcpm --text "VoxCPM is an innovative end-to-end TTS model from ModelBest, designed to generate highly expressive speech." \
# 2) Voice design with control instruction
voxcpm design \
--text "VoxCPM is an innovative end-to-end TTS model from ModelBest, designed to generate highly expressive speech." \
--control "Young female voice, warm and gentle, slightly smiling" \
--output out.wav
# 3) Voice cloning (reference audio only, VoxCPM2)
voxcpm clone \
--text "VoxCPM is an innovative end-to-end TTS model from ModelBest, designed to generate highly expressive speech." \
--reference-audio path/to/voice.wav \
--output out.wav
# 4) Hi-Fi / advanced cloning (prompt audio + transcript)
voxcpm clone \
--text "VoxCPM is an innovative end-to-end TTS model from ModelBest, designed to generate highly expressive speech." \
--prompt-audio path/to/voice.wav \
--prompt-text "reference transcript" \
--output out.wav \
# --denoise
--output out.wav
# (Optinal) Voice cloning (reference audio + transcript file)
voxcpm --text "VoxCPM is an innovative end-to-end TTS model from ModelBest, designed to generate highly expressive speech." \
# 5) Prompt transcript from file
voxcpm clone \
--text "VoxCPM is an innovative end-to-end TTS model from ModelBest, designed to generate highly expressive speech." \
--prompt-audio path/to/voice.wav \
--prompt-file "/path/to/text-file" \
--output out.wav \
# --denoise
--output out.wav
# 3) Batch processing (one text per line)
voxcpm --input examples/input.txt --output-dir outs
# (optional) Batch + cloning
voxcpm --input examples/input.txt --output-dir outs \
# 6) Advanced cloning: prompt + reference together
voxcpm clone \
--text "VoxCPM is an innovative end-to-end TTS model from ModelBest, designed to generate highly expressive speech." \
--prompt-audio path/to/voice.wav \
--prompt-text "reference transcript" \
# --denoise
--reference-audio path/to/voice.wav \
--output out.wav \
--denoise
# 4) Inference parameters (quality/speed)
voxcpm --text "..." --output out.wav \
# 7) Batch processing (one text per line)
voxcpm batch --input examples/input.txt --output-dir outs
# 8) Batch + cloning
voxcpm batch --input examples/input.txt --output-dir outs \
--reference-audio path/to/voice.wav
# 9) Inference parameters (quality/speed)
voxcpm design --text "..." --output out.wav \
--cfg-value 2.0 --inference-timesteps 10 --normalize
# 5) Model loading
# 10) Model loading
# Prefer local path
voxcpm --text "..." --output out.wav --model-path /path/to/VoxCPM_model_dir
voxcpm design --text "..." --output out.wav --model-path /path/to/VoxCPM_model_dir
# Or from Hugging Face (auto download/cache)
voxcpm --text "..." --output out.wav \
--hf-model-id openbmb/VoxCPM1.5 --cache-dir ~/.cache/huggingface --local-files-only
voxcpm design --text "..." --output out.wav \
--hf-model-id openbmb/VoxCPM2 --cache-dir ~/.cache/huggingface --local-files-only
# 6) Denoiser control
voxcpm --text "..." --output out.wav \
# 11) Denoiser control
voxcpm clone --text "..." --output out.wav --reference-audio path/to/voice.wav \
--no-denoiser --zipenhancer-path iic/speech_zipenhancer_ans_multiloss_16k_base
# 7) Help
# 12) Legacy root arguments still work but are deprecated
voxcpm --text "..." --output out.wav
# 13) Help
voxcpm --help
python -m voxcpm.cli --help
```