fix VoxCPM2 training sample_rate: 48000 -> 16000 (match AudioVAE encoder)

Made-with: Cursor
This commit is contained in:
刘鑫
2026-04-07 22:57:42 +08:00
parent da700f264e
commit 46cfce0c97
4 changed files with 53 additions and 2 deletions
+8
View File
@@ -46,6 +46,7 @@ def train(
train_manifest: str,
val_manifest: str = "",
sample_rate: int = 16_000,
out_sample_rate: int = 0, # accepted from YAML for documentation; not used in training
batch_size: int = 1,
grad_accum_steps: int = 1,
num_workers: int = 2,
@@ -68,6 +69,7 @@ def train(
distribute: bool = False, # If True, save hf_model_id as base_model; otherwise save pretrained_path
):
_ = config_path
_ = out_sample_rate
# Validate distribution options
if lora is not None and distribute and not hf_model_id:
@@ -98,6 +100,12 @@ def train(
)
tokenizer = base_model.text_tokenizer
expected_sr = base_model.audio_vae.sample_rate
assert sample_rate == expected_sr, (
f"sample_rate mismatch: config says {sample_rate}, but the AudioVAE encoder expects {expected_sr}. "
f"Please set sample_rate: {expected_sr} in your training config. "
)
train_ds, val_ds = load_audio_text_datasets(
train_manifest=train_manifest,
val_manifest=val_manifest,