fix VoxCPM2 training sample_rate: 48000 -> 16000 (match AudioVAE encoder)

Made-with: Cursor
2026-04-07 22:57:42 +08:00
parent da700f264e
commit 46cfce0c97
4 changed files with 53 additions and 2 deletions
@@ -46,6 +46,7 @@ def train(
    train_manifest: str,
    val_manifest: str = "",
    sample_rate: int = 16_000,
+    out_sample_rate: int = 0,  # accepted from YAML for documentation; not used in training
    batch_size: int = 1,
    grad_accum_steps: int = 1,
    num_workers: int = 2,
@@ -68,6 +69,7 @@ def train(
    distribute: bool = False,  # If True, save hf_model_id as base_model; otherwise save pretrained_path
 ):
    _ = config_path
+    _ = out_sample_rate

    # Validate distribution options
    if lora is not None and distribute and not hf_model_id:
@@ -98,6 +100,12 @@ def train(
    )
    tokenizer = base_model.text_tokenizer

+    expected_sr = base_model.audio_vae.sample_rate
+    assert sample_rate == expected_sr, (
+        f"sample_rate mismatch: config says {sample_rate}, but the AudioVAE encoder expects {expected_sr}. "
+        f"Please set sample_rate: {expected_sr} in your training config. "
+    )
+
    train_ds, val_ds = load_audio_text_datasets(
        train_manifest=train_manifest,
        val_manifest=val_manifest,