Modify lora inference api

2025-12-05 22:22:13 +08:00
parent b1f7593ae0
commit 400f47a516
5 changed files with 265 additions and 139 deletions
@@ -2,9 +2,9 @@ import os
 import re
 import tempfile
 import numpy as np
-from typing import Generator
+from typing import Generator, Optional
 from huggingface_hub import snapshot_download
-from .model.voxcpm import VoxCPMModel
+from .model.voxcpm import VoxCPMModel, LoRAConfig

 class VoxCPM:
    def __init__(self,
@@ -12,6 +12,8 @@ class VoxCPM:
            zipenhancer_model_path : str = "iic/speech_zipenhancer_ans_multiloss_16k_base",
            enable_denoiser : bool = True,
            optimize: bool = True,
+            lora_config: Optional[LoRAConfig] = None,
+            lora_weights_path: Optional[str] = None,
        ):
        """Initialize VoxCPM TTS pipeline.

@@ -23,9 +25,30 @@ class VoxCPM:
                id or local path. If None, denoiser will not be initialized.
            enable_denoiser: Whether to initialize the denoiser pipeline.
            optimize: Whether to optimize the model with torch.compile. True by default, but can be disabled for debugging.
+            lora_config: LoRA configuration for fine-tuning. If lora_weights_path is 
+                provided without lora_config, a default config will be created.
+            lora_weights_path: Path to pre-trained LoRA weights (.pth file or directory
+                containing lora_weights.ckpt). If provided, LoRA weights will be loaded.
        """
        print(f"voxcpm_model_path: {voxcpm_model_path}, zipenhancer_model_path: {zipenhancer_model_path}, enable_denoiser: {enable_denoiser}")
-        self.tts_model = VoxCPMModel.from_local(voxcpm_model_path, optimize=optimize)
+        
+        # If lora_weights_path is provided but no lora_config, create a default one
+        if lora_weights_path is not None and lora_config is None:
+            lora_config = LoRAConfig(
+                enable_lm=True,
+                enable_dit=True,
+                enable_proj=False,
+            )
+            print(f"Auto-created default LoRAConfig for loading weights from: {lora_weights_path}")
+        
+        self.tts_model = VoxCPMModel.from_local(voxcpm_model_path, optimize=optimize, lora_config=lora_config)
+        
+        # Load LoRA weights if path is provided
+        if lora_weights_path is not None:
+            print(f"Loading LoRA weights from: {lora_weights_path}")
+            loaded_keys, skipped_keys = self.tts_model.load_lora_weights(lora_weights_path)
+            print(f"Loaded {len(loaded_keys)} LoRA parameters, skipped {len(skipped_keys)}")
+        
        self.text_normalizer = None
        if enable_denoiser and zipenhancer_model_path is not None:
            from .zipenhancer import ZipEnhancer
@@ -46,6 +69,8 @@ class VoxCPM:
            cache_dir: str = None,
            local_files_only: bool = False,
            optimize: bool = True,
+            lora_config: Optional[LoRAConfig] = None,
+            lora_weights_path: Optional[str] = None,
            **kwargs,
        ):
        """Instantiate ``VoxCPM`` from a Hugging Face Hub snapshot.
@@ -59,6 +84,12 @@ class VoxCPM:
            cache_dir: Custom cache directory for the snapshot.
            local_files_only: If True, only use local files and do not attempt
                to download.
+            lora_config: LoRA configuration for fine-tuning. If lora_weights_path is 
+                provided without lora_config, a default config will be created with
+                enable_lm=True and enable_dit=True.
+            lora_weights_path: Path to pre-trained LoRA weights (.pth file or directory
+                containing lora_weights.ckpt). If provided, LoRA weights will be loaded
+                after model initialization.
        Kwargs:
            Additional keyword arguments passed to the ``VoxCPM`` constructor.

@@ -90,6 +121,8 @@ class VoxCPM:
            zipenhancer_model_path=zipenhancer_model_id if load_denoiser else None,
            enable_denoiser=load_denoiser,
            optimize=optimize,
+            lora_config=lora_config,
+            lora_weights_path=lora_weights_path,
            **kwargs,
        )

@@ -196,4 +229,52 @@ class VoxCPM:
                try:
                    os.unlink(temp_prompt_wav_path)
                except OSError:
-                    pass  
+                    pass
+
+    # ------------------------------------------------------------------ #
+    # LoRA Interface (delegated to VoxCPMModel)
+    # ------------------------------------------------------------------ #
+    def load_lora(self, lora_weights_path: str) -> tuple:
+        """Load LoRA weights from a checkpoint file.
+        
+        Args:
+            lora_weights_path: Path to LoRA weights (.pth file or directory
+                containing lora_weights.ckpt).
+        
+        Returns:
+            tuple: (loaded_keys, skipped_keys) - lists of loaded and skipped parameter names.
+        
+        Raises:
+            RuntimeError: If model was not initialized with LoRA config.
+        """
+        if self.tts_model.lora_config is None:
+            raise RuntimeError(
+                "Cannot load LoRA weights: model was not initialized with LoRA config. "
+                "Please reinitialize with lora_config or lora_weights_path parameter."
+            )
+        return self.tts_model.load_lora_weights(lora_weights_path)
+
+    def unload_lora(self):
+        """Unload LoRA by resetting all LoRA weights to initial state (effectively disabling LoRA)."""
+        self.tts_model.reset_lora_weights()
+    
+    def set_lora_enabled(self, enabled: bool):
+        """Enable or disable LoRA layers without unloading weights.
+        
+        Args:
+            enabled: If True, LoRA layers are active; if False, only base model is used.
+        """
+        self.tts_model.set_lora_enabled(enabled)
+    
+    def get_lora_state_dict(self) -> dict:
+        """Get current LoRA parameters state dict.
+        
+        Returns:
+            dict: State dict containing all LoRA parameters (lora_A, lora_B).
+        """
+        return self.tts_model.get_lora_state_dict()
+    
+    @property
+    def lora_enabled(self) -> bool:
+        """Check if LoRA is currently configured."""
+        return self.tts_model.lora_config is not None