fix: complete shared generator cleanup coverage
Move generator close handling into a shared utility and wire the core generation pipeline through it so partially-consumed prompt cache generators are cleaned up consistently across both model variants and the public VoxCPM wrapper. Made-with: Cursor
This commit is contained in:
@@ -3,6 +3,16 @@ import torch
|
||||
from transformers import PreTrainedTokenizer
|
||||
|
||||
|
||||
# Ref: https://github.com/OpenBMB/VoxCPM/issues/256#issuecomment-4235252732
|
||||
# Explicitly close partially-consumed generators so inference_mode cleanup
|
||||
# does not get deferred to Python's GC/finalizer path.
|
||||
def next_and_close(gen):
|
||||
try:
|
||||
return next(gen)
|
||||
finally:
|
||||
gen.close()
|
||||
|
||||
|
||||
def mask_multichar_chinese_tokens(tokenizer: PreTrainedTokenizer):
|
||||
"""Create a tokenizer wrapper that converts multi-character Chinese tokens to single characters.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user