diff --git a/src/f5_tts/infer/infer_cli.py b/src/f5_tts/infer/infer_cli.py index 1d9b319..570834b 100644 --- a/src/f5_tts/infer/infer_cli.py +++ b/src/f5_tts/infer/infer_cli.py @@ -161,6 +161,8 @@ def main_process(ref_audio, ref_text, text_gen, model_obj, remove_silence, speed chunks = re.split(reg1, text_gen) reg2 = r"\[(\w+)\]" for text in chunks: + if not text.strip(): + continue match = re.match(reg2, text) if match: voice = match[1]