Update WAV File Naming and Dependencies 📝🔊 (#1091)

* Update infer_cli.py

* Update pyproject.toml

* formalized

---------

Co-authored-by: SWivid <swivid@qq.com>
This commit is contained in:
Danh Tran
2025-06-24 22:23:00 +07:00
committed by GitHub
parent ac42286d04
commit ebbd7bd91f
2 changed files with 15 additions and 0 deletions

View File

@@ -38,6 +38,7 @@ dependencies = [
"tqdm>=4.65.0",
"transformers",
"transformers_stream_generator",
"unidecode",
"vocos",
"wandb",
"x_transformers>=1.31.14",

View File

@@ -12,6 +12,7 @@ import tomli
from cached_path import cached_path
from hydra.utils import get_class
from omegaconf import OmegaConf
from unidecode import unidecode
from f5_tts.infer.utils_infer import (
cfg_strength,
@@ -112,6 +113,11 @@ parser.add_argument(
action="store_true",
help="To save each audio chunks during inference",
)
parser.add_argument(
"--no_legacy_text",
action="store_false",
help="Not to use lossy ASCII transliterations of unicode text in saved file names.",
)
parser.add_argument(
"--remove_silence",
action="store_true",
@@ -197,6 +203,12 @@ output_file = args.output_file or config.get(
)
save_chunk = args.save_chunk or config.get("save_chunk", False)
use_legacy_text = args.no_legacy_text or config.get("no_legacy_text", False) # no_legacy_text is a store_false arg
if save_chunk and use_legacy_text:
print(
"\nWarning to --save_chunk: lossy ASCII transliterations of unicode text for legacy (.wav) file names, --no_legacy_text to disable.\n"
)
remove_silence = args.remove_silence or config.get("remove_silence", False)
load_vocoder_from_local = args.load_vocoder_from_local or config.get("load_vocoder_from_local", False)
@@ -344,6 +356,8 @@ def main():
if save_chunk:
if len(gen_text_) > 200:
gen_text_ = gen_text_[:200] + " ... "
if use_legacy_text:
gen_text_ = unidecode(gen_text_)
sf.write(
os.path.join(output_chunk_dir, f"{len(generated_audio_segments) - 1}_{gen_text_}.wav"),
audio_segment,