From 02d59131c40db3c7e1cf134ed99aed0fa0c32443 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 30 Oct 2024 14:25:16 +0200 Subject: [PATCH 1/3] fix when none tts_api --- src/f5_tts/train/finetune_gradio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/f5_tts/train/finetune_gradio.py b/src/f5_tts/train/finetune_gradio.py index fe835b2..007dad8 100644 --- a/src/f5_tts/train/finetune_gradio.py +++ b/src/f5_tts/train/finetune_gradio.py @@ -1216,7 +1216,7 @@ def infer(project, file_checkpoint, exp_name, ref_text, ref_audio, gen_text, nfe else: device_test = None - if last_checkpoint != file_checkpoint or last_device != device_test or last_ema != use_ema: + if last_checkpoint != file_checkpoint or last_device != device_test or last_ema != use_ema or tts_api is None: if last_checkpoint != file_checkpoint: last_checkpoint = file_checkpoint From 3dd59b8cdfb8f23ea5bf2d98acb6048292f40977 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 30 Oct 2024 14:26:13 +0200 Subject: [PATCH 2/3] when ref_text empty automatic transcribing --- src/f5_tts/api.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/f5_tts/api.py b/src/f5_tts/api.py index 41fc667..4dac62f 100644 --- a/src/f5_tts/api.py +++ b/src/f5_tts/api.py @@ -15,6 +15,9 @@ from f5_tts.infer.utils_infer import ( infer_process, remove_silence_for_generated_wav, save_spectrogram, + preprocess_ref_audio_text, + target_sample_rate, + hop_length, ) @@ -31,10 +34,8 @@ class F5TTS: ): # Initialize parameters self.final_wave = None - self.target_sample_rate = 24000 - self.n_mel_channels = 100 - self.hop_length = 256 - self.target_rms = 0.1 + self.target_sample_rate = target_sample_rate + self.hop_length = hop_length self.seed = -1 # Set device @@ -97,6 +98,10 @@ class F5TTS: seed = random.randint(0, sys.maxsize) seed_everything(seed) self.seed = seed + + if ref_text == "": + ref_file, ref_text = preprocess_ref_audio_text(ref_file, ref_text, device=self.device) + wav, sr, spect = infer_process( ref_file, ref_text, From 6cbb548f9c3cb5139225759a25911e678147e64e Mon Sep 17 00:00:00 2001 From: Yushen CHEN <45333109+SWivid@users.noreply.github.com> Date: Thu, 31 Oct 2024 11:39:23 +0800 Subject: [PATCH 3/3] Update api.py --- src/f5_tts/api.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/f5_tts/api.py b/src/f5_tts/api.py index 4dac62f..823067d 100644 --- a/src/f5_tts/api.py +++ b/src/f5_tts/api.py @@ -99,8 +99,7 @@ class F5TTS: seed_everything(seed) self.seed = seed - if ref_text == "": - ref_file, ref_text = preprocess_ref_audio_text(ref_file, ref_text, device=self.device) + ref_file, ref_text = preprocess_ref_audio_text(ref_file, ref_text, device=self.device) wav, sr, spect = infer_process( ref_file,