From a83e76411089fb5b8d2d035f72331da3ca332c5c Mon Sep 17 00:00:00 2001 From: Rino Date: Mon, 4 Nov 2024 15:46:00 +0700 Subject: [PATCH 1/4] Update socket.py [edit] adjusting mel_spec_type on load_model use case --- src/f5_tts/socket.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/f5_tts/socket.py b/src/f5_tts/socket.py index 183f24e..22f7e7a 100644 --- a/src/f5_tts/socket.py +++ b/src/f5_tts/socket.py @@ -19,10 +19,14 @@ class TTSStreamingProcessor: # Load the model using the provided checkpoint and vocab files self.model = load_model( - DiT, - dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4), - ckpt_file, - vocab_file, + model_cls=DiT, + model_cfg=dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4), + ckpt_path=ckpt_file, + mel_spec_type="vocos", # or "bigvgan" depending on vocoder + vocab_file=vocab_file, + ode_method="euler", + use_ema=True, + device=self.device ).to(self.device, dtype=dtype) # Load the vocoder From c129dd7ba49e07f3500f65b9c86dc743f17261d3 Mon Sep 17 00:00:00 2001 From: Rino Date: Mon, 4 Nov 2024 15:48:09 +0700 Subject: [PATCH 2/4] Rename socket.py to socket_server.py [bug fix] due to circular import, can't use socket as file name --- src/f5_tts/{socket.py => socket_server.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/f5_tts/{socket.py => socket_server.py} (100%) diff --git a/src/f5_tts/socket.py b/src/f5_tts/socket_server.py similarity index 100% rename from src/f5_tts/socket.py rename to src/f5_tts/socket_server.py From 24cfa9ecb9a9f2237d3ee20739c146ee82fe106f Mon Sep 17 00:00:00 2001 From: Rino Date: Mon, 4 Nov 2024 15:50:15 +0700 Subject: [PATCH 3/4] Update README.md --- src/f5_tts/infer/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/f5_tts/infer/README.md b/src/f5_tts/infer/README.md index e6f6533..24600b6 100644 --- a/src/f5_tts/infer/README.md +++ b/src/f5_tts/infer/README.md @@ -119,7 +119,7 @@ python src/f5_tts/infer/speech_edit.py To communicate with socket server you need to run ```bash -python src/f5_tts/socket.py +python src/f5_tts/socket_server.py ```
From c1c20ed009a4761f21a41a5cbecaefa9c83ef29c Mon Sep 17 00:00:00 2001 From: Yushen CHEN <45333109+SWivid@users.noreply.github.com> Date: Mon, 4 Nov 2024 17:11:29 +0800 Subject: [PATCH 4/4] Update socket_server.py, to pass format check --- src/f5_tts/socket_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/f5_tts/socket_server.py b/src/f5_tts/socket_server.py index 22f7e7a..cc5dc3e 100644 --- a/src/f5_tts/socket_server.py +++ b/src/f5_tts/socket_server.py @@ -26,7 +26,7 @@ class TTSStreamingProcessor: vocab_file=vocab_file, ode_method="euler", use_ema=True, - device=self.device + device=self.device, ).to(self.device, dtype=dtype) # Load the vocoder