diff --git a/src/f5_tts/infer/README.md b/src/f5_tts/infer/README.md index e6f6533..24600b6 100644 --- a/src/f5_tts/infer/README.md +++ b/src/f5_tts/infer/README.md @@ -119,7 +119,7 @@ python src/f5_tts/infer/speech_edit.py To communicate with socket server you need to run ```bash -python src/f5_tts/socket.py +python src/f5_tts/socket_server.py ```
diff --git a/src/f5_tts/socket.py b/src/f5_tts/socket_server.py similarity index 93% rename from src/f5_tts/socket.py rename to src/f5_tts/socket_server.py index 183f24e..cc5dc3e 100644 --- a/src/f5_tts/socket.py +++ b/src/f5_tts/socket_server.py @@ -19,10 +19,14 @@ class TTSStreamingProcessor: # Load the model using the provided checkpoint and vocab files self.model = load_model( - DiT, - dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4), - ckpt_file, - vocab_file, + model_cls=DiT, + model_cfg=dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4), + ckpt_path=ckpt_file, + mel_spec_type="vocos", # or "bigvgan" depending on vocoder + vocab_file=vocab_file, + ode_method="euler", + use_ema=True, + device=self.device, ).to(self.device, dtype=dtype) # Load the vocoder