From 6667d6f5015415289b3b6bd5202592c6b86f059f Mon Sep 17 00:00:00 2001 From: J <114445157+Jxspa@users.noreply.github.com> Date: Tue, 29 Oct 2024 10:59:42 +0000 Subject: [PATCH] Add optional text chat function --- src/f5_tts/infer/infer_gradio.py | 55 ++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 10 deletions(-) diff --git a/src/f5_tts/infer/infer_gradio.py b/src/f5_tts/infer/infer_gradio.py index ef6a19e..85d2550 100644 --- a/src/f5_tts/infer/infer_gradio.py +++ b/src/f5_tts/infer/infer_gradio.py @@ -540,15 +540,19 @@ Have a conversation with an AI using your reference voice! chatbot_interface = gr.Chatbot(label="Conversation") with gr.Row(): - with gr.Column(): - audio_output_chat = gr.Audio(autoplay=True) with gr.Column(): audio_input_chat = gr.Microphone( label="Speak your message", type="filepath", ) - - clear_btn_chat = gr.Button("Clear Conversation") + audio_output_chat = gr.Audio(autoplay=True) + with gr.Column(): + text_input_chat = gr.Textbox( + label="Type your message", + lines=1, + ) + send_btn_chat = gr.Button("Send") + clear_btn_chat = gr.Button("Clear Conversation") conversation_state = gr.State( value=[ @@ -561,13 +565,14 @@ Have a conversation with an AI using your reference voice! # Modify process_audio_input to use model and tokenizer from state @gpu_decorator - def process_audio_input(audio_path, history, conv_state): - """Handle audio input from user""" - if not audio_path: + def process_audio_input(audio_path, text, history, conv_state): + """Handle audio or text input from user""" + + if not audio_path and not text.strip(): return history, conv_state, "" - text = "" - text = preprocess_ref_audio_text(audio_path, text, clip_short=False)[1] + if audio_path: + text = preprocess_ref_audio_text(audio_path, text)[1] if not text.strip(): return history, conv_state, "" @@ -621,7 +626,7 @@ Have a conversation with an AI using your reference voice! # Handle audio input audio_input_chat.stop_recording( process_audio_input, - inputs=[audio_input_chat, chatbot_interface, conversation_state], + inputs=[audio_input_chat, text_input_chat, chatbot_interface, conversation_state], outputs=[chatbot_interface, conversation_state], ).then( generate_audio_response, @@ -633,6 +638,36 @@ Have a conversation with an AI using your reference voice! audio_input_chat, ) + # Handle text input + text_input_chat.submit( + process_audio_input, + inputs=[audio_input_chat, text_input_chat, chatbot_interface, conversation_state], + outputs=[chatbot_interface, conversation_state], + ).then( + generate_audio_response, + inputs=[chatbot_interface, ref_audio_chat, ref_text_chat, model_choice_chat, remove_silence_chat], + outputs=[audio_output_chat], + ).then( + lambda: None, + None, + text_input_chat, + ) + + # Handle send button + send_btn_chat.click( + process_audio_input, + inputs=[audio_input_chat, text_input_chat, chatbot_interface, conversation_state], + outputs=[chatbot_interface, conversation_state], + ).then( + generate_audio_response, + inputs=[chatbot_interface, ref_audio_chat, ref_text_chat, model_choice_chat, remove_silence_chat], + outputs=[audio_output_chat], + ).then( + lambda: None, + None, + text_input_chat, + ) + # Handle clear button clear_btn_chat.click( clear_conversation,