diff --git a/pyproject.toml b/pyproject.toml index ceade41..8f1db43 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "f5-tts" -version = "1.1.11" +version = "1.1.12" description = "F5-TTS: A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching" readme = "README.md" license = {text = "MIT License"} diff --git a/src/f5_tts/infer/infer_gradio.py b/src/f5_tts/infer/infer_gradio.py index d82f482..93b2c6e 100644 --- a/src/f5_tts/infer/infer_gradio.py +++ b/src/f5_tts/infer/infer_gradio.py @@ -816,7 +816,9 @@ Have a conversation with an AI using your reference voice! lines=2, ) - chatbot_interface = gr.Chatbot(label="Conversation", type="messages") + chatbot_interface = gr.Chatbot( + label="Conversation" + ) # type="messages" hard-coded and no need to pass in since gradio 6.0 with gr.Row(): with gr.Column(): @@ -853,6 +855,10 @@ Have a conversation with an AI using your reference voice! @gpu_decorator def generate_text_response(conv_state, system_prompt): """Generate text response from AI""" + for single_state in conv_state: + if isinstance(single_state["content"], list): + assert len(single_state["content"]) == 1 and single_state["content"][0]["type"] == "text" + single_state["content"] = single_state["content"][0]["text"] system_prompt_state = [{"role": "system", "content": system_prompt}] response = chat_model_inference(system_prompt_state + conv_state, chat_model_state, chat_tokenizer_state) @@ -866,7 +872,7 @@ Have a conversation with an AI using your reference voice! if not conv_state or not ref_audio: return None, ref_text, seed_input - last_ai_response = conv_state[-1]["content"] + last_ai_response = conv_state[-1]["content"][0]["text"] if not last_ai_response or conv_state[-1]["role"] != "assistant": return None, ref_text, seed_input @@ -1108,7 +1114,6 @@ def main(port, host, share, api, root_path, inbrowser): server_name=host, server_port=port, share=share, - show_api=api, root_path=root_path, inbrowser=inbrowser, )