From 4b3cd13382c69bd3e9cc1899cefe416f744a8383 Mon Sep 17 00:00:00 2001
From: SWivid <swivid@qq.com>
Date: Thu, 3 Apr 2025 15:04:42 +0800
Subject: [PATCH] Update README.md

---
 README.md | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index a5f2a6a..9204f32 100644
--- a/README.md
+++ b/README.md
@@ -107,6 +107,19 @@ docker container run --rm -it --gpus=all --mount 'type=volume,source=f5-tts,targ
 docker container run --rm -it --gpus=all --mount 'type=volume,source=f5-tts,target=/root/.cache/huggingface/hub/' -p 7860:7860 ghcr.io/swivid/f5-tts:main f5-tts_infer-gradio --host 0.0.0.0
 ```
 
+### Runtime
+
+Deployment solution with Triton and TensorRT-LLM.
+
+#### Benchmark Results
+Decoding on a single L20 GPU, using 26 different prompt_audio & target_text pairs.
+
+| Model | Concurrency | Avg Latency    | RTF   | 
+|-------|-------------|----------------|-------|
+| F5-TTS Base (Vocos) | 1     | 253 ms | 0.0394|
+
+See [detailed instructions](src/f5_tts/runtime/triton_trtllm/README.md) for more information.
+
 
 ## Inference
 
@@ -179,19 +192,6 @@ f5-tts_infer-cli -c custom.toml
 f5-tts_infer-cli -c src/f5_tts/infer/examples/multi/story.toml
 ```
 
-### 3. Runtime
-
-Deployment solution with Triton and TensorRT-LLM.
-
-#### Benchmark Results
-Decoding on a single L20 GPU, using 26 different prompt_audio & target_text pairs.
-
-| Model | Concurrency | Avg Latency    | RTF   | 
-|-------|-------------|----------------|-------|
-| F5-TTS Base (Vocos) | 1     | 253 ms | 0.0394|
-
-See [detailed instructions](src/f5_tts/runtime/triton_trtllm/README.md) for more information.
-
 
 ## Training