From 9ae46c8360303417489d2c1071f29972cd8ab171 Mon Sep 17 00:00:00 2001
From: SWivid <swivid@qq.com>
Date: Fri, 28 Nov 2025 13:08:07 +0000
Subject: [PATCH] Replace jieba pkg with rjieba - a jieba-rs Python binding

---
 pyproject.toml                                            | 4 ++--
 src/f5_tts/model/utils.py                                 | 8 ++------
 src/f5_tts/runtime/triton_trtllm/Dockerfile.server        | 2 +-
 .../triton_trtllm/model_repo_f5_tts/f5_tts/1/model.py     | 4 ++--
 src/f5_tts/train/datasets/prepare_emilia.py               | 2 +-
 src/f5_tts/train/datasets/prepare_wenetspeech4tts.py      | 2 +-
 6 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 7e7ed45..a602db3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "f5-tts"
-version = "1.1.9"
+version = "1.1.10"
 description = "F5-TTS: A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching"
 readme = "README.md"
 license = {text = "MIT License"}
@@ -22,13 +22,13 @@ dependencies = [
     "ema_pytorch>=0.5.2",
     "gradio>=5.0.0",
     "hydra-core>=1.3.0",
-    "jieba",
     "librosa",
     "matplotlib",
     "numpy<=1.26.4; python_version<='3.10'",
     "pydantic<=2.10.6",
     "pydub",
     "pypinyin",
+    "rjieba",
     "safetensors",
     "soundfile",
     "tomli",
diff --git a/src/f5_tts/model/utils.py b/src/f5_tts/model/utils.py
index cd5b3a0..ff34956 100644
--- a/src/f5_tts/model/utils.py
+++ b/src/f5_tts/model/utils.py
@@ -7,7 +7,7 @@ import random
 from collections import defaultdict
 from importlib.resources import files
 
-import jieba
+import rjieba
 import torch
 from pypinyin import Style, lazy_pinyin
 from torch.nn.utils.rnn import pad_sequence
@@ -146,10 +146,6 @@ def get_tokenizer(dataset_name, tokenizer: str = "pinyin"):
 
 
 def convert_char_to_pinyin(text_list, polyphone=True):
-    if jieba.dt.initialized is False:
-        jieba.default_logger.setLevel(50)  # CRITICAL
-        jieba.initialize()
-
     final_text_list = []
     custom_trans = str.maketrans(
         {";": ",", "“": '"', "”": '"', "‘": "'", "’": "'"}
@@ -163,7 +159,7 @@ def convert_char_to_pinyin(text_list, polyphone=True):
     for text in text_list:
         char_list = []
         text = text.translate(custom_trans)
-        for seg in jieba.cut(text):
+        for seg in rjieba.cut(text):
             seg_byte_len = len(bytes(seg, "UTF-8"))
             if seg_byte_len == len(seg):  # if pure alphabets and symbols
                 if char_list and seg_byte_len > 1 and char_list[-1] not in " :'\"":
diff --git a/src/f5_tts/runtime/triton_trtllm/Dockerfile.server b/src/f5_tts/runtime/triton_trtllm/Dockerfile.server
index 861e266..dd176a5 100644
--- a/src/f5_tts/runtime/triton_trtllm/Dockerfile.server
+++ b/src/f5_tts/runtime/triton_trtllm/Dockerfile.server
@@ -1,3 +1,3 @@
 FROM nvcr.io/nvidia/tritonserver:24.12-py3
-RUN pip install tritonclient[grpc] tensorrt-llm==0.16.0 torchaudio==2.5.1 jieba pypinyin librosa vocos
+RUN pip install tritonclient[grpc] tensorrt-llm==0.16.0 torchaudio==2.5.1 rjieba pypinyin librosa vocos
 WORKDIR /workspace
\ No newline at end of file
diff --git a/src/f5_tts/runtime/triton_trtllm/model_repo_f5_tts/f5_tts/1/model.py b/src/f5_tts/runtime/triton_trtllm/model_repo_f5_tts/f5_tts/1/model.py
index b1115a3..0001937 100644
--- a/src/f5_tts/runtime/triton_trtllm/model_repo_f5_tts/f5_tts/1/model.py
+++ b/src/f5_tts/runtime/triton_trtllm/model_repo_f5_tts/f5_tts/1/model.py
@@ -26,7 +26,7 @@
 import json
 import os
 
-import jieba
+import rjieba
 import torch
 import torchaudio
 import triton_python_backend_utils as pb_utils
@@ -66,7 +66,7 @@ def convert_char_to_pinyin(reference_target_texts_list, polyphone=True):
     for text in reference_target_texts_list:
         char_list = []
         text = text.translate(custom_trans)
-        for seg in jieba.cut(text):
+        for seg in rjieba.cut(text):
             seg_byte_len = len(bytes(seg, "UTF-8"))
             if seg_byte_len == len(seg):  # if pure alphabets and symbols
                 if char_list and seg_byte_len > 1 and char_list[-1] not in " :'\"":
diff --git a/src/f5_tts/train/datasets/prepare_emilia.py b/src/f5_tts/train/datasets/prepare_emilia.py
index 7c6b805..1f92fb8 100644
--- a/src/f5_tts/train/datasets/prepare_emilia.py
+++ b/src/f5_tts/train/datasets/prepare_emilia.py
@@ -225,5 +225,5 @@ if __name__ == "__main__":
     # bad zh asr cnt        230435   (samples)
     # bad eh asr cnt         37217   (samples)
 
-    # vocab size may be slightly different due to jieba tokenizer and pypinyin (e.g. way of polyphoneme)
+    # vocab size may be slightly different due to rjieba tokenizer and pypinyin (e.g. way of polyphoneme)
     # please be careful if using pretrained model, make sure the vocab.txt is same
diff --git a/src/f5_tts/train/datasets/prepare_wenetspeech4tts.py b/src/f5_tts/train/datasets/prepare_wenetspeech4tts.py
index 6498421..a598966 100644
--- a/src/f5_tts/train/datasets/prepare_wenetspeech4tts.py
+++ b/src/f5_tts/train/datasets/prepare_wenetspeech4tts.py
@@ -122,5 +122,5 @@ if __name__ == "__main__":
     #                           -            -        1459   (polyphone)
     # char   vocab size      5264         5219        5042
 
-    # vocab size may be slightly different due to jieba tokenizer and pypinyin (e.g. way of polyphoneme)
+    # vocab size may be slightly different due to rjieba tokenizer and pypinyin (e.g. way of polyphoneme)
     # please be careful if using pretrained model, make sure the vocab.txt is same