frankslin · frankslin · Jan 18, 2026 · Jan 18, 2026 · Jan 18, 2026 · Jan 18, 2026
diff --git a/BUILD.bazel b/BUILD.bazel
@@ -14,6 +14,7 @@ cc_library(
         "//data/config",
         "//data/dictionary:binary_dictionaries",
         "//data/dictionary:text_dictionaries",
+        "//data/jieba_dict",
     ],
     strip_include_prefix = "src",
     deps = [

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -29,6 +29,7 @@ option(BUILD_SHARED_LIBS "Build opencc as shared library" ON)
 option(ENABLE_GTEST "Build all tests." OFF)
 option(ENABLE_BENCHMARK "Build benchmark tests." OFF)
 option(ENABLE_DARTS "Build DartsDict (ocd format)." ON)
+option(ENABLE_JIEBA "Build JiebaSegmentation (experimental)." OFF)
 option(BUILD_PYTHON "Build python library" OFF)
 option(USE_SYSTEM_DARTS "Use system version of Darts" OFF)
 option(USE_SYSTEM_GOOGLE_BENCHMARK "Use system version of Google Benchmark" OFF)
@@ -208,6 +209,12 @@ if (ENABLE_DARTS)
   )
 endif()
 
+if (ENABLE_JIEBA)
+  add_definitions(
+    -DENABLE_JIEBA
+  )
+endif()
+
 
 ######## Dependencies
 

diff --git a/data/config/s2twp_jieba.json b/data/config/s2twp_jieba.json
@@ -0,0 +1,31 @@
+{
+  "name": "Simplified Chinese to Traditional Chinese (Taiwan standard, with phrases, Jieba Segmentation - Experimental)",
+  "segmentation": {
+    "type": "jieba",
+    "dict_path": "jieba_dict/jieba.dict.utf8",
+    "model_path": "jieba_dict/hmm_model.utf8",
+    "user_dict_path": "jieba_dict/user.dict.utf8"
+  },
+  "conversion_chain": [{
+    "dict": {
+      "type": "group",
+      "dicts": [{
+        "type": "ocd2",
+        "file": "STPhrases.ocd2"
+      }, {
+        "type": "ocd2",
+        "file": "STCharacters.ocd2"
+      }]
+    }
+  }, {
+    "dict": {
+      "type": "ocd2",
+      "file": "TWPhrases.ocd2"
+    }
+  }, {
+    "dict": {
+      "type": "ocd2",
+      "file": "TWVariants.ocd2"
+    }
+  }]
+}
diff --git a/data/config/tw2sp_jieba.json b/data/config/tw2sp_jieba.json
@@ -0,0 +1,35 @@
+{
+  "name": "Traditional Chinese (Taiwan standard) to Simplified Chinese (with phrases, Jieba Segmentation - Experimental)",
+  "segmentation": {
+    "type": "jieba",
+    "dict_path": "jieba_dict/jieba.dict.utf8",
+    "model_path": "jieba_dict/hmm_model.utf8",
+    "user_dict_path": "jieba_dict/user.dict.utf8"
+  },
+  "conversion_chain": [{
+    "dict": {
+      "type": "group",
+      "dicts": [{
+        "type": "ocd2",
+        "file": "TWPhrasesRev.ocd2"
+      }, {
+        "type": "ocd2",
+        "file": "TWVariantsRevPhrases.ocd2"
+      }, {
+        "type": "ocd2",
+        "file": "TWVariantsRev.ocd2"
+      }]
+    }
+  }, {
+    "dict": {
+      "type": "group",
+      "dicts": [{
+        "type": "ocd2",
+        "file": "TSPhrases.ocd2"
+      }, {
+        "type": "ocd2",
+        "file": "TSCharacters.ocd2"
+      }]
+    }
+  }]
+}
diff --git a/data/jieba_dict/BUILD.bazel b/data/jieba_dict/BUILD.bazel
@@ -0,0 +1,6 @@
+package(default_visibility = ["//visibility:public"])
+
+filegroup(
+    name = "jieba_dict",
+    srcs = glob(["*.utf8", "README.md"]),
+)
diff --git a/data/jieba_dict/README.md b/data/jieba_dict/README.md
@@ -0,0 +1,45 @@
+# Jieba 分词词典
+
+此目录包含 Jieba 中文分词所需的词典文件，来源于 [libcppjieba](https://github.com/yanyiwu/libcppjieba)。
+
+## 文件说明
+
+- **jieba.dict.utf8** (4.9 MB) - 主词典文件，包含词语及其词频
+- **hmm_model.utf8** (508 KB) - 隐马尔可夫模型（HMM）文件，用于识别未登录词
+- **user.dict.utf8** (33 B) - 用户自定义词典（可选）
+
+## 许可证
+
+这些词典文件继承自 jieba 项目，遵循 MIT 许可证。
+
+## 使用方式
+
+在 OpenCC 配置文件中指定这些词典的路径。IDF 和停用词数据
+会从 `deps/libcppjieba/dict/` 自动解析，无需复制到此目录：
+
+```json
+{
+  "segmentation": {
+    "type": "jieba",
+    "dict_path": "jieba_dict/jieba.dict.utf8",
+    "model_path": "jieba_dict/hmm_model.utf8",
+    "user_dict_path": "jieba_dict/user.dict.utf8"
+  }
+}
+```
+
+## 自定义用户词典
+
+您可以编辑 `user.dict.utf8` 添加自定义词语，格式为：
+
+```
+词语 词频 词性
+```
+
+例如：
+```
+云计算 5 n
+机器学习 8 n
+```
+
+每行一个词语，词频和词性可选。
diff --git a/data/jieba_dict/hmm_model.utf8 b/data/jieba_dict/hmm_model.utf8