diff --git a/data/dictionary/DictionaryTest.cpp b/data/dictionary/DictionaryTest.cpp
index 93d62ca8..69cbf1ec 100644
--- a/data/dictionary/DictionaryTest.cpp
+++ b/data/dictionary/DictionaryTest.cpp
@@ -135,30 +135,38 @@ TEST_F(DictionaryRunfilesTest, TWPhrasesReverseMapping) {
     return map;
   };
 
-  LexiconPtr twPhrases = loadLexicon(twPhrasesFile);
-  LexiconPtr twPhrasesRev = loadLexicon(twPhrasesRevFile);
-  ASSERT_NE(twPhrases, nullptr);
-  ASSERT_NE(twPhrasesRev, nullptr);
-
-  auto twMap = buildMap(twPhrases);
-  auto twRevMap = buildMap(twPhrasesRev);
-
-  for (const auto& entry : twMap) {
-    const std::string& key = entry.first;
-    for (const auto& value : entry.second) {
-      auto it = twRevMap.find(value);
-      EXPECT_TRUE(it != twRevMap.end() && it->second.count(key) > 0)
-          << "Missing reverse mapping: " << key << " -> " << value;
+  try {
+    LexiconPtr twPhrases = loadLexicon(twPhrasesFile);
+    LexiconPtr twPhrasesRev = loadLexicon(twPhrasesRevFile);
+    ASSERT_NE(twPhrases, nullptr);
+    ASSERT_NE(twPhrasesRev, nullptr);
+
+    auto twMap = buildMap(twPhrases);
+    auto twRevMap = buildMap(twPhrasesRev);
+
+    for (const auto& entry : twMap) {
+      const std::string& key = entry.first;
+      for (const auto& value : entry.second) {
+        auto it = twRevMap.find(value);
+        EXPECT_TRUE(it != twRevMap.end() && it->second.count(key) > 0)
+            << "Missing reverse mapping: " << key << " -> " << value;
+      }
     }
-  }
 
-  for (const auto& entry : twRevMap) {
-    const std::string& key = entry.first;
-    for (const auto& value : entry.second) {
-      auto it = twMap.find(value);
-      EXPECT_TRUE(it != twMap.end() && it->second.count(key) > 0)
-          << "Missing reverse mapping: " << key << " -> " << value;
+    for (const auto& entry : twRevMap) {
+      const std::string& key = entry.first;
+      for (const auto& value : entry.second) {
+        auto it = twMap.find(value);
+        EXPECT_TRUE(it != twMap.end() && it->second.count(key) > 0)
+            << "Missing reverse mapping: " << key << " -> " << value;
+      }
     }
+  } catch (const Exception& ex) {
+    FAIL() << "Exception: " << ex.what();
+  } catch (const std::exception& ex) {
+    FAIL() << "std::exception: " << ex.what();
+  } catch (...) {
+    FAIL() << "Unknown exception thrown during reverse mapping check.";
   }
 }
 
diff --git a/data/dictionary/HKVariants.txt b/data/dictionary/HKVariants.txt
index e0f68813..37d77a2a 100644
--- a/data/dictionary/HKVariants.txt
+++ b/data/dictionary/HKVariants.txt
@@ -1,3 +1,10 @@
+# Open Chinese Convert (OpenCC) Dictionary
+# File: HKVariants.txt
+# Format: key	value(s) (values separated by spaces)
+# License: Apache-2.0 (see LICENSE)
+# Source: https://github.com/ByVoid/OpenCC
+# Used in configs: s2hk.json, t2hk.json
+
 僞	偽
 兌	兑
 叄	叁
diff --git a/data/dictionary/HKVariantsRevPhrases.txt b/data/dictionary/HKVariantsRevPhrases.txt
index 3f03fd89..5256bd05 100644
--- a/data/dictionary/HKVariantsRevPhrases.txt
+++ b/data/dictionary/HKVariantsRevPhrases.txt
@@ -1,3 +1,10 @@
+# Open Chinese Convert (OpenCC) Dictionary
+# File: HKVariantsRevPhrases.txt
+# Format: key	value(s) (values separated by spaces)
+# License: Apache-2.0 (see LICENSE)
+# Source: https://github.com/ByVoid/OpenCC
+# Used in configs: hk2s.json, hk2t.json
+
 一口吃個	一口喫個
 一口吃成	一口喫成
 一家三口	一家三口
diff --git a/data/dictionary/JPShinjitaiCharacters.txt b/data/dictionary/JPShinjitaiCharacters.txt
index 30220aa3..beaa192a 100644
--- a/data/dictionary/JPShinjitaiCharacters.txt
+++ b/data/dictionary/JPShinjitaiCharacters.txt
@@ -1,3 +1,10 @@
+# Open Chinese Convert (OpenCC) Dictionary
+# File: JPShinjitaiCharacters.txt
+# Format: key	value(s) (values separated by spaces)
+# License: Apache-2.0 (see LICENSE)
+# Source: https://github.com/ByVoid/OpenCC
+# Used in configs: jp2t.json
+
 両	兩 輛
 弁	辨 辯 瓣 辦 弁
 御	御 禦
diff --git a/data/dictionary/JPShinjitaiPhrases.txt b/data/dictionary/JPShinjitaiPhrases.txt
index 3a85c886..8fcbb9e7 100644
--- a/data/dictionary/JPShinjitaiPhrases.txt
+++ b/data/dictionary/JPShinjitaiPhrases.txt
@@ -1,3 +1,10 @@
+# Open Chinese Convert (OpenCC) Dictionary
+# File: JPShinjitaiPhrases.txt
+# Format: key	value(s) (values separated by spaces)
+# License: Apache-2.0 (see LICENSE)
+# Source: https://github.com/ByVoid/OpenCC
+# Used in configs: jp2t.json
+
 一獲千金	一攫千金
 丁寧	叮嚀
 丁重	鄭重
diff --git a/data/dictionary/JPVariants.txt b/data/dictionary/JPVariants.txt
index 3f90b90d..a9cfa000 100644
--- a/data/dictionary/JPVariants.txt
+++ b/data/dictionary/JPVariants.txt
@@ -1,3 +1,10 @@
+# Open Chinese Convert (OpenCC) Dictionary
+# File: JPVariants.txt
+# Format: key	value(s) (values separated by spaces)
+# License: Apache-2.0 (see LICENSE)
+# Source: https://github.com/ByVoid/OpenCC
+# Used in configs: t2jp.json
+
 乘	乗
 亂	乱
 亙	亘
diff --git a/data/dictionary/STCharacters.txt b/data/dictionary/STCharacters.txt
index 7347645a..90604775 100644
--- a/data/dictionary/STCharacters.txt
+++ b/data/dictionary/STCharacters.txt
@@ -1,3 +1,10 @@
+# Open Chinese Convert (OpenCC) Dictionary
+# File: STCharacters.txt
+# Format: key	value(s) (values separated by spaces)
+# License: Apache-2.0 (see LICENSE)
+# Source: https://github.com/ByVoid/OpenCC
+# Used in configs: s2hk.json, s2t.json, s2tw.json, s2twp.json
+
 㐷	傌
 㐹	㑶 㐹
 㐽	偑
diff --git a/data/dictionary/STPhrases.txt b/data/dictionary/STPhrases.txt
index 21aa4ccd..b92e2273 100644
--- a/data/dictionary/STPhrases.txt
+++ b/data/dictionary/STPhrases.txt
@@ -1,3 +1,10 @@
+# Open Chinese Convert (OpenCC) Dictionary
+# File: STPhrases.txt
+# Format: key	value(s) (values separated by spaces)
+# License: Apache-2.0 (see LICENSE)
+# Source: https://github.com/ByVoid/OpenCC
+# Used in configs: s2hk.json, s2t.json, s2tw.json, s2twp.json
+
 㓦划	㓦劃
 一丝不挂	一絲不掛
 一了心愿	一了心願
diff --git a/data/dictionary/TSCharacters.txt b/data/dictionary/TSCharacters.txt
index a2365145..31361395 100644
--- a/data/dictionary/TSCharacters.txt
+++ b/data/dictionary/TSCharacters.txt
@@ -1,3 +1,10 @@
+# Open Chinese Convert (OpenCC) Dictionary
+# File: TSCharacters.txt
+# Format: key	value(s) (values separated by spaces)
+# License: Apache-2.0 (see LICENSE)
+# Source: https://github.com/ByVoid/OpenCC
+# Used in configs: hk2s.json, t2s.json, tw2s.json, tw2sp.json
+
 㑮	𫝈
 㑯	㑔
 㑳	㑇
diff --git a/data/dictionary/TSPhrases.txt b/data/dictionary/TSPhrases.txt
index 792a1cad..7d13948d 100644
--- a/data/dictionary/TSPhrases.txt
+++ b/data/dictionary/TSPhrases.txt
@@ -1,3 +1,10 @@
+# Open Chinese Convert (OpenCC) Dictionary
+# File: TSPhrases.txt
+# Format: key	value(s) (values separated by spaces)
+# License: Apache-2.0 (see LICENSE)
+# Source: https://github.com/ByVoid/OpenCC
+# Used in configs: hk2s.json, t2s.json, tw2s.json, tw2sp.json
+
 一目瞭然	一目了然
 上鍊	上链
 不瞭解	不了解
diff --git a/data/dictionary/TWPhrases.txt b/data/dictionary/TWPhrases.txt
index be6ac7a3..9b0a7613 100644
--- a/data/dictionary/TWPhrases.txt
+++ b/data/dictionary/TWPhrases.txt
@@ -1,3 +1,10 @@
+# Open Chinese Convert (OpenCC) Dictionary
+# File: TWPhrases.txt
+# Format: key	value(s) (values separated by spaces)
+# License: Apache-2.0 (see LICENSE)
+# Source: https://github.com/ByVoid/OpenCC
+# Used in configs: s2twp.json (via TWPhrases.ocd2)
+
 PN結	PN接面
 SQL注入	SQL隱碼攻擊
 SQL注入攻擊	SQL隱碼攻擊
diff --git a/data/dictionary/TWPhrasesRev.txt b/data/dictionary/TWPhrasesRev.txt
index c8a3d19a..820a9140 100644
--- a/data/dictionary/TWPhrasesRev.txt
+++ b/data/dictionary/TWPhrasesRev.txt
@@ -1,3 +1,10 @@
+# Open Chinese Convert (OpenCC) Dictionary
+# File: TWPhrasesRev.txt
+# Format: key	value(s) (values separated by spaces)
+# License: Apache-2.0 (see LICENSE)
+# Source: https://github.com/ByVoid/OpenCC
+# Used in configs: tw2sp.json (via TWPhrasesRev.ocd2)
+
 PN接面	PN結
 SQL隱碼攻擊	SQL注入 SQL注入攻擊
 三極體	三極管
diff --git a/data/dictionary/TWVariants.txt b/data/dictionary/TWVariants.txt
index 023a0687..cadffb17 100644
--- a/data/dictionary/TWVariants.txt
+++ b/data/dictionary/TWVariants.txt
@@ -1,3 +1,10 @@
+# Open Chinese Convert (OpenCC) Dictionary
+# File: TWVariants.txt
+# Format: key	value(s) (values separated by spaces)
+# License: Apache-2.0 (see LICENSE)
+# Source: https://github.com/ByVoid/OpenCC
+# Used in configs: s2tw.json, s2twp.json, t2tw.json
+
 僞	偽
 啓	啟
 喫	吃
diff --git a/data/dictionary/TWVariantsRevPhrases.txt b/data/dictionary/TWVariantsRevPhrases.txt
index ec94209d..05c774d9 100644
--- a/data/dictionary/TWVariantsRevPhrases.txt
+++ b/data/dictionary/TWVariantsRevPhrases.txt
@@ -1,3 +1,10 @@
+# Open Chinese Convert (OpenCC) Dictionary
+# File: TWVariantsRevPhrases.txt
+# Format: key	value(s) (values separated by spaces)
+# License: Apache-2.0 (see LICENSE)
+# Source: https://github.com/ByVoid/OpenCC
+# Used in configs: tw2s.json, tw2sp.json, tw2t.json
+
 一口吃個	一口喫個
 一口吃成	一口喫成
 一家三口	一家三口
diff --git a/data/scripts/common.py b/data/scripts/common.py
index addd3c02..83a7d440 100644
--- a/data/scripts/common.py
+++ b/data/scripts/common.py
@@ -6,26 +6,174 @@
 
 def sort_items(input_filename, output_filename):
     input_file = codecs.open(input_filename, "r", encoding="utf-8")
-    dic = {}
-
-    for line in input_file:
-        if len(line) == 0 or line == '\n':
-            continue
-        try:
-            key, value = line.split("\t")
-        except ValueError:
-            print(line)
-        while value[-1] == "\n" or value[-1] == "\r":
-            value = value[:-1]
-        dic[key] = value
 
+    lines = [line.rstrip("\r\n") for line in input_file]
     input_file.close()
 
+    def line_type(line):
+        if line == "" or line.strip() == "":
+            return "empty"
+        if line.startswith("#"):
+            return "comment"
+        if "\t" in line:
+            return "entry"
+        raise ValueError("Invalid dictionary line: " + line)
+
+    parsed = []
+    for line in lines:
+        parsed.append({"type": line_type(line), "content": line})
+
+    entry_lines = [i for i, p in enumerate(parsed) if p["type"] == "entry"]
+    if not entry_lines:
+        header_blocks = []
+        current = []
+        for p in parsed:
+            if p["type"] == "comment":
+                current.append(p["content"])
+            elif p["type"] == "empty":
+                if current:
+                    header_blocks.append(list(current))
+                    current = []
+        if current:
+            header_blocks.append(list(current))
+
+        output_file = open(output_filename, "wb")
+        for idx, block in enumerate(header_blocks):
+            for line in block:
+                output_file.write((line + "\n").encode("utf-8"))
+            if idx < len(header_blocks) - 1:
+                output_file.write(b"\n")
+        if header_blocks:
+            output_file.write(b"\n")
+        output_file.close()
+        return
+
+    first_entry = entry_lines[0]
+    last_entry = entry_lines[-1]
+
+    header_end = -1
+    for i in range(first_entry - 1, -1, -1):
+        if parsed[i]["type"] == "empty":
+            header_end = i
+            break
+
+    header_blocks = []
+    current = []
+    for i in range(0, header_end + 1):
+        if parsed[i]["type"] == "comment":
+            current.append(parsed[i]["content"])
+        elif parsed[i]["type"] == "empty":
+            if current:
+                header_blocks.append(list(current))
+                current = []
+    if current:
+        header_blocks.append(list(current))
+
+    footer_blocks = []
+    current = []
+    for i in range(last_entry + 1, len(parsed)):
+        if parsed[i]["type"] == "comment":
+            current.append(parsed[i]["content"])
+        elif parsed[i]["type"] == "empty":
+            if current:
+                footer_blocks.append(list(current))
+                current = []
+    if current:
+        footer_blocks.append(list(current))
+
+    annotated_entries = []
+    floating_blocks = []
+    current = []
+    entry_index = 0
+    for i in range(header_end + 1, last_entry + 1):
+        p = parsed[i]
+        if p["type"] == "comment":
+            current.append(p["content"])
+            continue
+        if p["type"] == "empty":
+            if current:
+                floating_blocks.append({"anchor": entry_index, "lines": list(current)})
+                current = []
+            continue
+        if p["type"] == "entry":
+            attached = None
+            if current:
+                has_empty = False
+                for j in range(i - 1, -1, -1):
+                    if parsed[j]["type"] == "entry":
+                        break
+                    if parsed[j]["type"] == "empty":
+                        has_empty = True
+                        break
+                if has_empty:
+                    floating_blocks.append({"anchor": entry_index, "lines": list(current)})
+                else:
+                    attached = list(current)
+                current = []
+
+            key, value = p["content"].split("\t", 1)
+            annotated_entries.append(
+                {
+                    "key": key,
+                    "value": value,
+                    "attached": attached,
+                    "original_index": entry_index,
+                }
+            )
+            entry_index += 1
+
+    if current:
+        floating_blocks.append({"anchor": entry_index, "lines": list(current)})
+
+    annotated_entries.sort(key=lambda e: e["key"])
+    index_map = {e["original_index"]: i for i, e in enumerate(annotated_entries)}
+    for block in floating_blocks:
+        if block["anchor"] in index_map:
+            block["anchor"] = index_map[block["anchor"]]
+        else:
+            block["anchor"] = len(annotated_entries)
+
+    floating_by_anchor = {}
+    for block in floating_blocks:
+        floating_by_anchor.setdefault(block["anchor"], []).append(block["lines"])
+
     output_file = open(output_filename, "wb")
 
-    for key in sorted(dic.keys()):
-        line = key + "\t" + dic[key] + "\n"
-        output_file.write(line.encode('utf-8'))
+    for idx, block in enumerate(header_blocks):
+        for line in block:
+            output_file.write((line + "\n").encode("utf-8"))
+        if idx < len(header_blocks) - 1:
+            output_file.write(b"\n")
+    if header_blocks and annotated_entries:
+        output_file.write(b"\n")
+
+    for i, entry in enumerate(annotated_entries):
+        for block in floating_by_anchor.get(i, []):
+            output_file.write(b"\n")
+            for line in block:
+                output_file.write((line + "\n").encode("utf-8"))
+            output_file.write(b"\n")
+
+        if entry["attached"]:
+            for line in entry["attached"]:
+                output_file.write((line + "\n").encode("utf-8"))
+        output_file.write(
+            (entry["key"] + "\t" + entry["value"] + "\n").encode("utf-8")
+        )
+
+    for block in floating_by_anchor.get(len(annotated_entries), []):
+        output_file.write(b"\n")
+        for line in block:
+            output_file.write((line + "\n").encode("utf-8"))
+
+    if footer_blocks:
+        if annotated_entries:
+            output_file.write(b"\n")
+        for idx, block in enumerate(footer_blocks):
+            for line in block:
+                output_file.write((line + "\n").encode("utf-8"))
+            if idx < len(footer_blocks) - 1:
+                output_file.write(b"\n")
 
     output_file.close()
 
@@ -35,7 +183,8 @@ def reverse_items(input_filename, output_filename):
     dic = {}
 
     for line in input_file:
-        if len(line) == 0:
+        stripped = line.strip()
+        if not stripped or stripped.startswith("#"):
             continue
         key, value = line.split("\t")
         while value[-1] == "\n" or value[-1] == "\r":
@@ -62,7 +211,8 @@ def reverse_items(input_filename, output_filename):
 def find_target_items(input_filename, keyword):
     input_file = codecs.open(input_filename, "r", encoding="utf-8")
     for line in input_file:
-        if len(line) == 0:
+        stripped = line.strip()
+        if not stripped or stripped.startswith("#"):
             continue
         key, value = line.split("\t")
         while value[-1] == "\n" or value[-1] == "\r":
diff --git a/src/BUILD.bazel b/src/BUILD.bazel
index e1e5f24d..ddc60b5f 100644
--- a/src/BUILD.bazel
+++ b/src/BUILD.bazel
@@ -275,6 +275,16 @@ cc_library(
     ],
 )
 
+cc_test(
+    name = "lexicon_annotation_test",
+    srcs = ["LexiconAnnotationTest.cpp"],
+    deps = [
+        ":text_dict",
+        ":text_dict_test_base",
+        "@googletest//:gtest_main",
+    ],
+)
+
 cc_library(
     name = "marisa_dict",
     srcs = ["MarisaDict.cpp"],
@@ -322,7 +332,10 @@ cc_library(
     name = "phrase_extract",
     srcs = ["PhraseExtract.cpp"],
     hdrs = ["PhraseExtract.hpp"],
-    visibility = ["//src/tools:__pkg__"],
+    visibility = [
+        "//src:__pkg__",
+        "//src/tools:__pkg__",
+    ],
     deps = [
         ":common",
         ":marisa_dict",
@@ -330,6 +343,17 @@ cc_library(
     ],
 )
 
+cc_test(
+    name = "phrase_extract_test",
+    srcs = ["PhraseExtractTest.cpp"],
+    deps = [
+        ":phrase_extract",
+        ":test_utils",
+        ":test_utils_utf8",
+        "@googletest//:gtest_main",
+    ],
+)
+
 pybind_extension(
     name = "opencc_clib",
     srcs = ["py_opencc.cpp"],
@@ -470,6 +494,16 @@ cc_library(
     ],
 )
 
+cc_test(
+    name = "utf8_string_slice_test",
+    srcs = ["UTF8StringSliceTest.cpp"],
+    deps = [
+        ":test_utils",
+        ":utf8_string_slice",
+        "@googletest//:gtest_main",
+    ],
+)
+
 cc_library(
     name = "utf8_util",
     srcs = ["UTF8Util.cpp"],
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 7768c89d..227e6c65 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -72,6 +72,7 @@ set(UNITTESTS
   ConversionChainTest
   ConversionTest
   DictGroupTest
+  LexiconAnnotationTest
   MarisaDictTest
   MaxMatchSegmentationTest
   PhraseExtractTest
diff --git a/src/DictConverter.cpp b/src/DictConverter.cpp
index 40e46d1f..d9953f83 100644
--- a/src/DictConverter.cpp
+++ b/src/DictConverter.cpp
@@ -17,8 +17,11 @@
  */
 
 #include "DictConverter.hpp"
+#include "Exception.hpp"
+#include "Lexicon.hpp"
 #include "MarisaDict.hpp"
 #include "TextDict.hpp"
+#include "UTF8Util.hpp"
 
 #ifdef ENABLE_DARTS
 #include "DartsDict.hpp"
@@ -29,7 +32,19 @@ using namespace opencc;
 DictPtr LoadDictionary(const std::string& format,
                        const std::string& inputFileName) {
   if (format == "text") {
-    return SerializableDict::NewFromFile<TextDict>(inputFileName);
+    FILE* fp =
+#ifdef _MSC_VER
+        _wfopen(UTF8Util::GetPlatformString(inputFileName).c_str(), L"r")
+#else
+        fopen(UTF8Util::GetPlatformString(inputFileName).c_str(), "r")
+#endif
+        ;
+    if (!fp) {
+      throw FileNotFound(inputFileName);
+    }
+    DictPtr dict = TextDict::NewFromFile(fp);
+    fclose(fp);
+    return dict;
   } else if (format == "ocd") {
 #ifdef ENABLE_DARTS
     return SerializableDict::NewFromFile<DartsDict>(inputFileName);
diff --git a/src/Lexicon.cpp b/src/Lexicon.cpp
index cfb215c4..4429edf7 100644
--- a/src/Lexicon.cpp
+++ b/src/Lexicon.cpp
@@ -81,8 +81,13 @@ LexiconPtr Lexicon::ParseLexiconFromFile(FILE* fp) {
   char buff[ENTRY_BUFF_SIZE];
   LexiconPtr lexicon(new Lexicon);
   UTF8Util::SkipUtf8Bom(fp);
+
   size_t lineNum = 1;
   while (fgets(buff, ENTRY_BUFF_SIZE, fp)) {
+    if (*buff == '#') {
+      lineNum++;
+      continue;
+    }
     DictEntry* entry = ParseKeyValues(buff, lineNum);
     if (entry != nullptr) {
       lexicon->Add(entry);
diff --git a/src/LexiconAnnotationTest.cpp b/src/LexiconAnnotationTest.cpp
new file mode 100644
index 00000000..9a985b29
--- /dev/null
+++ b/src/LexiconAnnotationTest.cpp
@@ -0,0 +1,179 @@
+/*
+ * Open Chinese Convert (OpenCC) LexiconAnnotationTest
+ *
+ * Copyright 2026 Frank Lin <github@linshuang.info>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Lexicon.hpp"
+#include "SerializableDict.hpp"
+#include "TestUtils.hpp"
+#include "TestUtilsUTF8.hpp"
+#include "TextDict.hpp"
+
+namespace opencc {
+
+class LexiconAnnotationTest : public ::testing::Test {
+protected:
+  const std::string testFileName = "test_annotation_dict.txt";
+
+  void TearDown() override { remove(testFileName.c_str()); }
+};
+
+TEST_F(LexiconAnnotationTest, ParseCommentLines) {
+  FILE* fp = fopen(testFileName.c_str(), "w");
+  fprintf(fp, "# This is a header comment\n");
+  fprintf(fp, "# Line 2 of header\n");
+  fprintf(fp, "\n");
+  fprintf(fp, "A\tB\n");
+  fprintf(fp, "C\tD\n");
+  fclose(fp);
+
+  FILE* readFp = fopen(testFileName.c_str(), "r");
+  const TextDictPtr& dict = TextDict::NewFromFile(readFp);
+  fclose(readFp);
+  EXPECT_EQ(dict->GetLexicon()->Length(), 2);
+}
+
+TEST_F(LexiconAnnotationTest, ParseAttachedComment) {
+  FILE* fp = fopen(testFileName.c_str(), "w");
+  fprintf(fp, "# Header\n");
+  fprintf(fp, "\n");
+  fprintf(fp, "# Comment for A\n");
+  fprintf(fp, "A\tB\n");
+  fprintf(fp, "C\tD\n");
+  fclose(fp);
+
+  FILE* readFp = fopen(testFileName.c_str(), "r");
+  const TextDictPtr& dict = TextDict::NewFromFile(readFp);
+  fclose(readFp);
+  EXPECT_EQ(dict->GetLexicon()->Length(), 2);
+}
+
+TEST_F(LexiconAnnotationTest, ParseFloatingComment) {
+  FILE* fp = fopen(testFileName.c_str(), "w");
+  fprintf(fp, "A\tB\n");
+  fprintf(fp, "\n");
+  fprintf(fp, "# This is a floating comment\n");
+  fprintf(fp, "\n");
+  fprintf(fp, "C\tD\n");
+  fclose(fp);
+
+  FILE* readFp = fopen(testFileName.c_str(), "r");
+  const TextDictPtr& dict = TextDict::NewFromFile(readFp);
+  fclose(readFp);
+  EXPECT_EQ(dict->GetLexicon()->Length(), 2);
+}
+
+TEST_F(LexiconAnnotationTest, ParseFooterComment) {
+  FILE* fp = fopen(testFileName.c_str(), "w");
+  fprintf(fp, "A\tB\n");
+  fprintf(fp, "C\tD\n");
+  fprintf(fp, "\n");
+  fprintf(fp, "# Footer comment\n");
+  fprintf(fp, "# Line 2 of footer\n");
+  fclose(fp);
+
+  FILE* readFp = fopen(testFileName.c_str(), "r");
+  const TextDictPtr& dict = TextDict::NewFromFile(readFp);
+  fclose(readFp);
+  EXPECT_EQ(dict->GetLexicon()->Length(), 2);
+}
+
+TEST_F(LexiconAnnotationTest, SerializeIgnoresComments) {
+  FILE* fp = fopen(testFileName.c_str(), "w");
+  fprintf(fp, "# Header\n");
+  fprintf(fp, "\n");
+  fprintf(fp, "# Comment for B\n");
+  fprintf(fp, "B\tBB\n");
+  fprintf(fp, "A\tAA\n");
+  fprintf(fp, "\n");
+  fprintf(fp, "# Footer\n");
+  fclose(fp);
+
+  FILE* readFp = fopen(testFileName.c_str(), "r");
+  const TextDictPtr& dict = TextDict::NewFromFile(readFp);
+  fclose(readFp);
+
+  // Serialize back
+  const std::string outputFileName = "test_annotation_dict_output.txt";
+  FILE* outFp = fopen(outputFileName.c_str(), "w");
+  dict->SerializeToFile(outFp);
+  fclose(outFp);
+
+  // Read back and verify
+  FILE* outputFp = fopen(outputFileName.c_str(), "r");
+  char buff[1024];
+  std::vector<std::string> lines;
+  while (fgets(buff, sizeof(buff), outputFp)) {
+    std::string line(buff);
+    while (!line.empty() && (line.back() == '\n' || line.back() == '\r')) {
+      line.pop_back();
+    }
+    lines.push_back(line);
+  }
+  fclose(outputFp);
+  remove(outputFileName.c_str());
+
+  EXPECT_EQ(lines.size(), 2);
+  EXPECT_EQ(lines[0], "A\tAA");
+  EXPECT_EQ(lines[1], "B\tBB");
+}
+
+TEST_F(LexiconAnnotationTest, SortIgnoresComments) {
+  FILE* fp = fopen(testFileName.c_str(), "w");
+  fprintf(fp, "# Header\n");
+  fprintf(fp, "\n");
+  fprintf(fp, "# Comment for C\n");
+  fprintf(fp, "C\tCC\n");
+  fprintf(fp, "# Comment for A\n");
+  fprintf(fp, "A\tAA\n");
+  fprintf(fp, "B\tBB\n");
+  fclose(fp);
+
+  FILE* readFp = fopen(testFileName.c_str(), "r");
+  const TextDictPtr& dict = TextDict::NewFromFile(readFp);
+  fclose(readFp);
+
+  EXPECT_EQ(dict->GetLexicon()->Length(), 3);
+}
+
+TEST_F(LexiconAnnotationTest, DefaultBehaviorIgnoresComments) {
+  FILE* fp = fopen(testFileName.c_str(), "w");
+  fprintf(fp, "A\tB\n");
+  fprintf(fp, "C\tD\n");
+  fclose(fp);
+
+  // Default behavior should ignore comments
+  FILE* readFp = fopen(testFileName.c_str(), "r");
+  const TextDictPtr& dict = TextDict::NewFromFile(readFp);
+  fclose(readFp);
+
+  EXPECT_EQ(dict->GetLexicon()->Length(), 2);
+}
+
+TEST_F(LexiconAnnotationTest, DefaultBehaviorAcceptsCommentLines) {
+  FILE* fp = fopen(testFileName.c_str(), "w");
+  fprintf(fp, "# This is a comment\n");
+  fprintf(fp, "A\tB\n");
+  fclose(fp);
+
+  FILE* readFp = fopen(testFileName.c_str(), "r");
+  const TextDictPtr& dict = TextDict::NewFromFile(readFp);
+  fclose(readFp);
+
+  EXPECT_EQ(dict->GetLexicon()->Length(), 1);
+}
+
+} // namespace opencc