From 388f0d7a78757fd0ca02768096c446df4e0d026d Mon Sep 17 00:00:00 2001 From: obo Date: Thu, 8 Jan 2026 14:05:05 +0800 Subject: [PATCH 1/6] feat: add support for Ideographic Variation Sequences (IVS) in TrueType fonts --- .../com/lowagie/text/pdf/FontDetails.java | 102 ++++++++++++++++++ .../com/lowagie/text/pdf/TrueTypeFont.java | 89 +++++++++++++++ .../lowagie/text/pdf/TrueTypeFontUnicode.java | 31 +++++- 3 files changed, 221 insertions(+), 1 deletion(-) diff --git a/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java b/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java index 4e8e0d68e..a71c5d9be 100755 --- a/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java +++ b/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java @@ -54,6 +54,7 @@ import com.lowagie.text.Utilities; import java.awt.font.GlyphVector; import java.io.UnsupportedEncodingException; +import java.util.Arrays; import java.util.HashMap; /** @@ -222,6 +223,70 @@ byte[] convertToBytes(String text, TextRenderingOptions options) { b = s.getBytes(CJKFont.CJK_ENCODING); } else { + //ivs font handler,Simply judge whether it is IVS font or not + if (mayContainIVS(text)) { + glyph = new char[len * 2]; + for (int k = 0; k < len; ) { + int baseCp; + int charCount; + if (k < len - 1 && Character.isHighSurrogate(text.charAt(k)) + && Character.isLowSurrogate(text.charAt(k + 1))) { + baseCp = Character.toCodePoint(text.charAt(k), text.charAt(k + 1)); + charCount = 2; + } else { + baseCp = text.charAt(k); + charCount = 1; + } + + int vsCp = -1; + int vsCharCount = 0; + int nextIndex = k + charCount; + + if (nextIndex < len) { + int potentialVs = text.charAt(nextIndex); + if (isVariationSelector(potentialVs)) { + vsCp = potentialVs; + vsCharCount = 1; + } + else if (nextIndex < len - 1 + && Character.isHighSurrogate(text.charAt(nextIndex)) + && Character.isLowSurrogate(text.charAt(nextIndex + 1))) { + int potentialVsPair = Character.toCodePoint( + text.charAt(nextIndex), text.charAt(nextIndex + 1)); + if (isVariationSelector(potentialVsPair)) { + vsCp = potentialVsPair; + vsCharCount = 2; + } + } + } + + if (vsCp != -1) { + int[] format14Metrics = this.ttu.getFormat14MetricsTT(baseCp, vsCp); + if (format14Metrics != null) { + int gl = format14Metrics[0]; + if (!this.longTag.containsKey(gl)) { + this.longTag.put(gl, new int[]{gl, format14Metrics[1], baseCp, vsCp}); + } + glyph[i++] = (char) gl; + k += charCount + vsCharCount; + continue; + } + } + metrics = this.ttu.getMetricsTT(baseCp); + if (metrics != null) { + int gl = metrics[0]; + if (!this.longTag.containsKey(gl)) { + this.longTag.put(gl, new int[]{gl, metrics[1], baseCp}); + } + glyph[i++] = (char) gl; + } + + k += charCount; + } + glyph = Arrays.copyOfRange(glyph, 0, i); + b = convertCharsToBytes(glyph); + break; + } String fileName = ((TrueTypeFontUnicode) getBaseFont()).fileName; if (options.isGlyphSubstitutionEnabled() && FopGlyphProcessor.isFopSupported() && (fileName != null && fileName.length() > 0 @@ -241,6 +306,43 @@ byte[] convertToBytes(String text, TextRenderingOptions options) { return b; } + private static boolean isVariationSelector(int codePoint) { + return (codePoint >= 0xFE00 && codePoint <= 0xFE0F) + || (codePoint >= 0xE0100 && codePoint <= 0xE01EF); + } + + /** + * Quickly determine whether the text may contain IVS (to decide whether to use the IVS dedicated path) + * Note: This means "may contain," not "must contain"—err on the side of caution to avoid omissions + */ + private static boolean mayContainIVS(String text) { + if (text == null) return false; + + for (int i = 0; i < text.length(); i++) { + char c = text.charAt(i); + + if (c >= '\uFE00' && c <= '\uFE0F') { + return true; + } + + if (c >= '\udb40' && c <= '\udb43') { + return true; + } + } + return false; + } + + private byte[] convertCharsToBytes(char[] chars) { + byte[] result = new byte[chars.length * 2]; + + for (int i = 0; i < chars.length; ++i) { + result[2 * i] = (byte) (chars[i] / 256); + result[2 * i + 1] = (byte) (chars[i] % 256); + } + + return result; + } + private byte[] convertToBytesWithGlyphs(String text) throws UnsupportedEncodingException { int len = text.length(); int[] metrics = null; diff --git a/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java b/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java index 64100b556..2327a783f 100644 --- a/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java +++ b/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java @@ -206,6 +206,8 @@ class TrueTypeFont extends BaseFont { protected HashMap cmapExt; + protected HashMap cmap05; + /** * The map containing the kerning information. It represents the content of table 'kern'. The key is an * Integer where the top 16 bits are the glyph number for the first character and the lower 16 bits @@ -797,6 +799,7 @@ void readCMaps() throws DocumentException, IOException { int map31 = 0; int map30 = 0; int mapExt = 0; + int map05 = 0; for (int k = 0; k < num_tables; ++k) { int platId = rf.readUnsignedShort(); int platSpecId = rf.readUnsignedShort(); @@ -808,6 +811,8 @@ void readCMaps() throws DocumentException, IOException { map31 = offset; } else if (platId == 3 && platSpecId == 10) { mapExt = offset; + } else if (platId == 0 && platSpecId == 5) { + map05 = offset; } if (platId == 1 && platSpecId == 0) { map10 = offset; @@ -860,6 +865,87 @@ void readCMaps() throws DocumentException, IOException { break; } } + if (map05 > 0) { + int format14Location = table_location[0] + map05; + this.rf.seek((long) format14Location); + int format = this.rf.readUnsignedShort(); + if (format == 14) { + this.cmap05 = this.readFormat14(format14Location); + } + } + } + + HashMap readFormat14(int format14Location) throws IOException { + HashMap result = new HashMap<>(); + long startPosition = this.rf.getFilePointer() - 4; // 回退到读取 byteLength 之前的位置 + + int byteLength = this.rf.readInt(); + int numVarSelectorRecords = this.rf.readInt(); + + if (numVarSelectorRecords < 0 || numVarSelectorRecords > 10000) { + throw new IOException("Invalid numVarSelectorRecords: " + numVarSelectorRecords); + } + + Map nonDefaultOffsetMap = new HashMap<>(); + + for (int i = 0; i < numVarSelectorRecords; ++i) { + byte[] input = new byte[3]; + this.rf.read(input); + int selectorUnicodeValue = this.byte2int(input, 3); + int defaultUVSOffset = this.rf.readInt(); + int nonDefaultUVSOffset = this.rf.readInt(); + + if (nonDefaultUVSOffset > 0) { // 只处理非零偏移 + nonDefaultOffsetMap.put(selectorUnicodeValue, nonDefaultUVSOffset); + } + } + + for (Map.Entry entry : nonDefaultOffsetMap.entrySet()) { + Integer selectorUnicodeValue = entry.getKey(); + int nonDefaultUVSOffset = entry.getValue(); + + this.rf.seek((long) (format14Location + nonDefaultUVSOffset)); + int mappingNums = this.rf.readInt(); + + if (mappingNums < 0 || mappingNums > 10000) { + // invalid mapping + continue; + } + + for (int i = 0; i < mappingNums; ++i) { + byte[] input = new byte[3]; + this.rf.read(input); + int unicodeValue = this.byte2int(input, 3); + int glyphId = this.rf.readUnsignedShort(); + result.put(unicodeValue + "_" + selectorUnicodeValue, + new int[]{glyphId, this.getGlyphWidth(glyphId)}); + } + } + return result; + } + + /** + * 将大端序(Big-Endian)的字节数组转换为无符号整数(最多4字节) + * @param data 输入字节数组 + * @param n 读取字节数(1~4) + * @return 对应的非负整数值 + */ + public int byte2int(byte[] data, int n) { + if (data == null || n <= 0 || n > 4 || data.length < n) { + return 0; + } + int result = 0; + for (int i = 0; i < n; i++) { + result = (result << 8) | (data[i] & 0xFF); // & 0xFF 确保无符号 + } + return result; + } + + public int[] getFormat14MetricsTT(int char1, int char2) { + if (this.cmap05 != null) { + return this.cmap05.get(char1 + "_" + char2); + } + return null; } HashMap readFormat12() throws IOException { @@ -1419,6 +1505,9 @@ public int[] getMetricsTT(int c) { if (cmap10 != null) { return cmap10.get(c); } + if (cmap05 != null) { + return cmap05.get(c); + } return null; } diff --git a/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFontUnicode.java b/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFontUnicode.java index 125133939..b8366bcc7 100755 --- a/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFontUnicode.java +++ b/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFontUnicode.java @@ -268,7 +268,13 @@ private PdfStream getToUnicode(int[][] metrics) { --size; int[] metric = metrics[k]; String fromTo = toHex(metric[0]); - buf.append(fromTo).append(fromTo).append(toHex(metric[2])).append('\n'); + String hexString; + if (metric.length == 4) { + hexString = toHex(metric[2], metric[3]); + } else { + hexString = toHex(metric[2]); + } + buf.append(fromTo).append(fromTo).append(hexString).append('\n'); } buf.append( "endbfrange\n" + @@ -585,4 +591,27 @@ public int[] getCharBBox(int c) { return bboxes[m[0]]; } + private String toHex(int char1, int char2) { + String hex1; + int high; + int low; + if (char1 < 65536) { + hex1 = toHex4(char1); + } else { + char1 -= 65536; + high = char1 / 1024 + '\ud800'; + low = char1 % 1024 + '\udc00'; + hex1 = toHex4(high) + toHex4(low); + } + String hex2; + if (char2 < 65536) { + hex2 = toHex4(char2); + } else { + char2 -= 65536; + high = char2 / 1024 + '\ud800'; + low = char2 % 1024 + '\udc00'; + hex2 = toHex4(high) + toHex4(low); + } + return "[<" + hex1 + hex2 + ">]"; + } } From adddcf5ab792425c6ca5abeac75b9ff4cdd7aa73 Mon Sep 17 00:00:00 2001 From: obo Date: Thu, 8 Jan 2026 14:36:15 +0800 Subject: [PATCH 2/6] code review --- .../com/lowagie/text/pdf/FontDetails.java | 178 ++++++++++++------ .../com/lowagie/text/pdf/TrueTypeFont.java | 17 +- 2 files changed, 123 insertions(+), 72 deletions(-) diff --git a/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java b/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java index a71c5d9be..49d2ef901 100755 --- a/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java +++ b/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java @@ -225,66 +225,7 @@ byte[] convertToBytes(String text, TextRenderingOptions options) { } else { //ivs font handler,Simply judge whether it is IVS font or not if (mayContainIVS(text)) { - glyph = new char[len * 2]; - for (int k = 0; k < len; ) { - int baseCp; - int charCount; - if (k < len - 1 && Character.isHighSurrogate(text.charAt(k)) - && Character.isLowSurrogate(text.charAt(k + 1))) { - baseCp = Character.toCodePoint(text.charAt(k), text.charAt(k + 1)); - charCount = 2; - } else { - baseCp = text.charAt(k); - charCount = 1; - } - - int vsCp = -1; - int vsCharCount = 0; - int nextIndex = k + charCount; - - if (nextIndex < len) { - int potentialVs = text.charAt(nextIndex); - if (isVariationSelector(potentialVs)) { - vsCp = potentialVs; - vsCharCount = 1; - } - else if (nextIndex < len - 1 - && Character.isHighSurrogate(text.charAt(nextIndex)) - && Character.isLowSurrogate(text.charAt(nextIndex + 1))) { - int potentialVsPair = Character.toCodePoint( - text.charAt(nextIndex), text.charAt(nextIndex + 1)); - if (isVariationSelector(potentialVsPair)) { - vsCp = potentialVsPair; - vsCharCount = 2; - } - } - } - - if (vsCp != -1) { - int[] format14Metrics = this.ttu.getFormat14MetricsTT(baseCp, vsCp); - if (format14Metrics != null) { - int gl = format14Metrics[0]; - if (!this.longTag.containsKey(gl)) { - this.longTag.put(gl, new int[]{gl, format14Metrics[1], baseCp, vsCp}); - } - glyph[i++] = (char) gl; - k += charCount + vsCharCount; - continue; - } - } - metrics = this.ttu.getMetricsTT(baseCp); - if (metrics != null) { - int gl = metrics[0]; - if (!this.longTag.containsKey(gl)) { - this.longTag.put(gl, new int[]{gl, metrics[1], baseCp}); - } - glyph[i++] = (char) gl; - } - - k += charCount; - } - glyph = Arrays.copyOfRange(glyph, 0, i); - b = convertCharsToBytes(glyph); + b = handleIvsText(text, len, i); break; } String fileName = ((TrueTypeFontUnicode) getBaseFont()).fileName; @@ -482,4 +423,121 @@ public boolean isSubset() { public void setSubset(boolean subset) { this.subset = subset; } + + /** + * handle ivs text + */ + private byte[] handleIvsText(String text, int len, int startIndex) { + char[] glyph = new char[len * 2]; + int glyphIndex = startIndex; + int k = 0; + + while (k < len) { + CodePointInfo baseChar = parseCodePoint(text, k, len); + CodePointInfo vsChar = parseVariationSelector(text, k + baseChar.charCount, len); + int skipCount = baseChar.charCount; + if (vsChar != null) { + glyphIndex = addIvsGlyph(baseChar.codePoint, vsChar.codePoint, glyph, glyphIndex); + skipCount += vsChar.charCount; + } else { + glyphIndex = addDefaultGlyph(baseChar.codePoint, glyph, glyphIndex); + } + k += skipCount; + } + + glyph = Arrays.copyOfRange(glyph, 0, glyphIndex); + return convertCharsToBytes(glyph); + } + + private CodePointInfo parseCodePoint(String text, int index, int len) { + if (index < len - 1 + && Character.isHighSurrogate(text.charAt(index)) + && Character.isLowSurrogate(text.charAt(index + 1))) { + // Surrogate pair + int codePoint = Character.toCodePoint(text.charAt(index), text.charAt(index + 1)); + return new CodePointInfo(codePoint, 2); + } else { + // BMP + return new CodePointInfo(text.charAt(index), 1); + } + } + + private CodePointInfo parseVariationSelector(String text, int index, int len) { + if (index >= len) { + return null; + } + + char currentChar = text.charAt(index); + + // single char IVS + if (isVariationSelector(currentChar)) { + return new CodePointInfo(currentChar, 1); + } + + // surrogate pair IVS + if (index < len - 1 + && Character.isHighSurrogate(currentChar) + && Character.isLowSurrogate(text.charAt(index + 1))) { + int codePoint = Character.toCodePoint(currentChar, text.charAt(index + 1)); + if (isVariationSelector(codePoint)) { + return new CodePointInfo(codePoint, 2); + } + } + + return null; + } + + private int addIvsGlyph(int baseCp, int vsCp, char[] glyph, int glyphIndex) { + int[] format14Metrics = this.ttu.getFormat14MetricsTT(baseCp, vsCp); + + if (format14Metrics != null) { + int glyphId = format14Metrics[0]; + cacheGlyphMetrics(glyphId, format14Metrics[1], baseCp, vsCp); + glyph[glyphIndex++] = (char) glyphId; + return glyphIndex; + } + + // fallback + return addDefaultGlyph(baseCp, glyph, glyphIndex); + } + + private int addDefaultGlyph(int codePoint, char[] glyph, int glyphIndex) { + int[] metrics = this.ttu.getMetricsTT(codePoint); + + if (metrics != null) { + int glyphId = metrics[0]; + cacheGlyphMetrics(glyphId, metrics[1], codePoint); + glyph[glyphIndex++] = (char) glyphId; + } + + return glyphIndex; + } + + /** + * cache IVS glyph metrics info + */ + private void cacheGlyphMetrics(int glyphId, int width, int baseCp) { + if (!this.longTag.containsKey(glyphId)) { + this.longTag.put(glyphId, new int[]{glyphId, width, baseCp}); + } + } + + /** + * cache IVS glyph metrics info + */ + private void cacheGlyphMetrics(int glyphId, int width, int baseCp, int vsCp) { + if (!this.longTag.containsKey(glyphId)) { + this.longTag.put(glyphId, new int[]{glyphId, width, baseCp, vsCp}); + } + } + + private static class CodePointInfo { + final int codePoint; + final int charCount; + + CodePointInfo(int codePoint, int charCount) { + this.codePoint = codePoint; + this.charCount = charCount; + } + } } diff --git a/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java b/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java index 2327a783f..058edf160 100644 --- a/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java +++ b/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java @@ -877,9 +877,8 @@ void readCMaps() throws DocumentException, IOException { HashMap readFormat14(int format14Location) throws IOException { HashMap result = new HashMap<>(); - long startPosition = this.rf.getFilePointer() - 4; // 回退到读取 byteLength 之前的位置 - - int byteLength = this.rf.readInt(); + int startPosition = this.rf.getFilePointer() - 4; //reopen + int byteLength = this.rf.readInt(); // byteLength int numVarSelectorRecords = this.rf.readInt(); if (numVarSelectorRecords < 0 || numVarSelectorRecords > 10000) { @@ -895,7 +894,7 @@ HashMap readFormat14(int format14Location) throws IOException { int defaultUVSOffset = this.rf.readInt(); int nonDefaultUVSOffset = this.rf.readInt(); - if (nonDefaultUVSOffset > 0) { // 只处理非零偏移 + if (nonDefaultUVSOffset > 0) { nonDefaultOffsetMap.put(selectorUnicodeValue, nonDefaultUVSOffset); } } @@ -924,19 +923,13 @@ HashMap readFormat14(int format14Location) throws IOException { return result; } - /** - * 将大端序(Big-Endian)的字节数组转换为无符号整数(最多4字节) - * @param data 输入字节数组 - * @param n 读取字节数(1~4) - * @return 对应的非负整数值 - */ public int byte2int(byte[] data, int n) { if (data == null || n <= 0 || n > 4 || data.length < n) { return 0; } int result = 0; for (int i = 0; i < n; i++) { - result = (result << 8) | (data[i] & 0xFF); // & 0xFF 确保无符号 + result = (result << 8) | (data[i] & 0xFF); } return result; } @@ -945,7 +938,7 @@ public int[] getFormat14MetricsTT(int char1, int char2) { if (this.cmap05 != null) { return this.cmap05.get(char1 + "_" + char2); } - return null; + return new int[]{-1, -1}; } HashMap readFormat12() throws IOException { From b8d81849ad2bf2a1f4c980bd0cf76a922389c72c Mon Sep 17 00:00:00 2001 From: obo Date: Thu, 8 Jan 2026 14:47:36 +0800 Subject: [PATCH 3/6] code review2 --- .../src/main/java/com/lowagie/text/pdf/FontDetails.java | 7 ++++--- .../src/main/java/com/lowagie/text/pdf/TrueTypeFont.java | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java b/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java index 49d2ef901..01ce01a44 100755 --- a/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java +++ b/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java @@ -493,8 +493,8 @@ private int addIvsGlyph(int baseCp, int vsCp, char[] glyph, int glyphIndex) { if (format14Metrics != null) { int glyphId = format14Metrics[0]; cacheGlyphMetrics(glyphId, format14Metrics[1], baseCp, vsCp); - glyph[glyphIndex++] = (char) glyphId; - return glyphIndex; + glyph[glyphIndex] = (char) glyphId; + return glyphIndex + 1; } // fallback @@ -507,7 +507,8 @@ private int addDefaultGlyph(int codePoint, char[] glyph, int glyphIndex) { if (metrics != null) { int glyphId = metrics[0]; cacheGlyphMetrics(glyphId, metrics[1], codePoint); - glyph[glyphIndex++] = (char) glyphId; + glyph[glyphIndex] = (char) glyphId; + return glyphIndex + 1; } return glyphIndex; diff --git a/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java b/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java index 058edf160..4a86d56a4 100644 --- a/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java +++ b/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java @@ -878,7 +878,7 @@ void readCMaps() throws DocumentException, IOException { HashMap readFormat14(int format14Location) throws IOException { HashMap result = new HashMap<>(); int startPosition = this.rf.getFilePointer() - 4; //reopen - int byteLength = this.rf.readInt(); // byteLength + this.rf.readInt(); // byteLength,unused but need to read int numVarSelectorRecords = this.rf.readInt(); if (numVarSelectorRecords < 0 || numVarSelectorRecords > 10000) { From 4a8eded905fa27d84c3f9c59a3b3ae19af0e3f85 Mon Sep 17 00:00:00 2001 From: obo Date: Fri, 9 Jan 2026 12:04:18 +0800 Subject: [PATCH 4/6] refactoring to reduce code complexity --- .../com/lowagie/text/pdf/FontDetails.java | 185 ++++++++++++------ .../com/lowagie/text/pdf/TrueTypeFont.java | 4 +- 2 files changed, 127 insertions(+), 62 deletions(-) diff --git a/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java b/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java index 01ce01a44..f27e3f458 100755 --- a/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java +++ b/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java @@ -173,78 +173,143 @@ BaseFont getBaseFont() { * encoding and the characters used are stored. * * @param text the text to convert + * @param options rendering options * @return the conversion */ byte[] convertToBytes(String text, TextRenderingOptions options) { - byte[] b = null; switch (fontType) { case BaseFont.FONT_TYPE_T3: - return baseFont.convertToBytes(text); + return convertType3Font(text); + case BaseFont.FONT_TYPE_T1: - case BaseFont.FONT_TYPE_TT: { - b = baseFont.convertToBytes(text); - int len = b.length; - for (byte b1 : b) { - shortTag[b1 & 0xff] = 1; - } - break; + case BaseFont.FONT_TYPE_TT: + return convertType1OrTrueTypeFont(text); + + case BaseFont.FONT_TYPE_CJK: + return convertCjkFont(text); + + case BaseFont.FONT_TYPE_DOCUMENT: + return convertDocumentFont(text); + + case BaseFont.FONT_TYPE_TTUNI: + return convertTrueTypeUnicodeFont(text, options); + + default: + return null; + } + } + + // Converts Type 3 font text to bytes + private byte[] convertType3Font(String text) { + return baseFont.convertToBytes(text); + } + + // Converts Type 1 or TrueType font text to bytes + private byte[] convertType1OrTrueTypeFont(String text) { + byte[] bytes = baseFont.convertToBytes(text); + recordUsedCharacters(bytes); + return bytes; + } + + // Records characters that have been used + private void recordUsedCharacters(byte[] bytes) { + for (byte b : bytes) { + shortTag[b & 0xff] = 1; + } + } + + // Converts CJK font text to bytes + private byte[] convertCjkFont(String text) { + recordCjkCharacters(text); + return baseFont.convertToBytes(text); + } + + // Records CJK characters that have been used + private void recordCjkCharacters(String text) { + for (int i = 0; i < text.length(); i++) { + int cidCode = cjkFont.getCidCode(text.charAt(i)); + cjkTag.put(cidCode, 0); + } + } + + // Converts document font text to bytes + private byte[] convertDocumentFont(String text) { + return baseFont.convertToBytes(text); + } + + // Converts TrueType Unicode font text to bytes + private byte[] convertTrueTypeUnicodeFont(String text, TextRenderingOptions options) { + try { + if (symbolic) { + return convertSymbolicFont(text); } - case BaseFont.FONT_TYPE_CJK: { - int len = text.length(); - for (int k = 0; k < len; ++k) { - cjkTag.put(cjkFont.getCidCode(text.charAt(k)), 0); - } - b = baseFont.convertToBytes(text); - break; + + // Handle IVS (Ideographic Variation Sequence) fonts + if (mayContainIVS(text)) { + return handleIvsText(text, text.length(), 0); } - case BaseFont.FONT_TYPE_DOCUMENT: { - b = baseFont.convertToBytes(text); - break; + + // Use Fop glyph processor if applicable + if (shouldUseFopGlyphProcessor(options)) { + String fileName = ((TrueTypeFontUnicode) getBaseFont()).fileName; + return FopGlyphProcessor.convertToBytesWithGlyphs( + ttu, text, fileName, longTag, options.getDocumentLanguage() + ); } - case BaseFont.FONT_TYPE_TTUNI: { - try { - int len = text.length(); - int[] metrics = null; - char[] glyph = new char[len]; - int i = 0; - if (symbolic) { - b = PdfEncodings.convertToBytes(text, "symboltt"); - len = b.length; - for (int k = 0; k < len; ++k) { - metrics = ttu.getMetricsTT(b[k] & 0xff); - if (metrics == null) { - continue; - } - longTag.put(metrics[0], - new int[]{metrics[0], metrics[1], ttu.getUnicodeDifferences(b[k] & 0xff)}); - glyph[i++] = (char) metrics[0]; - } - String s = new String(glyph, 0, i); - b = s.getBytes(CJKFont.CJK_ENCODING); - } else { - //ivs font handler,Simply judge whether it is IVS font or not - if (mayContainIVS(text)) { - b = handleIvsText(text, len, i); - break; - } - String fileName = ((TrueTypeFontUnicode) getBaseFont()).fileName; - if (options.isGlyphSubstitutionEnabled() && FopGlyphProcessor.isFopSupported() - && (fileName != null && fileName.length() > 0 - && (fileName.contains(".ttf") || fileName.contains(".TTF")))) { - return FopGlyphProcessor.convertToBytesWithGlyphs(ttu, text, fileName, longTag, - options.getDocumentLanguage()); - } else { - return convertToBytesWithGlyphs(text); - } - } - } catch (UnsupportedEncodingException e) { - throw new ExceptionConverter(e); - } - break; + // Default glyph conversion + return convertToBytesWithGlyphs(text); + } catch (UnsupportedEncodingException e) { + throw new ExceptionConverter(e); + } + } + + // Converts symbolic font text to bytes + private byte[] convertSymbolicFont(String text) throws UnsupportedEncodingException { + byte[] symbolBytes = PdfEncodings.convertToBytes(text, "symboltt"); + char[] glyphCodes = extractGlyphCodes(symbolBytes); + String glyphString = new String(glyphCodes); + return glyphString.getBytes(CJKFont.CJK_ENCODING); + } + + // Extracts glyph codes from symbol bytes and records metrics + private char[] extractGlyphCodes(byte[] symbolBytes) { + char[] glyphCodes = new char[symbolBytes.length]; + int glyphCount = 0; + + for (byte b : symbolBytes) { + int[] metrics = ttu.getMetricsTT(b & 0xff); + if (metrics == null) { + continue; } + + int glyphCode = metrics[0]; + int width = metrics[1]; + int unicodeDiff = ttu.getUnicodeDifferences(b & 0xff); + + longTag.put(glyphCode, new int[]{glyphCode, width, unicodeDiff}); + glyphCodes[glyphCount++] = (char) glyphCode; } - return b; + + return java.util.Arrays.copyOf(glyphCodes, glyphCount); + } + + // Determines whether to use Fop glyph processor + private boolean shouldUseFopGlyphProcessor(TextRenderingOptions options) { + if (!options.isGlyphSubstitutionEnabled() || !FopGlyphProcessor.isFopSupported()) { + return false; + } + + String fileName = ((TrueTypeFontUnicode) getBaseFont()).fileName; + return fileName != null + && !fileName.isEmpty() + && isTrueTypeFile(fileName); + } + + // Checks if the file is a TrueType font file + private boolean isTrueTypeFile(String fileName) { + String lowerFileName = fileName.toLowerCase(); + return lowerFileName.endsWith(".ttf"); } private static boolean isVariationSelector(int codePoint) { diff --git a/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java b/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java index 4a86d56a4..f3e25f35d 100644 --- a/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java +++ b/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java @@ -877,7 +877,7 @@ void readCMaps() throws DocumentException, IOException { HashMap readFormat14(int format14Location) throws IOException { HashMap result = new HashMap<>(); - int startPosition = this.rf.getFilePointer() - 4; //reopen + this.rf.getFilePointer(); //reopen this.rf.readInt(); // byteLength,unused but need to read int numVarSelectorRecords = this.rf.readInt(); @@ -891,7 +891,7 @@ HashMap readFormat14(int format14Location) throws IOException { byte[] input = new byte[3]; this.rf.read(input); int selectorUnicodeValue = this.byte2int(input, 3); - int defaultUVSOffset = this.rf.readInt(); + this.rf.readInt(); // defaultUVSOffset int nonDefaultUVSOffset = this.rf.readInt(); if (nonDefaultUVSOffset > 0) { From 4c5c487d169689239c7a82cc1ab75b42487bb651 Mon Sep 17 00:00:00 2001 From: obo Date: Fri, 9 Jan 2026 13:45:50 +0800 Subject: [PATCH 5/6] refactor --- .../com/lowagie/text/pdf/FontDetails.java | 2 +- .../com/lowagie/text/pdf/TrueTypeFont.java | 246 ++++++++++++------ 2 files changed, 173 insertions(+), 75 deletions(-) diff --git a/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java b/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java index f27e3f458..191506a0d 100755 --- a/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java +++ b/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java @@ -195,7 +195,7 @@ byte[] convertToBytes(String text, TextRenderingOptions options) { return convertTrueTypeUnicodeFont(text, options); default: - return null; + return convertType3Font(text); } } diff --git a/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java b/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java index f3e25f35d..a2844bb1e 100644 --- a/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java +++ b/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java @@ -786,95 +786,193 @@ private void readBbox() throws DocumentException, IOException { * @throws IOException the font file could not be read */ void readCMaps() throws DocumentException, IOException { - int[] table_location = tables.get("cmap"); - if (table_location == null) { + int[] tableLocation = getTableLocation("cmap"); + + // Seek to cmap table and read number of subtables + rf.seek(tableLocation[0]); + rf.skipBytes(2); + int numTables = rf.readUnsignedShort(); + + // Scan all subtables to find the mappings we need + CMapOffsets offsets = scanCMapTables(tableLocation[0], numTables); + + // Read each cmap based on the offsets found + readCMap10(tableLocation[0], offsets.map10); + readCMap31(tableLocation[0], offsets.map31); + readCMap30(tableLocation[0], offsets.map30); + readCMapExt(tableLocation[0], offsets.mapExt); + readCMap05(tableLocation[0], offsets.map05); + } + + /** + * Gets the location information for the specified table + */ + private int[] getTableLocation(String tableName) throws DocumentException { + int[] location = tables.get(tableName); + if (location == null) { throw new DocumentException( - MessageLocalization.getComposedMessage("table.1.does.not.exist.in.2", "cmap", fileName + style)); + MessageLocalization.getComposedMessage("table.1.does.not.exist.in.2", tableName, fileName + style) + ); } - rf.seek(table_location[0]); - rf.skipBytes(2); - int num_tables = rf.readUnsignedShort(); + return location; + } + + /** + * Scans all cmap subtables and collects the mapping offsets we need + */ + private CMapOffsets scanCMapTables(int baseOffset, int numTables) throws IOException { + CMapOffsets offsets = new CMapOffsets(); fontSpecific = false; - int map10 = 0; - int map31 = 0; - int map30 = 0; - int mapExt = 0; - int map05 = 0; - for (int k = 0; k < num_tables; ++k) { - int platId = rf.readUnsignedShort(); - int platSpecId = rf.readUnsignedShort(); + + for (int i = 0; i < numTables; i++) { + int platformId = rf.readUnsignedShort(); + int platformSpecificId = rf.readUnsignedShort(); int offset = rf.readInt(); - if (platId == 3 && platSpecId == 0) { + + processTableEntry(platformId, platformSpecificId, offset, offsets); + } + + return offsets; + } + + /** + * Processes a single cmap table entry + */ + private void processTableEntry(int platformId, int platformSpecificId, int offset, CMapOffsets offsets) { + // Platform 3 (Windows) + if (platformId == 3) { + if (platformSpecificId == 0) { + // Symbol font fontSpecific = true; - map30 = offset; - } else if (platId == 3 && platSpecId == 1) { - map31 = offset; - } else if (platId == 3 && platSpecId == 10) { - mapExt = offset; - } else if (platId == 0 && platSpecId == 5) { - map05 = offset; - } - if (platId == 1 && platSpecId == 0) { - map10 = offset; + offsets.map30 = offset; + } else if (platformSpecificId == 1) { + // Unicode BMP + offsets.map31 = offset; + } else if (platformSpecificId == 10) { + // Unicode Full Repertoire + offsets.mapExt = offset; } } - if (map10 > 0) { - rf.seek(table_location[0] + map10); - int format = rf.readUnsignedShort(); - switch (format) { - case 0: - cmap10 = readFormat0(); - break; - case 4: - cmap10 = readFormat4(); - break; - case 6: - cmap10 = readFormat6(); - break; - } + // Platform 1 (Macintosh) + else if (platformId == 1 && platformSpecificId == 0) { + offsets.map10 = offset; } - if (map31 > 0) { - rf.seek(table_location[0] + map31); - int format = rf.readUnsignedShort(); - if (format == 4) { - cmap31 = readFormat4(); - } + // Platform 0 (Unicode) + else if (platformId == 0 && platformSpecificId == 5) { + offsets.map05 = offset; } - if (map30 > 0) { - rf.seek(table_location[0] + map30); - int format = rf.readUnsignedShort(); - if (format == 4) { + } + + /** + * Reads map 1.0 (Macintosh Roman) + */ + private void readCMap10(int baseOffset, int offset) throws IOException { + if (offset <= 0) { + return; + } + + rf.seek(baseOffset + offset); + int format = rf.readUnsignedShort(); + + switch (format) { + case 0: + cmap10 = readFormat0(); + break; + case 4: cmap10 = readFormat4(); - } + break; + case 6: + cmap10 = readFormat6(); + break; } - if (mapExt > 0) { - rf.seek(table_location[0] + mapExt); - int format = rf.readUnsignedShort(); - switch (format) { - case 0: - cmapExt = readFormat0(); - break; - case 4: - cmapExt = readFormat4(); - break; - case 6: - cmapExt = readFormat6(); - break; - case 12: - cmapExt = readFormat12(); - break; - } + } + + /** + * Reads map 3.1 (Windows Unicode BMP) + */ + private void readCMap31(int baseOffset, int offset) throws IOException { + if (offset <= 0) { + return; } - if (map05 > 0) { - int format14Location = table_location[0] + map05; - this.rf.seek((long) format14Location); - int format = this.rf.readUnsignedShort(); - if (format == 14) { - this.cmap05 = this.readFormat14(format14Location); - } + + rf.seek(baseOffset + offset); + int format = rf.readUnsignedShort(); + + if (format == 4) { + cmap31 = readFormat4(); } } + /** + * Reads map 3.0 (Windows Symbol) + */ + private void readCMap30(int baseOffset, int offset) throws IOException { + if (offset <= 0) { + return; + } + + rf.seek(baseOffset + offset); + int format = rf.readUnsignedShort(); + + if (format == 4) { + cmap10 = readFormat4(); + } + } + + /** + * Reads extended map (Windows Unicode Full Repertoire) + */ + private void readCMapExt(int baseOffset, int offset) throws IOException { + if (offset <= 0) { + return; + } + + rf.seek(baseOffset + offset); + int format = rf.readUnsignedShort(); + + switch (format) { + case 0: + cmapExt = readFormat0(); + break; + case 4: + cmapExt = readFormat4(); + break; + case 6: + cmapExt = readFormat6(); + break; + case 12: + cmapExt = readFormat12(); + break; + } + } + + /** + * Reads map 0.5 (Unicode Variation Sequences) + */ + private void readCMap05(int baseOffset, int offset) throws IOException { + if (offset <= 0) { + return; + } + + rf.seek(baseOffset + offset); + int format = rf.readUnsignedShort(); + + if (format == 14) { + cmap05 = readFormat14(baseOffset + offset); + } + } + + /** + * Container class for CMap offsets + */ + private static class CMapOffsets { + int map10 = 0; // Macintosh Roman + int map31 = 0; // Windows Unicode BMP + int map30 = 0; // Windows Symbol + int mapExt = 0; // Windows Unicode Full Repertoire + int map05 = 0; // Unicode Variation Sequences + } + HashMap readFormat14(int format14Location) throws IOException { HashMap result = new HashMap<>(); this.rf.getFilePointer(); //reopen From 124b0b35596203732c76176ff185e18ea5847da8 Mon Sep 17 00:00:00 2001 From: obo Date: Fri, 9 Jan 2026 13:53:20 +0800 Subject: [PATCH 6/6] Remove this unused method parameter "baseOffset". --- openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java b/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java index a2844bb1e..7e4120bbf 100644 --- a/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java +++ b/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java @@ -794,7 +794,7 @@ void readCMaps() throws DocumentException, IOException { int numTables = rf.readUnsignedShort(); // Scan all subtables to find the mappings we need - CMapOffsets offsets = scanCMapTables(tableLocation[0], numTables); + CMapOffsets offsets = scanCMapTables(numTables); // Read each cmap based on the offsets found readCMap10(tableLocation[0], offsets.map10); @@ -820,7 +820,7 @@ private int[] getTableLocation(String tableName) throws DocumentException { /** * Scans all cmap subtables and collects the mapping offsets we need */ - private CMapOffsets scanCMapTables(int baseOffset, int numTables) throws IOException { + private CMapOffsets scanCMapTables(int numTables) throws IOException { CMapOffsets offsets = new CMapOffsets(); fontSpecific = false;