diff --git a/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java b/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java index 4e8e0d68e..191506a0d 100755 --- a/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java +++ b/openpdf/src/main/java/com/lowagie/text/pdf/FontDetails.java @@ -54,6 +54,7 @@ import com.lowagie.text.Utilities; import java.awt.font.GlyphVector; import java.io.UnsupportedEncodingException; +import java.util.Arrays; import java.util.HashMap; /** @@ -172,73 +173,180 @@ BaseFont getBaseFont() { * encoding and the characters used are stored. * * @param text the text to convert + * @param options rendering options * @return the conversion */ byte[] convertToBytes(String text, TextRenderingOptions options) { - byte[] b = null; switch (fontType) { case BaseFont.FONT_TYPE_T3: - return baseFont.convertToBytes(text); + return convertType3Font(text); + case BaseFont.FONT_TYPE_T1: - case BaseFont.FONT_TYPE_TT: { - b = baseFont.convertToBytes(text); - int len = b.length; - for (byte b1 : b) { - shortTag[b1 & 0xff] = 1; - } - break; + case BaseFont.FONT_TYPE_TT: + return convertType1OrTrueTypeFont(text); + + case BaseFont.FONT_TYPE_CJK: + return convertCjkFont(text); + + case BaseFont.FONT_TYPE_DOCUMENT: + return convertDocumentFont(text); + + case BaseFont.FONT_TYPE_TTUNI: + return convertTrueTypeUnicodeFont(text, options); + + default: + return convertType3Font(text); + } + } + + // Converts Type 3 font text to bytes + private byte[] convertType3Font(String text) { + return baseFont.convertToBytes(text); + } + + // Converts Type 1 or TrueType font text to bytes + private byte[] convertType1OrTrueTypeFont(String text) { + byte[] bytes = baseFont.convertToBytes(text); + recordUsedCharacters(bytes); + return bytes; + } + + // Records characters that have been used + private void recordUsedCharacters(byte[] bytes) { + for (byte b : bytes) { + shortTag[b & 0xff] = 1; + } + } + + // Converts CJK font text to bytes + private byte[] convertCjkFont(String text) { + recordCjkCharacters(text); + return baseFont.convertToBytes(text); + } + + // Records CJK characters that have been used + private void recordCjkCharacters(String text) { + for (int i = 0; i < text.length(); i++) { + int cidCode = cjkFont.getCidCode(text.charAt(i)); + cjkTag.put(cidCode, 0); + } + } + + // Converts document font text to bytes + private byte[] convertDocumentFont(String text) { + return baseFont.convertToBytes(text); + } + + // Converts TrueType Unicode font text to bytes + private byte[] convertTrueTypeUnicodeFont(String text, TextRenderingOptions options) { + try { + if (symbolic) { + return convertSymbolicFont(text); } - case BaseFont.FONT_TYPE_CJK: { - int len = text.length(); - for (int k = 0; k < len; ++k) { - cjkTag.put(cjkFont.getCidCode(text.charAt(k)), 0); - } - b = baseFont.convertToBytes(text); - break; + + // Handle IVS (Ideographic Variation Sequence) fonts + if (mayContainIVS(text)) { + return handleIvsText(text, text.length(), 0); } - case BaseFont.FONT_TYPE_DOCUMENT: { - b = baseFont.convertToBytes(text); - break; + + // Use Fop glyph processor if applicable + if (shouldUseFopGlyphProcessor(options)) { + String fileName = ((TrueTypeFontUnicode) getBaseFont()).fileName; + return FopGlyphProcessor.convertToBytesWithGlyphs( + ttu, text, fileName, longTag, options.getDocumentLanguage() + ); } - case BaseFont.FONT_TYPE_TTUNI: { - try { - int len = text.length(); - int[] metrics = null; - char[] glyph = new char[len]; - int i = 0; - if (symbolic) { - b = PdfEncodings.convertToBytes(text, "symboltt"); - len = b.length; - for (int k = 0; k < len; ++k) { - metrics = ttu.getMetricsTT(b[k] & 0xff); - if (metrics == null) { - continue; - } - longTag.put(metrics[0], - new int[]{metrics[0], metrics[1], ttu.getUnicodeDifferences(b[k] & 0xff)}); - glyph[i++] = (char) metrics[0]; - } - String s = new String(glyph, 0, i); - b = s.getBytes(CJKFont.CJK_ENCODING); - - } else { - String fileName = ((TrueTypeFontUnicode) getBaseFont()).fileName; - if (options.isGlyphSubstitutionEnabled() && FopGlyphProcessor.isFopSupported() - && (fileName != null && fileName.length() > 0 - && (fileName.contains(".ttf") || fileName.contains(".TTF")))) { - return FopGlyphProcessor.convertToBytesWithGlyphs(ttu, text, fileName, longTag, - options.getDocumentLanguage()); - } else { - return convertToBytesWithGlyphs(text); - } - } - } catch (UnsupportedEncodingException e) { - throw new ExceptionConverter(e); - } - break; + + // Default glyph conversion + return convertToBytesWithGlyphs(text); + } catch (UnsupportedEncodingException e) { + throw new ExceptionConverter(e); + } + } + + // Converts symbolic font text to bytes + private byte[] convertSymbolicFont(String text) throws UnsupportedEncodingException { + byte[] symbolBytes = PdfEncodings.convertToBytes(text, "symboltt"); + char[] glyphCodes = extractGlyphCodes(symbolBytes); + String glyphString = new String(glyphCodes); + return glyphString.getBytes(CJKFont.CJK_ENCODING); + } + + // Extracts glyph codes from symbol bytes and records metrics + private char[] extractGlyphCodes(byte[] symbolBytes) { + char[] glyphCodes = new char[symbolBytes.length]; + int glyphCount = 0; + + for (byte b : symbolBytes) { + int[] metrics = ttu.getMetricsTT(b & 0xff); + if (metrics == null) { + continue; } + + int glyphCode = metrics[0]; + int width = metrics[1]; + int unicodeDiff = ttu.getUnicodeDifferences(b & 0xff); + + longTag.put(glyphCode, new int[]{glyphCode, width, unicodeDiff}); + glyphCodes[glyphCount++] = (char) glyphCode; } - return b; + + return java.util.Arrays.copyOf(glyphCodes, glyphCount); + } + + // Determines whether to use Fop glyph processor + private boolean shouldUseFopGlyphProcessor(TextRenderingOptions options) { + if (!options.isGlyphSubstitutionEnabled() || !FopGlyphProcessor.isFopSupported()) { + return false; + } + + String fileName = ((TrueTypeFontUnicode) getBaseFont()).fileName; + return fileName != null + && !fileName.isEmpty() + && isTrueTypeFile(fileName); + } + + // Checks if the file is a TrueType font file + private boolean isTrueTypeFile(String fileName) { + String lowerFileName = fileName.toLowerCase(); + return lowerFileName.endsWith(".ttf"); + } + + private static boolean isVariationSelector(int codePoint) { + return (codePoint >= 0xFE00 && codePoint <= 0xFE0F) + || (codePoint >= 0xE0100 && codePoint <= 0xE01EF); + } + + /** + * Quickly determine whether the text may contain IVS (to decide whether to use the IVS dedicated path) + * Note: This means "may contain," not "must contain"—err on the side of caution to avoid omissions + */ + private static boolean mayContainIVS(String text) { + if (text == null) return false; + + for (int i = 0; i < text.length(); i++) { + char c = text.charAt(i); + + if (c >= '\uFE00' && c <= '\uFE0F') { + return true; + } + + if (c >= '\udb40' && c <= '\udb43') { + return true; + } + } + return false; + } + + private byte[] convertCharsToBytes(char[] chars) { + byte[] result = new byte[chars.length * 2]; + + for (int i = 0; i < chars.length; ++i) { + result[2 * i] = (byte) (chars[i] / 256); + result[2 * i + 1] = (byte) (chars[i] % 256); + } + + return result; } private byte[] convertToBytesWithGlyphs(String text) throws UnsupportedEncodingException { @@ -380,4 +488,122 @@ public boolean isSubset() { public void setSubset(boolean subset) { this.subset = subset; } + + /** + * handle ivs text + */ + private byte[] handleIvsText(String text, int len, int startIndex) { + char[] glyph = new char[len * 2]; + int glyphIndex = startIndex; + int k = 0; + + while (k < len) { + CodePointInfo baseChar = parseCodePoint(text, k, len); + CodePointInfo vsChar = parseVariationSelector(text, k + baseChar.charCount, len); + int skipCount = baseChar.charCount; + if (vsChar != null) { + glyphIndex = addIvsGlyph(baseChar.codePoint, vsChar.codePoint, glyph, glyphIndex); + skipCount += vsChar.charCount; + } else { + glyphIndex = addDefaultGlyph(baseChar.codePoint, glyph, glyphIndex); + } + k += skipCount; + } + + glyph = Arrays.copyOfRange(glyph, 0, glyphIndex); + return convertCharsToBytes(glyph); + } + + private CodePointInfo parseCodePoint(String text, int index, int len) { + if (index < len - 1 + && Character.isHighSurrogate(text.charAt(index)) + && Character.isLowSurrogate(text.charAt(index + 1))) { + // Surrogate pair + int codePoint = Character.toCodePoint(text.charAt(index), text.charAt(index + 1)); + return new CodePointInfo(codePoint, 2); + } else { + // BMP + return new CodePointInfo(text.charAt(index), 1); + } + } + + private CodePointInfo parseVariationSelector(String text, int index, int len) { + if (index >= len) { + return null; + } + + char currentChar = text.charAt(index); + + // single char IVS + if (isVariationSelector(currentChar)) { + return new CodePointInfo(currentChar, 1); + } + + // surrogate pair IVS + if (index < len - 1 + && Character.isHighSurrogate(currentChar) + && Character.isLowSurrogate(text.charAt(index + 1))) { + int codePoint = Character.toCodePoint(currentChar, text.charAt(index + 1)); + if (isVariationSelector(codePoint)) { + return new CodePointInfo(codePoint, 2); + } + } + + return null; + } + + private int addIvsGlyph(int baseCp, int vsCp, char[] glyph, int glyphIndex) { + int[] format14Metrics = this.ttu.getFormat14MetricsTT(baseCp, vsCp); + + if (format14Metrics != null) { + int glyphId = format14Metrics[0]; + cacheGlyphMetrics(glyphId, format14Metrics[1], baseCp, vsCp); + glyph[glyphIndex] = (char) glyphId; + return glyphIndex + 1; + } + + // fallback + return addDefaultGlyph(baseCp, glyph, glyphIndex); + } + + private int addDefaultGlyph(int codePoint, char[] glyph, int glyphIndex) { + int[] metrics = this.ttu.getMetricsTT(codePoint); + + if (metrics != null) { + int glyphId = metrics[0]; + cacheGlyphMetrics(glyphId, metrics[1], codePoint); + glyph[glyphIndex] = (char) glyphId; + return glyphIndex + 1; + } + + return glyphIndex; + } + + /** + * cache IVS glyph metrics info + */ + private void cacheGlyphMetrics(int glyphId, int width, int baseCp) { + if (!this.longTag.containsKey(glyphId)) { + this.longTag.put(glyphId, new int[]{glyphId, width, baseCp}); + } + } + + /** + * cache IVS glyph metrics info + */ + private void cacheGlyphMetrics(int glyphId, int width, int baseCp, int vsCp) { + if (!this.longTag.containsKey(glyphId)) { + this.longTag.put(glyphId, new int[]{glyphId, width, baseCp, vsCp}); + } + } + + private static class CodePointInfo { + final int codePoint; + final int charCount; + + CodePointInfo(int codePoint, int charCount) { + this.codePoint = codePoint; + this.charCount = charCount; + } + } } diff --git a/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java b/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java index 64100b556..7e4120bbf 100644 --- a/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java +++ b/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFont.java @@ -206,6 +206,8 @@ class TrueTypeFont extends BaseFont { protected HashMap cmapExt; + protected HashMap cmap05; + /** * The map containing the kerning information. It represents the content of table 'kern'. The key is an * Integer where the top 16 bits are the glyph number for the first character and the lower 16 bits @@ -784,82 +786,257 @@ private void readBbox() throws DocumentException, IOException { * @throws IOException the font file could not be read */ void readCMaps() throws DocumentException, IOException { - int[] table_location = tables.get("cmap"); - if (table_location == null) { + int[] tableLocation = getTableLocation("cmap"); + + // Seek to cmap table and read number of subtables + rf.seek(tableLocation[0]); + rf.skipBytes(2); + int numTables = rf.readUnsignedShort(); + + // Scan all subtables to find the mappings we need + CMapOffsets offsets = scanCMapTables(numTables); + + // Read each cmap based on the offsets found + readCMap10(tableLocation[0], offsets.map10); + readCMap31(tableLocation[0], offsets.map31); + readCMap30(tableLocation[0], offsets.map30); + readCMapExt(tableLocation[0], offsets.mapExt); + readCMap05(tableLocation[0], offsets.map05); + } + + /** + * Gets the location information for the specified table + */ + private int[] getTableLocation(String tableName) throws DocumentException { + int[] location = tables.get(tableName); + if (location == null) { throw new DocumentException( - MessageLocalization.getComposedMessage("table.1.does.not.exist.in.2", "cmap", fileName + style)); + MessageLocalization.getComposedMessage("table.1.does.not.exist.in.2", tableName, fileName + style) + ); } - rf.seek(table_location[0]); - rf.skipBytes(2); - int num_tables = rf.readUnsignedShort(); + return location; + } + + /** + * Scans all cmap subtables and collects the mapping offsets we need + */ + private CMapOffsets scanCMapTables(int numTables) throws IOException { + CMapOffsets offsets = new CMapOffsets(); fontSpecific = false; - int map10 = 0; - int map31 = 0; - int map30 = 0; - int mapExt = 0; - for (int k = 0; k < num_tables; ++k) { - int platId = rf.readUnsignedShort(); - int platSpecId = rf.readUnsignedShort(); + + for (int i = 0; i < numTables; i++) { + int platformId = rf.readUnsignedShort(); + int platformSpecificId = rf.readUnsignedShort(); int offset = rf.readInt(); - if (platId == 3 && platSpecId == 0) { + + processTableEntry(platformId, platformSpecificId, offset, offsets); + } + + return offsets; + } + + /** + * Processes a single cmap table entry + */ + private void processTableEntry(int platformId, int platformSpecificId, int offset, CMapOffsets offsets) { + // Platform 3 (Windows) + if (platformId == 3) { + if (platformSpecificId == 0) { + // Symbol font fontSpecific = true; - map30 = offset; - } else if (platId == 3 && platSpecId == 1) { - map31 = offset; - } else if (platId == 3 && platSpecId == 10) { - mapExt = offset; - } - if (platId == 1 && platSpecId == 0) { - map10 = offset; + offsets.map30 = offset; + } else if (platformSpecificId == 1) { + // Unicode BMP + offsets.map31 = offset; + } else if (platformSpecificId == 10) { + // Unicode Full Repertoire + offsets.mapExt = offset; } } - if (map10 > 0) { - rf.seek(table_location[0] + map10); - int format = rf.readUnsignedShort(); - switch (format) { - case 0: - cmap10 = readFormat0(); - break; - case 4: - cmap10 = readFormat4(); - break; - case 6: - cmap10 = readFormat6(); - break; - } + // Platform 1 (Macintosh) + else if (platformId == 1 && platformSpecificId == 0) { + offsets.map10 = offset; } - if (map31 > 0) { - rf.seek(table_location[0] + map31); - int format = rf.readUnsignedShort(); - if (format == 4) { - cmap31 = readFormat4(); - } + // Platform 0 (Unicode) + else if (platformId == 0 && platformSpecificId == 5) { + offsets.map05 = offset; } - if (map30 > 0) { - rf.seek(table_location[0] + map30); - int format = rf.readUnsignedShort(); - if (format == 4) { + } + + /** + * Reads map 1.0 (Macintosh Roman) + */ + private void readCMap10(int baseOffset, int offset) throws IOException { + if (offset <= 0) { + return; + } + + rf.seek(baseOffset + offset); + int format = rf.readUnsignedShort(); + + switch (format) { + case 0: + cmap10 = readFormat0(); + break; + case 4: cmap10 = readFormat4(); + break; + case 6: + cmap10 = readFormat6(); + break; + } + } + + /** + * Reads map 3.1 (Windows Unicode BMP) + */ + private void readCMap31(int baseOffset, int offset) throws IOException { + if (offset <= 0) { + return; + } + + rf.seek(baseOffset + offset); + int format = rf.readUnsignedShort(); + + if (format == 4) { + cmap31 = readFormat4(); + } + } + + /** + * Reads map 3.0 (Windows Symbol) + */ + private void readCMap30(int baseOffset, int offset) throws IOException { + if (offset <= 0) { + return; + } + + rf.seek(baseOffset + offset); + int format = rf.readUnsignedShort(); + + if (format == 4) { + cmap10 = readFormat4(); + } + } + + /** + * Reads extended map (Windows Unicode Full Repertoire) + */ + private void readCMapExt(int baseOffset, int offset) throws IOException { + if (offset <= 0) { + return; + } + + rf.seek(baseOffset + offset); + int format = rf.readUnsignedShort(); + + switch (format) { + case 0: + cmapExt = readFormat0(); + break; + case 4: + cmapExt = readFormat4(); + break; + case 6: + cmapExt = readFormat6(); + break; + case 12: + cmapExt = readFormat12(); + break; + } + } + + /** + * Reads map 0.5 (Unicode Variation Sequences) + */ + private void readCMap05(int baseOffset, int offset) throws IOException { + if (offset <= 0) { + return; + } + + rf.seek(baseOffset + offset); + int format = rf.readUnsignedShort(); + + if (format == 14) { + cmap05 = readFormat14(baseOffset + offset); + } + } + + /** + * Container class for CMap offsets + */ + private static class CMapOffsets { + int map10 = 0; // Macintosh Roman + int map31 = 0; // Windows Unicode BMP + int map30 = 0; // Windows Symbol + int mapExt = 0; // Windows Unicode Full Repertoire + int map05 = 0; // Unicode Variation Sequences + } + + HashMap readFormat14(int format14Location) throws IOException { + HashMap result = new HashMap<>(); + this.rf.getFilePointer(); //reopen + this.rf.readInt(); // byteLength,unused but need to read + int numVarSelectorRecords = this.rf.readInt(); + + if (numVarSelectorRecords < 0 || numVarSelectorRecords > 10000) { + throw new IOException("Invalid numVarSelectorRecords: " + numVarSelectorRecords); + } + + Map nonDefaultOffsetMap = new HashMap<>(); + + for (int i = 0; i < numVarSelectorRecords; ++i) { + byte[] input = new byte[3]; + this.rf.read(input); + int selectorUnicodeValue = this.byte2int(input, 3); + this.rf.readInt(); // defaultUVSOffset + int nonDefaultUVSOffset = this.rf.readInt(); + + if (nonDefaultUVSOffset > 0) { + nonDefaultOffsetMap.put(selectorUnicodeValue, nonDefaultUVSOffset); } } - if (mapExt > 0) { - rf.seek(table_location[0] + mapExt); - int format = rf.readUnsignedShort(); - switch (format) { - case 0: - cmapExt = readFormat0(); - break; - case 4: - cmapExt = readFormat4(); - break; - case 6: - cmapExt = readFormat6(); - break; - case 12: - cmapExt = readFormat12(); - break; + + for (Map.Entry entry : nonDefaultOffsetMap.entrySet()) { + Integer selectorUnicodeValue = entry.getKey(); + int nonDefaultUVSOffset = entry.getValue(); + + this.rf.seek((long) (format14Location + nonDefaultUVSOffset)); + int mappingNums = this.rf.readInt(); + + if (mappingNums < 0 || mappingNums > 10000) { + // invalid mapping + continue; + } + + for (int i = 0; i < mappingNums; ++i) { + byte[] input = new byte[3]; + this.rf.read(input); + int unicodeValue = this.byte2int(input, 3); + int glyphId = this.rf.readUnsignedShort(); + result.put(unicodeValue + "_" + selectorUnicodeValue, + new int[]{glyphId, this.getGlyphWidth(glyphId)}); } } + return result; + } + + public int byte2int(byte[] data, int n) { + if (data == null || n <= 0 || n > 4 || data.length < n) { + return 0; + } + int result = 0; + for (int i = 0; i < n; i++) { + result = (result << 8) | (data[i] & 0xFF); + } + return result; + } + + public int[] getFormat14MetricsTT(int char1, int char2) { + if (this.cmap05 != null) { + return this.cmap05.get(char1 + "_" + char2); + } + return new int[]{-1, -1}; } HashMap readFormat12() throws IOException { @@ -1419,6 +1596,9 @@ public int[] getMetricsTT(int c) { if (cmap10 != null) { return cmap10.get(c); } + if (cmap05 != null) { + return cmap05.get(c); + } return null; } diff --git a/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFontUnicode.java b/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFontUnicode.java index 125133939..b8366bcc7 100755 --- a/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFontUnicode.java +++ b/openpdf/src/main/java/com/lowagie/text/pdf/TrueTypeFontUnicode.java @@ -268,7 +268,13 @@ private PdfStream getToUnicode(int[][] metrics) { --size; int[] metric = metrics[k]; String fromTo = toHex(metric[0]); - buf.append(fromTo).append(fromTo).append(toHex(metric[2])).append('\n'); + String hexString; + if (metric.length == 4) { + hexString = toHex(metric[2], metric[3]); + } else { + hexString = toHex(metric[2]); + } + buf.append(fromTo).append(fromTo).append(hexString).append('\n'); } buf.append( "endbfrange\n" + @@ -585,4 +591,27 @@ public int[] getCharBBox(int c) { return bboxes[m[0]]; } + private String toHex(int char1, int char2) { + String hex1; + int high; + int low; + if (char1 < 65536) { + hex1 = toHex4(char1); + } else { + char1 -= 65536; + high = char1 / 1024 + '\ud800'; + low = char1 % 1024 + '\udc00'; + hex1 = toHex4(high) + toHex4(low); + } + String hex2; + if (char2 < 65536) { + hex2 = toHex4(char2); + } else { + char2 -= 65536; + high = char2 / 1024 + '\ud800'; + low = char2 % 1024 + '\udc00'; + hex2 = toHex4(high) + toHex4(low); + } + return "[<" + hex1 + hex2 + ">]"; + } }