From 554fd8c5195424bdbcabf5de30fdc183aba391bd Mon Sep 17 00:00:00 2001 From: upstream source tree Date: Sun, 15 Mar 2015 20:14:05 -0400 Subject: obtained gcc-4.6.4.tar.bz2 from upstream website; verified gcc-4.6.4.tar.bz2.sig; imported gcc-4.6.4 source tree from verified upstream tarball. downloading a git-generated archive based on the 'upstream' tag should provide you with a source tree that is binary identical to the one extracted from the above tarball. if you have obtained the source via the command 'git clone', however, do note that line-endings of files in your working directory might differ from line-endings of the respective files in the upstream repository. --- .../gnu/java/awt/font/opentype/CharGlyphMap.java | 1027 ++++++++++++++++++++ 1 file changed, 1027 insertions(+) create mode 100644 libjava/classpath/gnu/java/awt/font/opentype/CharGlyphMap.java (limited to 'libjava/classpath/gnu/java/awt/font/opentype/CharGlyphMap.java') diff --git a/libjava/classpath/gnu/java/awt/font/opentype/CharGlyphMap.java b/libjava/classpath/gnu/java/awt/font/opentype/CharGlyphMap.java new file mode 100644 index 000000000..8529f7e47 --- /dev/null +++ b/libjava/classpath/gnu/java/awt/font/opentype/CharGlyphMap.java @@ -0,0 +1,1027 @@ +/* CharGlyphMap.java -- Manages the 'cmap' table of TrueType fonts + Copyright (C) 2006 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package gnu.java.awt.font.opentype; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.ShortBuffer; +import java.nio.IntBuffer; + + +/** + * A mapping from Unicode codepoints to glyphs. This mapping + * does not perform any re-ordering or decomposition, so it + * is not everything that is needed to support Unicode. + * + *

This class manages the cmap table of + * OpenType and TrueType fonts. + * + * @see + * the cmap part of Adobe’ OpenType Specification + * + * @see + * the cmap section of Apple’s TrueType Reference + * Manual + * + * @author Sascha Brawer (brawer@dandelis.ch) + */ +public abstract class CharGlyphMap +{ + private static final int PLATFORM_UNICODE = 0; + private static final int PLATFORM_MACINTOSH = 1; + private static final int PLATFORM_MICROSOFT = 3; + + + /** + * Determines the glyph index for a given Unicode codepoint. Users + * should be aware that the character-to-glyph mapping not not + * everything that is needed for full Unicode support. For example, + * the cmap table is not able to synthesize accented + * glyphs from the canonical decomposition sequence, even if the + * font would contain a glyph for the composed form. + * + * @param ucs4 the Unicode codepoint in UCS-4 encoding. Surrogates + * (U+D800 to U+DFFF) cannot be passed, they must be mapped to + * UCS-4 first. + * + * @return the glyph index, or 0 if the font does not contain + * a glyph for this codepoint. + */ + public abstract int getGlyph(int ucs4); + + + /** + * Reads a CharGlyphMap from an OpenType or TrueType cmap + * table. The current implementation works as follows: + * + *

  1. If the font has a type 4 cmap for the Unicode platform + * (encoding 0, 1, 2, 3 or 4), or a type 4 cmap for the Microsoft + * platform (encodings 1 or 10), that table is used to map Unicode + * codepoints to glyphs. Most recent fonts, both for Macintosh and + * Windows, should provide such a table.
  2. + * + *
  3. Otherwise, if the font has any type 0 cmap for the Macintosh + * platform, a Unicode-to-glyph mapping is synthesized from certain + * type 0 cmaps. The current implementation collects mappings from + * Roman, Icelandic, Turkish, Croatian, Romanian, Eastern European, + * Cyrillic, Greek, Hebrew, Arabic and Farsi cmaps.
  4. .
+ * + * @param buf a buffer whose position is right at the start + * of the entire cmap table, and whose limit + * is at its end. + * + * @return a concrete subclass of CharGlyphMap + * that performs the mapping. + * + * @see the cmap part of Adobe’ OpenType Specification + * + * @see the cmap section of Apple’s TrueType Reference + * Manual + */ + public static CharGlyphMap forTable(ByteBuffer buf) + { + boolean hasType0 = false; + int start4 = -1, platform4 = 0, encoding4 = 0; + int start12 = -1, platform12 = 0, encoding12 = 0; + int version; + int numTables; + int tableStart = buf.position(); + int limit = buf.limit(); + int format, platform, language, encoding, length, offset; + + version = buf.getChar(); + if (version != 0) + return null; + + numTables = buf.getChar(); + for (int i = 0; i < numTables; i++) + { + buf.limit(limit).position(tableStart + 4 + i * 8); + platform = buf.getChar(); + encoding = buf.getChar(); + offset = tableStart + buf.getInt(); + + buf.position(offset); + format = buf.getChar(); + + switch (format) + { + case 0: + hasType0 = true; + break; + + case 4: + length = buf.getChar(); + language = buf.getChar(); + if ((start4 == -1) + && Type4.isSupported(platform, language, encoding)) + { + start4 = offset; + platform4 = platform; + encoding4 = encoding; + } + break; + + case 12: + if ((start12 == -1) && Type12.isSupported(platform, encoding)) + { + start12 = offset; + platform12 = platform; + encoding12 = encoding; + } + break; + } + } + + + if (start12 >= 0) + { + try + { + buf.limit(limit).position(start12); + return new Type12(buf, platform12, encoding12); + } + catch (Exception ex) + { + ex.printStackTrace(); + } + } + + if (start4 >= 0) + { + try + { + buf.limit(limit).position(start4); + return Type4.readTable(buf, platform4, encoding4); + } + catch (Exception ex) + { + } + } + + if (hasType0) + { + try + { + buf.limit(limit).position(tableStart); + return new Type0(buf); + } + catch (Exception ex) + { + } + } + + return new Dummy(); + } + + + /** + * A dummy mapping that maps anything to the undefined glyph. + * Used if no other cmap is understood in a font. + * + * @author Sascha Brawer (brawer@dandelis.ch) + */ + private static final class Dummy + extends CharGlyphMap + { + public int getGlyph(int ucs4) + { + return 0; + } + } + + + /** + * A mapping from Unicode code points to glyph IDs through CMAP Type + * 0 tables. These tables have serious limitations: Only the first + * 256 glyphs can be addressed, and the source of the mapping is not + * Unicode, but an encoding used on the Macintosh. + * + *

However, some fonts have only a Type 0 cmap. In this case, we + * process all the Type 0 tables we understand, and establish + * a reversed glyph-to-Unicode mapping. When a glyph is requested + * for a given Unicode character, we perform a linear search on the + * reversed table to find the glyph which maps to the requested + * character. While not blazingly fast, this gives a reasonable + * fallback for old fonts. + * + * @author Sascha Brawer (brawer@dandelis.ch) + */ + private static final class Type0 + extends CharGlyphMap + { + /** + * An array whose i-th element indicates the + * Unicode code point of glyph i in the font. + */ + private char[] glyphToUCS2 = new char[256]; + + + /** + * A String whose charAt(i) is the Unicode character + * that corresponds to the codepoint i + 127 in the + * MacOS Arabic encoding. + * + * @see the Unicode mapping table for the MacOS Arabic encoding + */ + private static final String UPPER_ARABIC + = "\u007e\u0000\u00c4\u00a0\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1" + + "\u00e0\u00e2\u00e4\u06ba\u00ab\u00e7\u00e9\u00e8\u00ea\u00eb" + + "\u00ed\u2026\u00ee\u00ef\u00f1\u00f3\u00bb\u00f4\u00f6\u00f7" + + "\u00fa\u00f9\u00fb\u00fc\u0020\u0021\"\u0023\u0024\u066a" + + "\u0026\u0027\u0028\u0029\u002a\u002b\u060c\u002d\u002e\u002f" + + "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669" + + "\u003a\u061b\u003c\u003d\u003e\u061f\u274a\u0621\u0622\u0623" + + "\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d" + + "\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637" + + "\u0638\u0639\u063a\u005b\\\u005d\u005e\u005f\u0640\u0641" + + "\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u064b" + + "\u064c\u064d\u064e\u064f\u0650\u0651\u0652\u067e\u0679\u0686" + + "\u06d5\u06a4\u06af\u0688\u0691\u007b\u007c\u007d\u0698\u06d2"; + + + /** + * A String whose charAt(i) is the Unicode character + * that corresponds to the codepoint i + 127 in the + * MacOS East European Roman encoding. + * + * @see the Unicode mapping table for the MacOS Central European + * encoding + */ + private static final String UPPER_EAST_EUROPEAN_ROMAN + = "\u007e\u0000\u00c4\u0100\u0101\u00c9\u0104\u00d6\u00dc\u00e1" + + "\u0105\u010c\u00e4\u010d\u0106\u0107\u00e9\u0179\u017a\u010e" + + "\u00ed\u010f\u0112\u0113\u0116\u00f3\u0117\u00f4\u00f6\u00f5" + + "\u00fa\u011a\u011b\u00fc\u2020\u00b0\u0118\u00a3\u00a7\u2022" + + "\u00b6\u00df\u00ae\u00a9\u2122\u0119\u00a8\u2260\u0123\u012e" + + "\u012f\u012a\u2264\u2265\u012b\u0136\u2202\u2211\u0142\u013b" + + "\u013c\u013d\u013e\u0139\u013a\u0145\u0146\u0143\u00ac\u221a" + + "\u0144\u0147\u2206\u00ab\u00bb\u2026\u00a0\u0148\u0150\u00d5" + + "\u0151\u014c\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca" + + "\u014d\u0154\u0155\u0158\u2039\u203a\u0159\u0156\u0157\u0160" + + "\u201a\u201e\u0161\u015a\u015b\u00c1\u0164\u0165\u00cd\u017d" + + "\u017e\u016a\u00d3\u00d4\u016b\u016e\u00da\u016f\u0170\u0171" + + "\u0172\u0173\u00dd\u00fd\u0137\u017b\u0141\u017c\u0122\u02c7"; + + + /** + * A String whose charAt(i) is the Unicode character + * that corresponds to the codepoint i + 127 in the + * MacOS Roman encoding for the Croatian language. + * + * @see the Unicode mapping table for the MacOS Croatian encoding + */ + private static final String UPPER_CROATIAN + = "\u007e\u0000\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1" + + "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb" + + "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5" + + "\u00fa\u00f9\u00fb\u00fc\u2020\u00b0\u00a2\u00a3\u00a7\u2022" + + "\u00b6\u00df\u00ae\u0160\u2122\u00b4\u00a8\u2260\u017d\u00d8" + + "\u221e\u00b1\u2264\u2265\u2206\u00b5\u2202\u2211\u220f\u0161" + + "\u222b\u00aa\u00ba\u03a9\u017e\u00f8\u00bf\u00a1\u00ac\u221a" + + "\u0192\u2248\u0106\u00ab\u010c\u2026\u00a0\u00c0\u00c3\u00d5" + + "\u0152\u0153\u0110\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca" + + "\uf8ff\u00a9\u2044\u20ac\u2039\u203a\u00c6\u00bb\u2013\u00b7" + + "\u201a\u201e\u2030\u00c2\u0107\u00c1\u010d\u00c8\u00cd\u00ce" + + "\u00cf\u00cc\u00d3\u00d4\u0111\u00d2\u00da\u00db\u00d9\u0131" + + "\u02c6\u02dc\u00af\u03c0\u00cb\u02da\u00b8\u00ca\u00e6\u02c7"; + + + /** + * A String whose charAt(i) is the Unicode character + * that corresponds to the codepoint i + 127 in the + * MacOS Cyrillic encoding. + * + * @see the Unicode mapping table for the MacOS Cyrillic encoding + */ + private static final String UPPER_CYRILLIC + = "\u007e\u0000\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417" + + "\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421" + + "\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b" + + "\u042c\u042d\u042e\u042f\u2020\u00b0\u0490\u00a3\u00a7\u2022" + + "\u00b6\u0406\u00ae\u00a9\u2122\u0402\u0452\u2260\u0403\u0453" + + "\u221e\u00b1\u2264\u2265\u0456\u00b5\u0491\u0408\u0404\u0454" + + "\u0407\u0457\u0409\u0459\u040a\u045a\u0458\u0405\u00ac\u221a" + + "\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u040b\u045b\u040c" + + "\u045c\u0455\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u201e" + + "\u040e\u045e\u040f\u045f\u2116\u0401\u0451\u044f\u0430\u0431" + + "\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b" + + "\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445" + + "\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u20ac"; + + + /** + * A String whose charAt(i) is the Unicode character + * that corresponds to the codepoint i + 127 in the + * MacOS Arabic encoding with the Farsi language. + * + * @see the Unicode mapping table for the MacOS Farsi encoding + */ + private static final String UPPER_FARSI + = "\u007e\u0000\u00c4\u00a0\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1" + + "\u00e0\u00e2\u00e4\u06ba\u00ab\u00e7\u00e9\u00e8\u00ea\u00eb" + + "\u00ed\u2026\u00ee\u00ef\u00f1\u00f3\u00bb\u00f4\u00f6\u00f7" + + "\u00fa\u00f9\u00fb\u00fc\u0020\u0021\"\u0023\u0024\u066a" + + "\u0026\u0027\u0028\u0029\u002a\u002b\u060c\u002d\u002e\u002f" + + "\u06f0\u06f1\u06f2\u06f3\u06f4\u06f5\u06f6\u06f7\u06f8\u06f9" + + "\u003a\u061b\u003c\u003d\u003e\u061f\u274a\u0621\u0622\u0623" + + "\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d" + + "\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637" + + "\u0638\u0639\u063a\u005b\\\u005d\u005e\u005f\u0640\u0641" + + "\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u064b" + + "\u064c\u064d\u064e\u064f\u0650\u0651\u0652\u067e\u0679\u0686" + + "\u06d5\u06a4\u06af\u0688\u0691\u007b\u007c\u007d\u0698\u06d2"; + + + /** + * A String whose charAt(i) is the Unicode character + * that corresponds to the codepoint i + 127 in the + * MacOS Greek encoding. + * + * @see the Unicode mapping table for the MacOS Greek encoding + */ + private static final String UPPER_GREEK + = "\u007e\u0000\u00c4\u00b9\u00b2\u00c9\u00b3\u00d6\u00dc\u0385" + + "\u00e0\u00e2\u00e4\u0384\u00a8\u00e7\u00e9\u00e8\u00ea\u00eb" + + "\u00a3\u2122\u00ee\u00ef\u2022\u00bd\u2030\u00f4\u00f6\u00a6" + + "\u20ac\u00f9\u00fb\u00fc\u2020\u0393\u0394\u0398\u039b\u039e" + + "\u03a0\u00df\u00ae\u00a9\u03a3\u03aa\u00a7\u2260\u00b0\u00b7" + + "\u0391\u00b1\u2264\u2265\u00a5\u0392\u0395\u0396\u0397\u0399" + + "\u039a\u039c\u03a6\u03ab\u03a8\u03a9\u03ac\u039d\u00ac\u039f" + + "\u03a1\u2248\u03a4\u00ab\u00bb\u2026\u00a0\u03a5\u03a7\u0386" + + "\u0388\u0153\u2013\u2015\u201c\u201d\u2018\u2019\u00f7\u0389" + + "\u038a\u038c\u038e\u03ad\u03ae\u03af\u03cc\u038f\u03cd\u03b1" + + "\u03b2\u03c8\u03b4\u03b5\u03c6\u03b3\u03b7\u03b9\u03be\u03ba" + + "\u03bb\u03bc\u03bd\u03bf\u03c0\u03ce\u03c1\u03c3\u03c4\u03b8" + + "\u03c9\u03c2\u03c7\u03c5\u03b6\u03ca\u03cb\u0390\u03b0\u00ad"; + + + /** + * A String whose charAt(i) is the Unicode character + * that corresponds to the codepoint i + 127 in the + * MacOS Hebrew encoding. + * + *

The codepoint 0x81 (HEBREW LIGATURE YIDDISH YOD YOD PATAH) + * has no composed Unicode equivalent, but is expressed as the + * sequence U+05F2 U+05B7 in Unicode. A similar situation exists + * with the codepoint 0xC0 (HEBREW LIGATURE LAMED HOLAM), which + * MacOS converts to U+F86A U+05DC U+05B9. To correctly deal + * with these sequences, we probably should synthesize a ligature + * table if a Hebrew font only provides a Type 0 CMAP. + * + * @see the Unicode mapping table for the MacOS Hebrew encoding + */ + private static final String UPPER_HEBREW + = "\u007e\u0000\u00c4\u0000\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1" + + "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb" + + "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5" + + "\u00fa\u00f9\u00fb\u00fc\u0020\u0021\"\u0023\u0024\u0025" + + "\u20aa\u0027\u0029\u0028\u002a\u002b\u002c\u002d\u002e\u002f" + + "\u0030\u0031\u0032\u0033\u0034\u0035\u0036\u0037\u0038\u0039" + + "\u003a\u003b\u003c\u003d\u003e\u003f\u0000\u201e\uf89b\uf89c" + + "\uf89d\uf89e\u05bc\ufb4b\ufb35\u2026\u00a0\u05b8\u05b7\u05b5" + + "\u05b6\u05b4\u2013\u2014\u201c\u201d\u2018\u2019\ufb2a\ufb2b" + + "\u05bf\u05b0\u05b2\u05b1\u05bb\u05b9\u0000\u05b3\u05d0\u05d1" + + "\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05da\u05db" + + "\u05dc\u05dd\u05de\u05df\u05e0\u05e1\u05e2\u05e3\u05e4\u05e5" + + "\u05e6\u05e7\u05e8\u05e9\u05ea\u007d\u005d\u007b\u005b\u007c"; + + + /** + * A String whose charAt(i) is the Unicode character + * that corresponds to the codepoint i + 127 in the + * MacOS Roman encoding with the Icelandic language. + * + * @see the Unicode mapping table for the MacOS Icelandic encoding + */ + private static final String UPPER_ICELANDIC + = "\u007e\u0000\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1" + + "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb" + + "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5" + + "\u00fa\u00f9\u00fb\u00fc\u00dd\u00b0\u00a2\u00a3\u00a7\u2022" + + "\u00b6\u00df\u00ae\u00a9\u2122\u00b4\u00a8\u2260\u00c6\u00d8" + + "\u221e\u00b1\u2264\u2265\u00a5\u00b5\u2202\u2211\u220f\u03c0" + + "\u222b\u00aa\u00ba\u03a9\u00e6\u00f8\u00bf\u00a1\u00ac\u221a" + + "\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u00c0\u00c3\u00d5" + + "\u0152\u0153\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca" + + "\u00ff\u0178\u2044\u20ac\u00d0\u00f0\u00de\u00fe\u00fd\u00b7" + + "\u201a\u201e\u2030\u00c2\u00ca\u00c1\u00cb\u00c8\u00cd\u00ce" + + "\u00cf\u00cc\u00d3\u00d4\uf8ff\u00d2\u00da\u00db\u00d9\u0131" + + "\u02c6\u02dc\u00af\u02d8\u02d9\u02da\u00b8\u02dd\u02db\u02c7"; + + + /** + * A String whose charAt(i) is the Unicode character + * that corresponds to the codepoint i + 127 in the + * MacOS Roman encoding for most languages. Exceptions include + * Croatian, Icelandic, Romanian, and Turkish. + * + * @see the Unicode mapping table for the MacOS Roman encoding + */ + private static final String UPPER_ROMAN + = "\u007e\u0000\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1" + + "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb" + + "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5" + + "\u00fa\u00f9\u00fb\u00fc\u2020\u00b0\u00a2\u00a3\u00a7\u2022" + + "\u00b6\u00df\u00ae\u00a9\u2122\u00b4\u00a8\u2260\u00c6\u00d8" + + "\u221e\u00b1\u2264\u2265\u00a5\u00b5\u2202\u2211\u220f\u03c0" + + "\u222b\u00aa\u00ba\u03a9\u00e6\u00f8\u00bf\u00a1\u00ac\u221a" + + "\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u00c0\u00c3\u00d5" + + "\u0152\u0153\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca" + + "\u00ff\u0178\u2044\u20ac\u2039\u203a\ufb01\ufb02\u2021\u00b7" + + "\u201a\u201e\u2030\u00c2\u00ca\u00c1\u00cb\u00c8\u00cd\u00ce" + + "\u00cf\u00cc\u00d3\u00d4\uf8ff\u00d2\u00da\u00db\u00d9\u0131" + + "\u02c6\u02dc\u00af\u02d8\u02d9\u02da\u00b8\u02dd\u02db\u02c7"; + + + /** + * A String whose charAt(i) is the Unicode character + * that corresponds to the codepoint i + 127 in the + * MacOS Roman encoding with the Romanian language. + * + * @see the Unicode mapping table for the MacOS Romanian encoding + */ + private static final String UPPER_ROMANIAN + = "\u007e\u0000\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1" + + "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb" + + "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5" + + "\u00fa\u00f9\u00fb\u00fc\u2020\u00b0\u00a2\u00a3\u00a7\u2022" + + "\u00b6\u00df\u00ae\u00a9\u2122\u00b4\u00a8\u2260\u0102\u0218" + + "\u221e\u00b1\u2264\u2265\u00a5\u00b5\u2202\u2211\u220f\u03c0" + + "\u222b\u00aa\u00ba\u03a9\u0103\u0219\u00bf\u00a1\u00ac\u221a" + + "\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u00c0\u00c3\u00d5" + + "\u0152\u0153\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca" + + "\u00ff\u0178\u2044\u20ac\u2039\u203a\u021a\u021b\u2021\u00b7" + + "\u201a\u201e\u2030\u00c2\u00ca\u00c1\u00cb\u00c8\u00cd\u00ce" + + "\u00cf\u00cc\u00d3\u00d4\uf8ff\u00d2\u00da\u00db\u00d9\u0131" + + "\u02c6\u02dc\u00af\u02d8\u02d9\u02da\u00b8\u02dd\u02db\u02c7"; + + + /** + * A String whose charAt(i) is the Unicode character + * that corresponds to the codepoint i + 127 in the + * MacOS Roman encoding with the Turkish language. + * + * @see the Unicode mapping table for the MacOS Turkish encoding + */ + private static final String UPPER_TURKISH + = "\u007e\u0000\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1" + + "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb" + + "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5" + + "\u00fa\u00f9\u00fb\u00fc\u2020\u00b0\u00a2\u00a3\u00a7\u2022" + + "\u00b6\u00df\u00ae\u00a9\u2122\u00b4\u00a8\u2260\u00c6\u00d8" + + "\u221e\u00b1\u2264\u2265\u00a5\u00b5\u2202\u2211\u220f\u03c0" + + "\u222b\u00aa\u00ba\u03a9\u00e6\u00f8\u00bf\u00a1\u00ac\u221a" + + "\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u00c0\u00c3\u00d5" + + "\u0152\u0153\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca" + + "\u00ff\u0178\u011e\u011f\u0130\u0131\u015e\u015f\u2021\u00b7" + + "\u201a\u201e\u2030\u00c2\u00ca\u00c1\u00cb\u00c8\u00cd\u00ce" + + "\u00cf\u00cc\u00d3\u00d4\uf8ff\u00d2\u00da\u00db\u00d9\uf8a0" + + "\u02c6\u02dc\u00af\u02d8\u02d9\u02da\u00b8\u02dd\u02db\u02c7"; + + + /** + * Constructs a CharGlyphMap.Type0 from all type 0 cmaps provided + * by the font. The implementation is able to fuse multiple type + * 0 cmaps, such as the MacRoman, Turkish, Icelandic and Croatian + * encoding, into a single map from Unicode characters to glyph + * indices. + * + * @param buf a ByteBuffer whose position is right at the + * beginning of the entire cmap table of the font (not + * at some subtable). + */ + public Type0(ByteBuffer buf) + { + int numTables; + int tableStart = buf.position(); + int limit = buf.limit(); + + /* The CMAP version must be 0. */ + if (buf.getChar() != 0) + throw new IllegalStateException(); + + numTables = buf.getChar(); + for (int i = 0; i < numTables; i++) + { + buf.limit(limit).position(tableStart + 4 + i * 8); + int platform = buf.getChar(); + int encoding = buf.getChar(); + int offset = tableStart + buf.getInt(); + + buf.position(offset); + int format = buf.getChar(); + int length = buf.getChar(); + buf.limit(offset + length); + int language = buf.getChar(); + + if (format == 0) + readSingleTable(buf, platform, language, encoding); + } + } + + + /** + * Processes a CMAP Type 0 table whose platform, encoding and + * language are already known. + * + * @param buf the buffer to read the table from, positioned + * right after the language tag. + */ + private void readSingleTable(ByteBuffer buf, + int platform, int language, + int encoding) + { + String upper = getUpper129(platform, encoding, language); + if (upper == null) + return; + + /* Skip the MacOS codepoints [0 .. 31] because they do not + * correspond to any Unicode codepoint. + */ + buf.position(buf.position() + 32); + + /* Irrespective of script and language, the MacOS codepoints + * [32 .. 126] correspond to the same Unicode codepoint. + */ + for (int i = 32; i < 126; i++) + glyphToUCS2[buf.get() & 0xff] = (char) i; + + for (int i = 127; i < 256; i++) + glyphToUCS2[buf.get() & 0xff] = upper.charAt(i - 127); + + /* Glyph 0 is always the undefined character, which has + * no codepoint in Unicode. + */ + glyphToUCS2[0] = 0; + } + + + /** + * Determines the glyph index for a given Unicode codepoint. + * + * @param ucs4 the Unicode codepoint in UCS-4 encoding. + * + * @return the glyph index, or 0 if the font does not contain + * a glyph for this codepoint. + */ + public int getGlyph(int ucs4) + { + /* This linear search is not exactly super fast. However, + * only really ancient fonts have only a type 0 cmap, + * so it should not hurt in very many cases. If it shows + * to be a performance problem, one could do a binary search + * on a 256-entry table sorted by Unicode codepoint. The + * matching index of that table could then be used to look + * up the glyph ID at that position. + */ + for (int i = 0; i < 256; i++) + if (glyphToUCS2[i] == ucs4) + return i; + return 0; + } + + + /** + * Returns a String whose charAt(i) is the Unicode + * character that corresponds to the codepoint i + + * 127 in the encoding specified by the platform, script + * and language tag of a Type 0 CMAP. + * + * @param language the language tag in the cmap subtable. For the + * Macintosh platform, this is 0 to indicate language-neutral + * encoding, or the MacOS language code plus one. The + * Apple documentation does not mention that one needs to be + * added, but the Adobe OpenType specification does. + * + * @return a String for mapping the top 129 characters to + * UCS-2. If platform is not 1 + * (indicating Macintosh), or if the combination of + * script and language is not + * recognized, null will be returned. + */ + private static String getUpper129(int platform, int script, int language) + { + if (platform != PLATFORM_MACINTOSH) + return null; + + switch (script) + { + case 0: /* smRoman */ + if (language == /* langIcelandic+1 */ 16) + return UPPER_ICELANDIC; + else if (language == /* langTurkish+1 */ 18) + return UPPER_TURKISH; + else if (language == /* langCroatian+1 */ 19) + return UPPER_CROATIAN; + else if (language == /* langRomanian+1 */ 38) + return UPPER_ROMANIAN; + else if (language == /* language-neutral */ 0) + return UPPER_ROMAN; + else + return null; + + case 4: /* smArabic */ + if (language == /* langFarsi+1 */ 32) + return UPPER_FARSI; + else + return UPPER_ARABIC; + + case 5: /* smHebrew */ + return UPPER_HEBREW; + + case 6: /* smGreek */ + return UPPER_GREEK; + + case 7: /* smCyrillic */ + return UPPER_CYRILLIC; + + case 29: /* smSlavic == smEastEurRoman */ + return UPPER_EAST_EUROPEAN_ROMAN; + } + + return null; + } + } + + + /** + * A mapping from Unicode code points to glyph IDs through CMAP Type + * 4 tables. These tables are able to map two-byte encoded text + * to glyph IDs, such as Unicode Basic Multilingual Plane which + * contains U+0000 .. U+FFFE without surrogates. + * + * @author Sascha Brawer (brawer@dandelis.ch) + */ + private static final class Type4 + extends CharGlyphMap + { + /** + * Determines whether this implementation supports a combination + * of platform, language and encoding is supported for a type 4 + * cmap table. + * + *

Currently, we support the following combinations: + * + *

+ * + *

Most recent Macintosh fonts provide a type 4 + * cmap for Unicode. Microsoft recommends providing a + * type 4 cmap for encoding 1 of the Microsoft + * platform. The implementation of GNU Classpath supports both + * variants. + * + *

Not supported are ShiftJIS, Big5, Wansung, Johab, and other + * non-Unicode encodings. Text can easily be converted to Unicode + * using the java.nio.charset package. + */ + static boolean isSupported(int platform, int language, int encoding) + { + switch (platform) + { + case PLATFORM_UNICODE: + return (encoding >= 0) && (encoding <= 4); + + case PLATFORM_MICROSOFT: + return (encoding == /* Basic Multilingual Plane */ 1) + || (encoding == /* Full Unicode */ 10); + } + + return false; + } + + + /** + * Processes a CMAP Type 4 table whose platform, encoding and + * language are already known. We understand the Unicode platform + * with encodings 0, 1, 2, 3 and 4, and the Microsoft platform + * with encodings 1 (Unicode BMP) and 10 (UCS-4). + * + * @param buf the buffer to read the table from, positioned at + * its beginning. + * + * @return a Type4 table, or null if the combination + * of platform and encoding is not understood. + */ + static Type4 readTable(ByteBuffer buf, + int platform, int encoding) + { + int tableStart = buf.position(); + char format = buf.getChar(); + int length = buf.getChar(); + int language = buf.getChar(); + + if ((format != 4) || !isSupported(platform, language, encoding)) + throw new IllegalArgumentException(); + + buf.limit(tableStart + length); + + int segCountX2 = buf.getChar(); + int segCount = segCountX2 / 2; + int searchRange = buf.getChar(); + int entrySelector = buf.getChar(); + int rangeShift = buf.getChar(); + + CharBuffer endCode, startCode, idRangeOffset_glyphID; + ShortBuffer idDelta; + + int pos = buf.position(); + endCode = buf.asCharBuffer(); + pos += segCountX2 + /* reservedPad */ 2; + + buf.position(pos); + startCode = buf.asCharBuffer(); + pos += segCountX2; + + buf.position(pos); + idDelta = buf.asShortBuffer(); + pos += segCountX2; + + buf.position(pos); + idRangeOffset_glyphID = buf.asCharBuffer(); + + endCode.limit(segCount); + startCode.limit(segCount); + idDelta.limit(segCount); + idRangeOffset_glyphID.limit((buf.limit() - pos) / 2); + + return new Type4(segCount, + endCode, startCode, idDelta, + idRangeOffset_glyphID); + } + + + private CharBuffer lastChar; + private CharBuffer firstChar; + private ShortBuffer idDelta; + private CharBuffer rangeID; + private int numSegments; + + private Type4(int numSegments, + CharBuffer lastChar, CharBuffer firstChar, + ShortBuffer idDelta, CharBuffer rangeID) + { + this.numSegments = numSegments; + this.lastChar = lastChar; + this.firstChar = firstChar; + this.idDelta = idDelta; + this.rangeID = rangeID; + } + + + /** + * Determines the glyph index for a given Unicode codepoint. + * + * @param ucs4 the Unicode codepoint in UCS-4 encoding. + * + * @return the glyph index, or 0 if the font does not contain + * a glyph for this codepoint. + */ + public int getGlyph(int ucs4) + { + char c, segStart; + int segment, idRangeOffset; + + if (ucs4 > 0xffff) + return 0; + + c = (char) ucs4; + segment = find(c); + segStart = firstChar.get(segment); + if ((c < segStart) || (c > lastChar.get(segment))) + return 0; + + /* + * System.out.println("seg " + segment + * + ", range=" + (int) rangeID[segment] + * + ", delta=" + delta[segment]); + */ + + idRangeOffset = rangeID.get(segment); + if (idRangeOffset == 0) + return (int) (char) (((int) c) + idDelta.get(segment)); + int result = rangeID.get((idRangeOffset >> 1) + + (c - segStart) + segment); + if (result == 0) + return 0; + return (int) (char) (result + idDelta.get(segment)); + } + + + private int find(char c) + { + int min, max, mid; + + min = 0; + max = numSegments - 1; + mid = max >> 1; + + while (min < max) + { + // System.out.println("(" + min + "," + max + ") " + mid); + char val = lastChar.get(mid); + if (val == c) + break; + else if (val < c) + min = mid + 1; + else if (val > c) + max = mid; + mid = (min + max) >> 1; + } + + return mid; + } + } + + + /** + * A mapping from Unicode code points to glyph IDs through CMAP Type + * 12 tables. These tables are able to map four-byte encoded text + * to glyph IDs, such as Unicode UCS-4. + * + * @author Sascha Brawer (brawer@dandelis.ch) + */ + private static final class Type12 + extends CharGlyphMap + { + int numGroups; + IntBuffer data; + + + /** + * Determines whether this implementation supports a combination + * of platform and encoding for a type 12 cmap table. + * + *

Currently, we support the following combinations: + * + *

+ */ + static boolean isSupported(int platform, int encoding) + { + switch (platform) + { + case PLATFORM_UNICODE: + return (encoding >= 0) && (encoding <= 4); + + case PLATFORM_MICROSOFT: + return (encoding == /* Basic Multilingual Plane */ 1) + || (encoding == /* Full Unicode */ 10); + } + + return false; + } + + + /** + * Constructs a cmap type 12 table whose platform and + * encoding are already known. We understand the Unicode platform + * with encodings 0, 1, 2, 3 and 4, and the Microsoft platform + * with encodings 1 (Unicode BMP) and 10 (UCS-4). + * + * @param buf the buffer to read the table from, positioned at + * its beginning. + */ + Type12(ByteBuffer buf, int platform, int encoding) + { + int tableStart = buf.position(); + int format = buf.getChar(); + if ((format != 12) || !isSupported(platform, encoding)) + throw new IllegalStateException(); + + buf.getChar(); // skip reserved field + buf.limit(tableStart + buf.getInt()); + int language = buf.getInt(); + numGroups = buf.getInt(); + data = buf.asIntBuffer(); + } + + + /** + * Determines the glyph index for a given Unicode codepoint. Users + * should be aware that the character-to-glyph mapping not not + * everything that is needed for full Unicode support. For example, + * the cmap table is not able to synthesize accented + * glyphs from the canonical decomposition sequence, even if the + * font would contain a glyph for the composed form. + * + * @param ucs4 the Unicode codepoint in UCS-4 encoding. Surrogates + * (U+D800 to U+DFFF) cannot be passed, they must be mapped to + * UCS-4 first. + * + * @return the glyph index, or 0 if the font does not contain + * a glyph for this codepoint. + */ + public int getGlyph(int ucs4) + { + int min, max, mid, startCharCode, endCharCode; + + min = 0; + max = numGroups - 1; + mid = max >> 1; + do + { + startCharCode = data.get(3 * mid); + endCharCode = data.get(3 * mid + 1); + + + /* + System.out.println("group " + mid + " (U+" + + Integer.toHexString(startCharCode) + + " .. U+" + Integer.toHexString(endCharCode) + + "): glyph " + (int) data.get(mid*3+2)); + */ + + if ((startCharCode <= ucs4) && (ucs4 <= endCharCode)) + return ucs4 + - startCharCode + + /* startGlyphID */ data.get(mid * 3 + 2); + + if (endCharCode < ucs4) + min = mid + 1; + else + max = mid; + mid = (min + max) >> 1; + } + while (min < max); + + startCharCode = data.get(3 * mid); + endCharCode = data.get(3 * mid + 1); + if ((startCharCode <= ucs4) && (ucs4 <= endCharCode)) + return ucs4 + - startCharCode + + /* startGlyphID */ data.get(mid * 3 + 2); + + return 0; + } + } +} -- cgit v1.2.3