diff options
author | upstream source tree <ports@midipix.org> | 2015-03-15 20:14:05 -0400 |
---|---|---|
committer | upstream source tree <ports@midipix.org> | 2015-03-15 20:14:05 -0400 |
commit | 554fd8c5195424bdbcabf5de30fdc183aba391bd (patch) | |
tree | 976dc5ab7fddf506dadce60ae936f43f58787092 /libjava/classpath/gnu/xml/stream/UnicodeReader.java | |
download | cbb-gcc-4.6.4-554fd8c5195424bdbcabf5de30fdc183aba391bd.tar.bz2 cbb-gcc-4.6.4-554fd8c5195424bdbcabf5de30fdc183aba391bd.tar.xz |
obtained gcc-4.6.4.tar.bz2 from upstream website;upstream
verified gcc-4.6.4.tar.bz2.sig;
imported gcc-4.6.4 source tree from verified upstream tarball.
downloading a git-generated archive based on the 'upstream' tag
should provide you with a source tree that is binary identical
to the one extracted from the above tarball.
if you have obtained the source via the command 'git clone',
however, do note that line-endings of files in your working
directory might differ from line-endings of the respective
files in the upstream repository.
Diffstat (limited to 'libjava/classpath/gnu/xml/stream/UnicodeReader.java')
-rw-r--r-- | libjava/classpath/gnu/xml/stream/UnicodeReader.java | 205 |
1 files changed, 205 insertions, 0 deletions
diff --git a/libjava/classpath/gnu/xml/stream/UnicodeReader.java b/libjava/classpath/gnu/xml/stream/UnicodeReader.java new file mode 100644 index 000000000..aa2a95422 --- /dev/null +++ b/libjava/classpath/gnu/xml/stream/UnicodeReader.java @@ -0,0 +1,205 @@ +/* UnicodeReader.java -- + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package gnu.xml.stream; + +import java.io.IOException; +import java.io.Reader; + +/** + * A reader that converts UTF-16 characters to Unicode code points. + * + * @author <a href='mailto:dog@gnu.org'>Chris Burdess</a> + */ +public class UnicodeReader +{ + + final Reader in; + + UnicodeReader(Reader in) + { + this.in = in; + } + + public void mark(int limit) + throws IOException + { + in.mark(limit * 2); + } + + public void reset() + throws IOException + { + in.reset(); + } + + public int read() + throws IOException + { + int ret = in.read(); + if (ret == -1) + return ret; + if (ret >= 0xd800 && ret < 0xdc00) + { + // Unicode surrogate? + int low = in.read(); + if (low >= 0xdc00 && low < 0xe000) + ret = Character.toCodePoint((char) ret, (char) low); + else + throw new IOException("unpaired surrogate: U+" + + Integer.toHexString(ret)); + } + else if (ret >= 0xdc00 && ret < 0xe000) + throw new IOException("unpaired surrogate: U+" + + Integer.toHexString(ret)); + return ret; + } + + public int read(int[] buf, int off, int len) + throws IOException + { + if (len == 0) + return 0; + char[] b2 = new char[len]; + int ret = in.read(b2, 0, len); + if (ret <= 0) + return ret; + int l = ret - 1; + int i = 0, j = off; + for (; i < l; i++) + { + char c = b2[i]; + if (c >= 0xd800 && c < 0xdc00) + { + // Unicode surrogate? + char d = b2[i + 1]; + if (d >= 0xdc00 && d < 0xe000) + { + buf[j++] = Character.toCodePoint(c, d); + i++; + continue; + } + else + throw new IOException("unpaired surrogate: U+" + + Integer.toHexString(c)); + } + else if (c >= 0xdc00 && c < 0xe000) + throw new IOException("unpaired surrogate: U+" + + Integer.toHexString(c)); + buf[j++] = (int) c; + } + if (i == l) + { + // last char + char c = b2[l]; + if (c >= 0xd800 && c < 0xdc00) + { + int low = in.read(); + if (low >= 0xdc00 && low < 0xe000) + { + buf[j++] = Character.toCodePoint(c, (char) low); + return j; + } + else + throw new IOException("unpaired surrogate: U+" + + Integer.toHexString(c)); + } + else if (c >= 0xdc00 && c < 0xe000) + throw new IOException("unpaired surrogate: U+" + + Integer.toHexString(c)); + buf[j++] = (int) c; + } + return j; + } + + public void close() + throws IOException + { + in.close(); + } + + /** + * Returns the specified UTF-16 char array as an array of Unicode code + * points. + */ + public static int[] toCodePointArray(String text) + throws IOException + { + char[] b2 = text.toCharArray(); + int[] buf = new int[b2.length]; + if (b2.length > 0) + { + int l = b2.length - 1; + int i = 0, j = 0; + for (; i < l; i++) + { + char c = b2[i]; + if (c >= 0xd800 && c < 0xdc00) + { + // Unicode surrogate? + char d = b2[i + 1]; + if (d >= 0xdc00 && d < 0xe000) + { + buf[j++] = Character.toCodePoint(c, d); + i++; + continue; + } + else + throw new IOException("unpaired surrogate: U+" + + Integer.toHexString(c)); + } + else if (c >= 0xdc00 && c < 0xe000) + throw new IOException("unpaired surrogate: U+" + + Integer.toHexString(c)); + buf[j++] = (int) c; + } + if (i == l) + { + // last char + buf[j++] = (int) b2[l]; + if (j < buf.length) + { + int[] buf2 = new int[j]; + System.arraycopy(buf, 0, buf2, 0, j); + buf = buf2; + } + } + } + return buf; + } + +} |