summaryrefslogtreecommitdiff
path: root/libjava/classpath/gnu/xml/stream/UnicodeReader.java
diff options
context:
space:
mode:
authorupstream source tree <ports@midipix.org>2015-03-15 20:14:05 -0400
committerupstream source tree <ports@midipix.org>2015-03-15 20:14:05 -0400
commit554fd8c5195424bdbcabf5de30fdc183aba391bd (patch)
tree976dc5ab7fddf506dadce60ae936f43f58787092 /libjava/classpath/gnu/xml/stream/UnicodeReader.java
downloadcbb-gcc-4.6.4-554fd8c5195424bdbcabf5de30fdc183aba391bd.tar.bz2
cbb-gcc-4.6.4-554fd8c5195424bdbcabf5de30fdc183aba391bd.tar.xz
obtained gcc-4.6.4.tar.bz2 from upstream website;upstream
verified gcc-4.6.4.tar.bz2.sig; imported gcc-4.6.4 source tree from verified upstream tarball. downloading a git-generated archive based on the 'upstream' tag should provide you with a source tree that is binary identical to the one extracted from the above tarball. if you have obtained the source via the command 'git clone', however, do note that line-endings of files in your working directory might differ from line-endings of the respective files in the upstream repository.
Diffstat (limited to 'libjava/classpath/gnu/xml/stream/UnicodeReader.java')
-rw-r--r--libjava/classpath/gnu/xml/stream/UnicodeReader.java205
1 files changed, 205 insertions, 0 deletions
diff --git a/libjava/classpath/gnu/xml/stream/UnicodeReader.java b/libjava/classpath/gnu/xml/stream/UnicodeReader.java
new file mode 100644
index 000000000..aa2a95422
--- /dev/null
+++ b/libjava/classpath/gnu/xml/stream/UnicodeReader.java
@@ -0,0 +1,205 @@
+/* UnicodeReader.java --
+ Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.xml.stream;
+
+import java.io.IOException;
+import java.io.Reader;
+
+/**
+ * A reader that converts UTF-16 characters to Unicode code points.
+ *
+ * @author <a href='mailto:dog@gnu.org'>Chris Burdess</a>
+ */
+public class UnicodeReader
+{
+
+ final Reader in;
+
+ UnicodeReader(Reader in)
+ {
+ this.in = in;
+ }
+
+ public void mark(int limit)
+ throws IOException
+ {
+ in.mark(limit * 2);
+ }
+
+ public void reset()
+ throws IOException
+ {
+ in.reset();
+ }
+
+ public int read()
+ throws IOException
+ {
+ int ret = in.read();
+ if (ret == -1)
+ return ret;
+ if (ret >= 0xd800 && ret < 0xdc00)
+ {
+ // Unicode surrogate?
+ int low = in.read();
+ if (low >= 0xdc00 && low < 0xe000)
+ ret = Character.toCodePoint((char) ret, (char) low);
+ else
+ throw new IOException("unpaired surrogate: U+" +
+ Integer.toHexString(ret));
+ }
+ else if (ret >= 0xdc00 && ret < 0xe000)
+ throw new IOException("unpaired surrogate: U+" +
+ Integer.toHexString(ret));
+ return ret;
+ }
+
+ public int read(int[] buf, int off, int len)
+ throws IOException
+ {
+ if (len == 0)
+ return 0;
+ char[] b2 = new char[len];
+ int ret = in.read(b2, 0, len);
+ if (ret <= 0)
+ return ret;
+ int l = ret - 1;
+ int i = 0, j = off;
+ for (; i < l; i++)
+ {
+ char c = b2[i];
+ if (c >= 0xd800 && c < 0xdc00)
+ {
+ // Unicode surrogate?
+ char d = b2[i + 1];
+ if (d >= 0xdc00 && d < 0xe000)
+ {
+ buf[j++] = Character.toCodePoint(c, d);
+ i++;
+ continue;
+ }
+ else
+ throw new IOException("unpaired surrogate: U+" +
+ Integer.toHexString(c));
+ }
+ else if (c >= 0xdc00 && c < 0xe000)
+ throw new IOException("unpaired surrogate: U+" +
+ Integer.toHexString(c));
+ buf[j++] = (int) c;
+ }
+ if (i == l)
+ {
+ // last char
+ char c = b2[l];
+ if (c >= 0xd800 && c < 0xdc00)
+ {
+ int low = in.read();
+ if (low >= 0xdc00 && low < 0xe000)
+ {
+ buf[j++] = Character.toCodePoint(c, (char) low);
+ return j;
+ }
+ else
+ throw new IOException("unpaired surrogate: U+" +
+ Integer.toHexString(c));
+ }
+ else if (c >= 0xdc00 && c < 0xe000)
+ throw new IOException("unpaired surrogate: U+" +
+ Integer.toHexString(c));
+ buf[j++] = (int) c;
+ }
+ return j;
+ }
+
+ public void close()
+ throws IOException
+ {
+ in.close();
+ }
+
+ /**
+ * Returns the specified UTF-16 char array as an array of Unicode code
+ * points.
+ */
+ public static int[] toCodePointArray(String text)
+ throws IOException
+ {
+ char[] b2 = text.toCharArray();
+ int[] buf = new int[b2.length];
+ if (b2.length > 0)
+ {
+ int l = b2.length - 1;
+ int i = 0, j = 0;
+ for (; i < l; i++)
+ {
+ char c = b2[i];
+ if (c >= 0xd800 && c < 0xdc00)
+ {
+ // Unicode surrogate?
+ char d = b2[i + 1];
+ if (d >= 0xdc00 && d < 0xe000)
+ {
+ buf[j++] = Character.toCodePoint(c, d);
+ i++;
+ continue;
+ }
+ else
+ throw new IOException("unpaired surrogate: U+" +
+ Integer.toHexString(c));
+ }
+ else if (c >= 0xdc00 && c < 0xe000)
+ throw new IOException("unpaired surrogate: U+" +
+ Integer.toHexString(c));
+ buf[j++] = (int) c;
+ }
+ if (i == l)
+ {
+ // last char
+ buf[j++] = (int) b2[l];
+ if (j < buf.length)
+ {
+ int[] buf2 = new int[j];
+ System.arraycopy(buf, 0, buf2, 0, j);
+ buf = buf2;
+ }
+ }
+ }
+ return buf;
+ }
+
+}