summaryrefslogtreecommitdiff
path: root/libjava/classpath/gnu/java/nio/charset/UTF_16Decoder.java
diff options
context:
space:
mode:
Diffstat (limited to 'libjava/classpath/gnu/java/nio/charset/UTF_16Decoder.java')
-rw-r--r--libjava/classpath/gnu/java/nio/charset/UTF_16Decoder.java167
1 files changed, 167 insertions, 0 deletions
diff --git a/libjava/classpath/gnu/java/nio/charset/UTF_16Decoder.java b/libjava/classpath/gnu/java/nio/charset/UTF_16Decoder.java
new file mode 100644
index 000000000..e078a6919
--- /dev/null
+++ b/libjava/classpath/gnu/java/nio/charset/UTF_16Decoder.java
@@ -0,0 +1,167 @@
+/* UTF_16Decoder.java --
+ Copyright (C) 2002, 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.java.nio.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
+
+/**
+ * Decoder for UTF-16, UTF-15LE, and UTF-16BE.
+ *
+ * @author Jesse Rosenstock
+ */
+final class UTF_16Decoder extends CharsetDecoder
+{
+ // byte orders
+ static final int BIG_ENDIAN = 0;
+ static final int LITTLE_ENDIAN = 1;
+ static final int UNKNOWN_ENDIAN = 2;
+ static final int MAYBE_BIG_ENDIAN = 3;
+ static final int MAYBE_LITTLE_ENDIAN = 4;
+
+ private static final char BYTE_ORDER_MARK = 0xFEFF;
+ private static final char REVERSED_BYTE_ORDER_MARK = 0xFFFE;
+
+ private final int originalByteOrder;
+ private int byteOrder;
+
+ UTF_16Decoder (Charset cs, int byteOrder)
+ {
+ super (cs, 0.5f, 1.0f);
+ this.originalByteOrder = byteOrder;
+ this.byteOrder = byteOrder;
+ }
+
+ protected CoderResult decodeLoop (ByteBuffer in, CharBuffer out)
+ {
+ // TODO: Optimize this in the case in.hasArray() / out.hasArray()
+
+ int inPos = in.position ();
+ try
+ {
+ while (in.remaining () >= 2)
+ {
+ byte b1 = in.get ();
+ byte b2 = in.get ();
+
+ // handle byte order mark
+ if (byteOrder == UNKNOWN_ENDIAN ||
+ byteOrder == MAYBE_BIG_ENDIAN ||
+ byteOrder == MAYBE_LITTLE_ENDIAN)
+ {
+ char c = (char) (((b1 & 0xFF) << 8) | (b2 & 0xFF));
+ if (c == BYTE_ORDER_MARK)
+ {
+ if (byteOrder == MAYBE_LITTLE_ENDIAN)
+ {
+ return CoderResult.malformedForLength (2);
+ }
+ byteOrder = BIG_ENDIAN;
+ inPos += 2;
+ continue;
+ }
+ else if (c == REVERSED_BYTE_ORDER_MARK)
+ {
+ if (byteOrder == MAYBE_BIG_ENDIAN)
+ {
+ return CoderResult.malformedForLength (2);
+ }
+ byteOrder = LITTLE_ENDIAN;
+ inPos += 2;
+ continue;
+ }
+ else
+ {
+ // assume big or little endian, do not consume bytes,
+ // continue with normal processing
+ byteOrder = (byteOrder == MAYBE_LITTLE_ENDIAN ?
+ LITTLE_ENDIAN : BIG_ENDIAN);
+ }
+ }
+
+ // FIXME: Change so you only do a single comparison here.
+ char c = (byteOrder == BIG_ENDIAN
+ ? (char) (((b1 & 0xFF) << 8) | (b2 & 0xFF))
+ : (char) (((b2 & 0xFF) << 8) | (b1 & 0xFF)));
+
+ if (0xD800 <= c && c <= 0xDFFF)
+ {
+ // c is a surrogate
+
+ // make sure c is a high surrogate
+ if (c > 0xDBFF)
+ return CoderResult.malformedForLength (2);
+ if (in.remaining () < 2)
+ return CoderResult.UNDERFLOW;
+ byte b3 = in.get ();
+ byte b4 = in.get ();
+ char d = (byteOrder == BIG_ENDIAN
+ ? (char) (((b3 & 0xFF) << 8) | (b4 & 0xFF))
+ : (char) (((b4 & 0xFF) << 8) | (b3 & 0xFF)));
+ // make sure d is a low surrogate
+ if (d < 0xDC00 || d > 0xDFFF)
+ return CoderResult.malformedForLength (2);
+ out.put (c);
+ out.put (d);
+ inPos += 4;
+ }
+ else
+ {
+ if (!out.hasRemaining ())
+ return CoderResult.UNDERFLOW;
+ out.put (c);
+ inPos += 2;
+ }
+ }
+
+ return CoderResult.UNDERFLOW;
+ }
+ finally
+ {
+ in.position (inPos);
+ }
+ }
+
+ protected void implReset ()
+ {
+ byteOrder = originalByteOrder;
+ }
+}