From 554fd8c5195424bdbcabf5de30fdc183aba391bd Mon Sep 17 00:00:00 2001
From: upstream source tree <ports@midipix.org>
Date: Sun, 15 Mar 2015 20:14:05 -0400
Subject: obtained gcc-4.6.4.tar.bz2 from upstream website; verified
 gcc-4.6.4.tar.bz2.sig; imported gcc-4.6.4 source tree from verified upstream
 tarball.

downloading a git-generated archive based on the 'upstream' tag
should provide you with a source tree that is binary identical
to the one extracted from the above tarball.

if you have obtained the source via the command 'git clone',
however, do note that line-endings of files in your working
directory might differ from line-endings of the respective
files in the upstream repository.
---
 libjava/gnu/gcj/convert/Output_UTF8.java | 146 +++++++++++++++++++++++++++++++
 1 file changed, 146 insertions(+)
 create mode 100644 libjava/gnu/gcj/convert/Output_UTF8.java

(limited to 'libjava/gnu/gcj/convert/Output_UTF8.java')

diff --git a/libjava/gnu/gcj/convert/Output_UTF8.java b/libjava/gnu/gcj/convert/Output_UTF8.java
new file mode 100644
index 000000000..e550a7f0a
--- /dev/null
+++ b/libjava/gnu/gcj/convert/Output_UTF8.java
@@ -0,0 +1,146 @@
+/* Copyright (C) 1999, 2000, 2003, 2006  Free Software Foundation
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+package gnu.gcj.convert;
+
+/**
+ * Convert Unicode to UTF8.
+ * @author Per Bothner <bothner@cygnus.com>
+ * @date Match 1999.
+ */
+
+public class Output_UTF8 extends UnicodeToBytes
+{
+  public String getName() { return "UTF8"; }
+
+  /** True if a surrogate pair should be emitted as a single UTF8 sequence.
+   * Otherwise, a surrogate pair is treated as two separate characters.
+   * Also, '\0' is emitted as {0} if true, and as {0xC0,0x80} if false. */
+  public boolean standardUTF8 = true;
+
+  // Saves the previous char if it was a high-surrogate.
+  char hi_part;
+  // Value of incomplete character.
+  int value;
+  // Number of continuation bytes still to emit.
+  int bytes_todo;
+
+  public int write (char[] inbuffer, int inpos, int inlength)
+  {
+    int start_pos = inpos;
+    int avail = buf.length - count;
+    for (;;)
+      {
+	if (avail == 0 || (inlength == 0 && bytes_todo == 0 && hi_part == 0))
+	  break;
+	// The algorithm is made more complicated because we want to write
+	// at least one byte in the output buffer, if there is room for
+	// that byte, and at least one input character is available.
+	// This makes the code more robust, since client code will
+	// always "make progress", even in the complicated cases,
+	// where the output buffer only has room for only *part* of a
+	// multi-byte sequence, or the input char buffer only has half
+	// of a surrogate pair (when standardUTF8 is set), or both.
+
+	// Handle continuation characters we did not have room for before.
+	if (bytes_todo > 0)
+	  {
+	    do
+	      {
+		bytes_todo--;
+		buf[count++] = (byte)
+		  (((value >> (bytes_todo * 6)) & 0x3F) | 0x80);
+		avail--;
+	      }
+	    while (bytes_todo > 0 && avail > 0);
+	    continue;
+	  }
+
+	// Handle a high surrogate at the end of the input stream.
+	if (inlength == 0 && hi_part != 0)
+	  {
+	    buf[count++] = (byte) (0xE0 | (hi_part >> 12));
+	    value = hi_part;
+	    hi_part = 0;
+	    avail--;
+	    bytes_todo = 2;
+	    continue;
+	  }
+
+	char ch = inbuffer[inpos++];
+	inlength--;
+
+	if (hi_part != 0 && (ch <= 0xDBFF || ch > 0xDFFF))
+	  {
+	    // If the previous character was a high surrogate, and we
+	    // don't now have a low surrogate, we print the high
+	    // surrogate as an isolated character.
+	    --inpos;
+	    ++inlength;
+	    buf[count++] = (byte) (0xE0 | (hi_part >> 12));
+	    value = hi_part;
+	    hi_part = 0;
+	    avail--;
+	    bytes_todo = 2;
+	  }
+	else if (hi_part == 0 && ch >= 0xDC00 && ch <= 0xDFFF)
+	  {
+	    // If this character is a low surrogate and we didn't
+	    // previously see a high surrogate, we do the same thing
+	    // as above.
+	    buf[count++] = (byte) (0xE0 | (ch >> 12));
+	    value = ch;
+	    avail--;
+	    bytes_todo = 2;
+	  }
+	else if (ch < 128 && (ch != 0 || standardUTF8))
+	  {
+	    avail--;
+	    buf[count++] = (byte) ch;
+	  }
+	else if (ch <= 0x07FF)
+	  {
+	    buf[count++] = (byte) (0xC0 | (ch >> 6));
+	    avail--;
+	    value = ch;
+	    bytes_todo = 1;
+	  }
+	else if (ch >= 0xD800 && ch <= 0xDFFF && standardUTF8)
+	  {
+	    if (ch <= 0xDBFF)  // High surrogates
+	      {
+		// Just save the high surrogate until the next
+		// character comes along.
+		hi_part = ch;
+	      }
+	    else // Low surrogates
+	      {
+		value = (hi_part - 0xD800) * 0x400 + (ch - 0xDC00) + 0x10000;
+		buf[count++] = (byte) (0xF0 | (value >> 18));
+		avail--;
+		bytes_todo = 3;
+		hi_part = 0;
+	      }
+	  }
+	else
+	  {
+	    buf[count++] = (byte) (0xE0 | (ch >> 12));
+	    value = ch;
+	    avail--;
+	    bytes_todo = 2;
+	  }
+      }
+    return inpos - start_pos;
+  }
+
+  public boolean havePendingBytes()
+  {
+    return bytes_todo > 0 || hi_part != 0;
+  }
+
+}
-- 
cgit v1.2.3