diff options
author | upstream source tree <ports@midipix.org> | 2015-03-15 20:14:05 -0400 |
---|---|---|
committer | upstream source tree <ports@midipix.org> | 2015-03-15 20:14:05 -0400 |
commit | 554fd8c5195424bdbcabf5de30fdc183aba391bd (patch) | |
tree | 976dc5ab7fddf506dadce60ae936f43f58787092 /gcc/ada/a-suewst.adb | |
download | cbb-gcc-4.6.4-15d2061ac0796199866debe9ac87130894b0cdd3.tar.bz2 cbb-gcc-4.6.4-15d2061ac0796199866debe9ac87130894b0cdd3.tar.xz |
obtained gcc-4.6.4.tar.bz2 from upstream website;upstream
verified gcc-4.6.4.tar.bz2.sig;
imported gcc-4.6.4 source tree from verified upstream tarball.
downloading a git-generated archive based on the 'upstream' tag
should provide you with a source tree that is binary identical
to the one extracted from the above tarball.
if you have obtained the source via the command 'git clone',
however, do note that line-endings of files in your working
directory might differ from line-endings of the respective
files in the upstream repository.
Diffstat (limited to 'gcc/ada/a-suewst.adb')
-rwxr-xr-x | gcc/ada/a-suewst.adb | 370 |
1 files changed, 370 insertions, 0 deletions
diff --git a/gcc/ada/a-suewst.adb b/gcc/ada/a-suewst.adb new file mode 100755 index 000000000..67f39ec2d --- /dev/null +++ b/gcc/ada/a-suewst.adb @@ -0,0 +1,370 @@ +------------------------------------------------------------------------------ +-- -- +-- GNAT RUN-TIME COMPONENTS -- +-- -- +-- ADA.STRINGS.UTF_ENCODING.WIDE_STRINGS -- +-- -- +-- B o d y -- +-- -- +-- Copyright (C) 2010, Free Software Foundation, Inc. -- +-- -- +-- GNAT is free software; you can redistribute it and/or modify it under -- +-- terms of the GNU General Public License as published by the Free Soft- -- +-- ware Foundation; either version 3, or (at your option) any later ver- -- +-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- +-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- +-- or FITNESS FOR A PARTICULAR PURPOSE. -- +-- -- +-- As a special exception under Section 7 of GPL version 3, you are granted -- +-- additional permissions described in the GCC Runtime Library Exception, -- +-- version 3.1, as published by the Free Software Foundation. -- +-- -- +-- You should have received a copy of the GNU General Public License and -- +-- a copy of the GCC Runtime Library Exception along with this program; -- +-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- +-- <http://www.gnu.org/licenses/>. -- +-- -- +-- GNAT was originally developed by the GNAT team at New York University. -- +-- Extensive contributions were provided by Ada Core Technologies Inc. -- +-- -- +------------------------------------------------------------------------------ + +package body Ada.Strings.UTF_Encoding.Wide_Strings is + use Interfaces; + + ------------ + -- Decode -- + ------------ + + -- Decode UTF-8/UTF-16BE/UTF-16LE input to Wide_String + + function Decode + (Item : UTF_String; + Input_Scheme : Encoding_Scheme) return Wide_String + is + begin + if Input_Scheme = UTF_8 then + return Decode (Item); + else + return Decode (To_UTF_16 (Item, Input_Scheme)); + end if; + end Decode; + + -- Decode UTF-8 input to Wide_String + + function Decode (Item : UTF_8_String) return Wide_String is + Result : Wide_String (1 .. Item'Length); + -- Result string (worst case is same length as input) + + Len : Natural := 0; + -- Length of result stored so far + + Iptr : Natural; + -- Input Item pointer + + C : Unsigned_8; + R : Unsigned_16; + + procedure Get_Continuation; + -- Reads a continuation byte of the form 10xxxxxx, shifts R left + -- by 6 bits, and or's in the xxxxxx to the low order 6 bits. On + -- return Ptr is incremented. Raises exception if continuation + -- byte does not exist or is invalid. + + ---------------------- + -- Get_Continuation -- + ---------------------- + + procedure Get_Continuation is + begin + if Iptr > Item'Last then + Raise_Encoding_Error (Iptr - 1); + + else + C := To_Unsigned_8 (Item (Iptr)); + Iptr := Iptr + 1; + + if C not in 2#10_000000# .. 2#10_111111# then + Raise_Encoding_Error (Iptr - 1); + else + R := Shift_Left (R, 6) or Unsigned_16 (C and 2#00_111111#); + end if; + end if; + end Get_Continuation; + + -- Start of processing for Decode + + begin + Iptr := Item'First; + + -- Skip BOM at start + + if Item'Length >= 3 + and then Item (Iptr .. Iptr + 2) = BOM_8 + then + Iptr := Iptr + 3; + + -- Error if bad BOM + + elsif Item'Length >= 2 + and then (Item (Iptr .. Iptr + 1) = BOM_16BE + or else + Item (Iptr .. Iptr + 1) = BOM_16LE) + then + Raise_Encoding_Error (Iptr); + end if; + + while Iptr <= Item'Last loop + C := To_Unsigned_8 (Item (Iptr)); + Iptr := Iptr + 1; + + -- Codes in the range 16#00# - 16#7F# are represented as + -- 0xxxxxxx + + if C <= 16#7F# then + R := Unsigned_16 (C); + + -- No initial code can be of the form 10xxxxxx. Such codes are used + -- only for continuations. + + elsif C <= 2#10_111111# then + Raise_Encoding_Error (Iptr - 1); + + -- Codes in the range 16#80# - 16#7FF# are represented as + -- 110yyyxx 10xxxxxx + + elsif C <= 2#110_11111# then + R := Unsigned_16 (C and 2#000_11111#); + Get_Continuation; + + -- Codes in the range 16#800# - 16#FFFF# are represented as + -- 1110yyyy 10yyyyxx 10xxxxxx + + elsif C <= 2#1110_1111# then + R := Unsigned_16 (C and 2#0000_1111#); + Get_Continuation; + Get_Continuation; + + -- Codes in the range 16#10000# - 16#10FFFF# are represented as + -- 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx + + -- Such codes are out of range for Wide_String output + + else + Raise_Encoding_Error (Iptr - 1); + end if; + + Len := Len + 1; + Result (Len) := Wide_Character'Val (R); + end loop; + + return Result (1 .. Len); + end Decode; + + -- Decode UTF-16 input to Wide_String + + function Decode (Item : UTF_16_Wide_String) return Wide_String is + Result : Wide_String (1 .. Item'Length); + -- Result is same length as input (possibly minus 1 if BOM present) + + Len : Natural := 0; + -- Length of result + + Iptr : Natural; + -- Index of next Item element + + C : Unsigned_16; + + begin + -- Skip UTF-16 BOM at start + + Iptr := Item'First; + + if Item'Length > 0 and then Item (Iptr) = BOM_16 (1) then + Iptr := Iptr + 1; + end if; + + -- Loop through input characters + + while Iptr <= Item'Last loop + C := To_Unsigned_16 (Item (Iptr)); + Iptr := Iptr + 1; + + -- Codes in the range 16#0000#..16#D7FF# or 16#E000#..16#FFFD# + -- represent their own value. + + if C <= 16#D7FF# or else C in 16#E000# .. 16#FFFD# then + Len := Len + 1; + Result (Len) := Wide_Character'Val (C); + + -- Codes in the range 16#D800#..16#DBFF# represent the first of the + -- two surrogates used to encode the range 16#01_000#..16#10_FFFF". + -- Such codes are out of range for 16-bit output. + + -- The case of input in the range 16#DC00#..16#DFFF# must never + -- occur, since it means we have a second surrogate character with + -- no corresponding first surrogate. + + -- Codes in the range 16#FFFE# .. 16#FFFF# are also invalid since + -- they conflict with codes used for BOM values. + + -- Thus all remaining codes are invalid + + else + Raise_Encoding_Error (Iptr - 1); + end if; + end loop; + + return Result (1 .. Len); + end Decode; + + ------------ + -- Encode -- + ------------ + + -- Encode Wide_String in UTF-8, UTF-16BE or UTF-16LE + + function Encode + (Item : Wide_String; + Output_Scheme : Encoding_Scheme; + Output_BOM : Boolean := False) return UTF_String + is + begin + -- Case of UTF_8 + + if Output_Scheme = UTF_8 then + return Encode (Item, Output_BOM); + + -- Case of UTF_16LE or UTF_16BE, use UTF-16 intermediary + + else + return From_UTF_16 (UTF_16_Wide_String'(Encode (Item)), + Output_Scheme, Output_BOM); + end if; + end Encode; + + -- Encode Wide_String in UTF-8 + + function Encode + (Item : Wide_String; + Output_BOM : Boolean := False) return UTF_8_String + is + Result : UTF_8_String (1 .. 3 * Item'Length + 3); + -- Worst case is three bytes per input byte + space for BOM + + Len : Natural; + -- Number of output codes stored in Result + + C : Unsigned_16; + -- Single input character + + procedure Store (C : Unsigned_16); + pragma Inline (Store); + -- Store one output code, C is in the range 0 .. 255 + + ----------- + -- Store -- + ----------- + + procedure Store (C : Unsigned_16) is + begin + Len := Len + 1; + Result (Len) := Character'Val (C); + end Store; + + -- Start of processing for UTF8_Encode + + begin + -- Output BOM if required + + if Output_BOM then + Result (1 .. 3) := BOM_8; + Len := 3; + else + Len := 0; + end if; + + -- Loop through characters of input + + for J in Item'Range loop + C := To_Unsigned_16 (Item (J)); + + -- Codes in the range 16#00# - 16#7F# are represented as + -- 0xxxxxxx + + if C <= 16#7F# then + Store (C); + + -- Codes in the range 16#80# - 16#7FF# are represented as + -- 110yyyxx 10xxxxxx + + elsif C <= 16#7FF# then + Store (2#110_00000# or Shift_Right (C, 6)); + Store (2#10_000000# or (C and 2#00_111111#)); + + -- Codes in the range 16#800# - 16#FFFF# are represented as + -- 1110yyyy 10yyyyxx 10xxxxxx + + else + Store (2#1110_0000# or Shift_Right (C, 12)); + Store (2#10_000000# or + Shift_Right (C and 2#111111_000000#, 6)); + Store (2#10_000000# or (C and 2#00_111111#)); + end if; + end loop; + + return Result (1 .. Len); + end Encode; + + -- Encode Wide_String in UTF-16 + + function Encode + (Item : Wide_String; + Output_BOM : Boolean := False) return UTF_16_Wide_String + is + Result : UTF_16_Wide_String + (1 .. Item'Length + Boolean'Pos (Output_BOM)); + -- Output is same length as input + possible BOM + + Len : Integer; + -- Length of output string + + C : Unsigned_16; + + begin + -- Output BOM if required + + if Output_BOM then + Result (1) := BOM_16 (1); + Len := 1; + else + Len := 0; + end if; + + -- Loop through input characters encoding them + + for Iptr in Item'Range loop + C := To_Unsigned_16 (Item (Iptr)); + + -- Codes in the range 16#0000#..16#D7FF# or 16#E000#..16#FFFD# are + -- output unchanged. + + if C <= 16#D7FF# or else C in 16#E000# .. 16#FFFD# then + Len := Len + 1; + Result (Len) := Wide_Character'Val (C); + + -- Codes in the range 16#D800#..16#DFFF# should never appear in the + -- input, since no valid Unicode characters are in this range (which + -- would conflict with the UTF-16 surrogate encodings). Similarly + -- codes in the range 16#FFFE#..16#FFFF conflict with BOM codes. + -- Thus all remaining codes are illegal. + + else + Raise_Encoding_Error (Iptr); + end if; + end loop; + + return Result; + end Encode; + +end Ada.Strings.UTF_Encoding.Wide_Strings; |