From 554fd8c5195424bdbcabf5de30fdc183aba391bd Mon Sep 17 00:00:00 2001 From: upstream source tree Date: Sun, 15 Mar 2015 20:14:05 -0400 Subject: obtained gcc-4.6.4.tar.bz2 from upstream website; verified gcc-4.6.4.tar.bz2.sig; imported gcc-4.6.4 source tree from verified upstream tarball. downloading a git-generated archive based on the 'upstream' tag should provide you with a source tree that is binary identical to the one extracted from the above tarball. if you have obtained the source via the command 'git clone', however, do note that line-endings of files in your working directory might differ from line-endings of the respective files in the upstream repository. --- gcc/ada/s-wchcnv.adb | 468 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 468 insertions(+) create mode 100644 gcc/ada/s-wchcnv.adb (limited to 'gcc/ada/s-wchcnv.adb') diff --git a/gcc/ada/s-wchcnv.adb b/gcc/ada/s-wchcnv.adb new file mode 100644 index 000000000..893232e60 --- /dev/null +++ b/gcc/ada/s-wchcnv.adb @@ -0,0 +1,468 @@ +------------------------------------------------------------------------------ +-- -- +-- GNAT RUN-TIME COMPONENTS -- +-- -- +-- S Y S T E M . W C H _ C N V -- +-- -- +-- B o d y -- +-- -- +-- Copyright (C) 1992-2009, Free Software Foundation, Inc. -- +-- -- +-- GNAT is free software; you can redistribute it and/or modify it under -- +-- terms of the GNU General Public License as published by the Free Soft- -- +-- ware Foundation; either version 3, or (at your option) any later ver- -- +-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- +-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- +-- or FITNESS FOR A PARTICULAR PURPOSE. -- +-- -- +-- As a special exception under Section 7 of GPL version 3, you are granted -- +-- additional permissions described in the GCC Runtime Library Exception, -- +-- version 3.1, as published by the Free Software Foundation. -- +-- -- +-- You should have received a copy of the GNU General Public License and -- +-- a copy of the GCC Runtime Library Exception along with this program; -- +-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- +-- . -- +-- -- +-- GNAT was originally developed by the GNAT team at New York University. -- +-- Extensive contributions were provided by Ada Core Technologies Inc. -- +-- -- +------------------------------------------------------------------------------ + +pragma Compiler_Unit; + +with Interfaces; use Interfaces; +with System.WCh_Con; use System.WCh_Con; +with System.WCh_JIS; use System.WCh_JIS; + +package body System.WCh_Cnv is + + ----------------------------- + -- Char_Sequence_To_UTF_32 -- + ----------------------------- + + function Char_Sequence_To_UTF_32 + (C : Character; + EM : System.WCh_Con.WC_Encoding_Method) return UTF_32_Code + is + B1 : Unsigned_32; + C1 : Character; + U : Unsigned_32; + W : Unsigned_32; + + procedure Get_Hex (N : Character); + -- If N is a hex character, then set B1 to 16 * B1 + character N. + -- Raise Constraint_Error if character N is not a hex character. + + procedure Get_UTF_Byte; + pragma Inline (Get_UTF_Byte); + -- Used to interpret a 2#10xxxxxx# continuation byte in UTF-8 mode. + -- Reads a byte, and raises CE if the first two bits are not 10. + -- Otherwise shifts W 6 bits left and or's in the 6 xxxxxx bits. + + ------------- + -- Get_Hex -- + ------------- + + procedure Get_Hex (N : Character) is + B2 : constant Unsigned_32 := Character'Pos (N); + begin + if B2 in Character'Pos ('0') .. Character'Pos ('9') then + B1 := B1 * 16 + B2 - Character'Pos ('0'); + elsif B2 in Character'Pos ('A') .. Character'Pos ('F') then + B1 := B1 * 16 + B2 - (Character'Pos ('A') - 10); + elsif B2 in Character'Pos ('a') .. Character'Pos ('f') then + B1 := B1 * 16 + B2 - (Character'Pos ('a') - 10); + else + raise Constraint_Error; + end if; + end Get_Hex; + + ------------------ + -- Get_UTF_Byte -- + ------------------ + + procedure Get_UTF_Byte is + begin + U := Unsigned_32 (Character'Pos (In_Char)); + + if (U and 2#11000000#) /= 2#10_000000# then + raise Constraint_Error; + end if; + + W := Shift_Left (W, 6) or (U and 2#00111111#); + end Get_UTF_Byte; + + -- Start of processing for Char_Sequence_To_Wide + + begin + case EM is + + when WCEM_Hex => + if C /= ASCII.ESC then + return Character'Pos (C); + + else + B1 := 0; + Get_Hex (In_Char); + Get_Hex (In_Char); + Get_Hex (In_Char); + Get_Hex (In_Char); + + return UTF_32_Code (B1); + end if; + + when WCEM_Upper => + if C > ASCII.DEL then + return 256 * Character'Pos (C) + Character'Pos (In_Char); + else + return Character'Pos (C); + end if; + + when WCEM_Shift_JIS => + if C > ASCII.DEL then + return Wide_Character'Pos (Shift_JIS_To_JIS (C, In_Char)); + else + return Character'Pos (C); + end if; + + when WCEM_EUC => + if C > ASCII.DEL then + return Wide_Character'Pos (EUC_To_JIS (C, In_Char)); + else + return Character'Pos (C); + end if; + + when WCEM_UTF8 => + + -- Note: for details of UTF8 encoding see RFC 3629 + + U := Unsigned_32 (Character'Pos (C)); + + -- 16#00_0000#-16#00_007F#: 0xxxxxxx + + if (U and 2#10000000#) = 2#00000000# then + return Character'Pos (C); + + -- 16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx + + elsif (U and 2#11100000#) = 2#110_00000# then + W := U and 2#00011111#; + Get_UTF_Byte; + return UTF_32_Code (W); + + -- 16#00_0800#-16#00_ffff#: 1110xxxx 10xxxxxx 10xxxxxx + + elsif (U and 2#11110000#) = 2#1110_0000# then + W := U and 2#00001111#; + Get_UTF_Byte; + Get_UTF_Byte; + return UTF_32_Code (W); + + -- 16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + + elsif (U and 2#11111000#) = 2#11110_000# then + W := U and 2#00000111#; + + for K in 1 .. 3 loop + Get_UTF_Byte; + end loop; + + return UTF_32_Code (W); + + -- 16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx + -- 10xxxxxx 10xxxxxx + + elsif (U and 2#11111100#) = 2#111110_00# then + W := U and 2#00000011#; + + for K in 1 .. 4 loop + Get_UTF_Byte; + end loop; + + return UTF_32_Code (W); + + -- 16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx + -- 10xxxxxx 10xxxxxx 10xxxxxx + + elsif (U and 2#11111110#) = 2#1111110_0# then + W := U and 2#00000001#; + + for K in 1 .. 5 loop + Get_UTF_Byte; + end loop; + + return UTF_32_Code (W); + + else + raise Constraint_Error; + end if; + + when WCEM_Brackets => + if C /= '[' then + return Character'Pos (C); + end if; + + if In_Char /= '"' then + raise Constraint_Error; + end if; + + B1 := 0; + Get_Hex (In_Char); + Get_Hex (In_Char); + + C1 := In_Char; + + if C1 /= '"' then + Get_Hex (C1); + Get_Hex (In_Char); + + C1 := In_Char; + + if C1 /= '"' then + Get_Hex (C1); + Get_Hex (In_Char); + + C1 := In_Char; + + if C1 /= '"' then + Get_Hex (C1); + Get_Hex (In_Char); + + if B1 > Unsigned_32 (UTF_32_Code'Last) then + raise Constraint_Error; + end if; + + if In_Char /= '"' then + raise Constraint_Error; + end if; + end if; + end if; + end if; + + if In_Char /= ']' then + raise Constraint_Error; + end if; + + return UTF_32_Code (B1); + + end case; + end Char_Sequence_To_UTF_32; + + -------------------------------- + -- Char_Sequence_To_Wide_Char -- + -------------------------------- + + function Char_Sequence_To_Wide_Char + (C : Character; + EM : System.WCh_Con.WC_Encoding_Method) return Wide_Character + is + function Char_Sequence_To_UTF is new Char_Sequence_To_UTF_32 (In_Char); + + U : constant UTF_32_Code := Char_Sequence_To_UTF (C, EM); + + begin + if U > 16#FFFF# then + raise Constraint_Error; + else + return Wide_Character'Val (U); + end if; + end Char_Sequence_To_Wide_Char; + + ----------------------------- + -- UTF_32_To_Char_Sequence -- + ----------------------------- + + procedure UTF_32_To_Char_Sequence + (Val : UTF_32_Code; + EM : System.WCh_Con.WC_Encoding_Method) + is + Hexc : constant array (UTF_32_Code range 0 .. 15) of Character := + "0123456789ABCDEF"; + + C1, C2 : Character; + U : Unsigned_32; + + begin + -- Raise CE for invalid UTF_32_Code + + if not Val'Valid then + raise Constraint_Error; + end if; + + -- Processing depends on encoding mode + + case EM is + + when WCEM_Hex => + if Val < 256 then + Out_Char (Character'Val (Val)); + elsif Val <= 16#FFFF# then + Out_Char (ASCII.ESC); + Out_Char (Hexc (Val / (16**3))); + Out_Char (Hexc ((Val / (16**2)) mod 16)); + Out_Char (Hexc ((Val / 16) mod 16)); + Out_Char (Hexc (Val mod 16)); + else + raise Constraint_Error; + end if; + + when WCEM_Upper => + if Val < 128 then + Out_Char (Character'Val (Val)); + elsif Val < 16#8000# or else Val > 16#FFFF# then + raise Constraint_Error; + else + Out_Char (Character'Val (Val / 256)); + Out_Char (Character'Val (Val mod 256)); + end if; + + when WCEM_Shift_JIS => + if Val < 128 then + Out_Char (Character'Val (Val)); + elsif Val <= 16#FFFF# then + JIS_To_Shift_JIS (Wide_Character'Val (Val), C1, C2); + Out_Char (C1); + Out_Char (C2); + else + raise Constraint_Error; + end if; + + when WCEM_EUC => + if Val < 128 then + Out_Char (Character'Val (Val)); + elsif Val <= 16#FFFF# then + JIS_To_EUC (Wide_Character'Val (Val), C1, C2); + Out_Char (C1); + Out_Char (C2); + else + raise Constraint_Error; + end if; + + when WCEM_UTF8 => + + -- Note: for details of UTF8 encoding see RFC 3629 + + U := Unsigned_32 (Val); + + -- 16#00_0000#-16#00_007F#: 0xxxxxxx + + if U <= 16#00_007F# then + Out_Char (Character'Val (U)); + + -- 16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx + + elsif U <= 16#00_07FF# then + Out_Char (Character'Val (2#11000000# or Shift_Right (U, 6))); + Out_Char (Character'Val (2#10000000# or (U and 2#00111111#))); + + -- 16#00_0800#-16#00_FFFF#: 1110xxxx 10xxxxxx 10xxxxxx + + elsif U <= 16#00_FFFF# then + Out_Char (Character'Val (2#11100000# or Shift_Right (U, 12))); + Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6) + and 2#00111111#))); + Out_Char (Character'Val (2#10000000# or (U and 2#00111111#))); + + -- 16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + + elsif U <= 16#10_FFFF# then + Out_Char (Character'Val (2#11110000# or Shift_Right (U, 18))); + Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12) + and 2#00111111#))); + Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6) + and 2#00111111#))); + Out_Char (Character'Val (2#10000000# or (U and 2#00111111#))); + + -- 16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx + -- 10xxxxxx 10xxxxxx + + elsif U <= 16#03FF_FFFF# then + Out_Char (Character'Val (2#11111000# or Shift_Right (U, 24))); + Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 18) + and 2#00111111#))); + Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12) + and 2#00111111#))); + Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6) + and 2#00111111#))); + Out_Char (Character'Val (2#10000000# or (U and 2#00111111#))); + + -- 16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx + -- 10xxxxxx 10xxxxxx 10xxxxxx + + elsif U <= 16#7FFF_FFFF# then + Out_Char (Character'Val (2#11111100# or Shift_Right (U, 30))); + Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 24) + and 2#00111111#))); + Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 18) + and 2#00111111#))); + Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12) + and 2#00111111#))); + Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6) + and 2#00111111#))); + Out_Char (Character'Val (2#10000000# or (U and 2#00111111#))); + + else + raise Constraint_Error; + end if; + + when WCEM_Brackets => + + -- Values in the range 0-255 are directly output. Note that there + -- is some issue with [ (16#5B#] since this will cause confusion + -- if the resulting string is interpreted using brackets encoding. + + -- One possibility would be to always output [ as ["5B"] but in + -- practice this is undesirable, since for example normal use of + -- Wide_Text_IO for output (much more common than input), really + -- does want to be able to say something like + + -- Put_Line ("Start of output [first run]"); + + -- and have it come out as intended, rather than contaminated by + -- a ["5B"] sequence in place of the left bracket. + + if Val < 256 then + Out_Char (Character'Val (Val)); + + -- Otherwise use brackets notation for vales greater than 255 + + else + Out_Char ('['); + Out_Char ('"'); + + if Val > 16#FFFF# then + if Val > 16#00FF_FFFF# then + Out_Char (Hexc (Val / 16 ** 7)); + Out_Char (Hexc ((Val / 16 ** 6) mod 16)); + end if; + + Out_Char (Hexc ((Val / 16 ** 5) mod 16)); + Out_Char (Hexc ((Val / 16 ** 4) mod 16)); + end if; + + Out_Char (Hexc ((Val / 16 ** 3) mod 16)); + Out_Char (Hexc ((Val / 16 ** 2) mod 16)); + Out_Char (Hexc ((Val / 16) mod 16)); + Out_Char (Hexc (Val mod 16)); + + Out_Char ('"'); + Out_Char (']'); + end if; + end case; + end UTF_32_To_Char_Sequence; + + -------------------------------- + -- Wide_Char_To_Char_Sequence -- + -------------------------------- + + procedure Wide_Char_To_Char_Sequence + (WC : Wide_Character; + EM : System.WCh_Con.WC_Encoding_Method) + is + procedure UTF_To_Char_Sequence is new UTF_32_To_Char_Sequence (Out_Char); + begin + UTF_To_Char_Sequence (Wide_Character'Pos (WC), EM); + end Wide_Char_To_Char_Sequence; + +end System.WCh_Cnv; -- cgit v1.2.3