diff options
author | charlet <charlet@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-12-13 10:20:52 +0000 |
---|---|---|
committer | charlet <charlet@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-12-13 10:20:52 +0000 |
commit | d03308df6e185f691bb854d38e1e3b3a8198ce1a (patch) | |
tree | 1c8e06d9a692908c2062d3287c702c32b03ea3e4 /gcc/ada/s-wchcon.ads | |
parent | 51f168eb1338af75aff54518b52f8dd577014060 (diff) | |
download | gcc-d03308df6e185f691bb854d38e1e3b3a8198ce1a.tar.gz |
2007-12-06 Robert Dewar <dewar@adacore.com>
* a-textio.adb, a-textio.ads:
Extensive changes to private part for wide character encoding
* a-witeio.ads, a-witeio.adb, a-ztexio.ads, a-ztexio.adb
(Look_Ahead): Fix mishandling of encoded sequences
Move declaration of Wch_Con to private part (should not be visible)
* ali.adb (Scan_ALI): Set default encoding method to brackets instead of
UTF-8. Probably this is never used, but if it is, brackets is
clearly correct.
* bindgen.adb (Get_WC_Encoding): New procedure to properly handle
setting wide character encoding for no main program case and when
encoding is specified using -W?
Initialize stack limit of environment task if stack limit method of
stack checking is enabled.
(Gen_Adainit_Ada): Use Get_WC_Encoding to output encoding method
(Gen_Adainit_C): Use Get_WC_Encoding to output encoding method
(Get_Main_Unit_Name): New function.
(Gen_Adainit_Ada): Add call to main program for .NET when needed.
(Gen_Output_File): Set Bind_Main_Program to True for .NET
* bindusg.adb: Add line for -Wx switch
* s-wchcon.adb, s-wchcon.ads: (Is_Start_Of_Encoding): New function
Add comments
Add new useful constant WC_Longest_Sequences
* switch-b.adb: Clean up handling of -Wx switch
For -gnatWx, set Wide_Character_Encoding_Method_Specified
* switch-c.adb: -gnatg activates warning on assertion errors
For -gnatWx, set Wide_Character_Encoding_Method_Specified
* s-wchcon.adb: (Is_Start_Of_Encoding): New function
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@130817 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/ada/s-wchcon.ads')
-rw-r--r-- | gcc/ada/s-wchcon.ads | 47 |
1 files changed, 36 insertions, 11 deletions
diff --git a/gcc/ada/s-wchcon.ads b/gcc/ada/s-wchcon.ads index 8607c19e951..af0eb7006a3 100644 --- a/gcc/ada/s-wchcon.ads +++ b/gcc/ada/s-wchcon.ads @@ -106,8 +106,8 @@ package System.WCh_Con is -- sequence ESC a b c d (five characters, where abcd are ASCII hex -- characters, using upper case for letters). This method is easy -- to deal with in external environments that do not support wide - -- characters, and covers the whole BMP. This is the default encoding - -- method. + -- characters, and covers the whole 16-bit BMP. Codes larger than + -- 16#FFFF# are not representable using this encoding method. WCEM_Upper : constant WC_Encoding_Method := 2; -- The wide character with encoding 16#abcd#, where the upper bit is on @@ -115,7 +115,8 @@ package System.WCh_Con is -- 16#cd#. The second byte may never be a format control character, but -- is not required to be in the upper half. This method can be also used -- for shift-JIS or EUC where the internal coding matches the external - -- coding. + -- coding. Codes larger than 16#FFFF# are not representable using this + -- encoding method. WCEM_Shift_JIS : constant WC_Encoding_Method := 3; -- A wide character is represented by a two character sequence 16#ab# @@ -123,19 +124,21 @@ package System.WCh_Con is -- as described above. The internal character code is the corresponding -- JIS character according to the standard algorithm for Shift-JIS -- conversion. See the body of package System.JIS_Conversions for - -- further details. + -- further details. Codes larger than 16#FFFF are not representable + -- using this encoding method. WCEM_EUC : constant WC_Encoding_Method := 4; -- A wide character is represented by a two character sequence 16#ab# and -- 16#cd#, with both characters being in the upper half set. The internal -- character code is the corresponding JIS character according to the EUC -- encoding algorithm. See the body of package System.JIS_Conversions for - -- further details. + -- further details. Codes larger than 16#FFFF# are not representable using + -- this encoding method. WCEM_UTF8 : constant WC_Encoding_Method := 5; - -- An ISO 10646-1 BMP/Unicode wide character is represented in - -- UCS Transformation Format 8 (UTF-8) as defined in Annex R of ISO - -- 10646-1/Am.2. Depending on the character value, a Unicode character + -- An ISO 10646-1 BMP/Unicode wide character is represented in UCS + -- Transformation Format 8 (UTF-8), as defined in Annex R of ISO + -- 10646-1/Am.2. Depending on the character value, a Unicode character -- is represented as the one to six byte sequence. -- -- 16#0000_0000#-16#0000_007f#: 2#0xxxxxxx# @@ -151,7 +154,8 @@ package System.WCh_Con is -- where the xxx bits correspond to the left-padded bits of the -- 16-bit character value. Note that all lower half ASCII characters -- are represented as ASCII bytes and all upper half characters and - -- other wide characters are represented as sequences of upper-half. + -- other wide characters are represented as sequences of upper-half. This + -- encoding method can represent the entire range of Wide_Wide_Character. WCEM_Brackets : constant WC_Encoding_Method := 6; -- A wide character is represented using one of the following sequences: @@ -161,7 +165,10 @@ package System.WCh_Con is -- ["xxxxxx"] -- ["xxxxxxxx"] -- - -- where xx are hexadecimal digits representing the character code. + -- where xx are hexadecimal digits representing the character code. This + -- encoding method can represent the entire range of Wide_Wide_Character + -- but in the general case results in ambiguous representations (there is + -- no ambiguity in Ada sources, since the above sequences are illegal Ada). WC_Encoding_Letters : constant array (WC_Encoding_Method) of Character := (WCEM_Hex => 'h', @@ -183,10 +190,20 @@ package System.WCh_Con is -- Encoding methods using an upper half character (16#80#..16#FF) at -- the start of the sequence. - WC_Longest_Sequence : constant := 10; + WC_Longest_Sequence : constant := 12; -- The longest number of characters that can be used for a wide character -- or wide wide character sequence for any of the active encoding methods. + WC_Longest_Sequences : constant array (WC_Encoding_Method) of Natural := + (WCEM_Hex => 5, + WCEM_Upper => 2, + WCEM_Shift_JIS => 2, + WCEM_EUC => 2, + WCEM_UTF8 => 6, + WCEM_Brackets => 12); + -- The longest number of characters that can be used for a wide character + -- or wide wide character sequence using the given encoding method. + function Get_WC_Encoding_Method (C : Character) return WC_Encoding_Method; -- Given a character C, returns corresponding encoding method (see array -- WC_Encoding_Letters above). Raises Constraint_Error if not in list. @@ -196,4 +213,12 @@ package System.WCh_Con is -- utf8, brackets, return the corresponding encoding method. Raises -- Constraint_Error if not in list. + function Is_Start_Of_Encoding + (C : Character; + EM : WC_Encoding_Method) return Boolean; + pragma Inline (Is_Start_Of_Encoding); + -- Returns True if the Character C is the start of a multi-character + -- encoding sequence for the given encoding method EM. If EM is set to + -- WCEM_Brackets, this function always returns False. + end System.WCh_Con; |