2013-10-14 Robert Dewar <dewar@adacore.com>

* einfo.ads, einfo.adb (Default_Aspect_Component_Value): Is on base type only. * exp_aggr.adb (Expand_Array_Aggregate): Handle proper initialization of <> component. * exp_ch3.adb, exp_tss.adb: Minor reformatting * sem_ch13.adb (Default_Aspect_Component_Value, Default_Aspect_Value): Is on base type only. * sinfo.ads: Minor comment revision. 2013-10-14 Robert Dewar <dewar@adacore.com> * g-decstr.adb (Decode_Wide_Wide_Character): Fix failure to detect invalid sequences where longer than necessary sequences are used for encoding. (Validate_Wide_Character): Call Decode_Wide_Character to get the above validations. (Validate_Wide_Wide_Character): Same fix * g-decstr.ads: Add documentation making it clear that the UTF-8 implementation here recognizes all valid UTF-8 sequences, rather than the well-formed subset corresponding to characters defined in Unicode. (Next_Wide_Character): Remove comment about this being more efficient than Decode_Wide_Character (because this no longer the case). (Prev_Wide_Character): Add note that valid encoding is assumed. 2013-10-14 Robert Dewar <dewar@adacore.com> * a-wichha.adb (Character_Set_Version): New function. * a-wichha.ads: Remove comments for pragma Pure (final RM has this). (Character_Set_Version): New function. * gnat_rm.texi: Update doc. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@203527 138bc75d-0d04-0410-961f-82ee72b054a4
author: charlet <charlet@138bc75d-0d04-0410-961f-82ee72b054a4> 2013-10-14 12:45:14 +0000
committer: charlet <charlet@138bc75d-0d04-0410-961f-82ee72b054a4> 2013-10-14 12:45:14 +0000
commit: f3d70f087cb9cf6b2772fbbd3ba301ae3daa5480 (patch)
tree: 66eac7cd5ac22d91d26fdd7aca350b7e25d39cd7 /gcc/ada/g-decstr.ads
parent: bd9c8f692a3680eabe0ed899f03ef97125b7f1c6 (diff)
download: gcc-f3d70f087cb9cf6b2772fbbd3ba301ae3daa5480.tar.gz
1 files changed, 19 insertions, 4 deletions
diff --git a/gcc/ada/g-decstr.ads b/gcc/ada/g-decstr.ads
index e4d7b7f1633..d59f10dcb20 100644
--- a/gcc/ada/g-decstr.ads
+++ b/gcc/ada/g-decstr.ads
@@ -6,7 +6,7 @@
 --                                                                          --
 --                                 S p e c                                  --
 --                                                                          --
---                       Copyright (C) 2007-2010, AdaCore                   --
+--                     Copyright (C) 2007-2013, AdaCore                     --
 --                                                                          --
 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
 -- terms of the  GNU General Public License as published  by the Free Soft- --
@@ -47,6 +47,17 @@
 --  does not make any assumptions about the character coding. See also the
 --  packages Ada.Wide_[Wide_]Characters.Unicode for unicode specific functions.
 
+--  In particular, in the case of UTF-8, all valid UTF-8 encodings, as listed
+--  in table 3.6 of the Unicode Standard, version 6.2.0, are recognized as
+--  legitimate. This includes the full range 16#0000_0000# .. 16#03FF_FFFF#.
+--  This includes codes in the range 16#D800# - 16#DFFF#. These codes all
+--  have UTF-8 encoding sequences that are well-defined (e.g. the encoding for
+--  16#D800# is ED A0 80). But these codes do not correspond to defined Unicode
+--  characters and are thus considered to be "not well-formed" (see table 3.7
+--  of the Unicode Standard). If you need to exclude these codes, you must do
+--  that manually, e.g. use Decode_Wide_Character/Decode_Wide_String and check
+--  that the resulting code(s) are not in this range.
+
 --  Note on the use of brackets encoding (WCEM_Brackets). The brackets encoding
 --  method is ambiguous in the context of this package, since there is no way
 --  to tell if ["1234"] is eight unencoded characters or one encoded character.
@@ -86,7 +97,6 @@ package GNAT.Decode_String is
    --  will be raised.
 
    function Decode_Wide_Wide_String (S : String) return Wide_Wide_String;
-   pragma Inline (Decode_Wide_Wide_String);
    --  Same as above function but for Wide_Wide_String output
 
    procedure Decode_Wide_Wide_String
@@ -124,16 +134,17 @@ package GNAT.Decode_String is
      (Input  : String;
       Ptr    : in out Natural;
       Result : out Wide_Wide_Character);
+   pragma Inline (Decode_Wide_Wide_Character);
    --  Same as above procedure but with Wide_Wide_Character input
 
    procedure Next_Wide_Character (Input : String; Ptr : in out Natural);
+   pragma Inline (Next_Wide_Character);
    --  This procedure examines the input string starting at Input (Ptr), and
    --  advances Ptr past one character in the encoded string, so that on return
    --  Ptr points to the next encoded character. Constraint_Error is raised if
    --  an invalid encoding is encountered, or the end of the string is reached
    --  or if Ptr is less than String'First on entry, or if the character
-   --  skipped is not a valid Wide_Character code. This call may be more
-   --  efficient than calling Decode_Wide_Character and discarding the result.
+   --  skipped is not a valid Wide_Character code.
 
    procedure Prev_Wide_Character (Input : String; Ptr : in out Natural);
    --  This procedure is similar to Next_Encoded_Character except that it moves
@@ -149,8 +160,12 @@ package GNAT.Decode_String is
    --  WCEM_Brackets). For all other encodings, we work by starting at the
    --  beginning of the string and moving forward till Ptr is reached, which
    --  is correct but slow.
+   --
+   --  Note: this routine assumes that the sequence prior to Ptr is correctly
+   --  encoded, it does not have a defined behavior if this is not the case.
 
    procedure Next_Wide_Wide_Character (Input : String; Ptr : in out Natural);
+   pragma Inline (Next_Wide_Wide_Character);
    --  Similar to Next_Wide_Character except that codes skipped must be valid
    --  Wide_Wide_Character codes.
author	charlet <charlet@138bc75d-0d04-0410-961f-82ee72b054a4>	2013-10-14 12:45:14 +0000
committer	charlet <charlet@138bc75d-0d04-0410-961f-82ee72b054a4>	2013-10-14 12:45:14 +0000
commit	f3d70f087cb9cf6b2772fbbd3ba301ae3daa5480 (patch)
tree	66eac7cd5ac22d91d26fdd7aca350b7e25d39cd7 /gcc/ada/g-decstr.ads
parent	bd9c8f692a3680eabe0ed899f03ef97125b7f1c6 (diff)
download	gcc-f3d70f087cb9cf6b2772fbbd3ba301ae3daa5480.tar.gz