summaryrefslogtreecommitdiff
path: root/gcc/ada/g-decstr.ads
diff options
context:
space:
mode:
authorcharlet <charlet@138bc75d-0d04-0410-961f-82ee72b054a4>2013-10-14 12:45:14 +0000
committercharlet <charlet@138bc75d-0d04-0410-961f-82ee72b054a4>2013-10-14 12:45:14 +0000
commitf3d70f087cb9cf6b2772fbbd3ba301ae3daa5480 (patch)
tree66eac7cd5ac22d91d26fdd7aca350b7e25d39cd7 /gcc/ada/g-decstr.ads
parentbd9c8f692a3680eabe0ed899f03ef97125b7f1c6 (diff)
downloadgcc-f3d70f087cb9cf6b2772fbbd3ba301ae3daa5480.tar.gz
2013-10-14 Robert Dewar <dewar@adacore.com>
* einfo.ads, einfo.adb (Default_Aspect_Component_Value): Is on base type only. * exp_aggr.adb (Expand_Array_Aggregate): Handle proper initialization of <> component. * exp_ch3.adb, exp_tss.adb: Minor reformatting * sem_ch13.adb (Default_Aspect_Component_Value, Default_Aspect_Value): Is on base type only. * sinfo.ads: Minor comment revision. 2013-10-14 Robert Dewar <dewar@adacore.com> * g-decstr.adb (Decode_Wide_Wide_Character): Fix failure to detect invalid sequences where longer than necessary sequences are used for encoding. (Validate_Wide_Character): Call Decode_Wide_Character to get the above validations. (Validate_Wide_Wide_Character): Same fix * g-decstr.ads: Add documentation making it clear that the UTF-8 implementation here recognizes all valid UTF-8 sequences, rather than the well-formed subset corresponding to characters defined in Unicode. (Next_Wide_Character): Remove comment about this being more efficient than Decode_Wide_Character (because this no longer the case). (Prev_Wide_Character): Add note that valid encoding is assumed. 2013-10-14 Robert Dewar <dewar@adacore.com> * a-wichha.adb (Character_Set_Version): New function. * a-wichha.ads: Remove comments for pragma Pure (final RM has this). (Character_Set_Version): New function. * gnat_rm.texi: Update doc. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@203527 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/ada/g-decstr.ads')
-rw-r--r--gcc/ada/g-decstr.ads23
1 files changed, 19 insertions, 4 deletions
diff --git a/gcc/ada/g-decstr.ads b/gcc/ada/g-decstr.ads
index e4d7b7f1633..d59f10dcb20 100644
--- a/gcc/ada/g-decstr.ads
+++ b/gcc/ada/g-decstr.ads
@@ -6,7 +6,7 @@
-- --
-- S p e c --
-- --
--- Copyright (C) 2007-2010, AdaCore --
+-- Copyright (C) 2007-2013, AdaCore --
-- --
-- GNAT is free software; you can redistribute it and/or modify it under --
-- terms of the GNU General Public License as published by the Free Soft- --
@@ -47,6 +47,17 @@
-- does not make any assumptions about the character coding. See also the
-- packages Ada.Wide_[Wide_]Characters.Unicode for unicode specific functions.
+-- In particular, in the case of UTF-8, all valid UTF-8 encodings, as listed
+-- in table 3.6 of the Unicode Standard, version 6.2.0, are recognized as
+-- legitimate. This includes the full range 16#0000_0000# .. 16#03FF_FFFF#.
+-- This includes codes in the range 16#D800# - 16#DFFF#. These codes all
+-- have UTF-8 encoding sequences that are well-defined (e.g. the encoding for
+-- 16#D800# is ED A0 80). But these codes do not correspond to defined Unicode
+-- characters and are thus considered to be "not well-formed" (see table 3.7
+-- of the Unicode Standard). If you need to exclude these codes, you must do
+-- that manually, e.g. use Decode_Wide_Character/Decode_Wide_String and check
+-- that the resulting code(s) are not in this range.
+
-- Note on the use of brackets encoding (WCEM_Brackets). The brackets encoding
-- method is ambiguous in the context of this package, since there is no way
-- to tell if ["1234"] is eight unencoded characters or one encoded character.
@@ -86,7 +97,6 @@ package GNAT.Decode_String is
-- will be raised.
function Decode_Wide_Wide_String (S : String) return Wide_Wide_String;
- pragma Inline (Decode_Wide_Wide_String);
-- Same as above function but for Wide_Wide_String output
procedure Decode_Wide_Wide_String
@@ -124,16 +134,17 @@ package GNAT.Decode_String is
(Input : String;
Ptr : in out Natural;
Result : out Wide_Wide_Character);
+ pragma Inline (Decode_Wide_Wide_Character);
-- Same as above procedure but with Wide_Wide_Character input
procedure Next_Wide_Character (Input : String; Ptr : in out Natural);
+ pragma Inline (Next_Wide_Character);
-- This procedure examines the input string starting at Input (Ptr), and
-- advances Ptr past one character in the encoded string, so that on return
-- Ptr points to the next encoded character. Constraint_Error is raised if
-- an invalid encoding is encountered, or the end of the string is reached
-- or if Ptr is less than String'First on entry, or if the character
- -- skipped is not a valid Wide_Character code. This call may be more
- -- efficient than calling Decode_Wide_Character and discarding the result.
+ -- skipped is not a valid Wide_Character code.
procedure Prev_Wide_Character (Input : String; Ptr : in out Natural);
-- This procedure is similar to Next_Encoded_Character except that it moves
@@ -149,8 +160,12 @@ package GNAT.Decode_String is
-- WCEM_Brackets). For all other encodings, we work by starting at the
-- beginning of the string and moving forward till Ptr is reached, which
-- is correct but slow.
+ --
+ -- Note: this routine assumes that the sequence prior to Ptr is correctly
+ -- encoded, it does not have a defined behavior if this is not the case.
procedure Next_Wide_Wide_Character (Input : String; Ptr : in out Natural);
+ pragma Inline (Next_Wide_Wide_Character);
-- Similar to Next_Wide_Character except that codes skipped must be valid
-- Wide_Wide_Character codes.