diff options
author | charlet <charlet@138bc75d-0d04-0410-961f-82ee72b054a4> | 2009-07-23 12:27:37 +0000 |
---|---|---|
committer | charlet <charlet@138bc75d-0d04-0410-961f-82ee72b054a4> | 2009-07-23 12:27:37 +0000 |
commit | 56e7611e96e9f52c328c393613503a227998e22f (patch) | |
tree | d72eed2b844820d6ec143ac42455485d1db1d8a8 /gcc/ada/s-regexp.adb | |
parent | 871bfb01e5cdc94c6de73e9a291d85f2ea969aed (diff) | |
download | gcc-56e7611e96e9f52c328c393613503a227998e22f.tar.gz |
2009-07-23 Yannick Moy <moy@adacore.com>
* s-regexp.adb (Check_Well_Formed_Pattern): Called before compiling the
pattern.
(Raise_Exception_If_No_More_Chars): Remove extra blank in exception
string.
(Raise_Exception): Ditto.
2009-07-23 Olivier Hainque <hainque@adacore.com>
* g-sse.ads: Simplify comment.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@150000 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/ada/s-regexp.adb')
-rwxr-xr-x | gcc/ada/s-regexp.adb | 280 |
1 files changed, 278 insertions, 2 deletions
diff --git a/gcc/ada/s-regexp.adb b/gcc/ada/s-regexp.adb index 48ebd4428fc..02d0a99f492 100755 --- a/gcc/ada/s-regexp.adb +++ b/gcc/ada/s-regexp.adb @@ -129,6 +129,14 @@ package body System.Regexp is -- Number of significant characters in the regular expression. -- This total does not include special operators, such as *, (, ... + procedure Check_Well_Formed_Pattern; + -- Check that the pattern to compile is well-formed, so that + -- subsequent code can rely on this without performing each time + -- the checks to avoid accessing the pattern outside its bounds. + -- Except that, not all well-formedness rules are checked. + -- In particular, the rules about special characters not being + -- treated as regular characters are not checked. + procedure Create_Mapping; -- Creates a mapping between characters in the regexp and columns -- in the tables representing the regexp. Test that the regexp is @@ -180,6 +188,270 @@ package body System.Regexp is pragma No_Return (Raise_Exception); -- Raise an exception, indicating an error at character Index in S + ------------------------------- + -- Check_Well_Formed_Pattern -- + ------------------------------- + + procedure Check_Well_Formed_Pattern is + + J : Integer := S'First; + Past_Elmt : Boolean := False; + -- Set to True everywhere an elmt has been parsed, if Glob=False, + -- meaning there can be now an occurence of '*', '+' and '?'. + Past_Term : Boolean := False; + -- Set to True everywhere a term has been parsed, if Glob=False, + -- meaning there can be now an occurence of '|'. + Parenthesis_Level : Integer := 0; + Curly_Level : Integer := 0; + Last_Open : Integer := S'First - 1; + -- The last occurence of an opening parenthesis, if Glob=False, + -- or the last occurence of an opening curly brace, if Glob=True. + + procedure Raise_Exception_If_No_More_Chars (K : Integer := 0); + + -------------------------------------- + -- Raise_Exception_If_No_More_Chars -- + -------------------------------------- + + procedure Raise_Exception_If_No_More_Chars (K : Integer := 0) is + begin + if J + K > S'Last then + Raise_Exception + ("Ill-formed pattern while parsing", J); + end if; + end Raise_Exception_If_No_More_Chars; + + -- Start of processing for Check_Well_Formed_Pattern + + begin + while J <= S'Last loop + case S (J) is + when Open_Bracket => + J := J + 1; + Raise_Exception_If_No_More_Chars; + + if not Glob then + if S (J) = '^' then + J := J + 1; + Raise_Exception_If_No_More_Chars; + end if; + end if; + + -- The first character never has a special meaning + + if S (J) = ']' or else S (J) = '-' then + J := J + 1; + Raise_Exception_If_No_More_Chars; + end if; + + -- The set of characters cannot be empty + + if S (J) = ']' then + Raise_Exception + ("Set of characters cannot be empty in regular " + & "expression", J); + end if; + + declare + Possible_Range_Start : Boolean := True; + -- Set to True everywhere a range character '-' + -- can occur. + begin + loop + exit when S (J) = Close_Bracket; + + -- The current character should be followed by + -- a closing bracket. + + Raise_Exception_If_No_More_Chars (1); + + if S (J) = '-' + and then S (J + 1) /= Close_Bracket + then + if not Possible_Range_Start then + Raise_Exception + ("No mix of ranges is allowed in " + & "regular expression", J); + end if; + + J := J + 1; + Raise_Exception_If_No_More_Chars; + + -- Range cannot be followed by '-' character, + -- except as last character in the set. + + Possible_Range_Start := False; + else + Possible_Range_Start := True; + end if; + + if S (J) = '\' then + J := J + 1; + Raise_Exception_If_No_More_Chars; + end if; + + J := J + 1; + end loop; + end; + + -- A closing bracket can end an elmt or term + + Past_Elmt := True; + Past_Term := True; + + when Close_Bracket => + -- A close bracket must follow a open_bracket, + -- and cannot be found alone on the line. + + Raise_Exception + ("Incorrect character ']' in regular expression", J); + + when '\' => + if J < S'Last then + J := J + 1; + + -- Any character can be an elmt or a term + + Past_Elmt := True; + Past_Term := True; + else + -- \ not allowed at the end of the regexp + + Raise_Exception + ("Incorrect character '\' in regular expression", J); + end if; + + when Open_Paren => + if not Glob then + Parenthesis_Level := Parenthesis_Level + 1; + Last_Open := J; + + -- An open parenthesis does not end an elmt or term + + Past_Elmt := False; + Past_Term := False; + end if; + + when Close_Paren => + if not Glob then + Parenthesis_Level := Parenthesis_Level - 1; + + if Parenthesis_Level < 0 then + Raise_Exception + ("')' is not associated with '(' in regular " + & "expression", J); + end if; + + if J = Last_Open + 1 then + Raise_Exception + ("Empty parentheses not allowed in regular " + & "expression", J); + end if; + + if not Past_Term then + Raise_Exception + ("Closing parenthesis not allowed here in regular " + & "expression", J); + end if; + + -- A closing parenthesis can end an elmt or term + + Past_Elmt := True; + Past_Term := True; + end if; + + when '{' => + if Glob then + Curly_Level := Curly_Level + 1; + Last_Open := J; + else + -- Any character can be an elmt or a term + + Past_Elmt := True; + Past_Term := True; + end if; + + -- No need to check for ',' as the code always accepts them + + when '}' => + if Glob then + Curly_Level := Curly_Level - 1; + + if Curly_Level < 0 then + Raise_Exception + ("'}' is not associated with '{' in regular " + & "expression", J); + end if; + + if J = Last_Open + 1 then + Raise_Exception + ("Empty curly braces not allowed in regular " + & "expression", J); + end if; + else + -- Any character can be an elmt or a term + + Past_Elmt := True; + Past_Term := True; + end if; + + when '*' | '?' | '+' => + if not Glob then + -- These operators must apply to an elmt sub-expression, + -- and cannot be found if one has not just been parsed. + + if not Past_Elmt then + Raise_Exception + ("'*', '+' and '?' operators must be " + & "applied to an element in regular expression", J); + end if; + + Past_Elmt := False; + Past_Term := True; + end if; + + when '|' => + if not Glob then + -- This operator must apply to a term sub-expression, + -- and cannot be found if one has not just been parsed. + + if not Past_Term then + Raise_Exception + ("'|' operator must be " + & "applied to a term in regular expression", J); + end if; + + Past_Elmt := False; + Past_Term := False; + end if; + + when others => + if not Glob then + -- Any character can be an elmt or a term + + Past_Elmt := True; + Past_Term := True; + end if; + end case; + + J := J + 1; + end loop; + + -- A closing parenthesis must follow an open parenthesis + + if Parenthesis_Level /= 0 then + Raise_Exception + ("'(' must always be associated with a ')'", J); + end if; + + -- A closing curly brace must follow an open curly brace + + if Curly_Level /= 0 then + Raise_Exception + ("'{' must always be associated with a '}'", J); + end if; + end Check_Well_Formed_Pattern; + -------------------- -- Create_Mapping -- -------------------- @@ -1224,7 +1496,7 @@ package body System.Regexp is procedure Raise_Exception (M : String; Index : Integer) is begin - raise Error_In_Regexp with M & " at offset " & Index'Img; + raise Error_In_Regexp with M & " at offset" & Index'Img; end Raise_Exception; -- Start of processing for Compile @@ -1247,12 +1519,16 @@ package body System.Regexp is System.Case_Util.To_Lower (S); end if; + -- Check the pattern is well-formed before any treatment + + Check_Well_Formed_Pattern; + Create_Mapping; -- Creates the primary table declare - Table : Regexp_Array_Access; + Table : Regexp_Array_Access; Num_States : State_Index; Start_State : State_Index; End_State : State_Index; |