summaryrefslogtreecommitdiff
path: root/gcc/ada/s-regexp.adb
diff options
context:
space:
mode:
authorcharlet <charlet@138bc75d-0d04-0410-961f-82ee72b054a4>2009-07-23 12:27:37 +0000
committercharlet <charlet@138bc75d-0d04-0410-961f-82ee72b054a4>2009-07-23 12:27:37 +0000
commit56e7611e96e9f52c328c393613503a227998e22f (patch)
treed72eed2b844820d6ec143ac42455485d1db1d8a8 /gcc/ada/s-regexp.adb
parent871bfb01e5cdc94c6de73e9a291d85f2ea969aed (diff)
downloadgcc-56e7611e96e9f52c328c393613503a227998e22f.tar.gz
2009-07-23 Yannick Moy <moy@adacore.com>
* s-regexp.adb (Check_Well_Formed_Pattern): Called before compiling the pattern. (Raise_Exception_If_No_More_Chars): Remove extra blank in exception string. (Raise_Exception): Ditto. 2009-07-23 Olivier Hainque <hainque@adacore.com> * g-sse.ads: Simplify comment. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@150000 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/ada/s-regexp.adb')
-rwxr-xr-xgcc/ada/s-regexp.adb280
1 files changed, 278 insertions, 2 deletions
diff --git a/gcc/ada/s-regexp.adb b/gcc/ada/s-regexp.adb
index 48ebd4428fc..02d0a99f492 100755
--- a/gcc/ada/s-regexp.adb
+++ b/gcc/ada/s-regexp.adb
@@ -129,6 +129,14 @@ package body System.Regexp is
-- Number of significant characters in the regular expression.
-- This total does not include special operators, such as *, (, ...
+ procedure Check_Well_Formed_Pattern;
+ -- Check that the pattern to compile is well-formed, so that
+ -- subsequent code can rely on this without performing each time
+ -- the checks to avoid accessing the pattern outside its bounds.
+ -- Except that, not all well-formedness rules are checked.
+ -- In particular, the rules about special characters not being
+ -- treated as regular characters are not checked.
+
procedure Create_Mapping;
-- Creates a mapping between characters in the regexp and columns
-- in the tables representing the regexp. Test that the regexp is
@@ -180,6 +188,270 @@ package body System.Regexp is
pragma No_Return (Raise_Exception);
-- Raise an exception, indicating an error at character Index in S
+ -------------------------------
+ -- Check_Well_Formed_Pattern --
+ -------------------------------
+
+ procedure Check_Well_Formed_Pattern is
+
+ J : Integer := S'First;
+ Past_Elmt : Boolean := False;
+ -- Set to True everywhere an elmt has been parsed, if Glob=False,
+ -- meaning there can be now an occurence of '*', '+' and '?'.
+ Past_Term : Boolean := False;
+ -- Set to True everywhere a term has been parsed, if Glob=False,
+ -- meaning there can be now an occurence of '|'.
+ Parenthesis_Level : Integer := 0;
+ Curly_Level : Integer := 0;
+ Last_Open : Integer := S'First - 1;
+ -- The last occurence of an opening parenthesis, if Glob=False,
+ -- or the last occurence of an opening curly brace, if Glob=True.
+
+ procedure Raise_Exception_If_No_More_Chars (K : Integer := 0);
+
+ --------------------------------------
+ -- Raise_Exception_If_No_More_Chars --
+ --------------------------------------
+
+ procedure Raise_Exception_If_No_More_Chars (K : Integer := 0) is
+ begin
+ if J + K > S'Last then
+ Raise_Exception
+ ("Ill-formed pattern while parsing", J);
+ end if;
+ end Raise_Exception_If_No_More_Chars;
+
+ -- Start of processing for Check_Well_Formed_Pattern
+
+ begin
+ while J <= S'Last loop
+ case S (J) is
+ when Open_Bracket =>
+ J := J + 1;
+ Raise_Exception_If_No_More_Chars;
+
+ if not Glob then
+ if S (J) = '^' then
+ J := J + 1;
+ Raise_Exception_If_No_More_Chars;
+ end if;
+ end if;
+
+ -- The first character never has a special meaning
+
+ if S (J) = ']' or else S (J) = '-' then
+ J := J + 1;
+ Raise_Exception_If_No_More_Chars;
+ end if;
+
+ -- The set of characters cannot be empty
+
+ if S (J) = ']' then
+ Raise_Exception
+ ("Set of characters cannot be empty in regular "
+ & "expression", J);
+ end if;
+
+ declare
+ Possible_Range_Start : Boolean := True;
+ -- Set to True everywhere a range character '-'
+ -- can occur.
+ begin
+ loop
+ exit when S (J) = Close_Bracket;
+
+ -- The current character should be followed by
+ -- a closing bracket.
+
+ Raise_Exception_If_No_More_Chars (1);
+
+ if S (J) = '-'
+ and then S (J + 1) /= Close_Bracket
+ then
+ if not Possible_Range_Start then
+ Raise_Exception
+ ("No mix of ranges is allowed in "
+ & "regular expression", J);
+ end if;
+
+ J := J + 1;
+ Raise_Exception_If_No_More_Chars;
+
+ -- Range cannot be followed by '-' character,
+ -- except as last character in the set.
+
+ Possible_Range_Start := False;
+ else
+ Possible_Range_Start := True;
+ end if;
+
+ if S (J) = '\' then
+ J := J + 1;
+ Raise_Exception_If_No_More_Chars;
+ end if;
+
+ J := J + 1;
+ end loop;
+ end;
+
+ -- A closing bracket can end an elmt or term
+
+ Past_Elmt := True;
+ Past_Term := True;
+
+ when Close_Bracket =>
+ -- A close bracket must follow a open_bracket,
+ -- and cannot be found alone on the line.
+
+ Raise_Exception
+ ("Incorrect character ']' in regular expression", J);
+
+ when '\' =>
+ if J < S'Last then
+ J := J + 1;
+
+ -- Any character can be an elmt or a term
+
+ Past_Elmt := True;
+ Past_Term := True;
+ else
+ -- \ not allowed at the end of the regexp
+
+ Raise_Exception
+ ("Incorrect character '\' in regular expression", J);
+ end if;
+
+ when Open_Paren =>
+ if not Glob then
+ Parenthesis_Level := Parenthesis_Level + 1;
+ Last_Open := J;
+
+ -- An open parenthesis does not end an elmt or term
+
+ Past_Elmt := False;
+ Past_Term := False;
+ end if;
+
+ when Close_Paren =>
+ if not Glob then
+ Parenthesis_Level := Parenthesis_Level - 1;
+
+ if Parenthesis_Level < 0 then
+ Raise_Exception
+ ("')' is not associated with '(' in regular "
+ & "expression", J);
+ end if;
+
+ if J = Last_Open + 1 then
+ Raise_Exception
+ ("Empty parentheses not allowed in regular "
+ & "expression", J);
+ end if;
+
+ if not Past_Term then
+ Raise_Exception
+ ("Closing parenthesis not allowed here in regular "
+ & "expression", J);
+ end if;
+
+ -- A closing parenthesis can end an elmt or term
+
+ Past_Elmt := True;
+ Past_Term := True;
+ end if;
+
+ when '{' =>
+ if Glob then
+ Curly_Level := Curly_Level + 1;
+ Last_Open := J;
+ else
+ -- Any character can be an elmt or a term
+
+ Past_Elmt := True;
+ Past_Term := True;
+ end if;
+
+ -- No need to check for ',' as the code always accepts them
+
+ when '}' =>
+ if Glob then
+ Curly_Level := Curly_Level - 1;
+
+ if Curly_Level < 0 then
+ Raise_Exception
+ ("'}' is not associated with '{' in regular "
+ & "expression", J);
+ end if;
+
+ if J = Last_Open + 1 then
+ Raise_Exception
+ ("Empty curly braces not allowed in regular "
+ & "expression", J);
+ end if;
+ else
+ -- Any character can be an elmt or a term
+
+ Past_Elmt := True;
+ Past_Term := True;
+ end if;
+
+ when '*' | '?' | '+' =>
+ if not Glob then
+ -- These operators must apply to an elmt sub-expression,
+ -- and cannot be found if one has not just been parsed.
+
+ if not Past_Elmt then
+ Raise_Exception
+ ("'*', '+' and '?' operators must be "
+ & "applied to an element in regular expression", J);
+ end if;
+
+ Past_Elmt := False;
+ Past_Term := True;
+ end if;
+
+ when '|' =>
+ if not Glob then
+ -- This operator must apply to a term sub-expression,
+ -- and cannot be found if one has not just been parsed.
+
+ if not Past_Term then
+ Raise_Exception
+ ("'|' operator must be "
+ & "applied to a term in regular expression", J);
+ end if;
+
+ Past_Elmt := False;
+ Past_Term := False;
+ end if;
+
+ when others =>
+ if not Glob then
+ -- Any character can be an elmt or a term
+
+ Past_Elmt := True;
+ Past_Term := True;
+ end if;
+ end case;
+
+ J := J + 1;
+ end loop;
+
+ -- A closing parenthesis must follow an open parenthesis
+
+ if Parenthesis_Level /= 0 then
+ Raise_Exception
+ ("'(' must always be associated with a ')'", J);
+ end if;
+
+ -- A closing curly brace must follow an open curly brace
+
+ if Curly_Level /= 0 then
+ Raise_Exception
+ ("'{' must always be associated with a '}'", J);
+ end if;
+ end Check_Well_Formed_Pattern;
+
--------------------
-- Create_Mapping --
--------------------
@@ -1224,7 +1496,7 @@ package body System.Regexp is
procedure Raise_Exception (M : String; Index : Integer) is
begin
- raise Error_In_Regexp with M & " at offset " & Index'Img;
+ raise Error_In_Regexp with M & " at offset" & Index'Img;
end Raise_Exception;
-- Start of processing for Compile
@@ -1247,12 +1519,16 @@ package body System.Regexp is
System.Case_Util.To_Lower (S);
end if;
+ -- Check the pattern is well-formed before any treatment
+
+ Check_Well_Formed_Pattern;
+
Create_Mapping;
-- Creates the primary table
declare
- Table : Regexp_Array_Access;
+ Table : Regexp_Array_Access;
Num_States : State_Index;
Start_State : State_Index;
End_State : State_Index;