diff options
author | charlet <charlet@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-08-31 10:23:37 +0000 |
---|---|---|
committer | charlet <charlet@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-08-31 10:23:37 +0000 |
commit | df20d1a8586235a388bc677a7199fe689bead575 (patch) | |
tree | 198081394ea01406fbc6cf4d9510dc38574a8add /gcc | |
parent | 78976972ae3ef0881f018102a8104db0e3e2ac67 (diff) | |
download | gcc-df20d1a8586235a388bc677a7199fe689bead575.tar.gz |
2007-08-31 Bob Duff <duff@adacore.com>
* par-ch4.adb (P_Simple_Expression): Fold long sequences of
concatenations of string literals into a single literal, in order to
avoid very deep recursion in the front end, which was causing stack
overflow.
* sem_eval.adb (Eval_Concatenation): If the left operand is the empty
string, and the right operand is a string literal (the case of "" &
"..."), optimize by avoiding copying the right operand -- just use the
value of the right operand directly.
* stringt.adb (Store_String_Chars): Optimize by growing the
String_Chars table all at once, rather than appending characters one by
one.
(Write_String_Table_Entry): If the string to be printed is very long,
just print the first few characters, followed by the length. Otherwise,
doing "pn(n)" in the debugger can take an extremely long time.
* sem_prag.adb (Process_Interface_Name): Replace loop doing
Store_String_Char with Store_String_Chars.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@127977 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ada/par-ch4.adb | 128 | ||||
-rw-r--r-- | gcc/ada/sem_eval.adb | 39 | ||||
-rw-r--r-- | gcc/ada/sem_prag.adb | 9 | ||||
-rw-r--r-- | gcc/ada/stringt.adb | 32 |
4 files changed, 174 insertions, 34 deletions
diff --git a/gcc/ada/par-ch4.adb b/gcc/ada/par-ch4.adb index 2d1adcdbb9d..8956e8654f8 100644 --- a/gcc/ada/par-ch4.adb +++ b/gcc/ada/par-ch4.adb @@ -28,6 +28,8 @@ pragma Style_Checks (All_Checks); -- Turn off subprogram body ordering check. Subprograms are in order -- by RM section rather than alphabetical +with Stringt; use Stringt; + separate (Par) package body Ch4 is @@ -1870,18 +1872,122 @@ package body Ch4 is Node1 := P_Term; end if; - -- Scan out sequence of terms separated by binary adding operators + -- In the following, we special-case a sequence of concatentations of + -- string literals, such as "aaa" & "bbb" & ... & "ccc", with nothing + -- else mixed in. For such a sequence, we return a tree representing + -- "" & "aaabbb...ccc" (a single concatenation). This is done only if + -- the number of concatenations is large. If semantic analysis + -- resolves the "&" to a predefined one, then this folding gives the + -- right answer. Otherwise, semantic analysis will complain about a + -- capacity-exceeded error. The purpose of this trick is to avoid + -- creating a deeply nested tree, which would cause deep recursion + -- during semantics, causing stack overflow. This way, we can handle + -- enormous concatenations in the normal case of predefined "&". We + -- first build up the normal tree, and then rewrite it if + -- appropriate. - loop - exit when Token not in Token_Class_Binary_Addop; - Tokptr := Token_Ptr; - Node2 := New_Node (P_Binary_Adding_Operator, Tokptr); - Scan; -- past operator - Set_Left_Opnd (Node2, Node1); - Set_Right_Opnd (Node2, P_Term); - Set_Op_Name (Node2); - Node1 := Node2; - end loop; + declare + Num_Concats_Threshold : constant Positive := 1000; + -- Arbitrary threshold value to enable optimization + + First_Node : constant Node_Id := Node1; + Is_Strlit_Concat : Boolean; + -- True iff we've parsed a sequence of concatenations of string + -- literals, with nothing else mixed in. + + Num_Concats : Natural; + -- Number of "&" operators if Is_Strlit_Concat is True + + begin + Is_Strlit_Concat := + Nkind (Node1) = N_String_Literal + and then Token = Tok_Ampersand; + Num_Concats := 0; + + -- Scan out sequence of terms separated by binary adding operators + + loop + exit when Token not in Token_Class_Binary_Addop; + Tokptr := Token_Ptr; + Node2 := New_Node (P_Binary_Adding_Operator, Tokptr); + Scan; -- past operator + Set_Left_Opnd (Node2, Node1); + Node1 := P_Term; + Set_Right_Opnd (Node2, Node1); + Set_Op_Name (Node2); + + -- Check if we're still concatenating string literals + + Is_Strlit_Concat := + Is_Strlit_Concat + and then Nkind (Node2) = N_Op_Concat + and then Nkind (Node1) = N_String_Literal; + + if Is_Strlit_Concat then + Num_Concats := Num_Concats + 1; + end if; + + Node1 := Node2; + end loop; + + -- If we have an enormous series of concatenations of string + -- literals, rewrite as explained above. The Is_Folded_In_Parser + -- flag tells semantic analysis that if the "&" is not predefined, + -- the folded value is wrong. + + if Is_Strlit_Concat + and then Num_Concats >= Num_Concats_Threshold + then + declare + Empty_String_Val : String_Id; + -- String_Id for "" + + Strlit_Concat_Val : String_Id; + -- Contains the folded value (which will be correct if the + -- "&" operators are the predefined ones). + + Cur_Node : Node_Id; + -- For walking up the tree + + New_Node : Node_Id; + -- Folded node to replace Node1 + + Loc : constant Source_Ptr := Sloc (First_Node); + + begin + -- Walk up the tree starting at the leftmost string literal + -- (First_Node), building up the Strlit_Concat_Val as we + -- go. Note that we do not use recursion here -- the whole + -- point is to avoid recursively walking that enormous tree. + + Start_String; + Store_String_Chars (Strval (First_Node)); + + Cur_Node := Parent (First_Node); + while Present (Cur_Node) loop + pragma Assert (Nkind (Cur_Node) = N_Op_Concat and then + Nkind (Right_Opnd (Cur_Node)) = N_String_Literal); + + Store_String_Chars (Strval (Right_Opnd (Cur_Node))); + Cur_Node := Parent (Cur_Node); + end loop; + + Strlit_Concat_Val := End_String; + + -- Create new folded node, and rewrite result with a concat- + -- enation of an empty string literal and the folded node. + + Start_String; + Empty_String_Val := End_String; + New_Node := + Make_Op_Concat (Loc, + Make_String_Literal (Loc, Empty_String_Val), + Make_String_Literal (Loc, Strlit_Concat_Val, + Is_Folded_In_Parser => True)); + Rewrite (Node1, New_Node); + end; + end if; + end; -- All done, we clearly do not have name or numeric literal so this -- is a case of a simple expression which is some other possibility. diff --git a/gcc/ada/sem_eval.adb b/gcc/ada/sem_eval.adb index dba6ae83946..465a86a3d58 100644 --- a/gcc/ada/sem_eval.adb +++ b/gcc/ada/sem_eval.adb @@ -1451,9 +1451,10 @@ package body Sem_Eval is -- concatenations with such aggregates. declare - Left_Str : constant Node_Id := Get_String_Val (Left); - Left_Len : Nat; - Right_Str : constant Node_Id := Get_String_Val (Right); + Left_Str : constant Node_Id := Get_String_Val (Left); + Left_Len : Nat; + Right_Str : constant Node_Id := Get_String_Val (Right); + Folded_Val : String_Id; begin -- Establish new string literal, and store left operand. We make @@ -1465,26 +1466,36 @@ package body Sem_Eval is if Nkind (Left_Str) = N_String_Literal then Left_Len := String_Length (Strval (Left_Str)); - Start_String (Strval (Left_Str)); + + -- If the left operand is the empty string, and the right operand + -- is a string literal (the case of "" & "..."), the result is the + -- value of the right operand. This optimization is important when + -- Is_Folded_In_Parser, to avoid copying an enormous right + -- operand. + + if Left_Len = 0 and then Nkind (Right_Str) = N_String_Literal then + Folded_Val := Strval (Right_Str); + else + Start_String (Strval (Left_Str)); + end if; + else Start_String; Store_String_Char (UI_To_CC (Char_Literal_Value (Left_Str))); Left_Len := 1; end if; - -- Now append the characters of the right operand + -- Now append the characters of the right operand, unless we + -- optimized the "" & "..." case above. if Nkind (Right_Str) = N_String_Literal then - declare - S : constant String_Id := Strval (Right_Str); - - begin - for J in 1 .. String_Length (S) loop - Store_String_Char (Get_String_Char (S, J)); - end loop; - end; + if Left_Len /= 0 then + Store_String_Chars (Strval (Right_Str)); + Folded_Val := End_String; + end if; else Store_String_Char (UI_To_CC (Char_Literal_Value (Right_Str))); + Folded_Val := End_String; end if; Set_Is_Static_Expression (N, Stat); @@ -1501,7 +1512,7 @@ package body Sem_Eval is Set_Etype (N, Etype (Right)); end if; - Fold_Str (N, End_String, Static => True); + Fold_Str (N, Folded_Val, Static => True); end if; end; end Eval_Concatenation; diff --git a/gcc/ada/sem_prag.adb b/gcc/ada/sem_prag.adb index e58cfc34808..1e54ac629f7 100644 --- a/gcc/ada/sem_prag.adb +++ b/gcc/ada/sem_prag.adb @@ -3736,13 +3736,10 @@ package body Sem_Prag is end if; String_Val := Strval (Expr_Value_S (Link_Nam)); - - for J in 1 .. String_Length (String_Val) loop - Store_String_Char (Get_String_Char (String_Val, J)); - end loop; - + Store_String_Chars (String_Val); Link_Nam := - Make_String_Literal (Sloc (Link_Nam), End_String); + Make_String_Literal (Sloc (Link_Nam), + Strval => End_String); end if; Set_Encoded_Interface_Name diff --git a/gcc/ada/stringt.adb b/gcc/ada/stringt.adb index e2720090264..88b72e056dd 100644 --- a/gcc/ada/stringt.adb +++ b/gcc/ada/stringt.adb @@ -202,10 +202,27 @@ package body Stringt is end Store_String_Chars; procedure Store_String_Chars (S : String_Id) is + + -- We are essentially doing this: + + -- for J in 1 .. String_Length (S) loop + -- Store_String_Char (Get_String_Char (S, J)); + -- end loop; + + -- but when the string is long it's more efficient to grow the + -- String_Chars table all at once. + + S_First : constant Int := Strings.Table (S).String_Index; + S_Len : constant Int := String_Length (S); + Old_Last : constant Int := String_Chars.Last; + New_Last : constant Int := Old_Last + S_Len; + begin - for J in 1 .. String_Length (S) loop - Store_String_Char (Get_String_Char (S, J)); - end loop; + String_Chars.Set_Last (New_Last); + String_Chars.Table (Old_Last + 1 .. New_Last) := + String_Chars.Table (S_First .. S_First + S_Len - 1); + Strings.Table (Strings.Last).Length := + Strings.Table (Strings.Last).Length + S_Len; end Store_String_Chars; ---------------------- @@ -417,6 +434,15 @@ package body Stringt is else Write_Char_Code (C); end if; + + -- If string is very long, quit + + if J >= 1000 then -- arbitrary limit + Write_Str ("""...etc (length = "); + Write_Int (String_Length (Id)); + Write_Str (")"); + return; + end if; end loop; Write_Char ('"'); |