summaryrefslogtreecommitdiff
path: root/gcc/ada/scn-slit.adb
blob: e1ded8758939ff2cd9ffd4af31015a96fad19148 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
------------------------------------------------------------------------------
--                                                                          --
--                         GNAT COMPILER COMPONENTS                         --
--                                                                          --
--                             S C N . S L I T                              --
--                                                                          --
--                                 B o d y                                  --
--                                                                          --
--          Copyright (C) 1992-2001 Free Software Foundation, Inc.          --
--                                                                          --
-- GNAT is free software;  you can  redistribute it  and/or modify it under --
-- terms of the  GNU General Public License as published  by the Free Soft- --
-- ware  Foundation;  either version 2,  or (at your option) any later ver- --
-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
-- for  more details.  You should have  received  a copy of the GNU General --
-- Public License  distributed with GNAT;  see file COPYING.  If not, write --
-- to  the Free Software Foundation,  59 Temple Place - Suite 330,  Boston, --
-- MA 02111-1307, USA.                                                      --
--                                                                          --
-- GNAT was originally developed  by the GNAT team at  New York University. --
-- Extensive contributions were provided by Ada Core Technologies Inc.      --
--                                                                          --
------------------------------------------------------------------------------

with Stringt; use Stringt;

separate (Scn)
procedure Slit is

   Delimiter : Character;
   --  Delimiter (first character of string)

   C : Character;
   --  Current source program character

   Code : Char_Code;
   --  Current character code value

   Err : Boolean;
   --  Error flag for Scan_Wide call

   String_Literal_Id : String_Id;
   --  Id for currently scanned string value

   Wide_Character_Found : Boolean := False;
   --  Set True if wide character found

   procedure Error_Bad_String_Char;
   --  Signal bad character in string/character literal. On entry Scan_Ptr
   --  points to the improper character encountered during the scan. Scan_Ptr
   --  is not modified, so it still points to the bad character on return.

   procedure Error_Unterminated_String;
   --  Procedure called if a line terminator character is encountered during
   --  scanning a string, meaning that the string is not properly terminated.

   procedure Set_String;
   --  Procedure used to distinguish between string and operator symbol.
   --  On entry the string has been scanned out, and its characters start
   --  at Token_Ptr and end one character before Scan_Ptr. On exit Token
   --  is set to Tok_String_Literal or Tok_Operator_Symbol as appropriate,
   --  and Token_Node is appropriately initialized. In addition, in the
   --  operator symbol case, Token_Name is appropriately set.

   ---------------------------
   -- Error_Bad_String_Char --
   ---------------------------

   procedure Error_Bad_String_Char is
      C : constant Character := Source (Scan_Ptr);

   begin
      if C = HT then
         Error_Msg_S ("horizontal tab not allowed in string");

      elsif C = VT or else C = FF then
         Error_Msg_S ("format effector not allowed in string");

      elsif C in Upper_Half_Character then
         Error_Msg_S ("(Ada 83) upper half character not allowed");

      else
         Error_Msg_S ("control character not allowed in string");
      end if;
   end Error_Bad_String_Char;

   -------------------------------
   -- Error_Unterminated_String --
   -------------------------------

   procedure Error_Unterminated_String is
   begin
      --  An interesting little refinement. Consider the following examples:

      --     A := "this is an unterminated string;
      --     A := "this is an unterminated string &
      --     P(A, "this is a parameter that didn't get terminated);

      --  We fiddle a little to do slightly better placement in these cases
      --  also if there is white space at the end of the line we place the
      --  flag at the start of this white space, not at the end. Note that
      --  we only have to test for blanks, since tabs aren't allowed in
      --  strings in the first place and would have caused an error message.

      --  Two more cases that we treat specially are:

      --     A := "this string uses the wrong terminator'
      --     A := "this string uses the wrong terminator' &

      --  In these cases we give a different error message as well

      --  We actually reposition the scan pointer to the point where we
      --  place the flag in these cases, since it seems a better bet on
      --  the original intention.

      while Source (Scan_Ptr - 1) = ' '
        or else Source (Scan_Ptr - 1) = '&'
      loop
         Scan_Ptr := Scan_Ptr - 1;
         Unstore_String_Char;
      end loop;

      --  Check for case of incorrect string terminator, but single quote is
      --  not considered incorrect if the opening terminator misused a single
      --  quote (error message already given).

      if Delimiter /= '''
        and then Source (Scan_Ptr - 1) = '''
      then
         Unstore_String_Char;
         Error_Msg ("incorrect string terminator character", Scan_Ptr - 1);
         return;
      end if;

      if Source (Scan_Ptr - 1) = ';' then
         Scan_Ptr := Scan_Ptr - 1;
         Unstore_String_Char;

         if Source (Scan_Ptr - 1) = ')' then
            Scan_Ptr := Scan_Ptr - 1;
            Unstore_String_Char;
         end if;
      end if;

      Error_Msg_S ("missing string quote");
   end Error_Unterminated_String;

   ----------------
   -- Set_String --
   ----------------

   procedure Set_String is
      Slen : Int := Int (Scan_Ptr - Token_Ptr - 2);
      C1   : Character;
      C2   : Character;
      C3   : Character;

   begin
      --  Token_Name is currently set to Error_Name. The following section of
      --  code resets Token_Name to the proper Name_Op_xx value if the string
      --  is a valid operator symbol, otherwise it is left set to Error_Name.

      if Slen = 1 then
         C1 := Source (Token_Ptr + 1);

         case C1 is
            when '=' =>
               Token_Name := Name_Op_Eq;

            when '>' =>
               Token_Name := Name_Op_Gt;

            when '<' =>
               Token_Name := Name_Op_Lt;

            when '+' =>
               Token_Name := Name_Op_Add;

            when '-' =>
               Token_Name := Name_Op_Subtract;

            when '&' =>
               Token_Name := Name_Op_Concat;

            when '*' =>
               Token_Name := Name_Op_Multiply;

            when '/' =>
               Token_Name := Name_Op_Divide;

            when others =>
               null;
         end case;

      elsif Slen = 2 then
         C1 := Source (Token_Ptr + 1);
         C2 := Source (Token_Ptr + 2);

         if C1 = '*' and then C2 = '*' then
            Token_Name := Name_Op_Expon;

         elsif C2 = '=' then

            if C1 = '/' then
               Token_Name := Name_Op_Ne;
            elsif C1 = '<' then
               Token_Name := Name_Op_Le;
            elsif C1 = '>' then
               Token_Name := Name_Op_Ge;
            end if;

         elsif (C1 = 'O' or else C1 = 'o') and then    -- OR
               (C2 = 'R' or else C2 = 'r')
         then
            Token_Name := Name_Op_Or;
         end if;

      elsif Slen = 3 then
         C1 := Source (Token_Ptr + 1);
         C2 := Source (Token_Ptr + 2);
         C3 := Source (Token_Ptr + 3);

         if (C1 = 'A' or else C1 = 'a') and then       -- AND
            (C2 = 'N' or else C2 = 'n') and then
            (C3 = 'D' or else C3 = 'd')
         then
            Token_Name := Name_Op_And;

         elsif (C1 = 'A' or else C1 = 'a') and then    -- ABS
               (C2 = 'B' or else C2 = 'b') and then
               (C3 = 'S' or else C3 = 's')
         then
            Token_Name := Name_Op_Abs;

         elsif (C1 = 'M' or else C1 = 'm') and then    -- MOD
               (C2 = 'O' or else C2 = 'o') and then
               (C3 = 'D' or else C3 = 'd')
         then
            Token_Name := Name_Op_Mod;

         elsif (C1 = 'N' or else C1 = 'n') and then    -- NOT
               (C2 = 'O' or else C2 = 'o') and then
               (C3 = 'T' or else C3 = 't')
         then
            Token_Name := Name_Op_Not;

         elsif (C1 = 'R' or else C1 = 'r') and then    -- REM
               (C2 = 'E' or else C2 = 'e') and then
               (C3 = 'M' or else C3 = 'm')
         then
            Token_Name := Name_Op_Rem;

         elsif (C1 = 'X' or else C1 = 'x') and then    -- XOR
               (C2 = 'O' or else C2 = 'o') and then
               (C3 = 'R' or else C3 = 'r')
         then
            Token_Name := Name_Op_Xor;
         end if;

      end if;

      --  If it is an operator symbol, then Token_Name is set. If it is some
      --  other string value, then Token_Name still contains Error_Name.

      if Token_Name = Error_Name then
         Token := Tok_String_Literal;
         Token_Node := New_Node (N_String_Literal, Token_Ptr);
         Set_Has_Wide_Character (Token_Node, Wide_Character_Found);

      else
         Token := Tok_Operator_Symbol;
         Token_Node := New_Node (N_Operator_Symbol, Token_Ptr);
         Set_Chars (Token_Node, Token_Name);
      end if;

      Set_Strval (Token_Node, String_Literal_Id);

   end Set_String;

----------
-- Slit --
----------

begin
   --  On entry, Scan_Ptr points to the opening character of the string which
   --  is either a percent, double quote, or apostrophe (single quote). The
   --  latter case is an error detected by the character literal circuit.

   Delimiter := Source (Scan_Ptr);
   Accumulate_Checksum (Delimiter);
   Start_String;
   Scan_Ptr := Scan_Ptr + 1;

   --  Loop to scan out characters of string literal

   loop
      C := Source (Scan_Ptr);

      if C = Delimiter then
         Accumulate_Checksum (C);
         Scan_Ptr := Scan_Ptr + 1;
         exit when Source (Scan_Ptr) /= Delimiter;
         Code := Get_Char_Code (C);
         Accumulate_Checksum (C);
         Scan_Ptr := Scan_Ptr + 1;

      else
         if C = '"' and then Delimiter = '%' then
            Error_Msg_S ("quote not allowed in percent delimited string");
            Code := Get_Char_Code (C);
            Scan_Ptr := Scan_Ptr + 1;

         elsif (C = ESC
                 and then
                Wide_Character_Encoding_Method in WC_ESC_Encoding_Method)
           or else
               (C in Upper_Half_Character
                 and then
                Upper_Half_Encoding)
           or else
               (C = '['
                 and then
                Source (Scan_Ptr + 1) = '"'
                 and then
                Identifier_Char (Source (Scan_Ptr + 2)))
         then
            Scan_Wide (Source, Scan_Ptr, Code, Err);
            Accumulate_Checksum (Code);

            if Err then
               Error_Illegal_Wide_Character;
               Code := Get_Char_Code (' ');
            end if;

         else
            Accumulate_Checksum (C);

            if C not in Graphic_Character then
               if C in Line_Terminator then
                  Error_Unterminated_String;
                  exit;

               elsif C in Upper_Half_Character then
                  if Ada_83 then
                     Error_Bad_String_Char;
                  end if;

               else
                  Error_Bad_String_Char;
               end if;
            end if;

            Code := Get_Char_Code (C);
            Scan_Ptr := Scan_Ptr + 1;
         end if;
      end if;

      Store_String_Char (Code);

      if not In_Character_Range (Code) then
         Wide_Character_Found := True;
      end if;
   end loop;

   String_Literal_Id := End_String;
   Set_String;
   return;

end Slit;