summaryrefslogtreecommitdiff
path: root/m4/syntax.c
blob: 4bde123484090f3a5fd395bfb60c0f0141c2113d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
/* GNU m4 -- A simple macro processor
   Copyright (C) 1989-1994, 2002, 2004, 2006-2010, 2013-2014, 2017 Free
   Software Foundation, Inc.

   This file is part of GNU M4.

   GNU M4 is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   GNU M4 is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <config.h>

#include "m4private.h"

/* Define this to see runtime debug info.  Implied by DEBUG.  */
/*#define DEBUG_SYNTAX */

/* THE SYNTAX TABLE

   The input is read character by character and grouped together
   according to a syntax table.  The character groups are (definitions
   are all in m4module.h, those marked with a * are not yet in use):

   Basic (all characters fall in one of these mutually exclusive bins)
   M4_SYNTAX_IGNORE     *Character to be deleted from input as if not present
   M4_SYNTAX_OTHER      Any character with no special meaning to m4
   M4_SYNTAX_SPACE      Whitespace (ignored when leading macro arguments)
   M4_SYNTAX_OPEN       Open list of macro arguments
   M4_SYNTAX_CLOSE      Close list of macro arguments
   M4_SYNTAX_COMMA      Separates macro arguments
   M4_SYNTAX_ACTIVE     This character is a macro name by itself
   M4_SYNTAX_ESCAPE     Use this character to prefix all macro names

   M4_SYNTAX_ALPHA      Alphabetic characters (can start macro names)
   M4_SYNTAX_NUM        Numeric characters (can form macro names)

   M4_SYNTAX_LQUOTE     A single character left quote
   M4_SYNTAX_BCOMM      A single character begin comment delimiter

   Attribute (these are context sensitive, and exist in addition to basic)
   M4_SYNTAX_RQUOTE     A single character right quote
   M4_SYNTAX_ECOMM      A single character end comment delimiter
   M4_SYNTAX_DOLLAR     Indicates macro argument in user macros
   M4_SYNTAX_LBRACE     *Indicates start of extended macro argument
   M4_SYNTAX_RBRACE     *Indicates end of extended macro argument

   Besides adding new facilities, the use of a syntax table will reduce
   the number of calls to next_token ().  Now groups of OTHER, NUM and
   SPACE characters can be returned as a single token, since next_token
   () knows they have no special syntactical meaning to m4.  This is,
   however, only possible if only single character quotes comments
   comments are used, because otherwise the quote and comment characters
   will not show up in the syntax-table.

   Having a syntax table allows new facilities.  The new builtin
   "changesyntax" allows the user to change the category of any
   character.

   By default, '\n' is both ECOMM and SPACE, depending on the context.
   Hence we have basic categories (mutually exclusive, can introduce a
   context, and can be empty sets), and attribute categories
   (additive, only recognized in context, and will never be empty).

   The precedence as implemented by next_token () is:

   M4_SYNTAX_IGNORE     *Filtered out below next_token ()
   M4_SYNTAX_ESCAPE     Reads macro name iff set, else next character
   M4_SYNTAX_ALPHA      Reads M4_SYNTAX_ALPHA and M4_SYNTAX_NUM as macro name
   M4_SYNTAX_LQUOTE     Reads all until balanced M4_SYNTAX_RQUOTE
   M4_SYNTAX_BCOMM      Reads all until M4_SYNTAX_ECOMM

   M4_SYNTAX_OTHER  }   Reads all M4_SYNTAX_OTHER, M4_SYNTAX_NUM
   M4_SYNTAX_NUM    }

   M4_SYNTAX_SPACE      Reads all M4_SYNTAX_SPACE, depending on buffering
   M4_SYNTAX_ACTIVE     Returns a single char as a macro name

   M4_SYNTAX_OPEN   }   Returned as a single char
   M4_SYNTAX_CLOSE  }
   M4_SYNTAX_COMMA  }

   M4_SYNTAX_RQUOTE and M4_SYNTAX_ECOMM are context-sensitive, and
   close out M4_SYNTAX_LQUOTE and M4_SYNTAX_BCOMM, respectively.
   Also, M4_SYNTAX_DOLLAR, M4_SYNTAX_LBRACE, and M4_SYNTAX_RBRACE are
   context-sensitive, only mattering when expanding macro definitions.

   There are several optimizations that can be performed depending on
   known states of the syntax table.  For example, when searching for
   quotes, if there is only a single start quote and end quote
   delimiter, we can use memchr2 and search a word at a time, instead
   of performing a table lookup a byte at a time.  The is_single_*
   flags track whether quotes and comments have a single delimiter
   (always the case if changequote/changecom were used, and
   potentially the case after changesyntax).  Since we frequently need
   to access quotes, we store the oldest valid quote outside the
   lookup table; the suspect flag tracks whether a cleanup pass is
   needed to restore our invariants.  On the other hand, coalescing
   multiple M4_SYNTAX_OTHER bytes could form a delimiter, so many
   optimizations must be disabled if a multi-byte delimiter exists;
   this is handled by m4__safe_quotes.  Meanwhile, quotes and comments
   can be disabled if the leading delimiter is length 0.  */

static int add_syntax_attribute         (m4_syntax_table *, char, int);
static int remove_syntax_attribute      (m4_syntax_table *, char, int);
static void set_quote_age               (m4_syntax_table *, bool, bool);

m4_syntax_table *
m4_syntax_create (void)
{
  m4_syntax_table *syntax = (m4_syntax_table *) xzalloc (sizeof *syntax);
  int ch;

  /* Set up default table.  This table never changes during operation,
     and contains no context attributes.  */
  for (ch = UCHAR_MAX + 1; --ch >= 0; )
    switch (ch)
      {
      case '(':
        syntax->orig[ch] = M4_SYNTAX_OPEN;
        break;
      case ')':
        syntax->orig[ch] = M4_SYNTAX_CLOSE;
        break;
      case ',':
        syntax->orig[ch] = M4_SYNTAX_COMMA;
        break;
      case '`':
        syntax->orig[ch] = M4_SYNTAX_LQUOTE;
        break;
      case '#':
        syntax->orig[ch] = M4_SYNTAX_BCOMM;
        break;
      default:
        if (isspace (ch))
          syntax->orig[ch] = M4_SYNTAX_SPACE;
        else if (isalpha (ch) || ch == '_')
          syntax->orig[ch] = M4_SYNTAX_ALPHA;
        else if (isdigit (ch))
          syntax->orig[ch] = M4_SYNTAX_NUM;
        else
          syntax->orig[ch] = M4_SYNTAX_OTHER;
      }

  /* Set up current table to match default.  */
  m4_reset_syntax (syntax);
  syntax->cached_simple.str1 = syntax->cached_lquote;
  syntax->cached_simple.len1 = 1;
  syntax->cached_simple.str2 = syntax->cached_rquote;
  syntax->cached_simple.len2 = 1;
  return syntax;
}

void
m4_syntax_delete (m4_syntax_table *syntax)
{
  assert (syntax);

  free (syntax->quote.str1);
  free (syntax->quote.str2);
  free (syntax->comm.str1);
  free (syntax->comm.str2);
  free (syntax);
}

int
m4_syntax_code (char ch)
{
  int code;

  switch (ch)
    {
      /* Sorted according to the order of M4_SYNTAX_* in m4module.h.  */
      /* FIXME - revisit the ignore syntax attribute.  */
    case 'I': case 'i': code = M4_SYNTAX_IGNORE; break;
      /* Basic categories.  */
    case '@':           code = M4_SYNTAX_ESCAPE; break;
    case 'W': case 'w': code = M4_SYNTAX_ALPHA;  break;
    case 'L': case 'l': code = M4_SYNTAX_LQUOTE; break;
    case 'B': case 'b': code = M4_SYNTAX_BCOMM;  break;
    case 'A': case 'a': code = M4_SYNTAX_ACTIVE; break;
    case 'D': case 'd': code = M4_SYNTAX_NUM;    break;
    case 'S': case 's': code = M4_SYNTAX_SPACE;  break;
    case '(':           code = M4_SYNTAX_OPEN;   break;
    case ')':           code = M4_SYNTAX_CLOSE;  break;
    case ',':           code = M4_SYNTAX_COMMA;  break;
    case 'O': case 'o': code = M4_SYNTAX_OTHER;  break;
      /* Context categories.  */
    case '$':           code = M4_SYNTAX_DOLLAR; break;
    case '{':           code = M4_SYNTAX_LBRACE; break;
    case '}':           code = M4_SYNTAX_RBRACE; break;
    case 'R': case 'r': code = M4_SYNTAX_RQUOTE; break;
    case 'E': case 'e': code = M4_SYNTAX_ECOMM;  break;

    default: code = -1;  break;
    }

  return code;
}



/* Functions to manipulate the syntax table.  */
static int
add_syntax_attribute (m4_syntax_table *syntax, char ch, int code)
{
  int c = to_uchar (ch);
  if (code & M4_SYNTAX_MASKS)
    {
      syntax->table[c] |= code;
      syntax->suspect = true;
    }
  else
    {
      if ((code & (M4_SYNTAX_SUSPECT)) != 0
          || m4_has_syntax (syntax, c, M4_SYNTAX_SUSPECT))
        syntax->suspect = true;
      syntax->table[c] = ((syntax->table[c] & M4_SYNTAX_MASKS) | code);
    }

#ifdef DEBUG_SYNTAX
  xfprintf(stderr, "Set syntax %o %c = %04X\n", c, isprint(c) ? c : '-',
           syntax->table[c]);
#endif

  return syntax->table[c];
}

static int
remove_syntax_attribute (m4_syntax_table *syntax, char ch, int code)
{
  int c = to_uchar (ch);
  assert (code & M4_SYNTAX_MASKS);
  syntax->table[c] &= ~code;
  syntax->suspect = true;

#ifdef DEBUG_SYNTAX
  xfprintf(stderr, "Unset syntax %o %c = %04X\n", c, isprint(c) ? c : '-',
           syntax->table[c]);
#endif

  return syntax->table[c];
}

/* Add the set CHARS of length LEN to syntax category CODE, removing
   them from whatever category they used to be in.  */
static void
add_syntax_set (m4_syntax_table *syntax, const char *chars, size_t len,
                int code)
{
  while (len--)
    add_syntax_attribute (syntax, *chars++, code);
}

/* Remove the set CHARS of length LEN from syntax category CODE,
   adding them to category M4_SYNTAX_OTHER instead.  */
static void
subtract_syntax_set (m4_syntax_table *syntax, const char *chars, size_t len,
                     int code)
{
  while (len--)
    {
      char ch = *chars++;
      if ((code & M4_SYNTAX_MASKS) != 0)
        remove_syntax_attribute (syntax, ch, code);
      else if (m4_has_syntax (syntax, ch, code))
        add_syntax_attribute (syntax, ch, M4_SYNTAX_OTHER);
    }
}

/* Make the set CHARS of length LEN become syntax category CODE,
   removing CHARS from any other categories, and sending all bytes in
   the category but not in CHARS to category M4_SYNTAX_OTHER
   instead.  */
static void
set_syntax_set (m4_syntax_table *syntax, const char *chars, size_t len,
                int code)
{
  int ch;
  /* Explicit set of characters to install with this category; all
     other characters that used to have the category get reset to
     OTHER.  */
  for (ch = UCHAR_MAX + 1; --ch >= 0; )
    {
      if ((code & M4_SYNTAX_MASKS) != 0)
        remove_syntax_attribute (syntax, ch, code);
      else if (m4_has_syntax (syntax, ch, code))
        add_syntax_attribute (syntax, ch, M4_SYNTAX_OTHER);
    }
  while (len--)
    {
      ch = *chars++;
      add_syntax_attribute (syntax, ch, code);
    }
}

/* Reset syntax category CODE to its default state, sending all other
   characters in the category back to their default state.  */
static void
reset_syntax_set (m4_syntax_table *syntax, int code)
{
  int ch;
  for (ch = UCHAR_MAX + 1; --ch >= 0; )
    {
      /* Reset the category back to its default state.  All other
         characters that used to have this category get reset to
         their default state as well.  */
      if (code == M4_SYNTAX_RQUOTE)
        {
          if (ch == '\'')
            add_syntax_attribute (syntax, ch, code);
          else
            remove_syntax_attribute (syntax, ch, code);
        }
      else if (code == M4_SYNTAX_ECOMM)
        {
          if (ch == '\n')
            add_syntax_attribute (syntax, ch, code);
          else
            remove_syntax_attribute (syntax, ch, code);
        }
      else if (code == M4_SYNTAX_DOLLAR)
        {
          if (ch == '$')
            add_syntax_attribute (syntax, ch, code);
          else
            remove_syntax_attribute (syntax, ch, code);
        }
      else if (code == M4_SYNTAX_LBRACE)
        {
          if (ch == '{')
            add_syntax_attribute (syntax, ch, code);
          else
            remove_syntax_attribute (syntax, ch, code);
        }
      else if (code == M4_SYNTAX_RBRACE)
        {
          if (ch == '}')
            add_syntax_attribute (syntax, ch, code);
          else
            remove_syntax_attribute (syntax, ch, code);
        }
      else if (syntax->orig[ch] == code || m4_has_syntax (syntax, ch, code))
        add_syntax_attribute (syntax, ch, syntax->orig[ch]);
    }
}

/* Reset the syntax table to its default state.  */
void
m4_reset_syntax (m4_syntax_table *syntax)
{
  /* Restore the default syntax, which has known quote and comment
     properties.  */
  memcpy (syntax->table, syntax->orig, sizeof syntax->orig);

  free (syntax->quote.str1);
  free (syntax->quote.str2);
  free (syntax->comm.str1);
  free (syntax->comm.str2);

  /* The use of xmemdup0 is exploited by input.c.  */
  syntax->quote.str1 = xmemdup0 (DEF_LQUOTE, 1);
  syntax->quote.len1 = 1;
  syntax->quote.str2 = xmemdup0 (DEF_RQUOTE, 1);
  syntax->quote.len2 = 1;
  syntax->comm.str1 = xmemdup0 (DEF_BCOMM, 1);
  syntax->comm.len1 = 1;
  syntax->comm.str2 = xmemdup0 (DEF_ECOMM, 1);
  syntax->comm.len2 = 1;
  syntax->dollar = '$';

  add_syntax_attribute (syntax, syntax->quote.str2[0], M4_SYNTAX_RQUOTE);
  add_syntax_attribute (syntax, syntax->comm.str2[0], M4_SYNTAX_ECOMM);
  add_syntax_attribute (syntax, '$', M4_SYNTAX_DOLLAR);
  add_syntax_attribute (syntax, '{', M4_SYNTAX_LBRACE);
  add_syntax_attribute (syntax, '}', M4_SYNTAX_RBRACE);

  syntax->is_single_quotes = true;
  syntax->is_single_comments = true;
  syntax->is_single_dollar = true;
  syntax->is_macro_escaped = false;
  set_quote_age (syntax, true, false);
}

/* Alter the syntax for category KEY, according to ACTION: '+' to add,
   '-' to subtract, '=' to set, or '\0' to reset.  The array CHARS of
   length LEN describes the characters to modify; it is ignored if
   ACTION is '\0'.  Return -1 if KEY is invalid, otherwise return the
   syntax category matching KEY.  */
int
m4_set_syntax (m4_syntax_table *syntax, char key, char action,
               const char *chars, size_t len)
{
  int code;

  assert (syntax && chars);
  code = m4_syntax_code (key);
  if (code < 0)
    {
      return -1;
    }
  syntax->suspect = false;
  switch (action)
    {
    case '+':
      add_syntax_set (syntax, chars, len, code);
      break;
    case '-':
      subtract_syntax_set (syntax, chars, len, code);
      break;
    case '=':
      set_syntax_set (syntax, chars, len, code);
      break;
    case '\0':
      assert (!len);
      reset_syntax_set (syntax, code);
      break;
    default:
      assert (false);
    }

  /* Check for any cleanup needed.  */
  if (syntax->suspect)
    {
      int ch;
      int lquote = -1;
      int rquote = -1;
      int bcomm = -1;
      int ecomm = -1;
      bool single_quote_possible = true;
      bool single_comm_possible = true;
      int dollar = -1;
      if (m4_has_syntax (syntax, syntax->quote.str1[0], M4_SYNTAX_LQUOTE))
        {
          assert (syntax->quote.len1 == 1);
          lquote = to_uchar (syntax->quote.str1[0]);
        }
      if (m4_has_syntax (syntax, syntax->quote.str2[0], M4_SYNTAX_RQUOTE))
        {
          assert (syntax->quote.len2 == 1);
          rquote = to_uchar (syntax->quote.str2[0]);
        }
      if (m4_has_syntax (syntax, syntax->comm.str1[0], M4_SYNTAX_BCOMM))
        {
          assert (syntax->comm.len1 == 1);
          bcomm = to_uchar (syntax->comm.str1[0]);
        }
      if (m4_has_syntax (syntax, syntax->comm.str2[0], M4_SYNTAX_ECOMM))
        {
          assert (syntax->comm.len2 == 1);
          ecomm = to_uchar (syntax->comm.str2[0]);
        }
      syntax->is_single_dollar = false;
      syntax->is_macro_escaped = false;
      /* Find candidates for each category.  */
      for (ch = UCHAR_MAX + 1; --ch >= 0; )
        {
          if (m4_has_syntax (syntax, ch, M4_SYNTAX_LQUOTE))
            {
              if (lquote == -1)
                lquote = ch;
              else if (lquote != ch)
                single_quote_possible = false;
            }
          if (m4_has_syntax (syntax, ch, M4_SYNTAX_RQUOTE))
            {
              if (rquote == -1)
                rquote = ch;
              else if (rquote != ch)
                single_quote_possible = false;
            }
          if (m4_has_syntax (syntax, ch, M4_SYNTAX_BCOMM))
            {
              if (bcomm == -1)
                bcomm = ch;
              else if (bcomm != ch)
                single_comm_possible = false;
            }
          if (m4_has_syntax (syntax, ch, M4_SYNTAX_ECOMM))
            {
              if (ecomm == -1)
                ecomm = ch;
              else if (ecomm != ch)
                single_comm_possible = false;
            }
          if (m4_has_syntax (syntax, ch, M4_SYNTAX_DOLLAR))
            {
              if (dollar == -1)
                {
                  syntax->dollar = dollar = ch;
                  syntax->is_single_dollar = true;
                }
              else
                syntax->is_single_dollar = false;
            }
          if (m4_has_syntax (syntax, ch, M4_SYNTAX_ESCAPE))
            syntax->is_macro_escaped = true;
        }
      /* Disable multi-character delimiters if we discovered
         delimiters.  */
      if (!single_quote_possible)
        syntax->is_single_quotes = false;
      if (!single_comm_possible)
        syntax->is_single_comments = false;
      if ((1 < syntax->quote.len1 || 1 < syntax->quote.len2)
          && (!syntax->is_single_quotes || lquote != -1 || rquote != -1))
        {
          if (syntax->quote.len1)
            {
              syntax->quote.len1 = lquote == to_uchar (syntax->quote.str1[0]);
              syntax->quote.str1[syntax->quote.len1] = '\0';
            }
          if (syntax->quote.len2)
            {
              syntax->quote.len2 = rquote == to_uchar (syntax->quote.str2[0]);
              syntax->quote.str2[syntax->quote.len2] = '\0';
            }
        }
      if ((1 < syntax->comm.len1 || 1 < syntax->comm.len2)
          && (!syntax->is_single_comments || bcomm != -1 || ecomm != -1))
        {
          if (syntax->comm.len1)
            {
              syntax->comm.len1 = bcomm == to_uchar (syntax->comm.str1[0]);
              syntax->comm.str1[syntax->comm.len1] = '\0';
            }
          if (syntax->comm.len2)
            {
              syntax->comm.len2 = ecomm == to_uchar (syntax->comm.str2[0]);
              syntax->comm.str2[syntax->comm.len2] = '\0';
            }
        }
      /* Update the strings.  */
      if (lquote != -1)
        {
          if (single_quote_possible)
            syntax->is_single_quotes = true;
          if (syntax->quote.len1)
            assert (syntax->quote.len1 == 1);
          else
            {
              free (syntax->quote.str1);
              syntax->quote.str1 = xcharalloc (2);
              syntax->quote.str1[1] = '\0';
              syntax->quote.len1 = 1;
            }
          syntax->quote.str1[0] = lquote;
          if (rquote == -1)
            {
              rquote = '\'';
              add_syntax_attribute (syntax, rquote, M4_SYNTAX_RQUOTE);
            }
          if (!syntax->quote.len2)
            {
              free (syntax->quote.str2);
              syntax->quote.str2 = xcharalloc (2);
            }
          syntax->quote.str2[0] = rquote;
          syntax->quote.str2[1] = '\0';
          syntax->quote.len2 = 1;
        }
      if (bcomm != -1)
        {
          if (single_comm_possible)
            syntax->is_single_comments = true;
          if (syntax->comm.len1)
            assert (syntax->comm.len1 == 1);
          else
            {
              free (syntax->comm.str1);
              syntax->comm.str1 = xcharalloc (2);
              syntax->comm.str1[1] = '\0';
              syntax->comm.len1 = 1;
            }
          syntax->comm.str1[0] = bcomm;
          if (ecomm == -1)
            {
              ecomm = '\n';
              add_syntax_attribute (syntax, ecomm, M4_SYNTAX_ECOMM);
            }
          if (!syntax->comm.len2)
            {
              free (syntax->comm.str2);
              syntax->comm.str2 = xcharalloc (2);
            }
          syntax->comm.str2[0] = ecomm;
          syntax->comm.str2[1] = '\0';
          syntax->comm.len2 = 1;
        }
    }
  set_quote_age (syntax, false, true);
  m4__quote_uncache (syntax);
  return code;
}


/* Functions for setting quotes and comment delimiters.  Used by
   m4_changecom () and m4_changequote ().  Both functions override the
   syntax table to maintain compatibility.  */

/* Set the quote delimiters to LQ and RQ, with respective lengths
   LQ_LEN and RQ_LEN.  Pass NULL if the argument was not present, to
   distinguish from an explicit empty string.  */
void
m4_set_quotes (m4_syntax_table *syntax, const char *lq, size_t lq_len,
               const char *rq, size_t rq_len)
{
  int ch;

  assert (syntax);

  /* POSIX states that with 0 arguments, the default quotes are used.
     POSIX XCU ERN 112 states that behavior is implementation-defined
     if there was only one argument, or if there is an empty string in
     either position when there are two arguments.  We allow an empty
     left quote to disable quoting, but a non-empty left quote will
     always create a non-empty right quote.  See the texinfo for what
     some other implementations do.  */
  if (!lq)
    {
      lq = DEF_LQUOTE;
      lq_len = 1;
      rq = DEF_RQUOTE;
      rq_len = 1;
    }
  else if (!rq || (lq_len && !rq_len))
    {
      rq = DEF_RQUOTE;
      rq_len = 1;
    }

  if (syntax->quote.len1 == lq_len && syntax->quote.len2 == rq_len
      && memcmp (syntax->quote.str1, lq, lq_len) == 0
      && memcmp (syntax->quote.str2, rq, rq_len) == 0)
    return;

  free (syntax->quote.str1);
  free (syntax->quote.str2);
  /* The use of xmemdup0 is exploited by input.c.  */
  syntax->quote.str1 = xmemdup0 (lq, lq_len);
  syntax->quote.len1 = lq_len;
  syntax->quote.str2 = xmemdup0 (rq, rq_len);
  syntax->quote.len2 = rq_len;

  /* changequote overrides syntax_table, but be careful when it is
     used to select a start-quote sequence that is effectively
     disabled.  */
  syntax->is_single_quotes = true;
  for (ch = UCHAR_MAX + 1; --ch >= 0; )
    {
      if (m4_has_syntax (syntax, ch, M4_SYNTAX_LQUOTE))
        add_syntax_attribute (syntax, ch,
                              (syntax->orig[ch] == M4_SYNTAX_LQUOTE
                               ? M4_SYNTAX_OTHER : syntax->orig[ch]));
      if (m4_has_syntax (syntax, ch, M4_SYNTAX_RQUOTE))
        remove_syntax_attribute (syntax, ch, M4_SYNTAX_RQUOTE);
    }

  if (!m4_has_syntax (syntax, *syntax->quote.str1,
                      (M4_SYNTAX_IGNORE | M4_SYNTAX_ESCAPE | M4_SYNTAX_ALPHA
                       | M4_SYNTAX_NUM)))
    {
      if (syntax->quote.len1 == 1)
        add_syntax_attribute (syntax, syntax->quote.str1[0], M4_SYNTAX_LQUOTE);
      if (syntax->quote.len2 == 1)
        add_syntax_attribute (syntax, syntax->quote.str2[0], M4_SYNTAX_RQUOTE);
    }
  set_quote_age (syntax, false, false);
}

/* Set the comment delimiters to BC and EC, with respective lengths
   BC_LEN and EC_LEN.  Pass NULL if the argument was not present, to
   distinguish from an explicit empty string.  */
void
m4_set_comment (m4_syntax_table *syntax, const char *bc, size_t bc_len,
                const char *ec, size_t ec_len)
{
  int ch;

  assert (syntax);

  /* POSIX requires no arguments to disable comments, and that one
     argument use newline as the close-comment.  POSIX XCU ERN 131
     states that empty arguments invoke implementation-defined
     behavior.  We allow an empty begin comment to disable comments,
     and a non-empty begin comment will always create a non-empty end
     comment.  See the texinfo for what some other implementations
     do.  */
  if (!bc)
    {
      bc = ec = "";
      bc_len = ec_len = 0;
    }
  else if (!ec || (bc_len && !ec_len))
    {
      ec = DEF_ECOMM;
      ec_len = 1;
    }

  if (syntax->comm.len1 == bc_len && syntax->comm.len2 == ec_len
      && memcmp (syntax->comm.str1, bc, bc_len) == 0
      && memcmp (syntax->comm.str2, ec, ec_len) == 0)
    return;

  free (syntax->comm.str1);
  free (syntax->comm.str2);
  /* The use of xmemdup0 is exploited by input.c.  */
  syntax->comm.str1 = xmemdup0 (bc, bc_len);
  syntax->comm.len1 = bc_len;
  syntax->comm.str2 = xmemdup0 (ec, ec_len);
  syntax->comm.len2 = ec_len;

  /* changecom overrides syntax_table, but be careful when it is used
     to select a start-comment sequence that is effectively
     disabled.  */
  syntax->is_single_comments = true;
  for (ch = UCHAR_MAX + 1; --ch >= 0; )
    {
      if (m4_has_syntax (syntax, ch, M4_SYNTAX_BCOMM))
        add_syntax_attribute (syntax, ch,
                              (syntax->orig[ch] == M4_SYNTAX_BCOMM
                               ? M4_SYNTAX_OTHER : syntax->orig[ch]));
      if (m4_has_syntax (syntax, ch, M4_SYNTAX_ECOMM))
        remove_syntax_attribute (syntax, ch, M4_SYNTAX_ECOMM);
    }
  if (!m4_has_syntax (syntax, *syntax->comm.str1,
                      (M4_SYNTAX_IGNORE | M4_SYNTAX_ESCAPE | M4_SYNTAX_ALPHA
                       | M4_SYNTAX_NUM | M4_SYNTAX_LQUOTE)))
    {
      if (syntax->comm.len1 == 1)
        add_syntax_attribute (syntax, syntax->comm.str1[0], M4_SYNTAX_BCOMM);
      if (syntax->comm.len2 == 1)
        add_syntax_attribute (syntax, syntax->comm.str2[0], M4_SYNTAX_ECOMM);
    }
  set_quote_age (syntax, false, false);
}

/* Call this when changing anything that might impact the quote age,
   so that m4__quote_age and m4__safe_quotes will reflect the change.
   If RESET, changesyntax was reset to its default stage; if CHANGE,
   arbitrary syntax has changed; otherwise, just quotes or comment
   delimiters have changed.  */
static void
set_quote_age (m4_syntax_table *syntax, bool reset, bool change)
{
  /* Multi-character quotes are inherently unsafe, since concatenation
     of individual characters can result in a quote delimiter,
     consider:

     define(echo,``$1'')define(a,A)changequote(<[,]>)echo(<[]]><[>a]>)
     => A]> (not ]>a)

   Also, unquoted close delimiters are unsafe, consider:

     define(echo,``$1'')define(a,A)echo(`a''`a')
     => aA' (not a'a)

   Duplicated start and end quote delimiters, as well as comment
   delimiters that overlap with quote delimiters or active characters,
   also present a problem, consider:

     define(echo,$*)echo(a,a,a`'define(a,A)changecom(`,',`,'))
     => A,a,A (not A,A,A)

   The impact of arbitrary changesyntax is difficult to characterize.
   So if things are in their default state, we use 0 for the upper 16
   bits of quote_age; otherwise we increment syntax_age for each
   changesyntax, but saturate it at 0xffff rather than wrapping
   around.  Perhaps a cache of other frequently used states is
   warranted, if changesyntax becomes more popular.

   Perhaps someday we will fix $@ expansion to use the current
   settings of the comma category, or even allow multi-character
   argument separators via changesyntax.  Until then, we use a literal
   `,' in $@ expansion, therefore we must insist that `,' be an
   argument separator for quote_age to be non-zero.

   Rather than check every token for an unquoted delimiter, we merely
   encode current_quote_age to 0 when things are unsafe, and non-zero
   when safe (namely, the syntax_age in the upper 16 bits, coupled
   with the 16-bit value composed of the single-character start and
   end quote delimiters).  There may be other situations which are
   safe even when this algorithm sets the quote_age to zero, but at
   least a quote_age of zero always produces correct results (although
   it may take more time in doing so).  */

  unsigned short local_syntax_age;
  if (reset)
    local_syntax_age = 0;
  else if (change && syntax->syntax_age < 0xffff)
    local_syntax_age = ++syntax->syntax_age;
  else
    local_syntax_age = syntax->syntax_age;
  if (local_syntax_age < 0xffff && syntax->is_single_quotes
      && syntax->quote.len1 == 1 && syntax->quote.len2 == 1
      && !m4_has_syntax (syntax, *syntax->quote.str1,
                         (M4_SYNTAX_ALPHA | M4_SYNTAX_NUM | M4_SYNTAX_OPEN
                          | M4_SYNTAX_COMMA | M4_SYNTAX_CLOSE
                          | M4_SYNTAX_SPACE))
      && !m4_has_syntax (syntax, *syntax->quote.str2,
                         (M4_SYNTAX_ALPHA | M4_SYNTAX_NUM | M4_SYNTAX_OPEN
                          | M4_SYNTAX_COMMA | M4_SYNTAX_CLOSE
                          | M4_SYNTAX_SPACE))
      && *syntax->quote.str1 != *syntax->quote.str2
      && (!syntax->comm.len1
          || (*syntax->comm.str1 != *syntax->quote.str2
              && !m4_has_syntax (syntax, *syntax->comm.str1,
                                 (M4_SYNTAX_OPEN | M4_SYNTAX_COMMA
                                  | M4_SYNTAX_CLOSE))))
      && m4_has_syntax (syntax, ',', M4_SYNTAX_COMMA))
    {
      syntax->quote_age = ((local_syntax_age << 16)
                           | ((*syntax->quote.str1 & 0xff) << 8)
                           | (*syntax->quote.str2 & 0xff));
    }
  else
    syntax->quote_age = 0;
}

/* Interface for caching frequently used quote pairs, independently of
   the current quote delimiters (for example, consider a text macro
   expansion that includes several copies of $@), and using AGE for
   optimization.  If QUOTES is NULL, don't use quoting.  If OBS is
   non-NULL, AGE should be the current quote age, and QUOTES should be
   m4_get_syntax_quotes; the return value will be a cached quote pair,
   where the pointer is valid at least as long as OBS is not reset,
   but whose contents are only guaranteed until the next changequote
   or quote_cache.  Otherwise, OBS is NULL, AGE should be the same as
   before, and QUOTES should be a previously returned cache value;
   used to refresh the contents of the result.  */
const m4_string_pair *
m4__quote_cache (m4_syntax_table *syntax, m4_obstack *obs, unsigned int age,
                 const m4_string_pair *quotes)
{
  /* Implementation - if AGE is non-zero, then the implementation of
     set_quote_age guarantees that we can recreate the return value on
     the fly; so we use static storage, and the contents must be used
     immediately.  If AGE is zero, then we must copy QUOTES onto OBS,
     but we might as well cache that copy.  */
  if (!quotes)
    return NULL;
  if (age)
    {
      *syntax->cached_lquote = (age >> 8) & 0xff;
      *syntax->cached_rquote = age & 0xff;
      return &syntax->cached_simple;
    }
  if (!obs)
    return quotes;
  assert (quotes == &syntax->quote);
  if (!syntax->cached_quote)
    {
      assert (obstack_object_size (obs) == 0);
      syntax->cached_quote = (m4_string_pair *) obstack_copy (obs, quotes,
                                                              sizeof *quotes);
      syntax->cached_quote->str1 = (char *) obstack_copy0 (obs, quotes->str1,
                                                           quotes->len1);
      syntax->cached_quote->str2 = (char *) obstack_copy0 (obs, quotes->str2,
                                                           quotes->len2);
    }
  return syntax->cached_quote;
}


/* Define these functions at the end, so that calls in the file use the
   faster macro version from m4module.h.  */
#undef m4_get_syntax_lquote
const char *
m4_get_syntax_lquote (m4_syntax_table *syntax)
{
  assert (syntax);
  return syntax->quote.str1;
}

#undef m4_get_syntax_rquote
const char *
m4_get_syntax_rquote (m4_syntax_table *syntax)
{
  assert (syntax);
  return syntax->quote.str2;
}

#undef m4_get_syntax_quotes
const m4_string_pair *
m4_get_syntax_quotes (m4_syntax_table *syntax)
{
  assert (syntax);
  return &syntax->quote;
}

#undef m4_is_syntax_single_quotes
bool
m4_is_syntax_single_quotes (m4_syntax_table *syntax)
{
  assert (syntax);
  return syntax->is_single_quotes;
}

#undef m4_get_syntax_bcomm
const char *
m4_get_syntax_bcomm (m4_syntax_table *syntax)
{
  assert (syntax);
  return syntax->comm.str1;
}

#undef m4_get_syntax_ecomm
const char *
m4_get_syntax_ecomm (m4_syntax_table *syntax)
{
  assert (syntax);
  return syntax->comm.str2;
}

#undef m4_get_syntax_comments
const m4_string_pair *
m4_get_syntax_comments (m4_syntax_table *syntax)
{
  assert (syntax);
  return &syntax->comm;
}

#undef m4_is_syntax_single_comments
bool
m4_is_syntax_single_comments (m4_syntax_table *syntax)
{
  assert (syntax);
  return syntax->is_single_comments;
}

#undef m4_is_syntax_single_dollar
bool
m4_is_syntax_single_dollar (m4_syntax_table *syntax)
{
  assert (syntax);
  return syntax->is_single_dollar;
}

#undef m4_is_syntax_macro_escaped
bool
m4_is_syntax_macro_escaped (m4_syntax_table *syntax)
{
  assert (syntax);
  return syntax->is_macro_escaped;
}