summaryrefslogtreecommitdiff
path: root/bcc/scan.c
diff options
context:
space:
mode:
Diffstat (limited to 'bcc/scan.c')
-rw-r--r--bcc/scan.c806
1 files changed, 806 insertions, 0 deletions
diff --git a/bcc/scan.c b/bcc/scan.c
new file mode 100644
index 0000000..0baecf8
--- /dev/null
+++ b/bcc/scan.c
@@ -0,0 +1,806 @@
+/* scan.c - lexical analyser for bcc */
+
+/* Copyright (C) 1992 Bruce Evans */
+
+#define GCH1() do { if (SYMOFCHAR(ch = *++lineptr) == SPECIALCHAR) specialchar(); } while (0)
+
+#include "const.h"
+#include "types.h"
+#include "input.h"
+#include "os.h"
+#include "output.h"
+#include "sc.h"
+#include "sizes.h"
+#include "table.h"
+#include "type.h"
+
+#undef EXTERN
+#define EXTERN
+#include "scan.h"
+
+PUBLIC sym_t symofchar[256] =
+{
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, WHITESPACE, SPECIALCHAR, BADCHAR,
+ WHITESPACE, SPECIALCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+
+ WHITESPACE, LOGNOTOP, STRINGCONST, CONTROL, /* !"# */
+ BADCHAR, MODOP, AMPERSAND, CHARCONST, /* $%&' */
+ LPAREN, RPAREN, STAR, ADDOP, /* ()*+ */
+ COMMA, HYPHEN, FLOATCONST, SLASH, /* ,-./ */
+
+ INTCONST, INTCONST, INTCONST, INTCONST, /* 0123 */
+ INTCONST, INTCONST, INTCONST, INTCONST, /* 4567 */
+ INTCONST, INTCONST, COLON, SEMICOLON, /* 89:; */
+ LTOP, ASSIGNOP, GTOP, CONDOP, /* <=>? */
+
+ BADCHAR, IDENT, IDENT, IDENT, /* @ABC */
+ IDENT, IDENT, IDENT, IDENT, /* DEFG */
+ IDENT, IDENT, IDENT, IDENT, /* HIJK */
+ IDENT, IDENT, IDENT, IDENT, /* LMNO */
+ IDENT, IDENT, IDENT, IDENT, /* PQRS */
+ IDENT, IDENT, IDENT, IDENT, /* TUVW */
+ IDENT, IDENT, IDENT, LBRACKET, /* XYZ[ */
+ SPECIALCHAR, RBRACKET, EOROP, IDENT, /* \]^_ */
+
+ BADCHAR, IDENT, IDENT, IDENT, /* `abc */
+ IDENT, IDENT, IDENT, IDENT, /* defg */
+ IDENT, IDENT, IDENT, IDENT, /* hijk */
+ IDENT, IDENT, IDENT, IDENT, /* lmno */
+ IDENT, IDENT, IDENT, IDENT, /* pqrs */
+ IDENT, IDENT, IDENT, IDENT, /* tuvw */
+ IDENT, IDENT, IDENT, LBRACE, /* xyz{ */
+ OROP, RBRACE, NOTOP, BADCHAR, /* |}~ */
+
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR,
+ BADCHAR, BADCHAR, BADCHAR, BADCHAR
+};
+
+FORWARD void getident P((void));
+FORWARD void intconst P((void));
+FORWARD void ppnumber P((void));
+PRIVATE void newstringorcharconst P((void));
+
+/* skip white space up to end of line (skip EOL's if in comments) */
+
+PUBLIC void blanks()
+{
+ register char *reglineptr;
+
+ while (TRUE)
+ {
+ reglineptr = lineptr;
+ while (SYMOFCHAR(*reglineptr) == WHITESPACE)
+ ++reglineptr;
+ lineptr = reglineptr;
+ if (SYMOFCHAR(ch = *reglineptr) == SPECIALCHAR)
+ {
+ specialchar();
+ if (ch == EOL)
+ return;
+ if (SYMOFCHAR(ch) == WHITESPACE)
+ continue;
+ reglineptr = lineptr;
+ }
+ if (*reglineptr != '/')
+ return;
+ if (SYMOFCHAR(*(reglineptr + 1)) == SPECIALCHAR)
+ {
+ ch = *(lineptr = reglineptr + 1);
+ specialchar();
+ ch = *(reglineptr = --lineptr) = '/'; /* pushback */
+ }
+ if (*(reglineptr + 1) != '*')
+ return;
+ gch1();
+ skipcomment();
+ }
+}
+
+PUBLIC void cppscan()
+{
+ while (TRUE)
+ {
+ switch (SYMOFCHAR(ch))
+ {
+ case CHARCONST:
+ case STRINGCONST:
+ stringorcharconst();
+ *charptr = 0; /* room reserved */
+ outstr(constant.value.s); /* XXX - embedded null would kill it */
+ charptr = constant.value.s;
+ break;
+ case CONTROL:
+ gch1();
+ if (maclevel != 0)
+ {
+ error("# in a macro: not preprocessed"); /* XXX? */
+ break;
+ }
+ else
+ {
+ cppmode = FALSE;
+ docontrol();
+ cppmode = TRUE;
+ break;
+ }
+ case SLASH:
+ gch1();
+ if (ch == '*')
+ {
+ skipcomment();
+ break;
+ }
+ outbyte('/');
+ break;
+ case FLOATCONST:
+ gch1();
+ if (SYMOFCHAR(ch) == INTCONST)
+ {
+ --lineptr;
+ ch = '.'; /* pushback */
+ ppnumber();
+ outstr(constant.value.s);
+ charptr = constant.value.s;
+ }
+ else
+ outbyte('.');
+ break;
+ case IDENT:
+ getident();
+ if ((gsymptr = findlorg(gsname)) != NULL)
+ {
+ if (gsymptr->flags == DEFINITION)
+ {
+ entermac();
+ break;
+ }
+ }
+ outstr(gsname);
+ break;
+ case INTCONST:
+ ppnumber();
+ outstr(constant.value.s);
+ charptr = constant.value.s;
+ break;
+ case SPECIALCHAR:
+ specialchar(); /* XXX - this is necessary because entermac
+ * and skipeol can leave lineptr at a special
+ * char although GCH1 cannot. entermac would
+ * be easy to fix, but too many places already
+ * have to handle the extra work after skipeol
+ * for other reasons.
+ */
+ if (ch == EOL)
+ {
+ skipeol();
+ if (eof)
+ return;
+ break;
+ }
+#ifdef COEOL
+ if (ch == COEOL)
+ {
+ gch1();
+ break;
+ }
+#endif
+ if (SYMOFCHAR(ch) != SPECIALCHAR)
+ break; /* specialchar advanced the input */
+ /* must be '\\' */
+ default:
+ OUTBYTE(ch);
+ GCH1();
+ break;
+ }
+ }
+}
+
+PUBLIC void eofin(message)
+char *message;
+{
+ error2error("end of file in ", message);
+}
+
+/* get identifier in gsname == gs2name + 2 */
+
+PRIVATE void getident()
+{
+ register int c;
+ register char *idptr;
+ register int length;
+ register char *reglineptr;
+
+ idptr = gsname;
+ length = NAMESIZE;
+ reglineptr = lineptr;
+ c = ch;
+more:
+ do
+ {
+ *idptr++ = c;
+ c = (unsigned char) *++reglineptr;
+ }
+ while (symofchar[c] <= MAXIDSYM && --length != 0);
+ ch = c;
+ *idptr = 0;
+ lineptr = reglineptr;
+ if (symofchar[c] == SPECIALCHAR)
+ {
+ specialchar();
+ if (SYMOFCHAR(ch) <= MAXIDSYM && length != 0)
+ {
+ reglineptr = lineptr;
+ c = ch;
+ goto more;
+ }
+ }
+}
+
+/* return nonzero if at an identifier */
+
+PUBLIC bool_pt isident()
+{
+ if (SYMOFCHAR(ch) != IDENT)
+ return FALSE;
+ getident();
+ return TRUE;
+}
+
+PRIVATE void intconst()
+{
+ fastin_t base;
+ fastin_t digit;
+ register char *digptr;
+ fastin_t lcount;
+ fastin_t ucount;
+ bool_t dflag;
+
+ ppnumber();
+ digptr = charptr = constant.value.s;
+ if (*digptr == '0')
+ {
+ if (digptr[1] != 'x' && digptr[1] != 'X')
+ base = 8;
+ else
+ {
+ base = 16;
+ digptr += 2;
+ }
+ }
+ else
+ base = 10;
+ dflag = FALSE;
+ if (*digptr == 0)
+ error("constant has no digits");
+ if (base != 16)
+ {
+ while (SYMOFCHAR(*digptr) == INTCONST)
+ ++digptr;
+ if (*digptr == '.')
+ {
+ dflag = TRUE;
+ ++digptr;
+ while (SYMOFCHAR(*digptr) == INTCONST)
+ ++digptr;
+ }
+ if (*digptr == 'E' || *digptr == 'e')
+ {
+ dflag = TRUE;
+ ++digptr;
+ if (*digptr == '+' || *digptr == '-')
+ ++digptr;
+ if (SYMOFCHAR(*digptr) != INTCONST)
+ error("exponent has no digits");
+ while (SYMOFCHAR(*digptr) == INTCONST)
+ ++digptr;
+ }
+ }
+ if (dflag)
+ {
+ constant.type = dtype;
+ digit = *digptr;
+ *digptr = 0;
+ constant.value.d = atof(constant.value.s);
+ *digptr = digit;
+ if (digit == 'F' || digit == 'f')
+ {
+ ++digptr;
+ constant.type = fltype;
+ constant.value.d = (float) constant.value.d;
+ }
+ sym = FLOATCONST;
+ if (*digptr != 0)
+ error("junk at end of floating point constant");
+ return;
+ }
+ if (base != 16)
+ digptr = constant.value.s;
+ constant.value.v = 0;
+ while (*digptr >= '0')
+ {
+ digit = *digptr;
+ if (digit > '9')
+ {
+ if (base != 16)
+ break;
+ if (digit >= 'a' && digit <= 'f')
+ digit = digit + (-'a' + '0' + 10);
+ else if (digit >= 'A' && digit <= 'F')
+ digit = digit + (-'A' + '0' + 10);
+ else
+ break;
+ }
+ constant.value.v = base * constant.value.v + (digit - '0');
+ ++digptr;
+ }
+ ucount = lcount = 0;
+ while (TRUE)
+ {
+ if (*digptr == 'L' || *digptr == 'l')
+ ++lcount;
+ else if (*digptr == 'U' || *digptr == 'u')
+ ++ucount;
+ else
+ break;
+ ++digptr;
+ }
+ if (lcount > 1)
+ error("more than one 'L' in integer constant");
+ if (ucount > 1)
+ error("more than one 'U' in integer constant");
+ if (constant.value.v <= maxintto && lcount == 0 && ucount == 0)
+ constant.type = itype;
+ else if (constant.value.v <= maxuintto && lcount == 0
+ && (base != 10 || ucount != 0))
+ constant.type = uitype;
+ else if (constant.value.v <= maxlongto && ucount == 0)
+ constant.type = ltype;
+ else
+ constant.type = ultype;
+ if (*digptr != 0)
+ error("junk at end of integer constant");
+}
+
+PUBLIC void nextsym()
+{
+ static char lastch;
+ register char *reglineptr;
+
+ while (TRUE) /* exit with short, fast returns */
+ {
+ reglineptr = lineptr;
+ while ((sym = SYMOFCHAR(*reglineptr)) == WHITESPACE)
+ ++reglineptr;
+ lastch = *reglineptr;
+ if (SYMOFCHAR(ch = *(lineptr = reglineptr + 1)) == SPECIALCHAR
+ && sym != SPECIALCHAR)
+ specialchar();
+ switch (sym)
+ {
+ case CHARCONST:
+ --lineptr;
+ ch = lastch; /* pushback */
+ stringorcharconst();
+ newstringorcharconst();
+ if (charptr == constant.value.s)
+ {
+ charptr = constant.value.s;
+ error("empty character constant");
+ constant.value.v = 0;
+ }
+ else
+ {
+ if (charptr != constant.value.s + 1)
+ error("character constant too long");
+ charptr = constant.value.s;
+ constant.value.v = *constant.value.s & CHMASKTO;
+ }
+ constant.type = itype;
+ return;
+ case CONTROL:
+ if (maclevel != 0)
+ {
+ error("# in a macro: not preprocessed"); /* XXX? */
+ return;
+ }
+ else
+ {
+ docontrol();
+ break;
+ }
+ case FLOATCONST:
+ if (SYMOFCHAR(ch) == INTCONST)
+ {
+ --lineptr;
+ ch = lastch;
+ intconst();
+ }
+ else
+ sym = STRUCELTOP;
+ return;
+ case IDENT:
+ --lineptr;
+ ch = lastch;
+ getident();
+ if ((gsymptr = findlorg(gsname)) != NULL)
+ {
+ if (gsymptr->flags == DEFINITION)
+ {
+ entermac();
+ break;
+ }
+ if (gsymptr->flags == KEYWORD)
+ sym = gsymptr->offset.offsym;
+ }
+ return;
+ case INTCONST:
+ --lineptr;
+ ch = lastch;
+ intconst();
+ return;
+ case SPECIALCHAR:
+ ch = *(lineptr = reglineptr);
+ specialchar();
+ if (ch == EOL)
+ {
+ if (incppexpr)
+ {
+ sym = SEMICOLON;
+ return;
+ }
+ skipeol();
+ if (eof)
+ {
+ sym = EOFSYM;
+ return;
+ }
+ break;
+ }
+ if (SYMOFCHAR(ch) != SPECIALCHAR)
+ break;
+ gch1();
+ /* fall through to BADCHAR case */
+ case BADCHAR:
+ error("illegal character");
+ break;
+ case STRINGCONST:
+ --lineptr;
+ ch = lastch; /* pushback */
+ stringorcharconst();
+ newstringorcharconst();
+#ifdef TS
+ ts_n_string += charptr - constant.value.s;
+#endif
+ return;
+ case COLON: /* could be COLONOP or in label */
+ case COMMA: /* could be COMMAOP or in list */
+ case CONDOP:
+ case LBRACE:
+ case LBRACKET:
+ case LPAREN:
+ case NOTOP:
+ case RBRACE:
+ case RBRACKET:
+ case RPAREN:
+ case SEMICOLON:
+ return;
+ case ASSIGNOP:
+ if (ch == '=')
+ {
+ sym = EQOP;
+ gch1();
+ }
+ return;
+ case ADDOP:
+ if (ch == '+')
+ {
+ sym = INCSYM; /* PREINCOP or POSINCOP */
+ gch1();
+ }
+ else if (ch == '=')
+ {
+ sym = ADDABOP;
+ gch1();
+ }
+ return;
+ case HYPHEN: /* NEGOP or SUBOP or decoded below */
+ if (ch == '-')
+ {
+ sym = DECSYM; /* PREDECOP or POSTDECOP */
+ gch1();
+ }
+ else if (ch == '=')
+ {
+ sym = SUBABOP;
+ gch1();
+ }
+ else if (ch == '>')
+ {
+ sym = STRUCPTROP;
+ gch1();
+ }
+ return;
+ case STAR: /* INDIRECTOP or MULOP or as below */
+ if (ch == '=')
+ {
+ sym = MULABOP;
+ gch1();
+ }
+ return;
+ case SLASH:
+ if (ch == '*')
+ {
+ skipcomment();
+ break;
+ }
+ if (ch == '=')
+ {
+ sym = DIVABOP;
+ gch1();
+ }
+ else
+ sym = DIVOP;
+ return;
+ case MODOP:
+ if (ch == '=')
+ {
+ sym = MODABOP;
+ gch1();
+ }
+ return;
+ case LTOP:
+ if (ch == '=')
+ {
+ sym = LEOP;
+ gch1();
+ }
+ else if (ch == '<')
+ {
+ gch1();
+ if (ch == '=')
+ {
+ sym = SLABOP;
+ gch1();
+ }
+ else
+ {
+ sym = SLOP;
+ }
+ }
+ return;
+ case GTOP:
+ if (ch == '=')
+ {
+ sym = GEOP;
+ gch1();
+ }
+ else if (ch == '>')
+ {
+ gch1();
+ if (ch == '=')
+ {
+ sym = SRABOP;
+ gch1();
+ }
+ else
+ {
+ sym = SROP;
+ }
+ }
+ return;
+ case AMPERSAND: /* ADDRESSOP or ANDOP or as below */
+ if (ch == '&')
+ {
+ sym = LOGANDOP;
+ gch1();
+ }
+ else if (ch == '=')
+ {
+ sym = ANDABOP;
+ gch1();
+ }
+ return;
+ case EOROP:
+ if (ch == '=')
+ {
+ sym = EORABOP;
+ gch1();
+ }
+ return;
+ case OROP:
+ if (ch == '|')
+ {
+ sym = LOGOROP;
+ gch1();
+ }
+ else if (ch == '=')
+ {
+ sym = ORABOP;
+ gch1();
+ }
+ return;
+ case LOGNOTOP:
+ if (ch == '=')
+ {
+ sym = NEOP;
+ gch1();
+ }
+ return;
+ }
+ }
+}
+
+PRIVATE void ppnumber()
+{
+ constant.value.s = charptr;
+ while (TRUE)
+ {
+ if (charptr >= char1top)
+ constant.value.s = growobject(constant.value.s, 2);
+ *charptr++ = ch;
+ if (ch == 'E' || ch == 'e')
+ {
+ gch1();
+ if (ch == '+' || ch == '-')
+ continue;
+ }
+ else
+ GCH1();
+ if (SYMOFCHAR(ch) > MAXPPNUMSYM)
+ break;
+ }
+ *charptr = 0;
+}
+
+PUBLIC void stringorcharconst()
+{
+ char terminator;
+
+ constant.value.s = charptr;
+ terminator = ch;
+ while (TRUE)
+ {
+ if (charptr >= char3top)
+ constant.value.s = growobject(constant.value.s, 4);
+ *charptr++ = ch;
+ if (ch == '\\')
+ {
+ gch1();
+ if (ch == EOL)
+ {
+ if (!orig_cppmode)
+ eofin("escape sequence");
+ break;
+ }
+ *charptr++ = ch;
+ }
+ GCH1();
+ if (ch == EOL)
+ {
+ if (!orig_cppmode)
+ error(terminator == '"' ? "end of line in string constant"
+ : "end of line in character constant");
+ break;
+ }
+ if (ch == terminator)
+ {
+ *charptr++ = terminator;
+ gch1();
+ break;
+ }
+ }
+}
+
+PRIVATE void newstringorcharconst()
+{
+ char *endinptr;
+ int escvalue;
+ fastin_t maxdigits;
+ register char *inptr;
+ register char *outptr;
+
+ outptr = constant.value.s;
+ inptr = charptr - 1;
+ if (inptr == outptr || *inptr != *outptr)
+ endinptr = charptr; /* not properly terminated */
+ else
+ endinptr = inptr;
+ inptr = outptr + 1;
+ while (inptr != endinptr)
+ {
+ if (*inptr == '\\')
+ {
+ if (++inptr == endinptr)
+ break; /* error reported already */
+ if (*inptr >= '0' && *inptr < '8')
+ {
+ escvalue = 0;
+ maxdigits = 3;
+ do
+ {
+ escvalue = 8 * escvalue + *inptr - '0';
+ ++inptr;
+ }
+ while (--maxdigits != 0 && *inptr >= '0' && *inptr < '8');
+ --inptr;
+ *outptr++ = escvalue;
+ }
+ else switch (*inptr)
+ {
+ case '"':
+ case '\'':
+ case '\\':
+ *outptr++ = *inptr;
+ break;
+ case 'a':
+ *outptr++ = 7;
+ break;
+ case 'b':
+ *outptr++ = 8;
+ break;
+ case 'f':
+ *outptr++ = 12;
+ break;
+ case 'n':
+ *outptr++ = EOLTO;
+ break;
+ case 'r':
+ *outptr++ = 13;
+ break;
+ case 't':
+ *outptr++ = 9;
+ break;
+ case 'v':
+ *outptr++ = 11;
+ break;
+ default:
+ error("%wnonstandard escape sequence");
+ *outptr++ = *inptr;
+ break;
+ }
+ }
+ else
+ *outptr++ = *inptr;
+ ++inptr;
+ }
+ charptr = outptr;
+}