summaryrefslogtreecommitdiff
path: root/ntpd/ntp_scanner.c
diff options
context:
space:
mode:
Diffstat (limited to 'ntpd/ntp_scanner.c')
-rw-r--r--ntpd/ntp_scanner.c755
1 files changed, 755 insertions, 0 deletions
diff --git a/ntpd/ntp_scanner.c b/ntpd/ntp_scanner.c
new file mode 100644
index 0000000..3e09856
--- /dev/null
+++ b/ntpd/ntp_scanner.c
@@ -0,0 +1,755 @@
+
+/* ntp_scanner.c
+ *
+ * The source code for a simple lexical analyzer.
+ *
+ * Written By: Sachin Kamboj
+ * University of Delaware
+ * Newark, DE 19711
+ * Copyright (c) 2006
+ */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdio.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include "ntpd.h"
+#include "ntp_config.h"
+#include "ntpsim.h"
+#include "ntp_scanner.h"
+#include "ntp_parser.h"
+
+/* ntp_keyword.h declares finite state machine and token text */
+#include "ntp_keyword.h"
+
+
+
+/* SCANNER GLOBAL VARIABLES
+ * ------------------------
+ */
+
+#define MAX_LEXEME (1024 + 1) /* The maximum size of a lexeme */
+char yytext[MAX_LEXEME]; /* Buffer for storing the input text/lexeme */
+u_int32 conf_file_sum; /* Simple sum of characters read */
+
+
+
+
+/* CONSTANTS
+ * ---------
+ */
+
+
+/* SCANNER GLOBAL VARIABLES
+ * ------------------------
+ */
+const char special_chars[] = "{}(),;|=";
+
+
+/* FUNCTIONS
+ * ---------
+ */
+
+static int is_keyword(char *lexeme, follby *pfollowedby);
+
+
+/*
+ * keyword() - Return the keyword associated with token T_ identifier.
+ * See also token_name() for the string-ized T_ identifier.
+ * Example: keyword(T_Server) returns "server"
+ * token_name(T_Server) returns "T_Server"
+ */
+const char *
+keyword(
+ int token
+ )
+{
+ int i;
+ const char *text;
+
+ i = token - LOWEST_KEYWORD_ID;
+
+ if (i >= 0 && i < COUNTOF(keyword_text))
+ text = keyword_text[i];
+ else
+ text = NULL;
+
+ return (text != NULL)
+ ? text
+ : "(keyword not found)";
+}
+
+
+/* FILE INTERFACE
+ * --------------
+ * We define a couple of wrapper functions around the standard C fgetc
+ * and ungetc functions in order to include positional bookkeeping
+ */
+
+struct FILE_INFO *
+F_OPEN(
+ const char *path,
+ const char *mode
+ )
+{
+ struct FILE_INFO *my_info;
+
+ my_info = emalloc(sizeof *my_info);
+
+ my_info->line_no = 1;
+ my_info->col_no = 0;
+ my_info->prev_line_col_no = 0;
+ my_info->prev_token_col_no = 0;
+ my_info->fname = path;
+
+ my_info->fd = fopen(path, mode);
+ if (NULL == my_info->fd) {
+ free(my_info);
+ return NULL;
+ }
+ return my_info;
+}
+
+int
+FGETC(
+ struct FILE_INFO *stream
+ )
+{
+ int ch;
+
+ do
+ ch = fgetc(stream->fd);
+ while (EOF != ch && (CHAR_MIN > ch || ch > CHAR_MAX));
+
+ if (EOF != ch) {
+ if (input_from_file)
+ conf_file_sum += (u_char)ch;
+ ++stream->col_no;
+ if (ch == '\n') {
+ stream->prev_line_col_no = stream->col_no;
+ ++stream->line_no;
+ stream->col_no = 1;
+ }
+ }
+
+ return ch;
+}
+
+/* BUGS: 1. Function will fail on more than one line of pushback
+ * 2. No error checking is done to see if ungetc fails
+ * SK: I don't think its worth fixing these bugs for our purposes ;-)
+ */
+int
+UNGETC(
+ int ch,
+ struct FILE_INFO *stream
+ )
+{
+ if (input_from_file)
+ conf_file_sum -= (u_char)ch;
+ if (ch == '\n') {
+ stream->col_no = stream->prev_line_col_no;
+ stream->prev_line_col_no = -1;
+ --stream->line_no;
+ }
+ --stream->col_no;
+ return ungetc(ch, stream->fd);
+}
+
+int
+FCLOSE(
+ struct FILE_INFO *stream
+ )
+{
+ int ret_val = fclose(stream->fd);
+
+ if (!ret_val)
+ free(stream);
+ return ret_val;
+}
+
+/* STREAM INTERFACE
+ * ----------------
+ * Provide a wrapper for the stream functions so that the
+ * stream can either read from a file or from a character
+ * array.
+ * NOTE: This is not very efficient for reading from character
+ * arrays, but needed to allow remote configuration where the
+ * configuration command is provided through ntpq.
+ *
+ * The behavior of there two functions is determined by the
+ * input_from_file flag.
+ */
+
+static int
+get_next_char(
+ struct FILE_INFO *ip_file
+ )
+{
+ char ch;
+
+ if (input_from_file)
+ return FGETC(ip_file);
+ else {
+ if (remote_config.buffer[remote_config.pos] == '\0')
+ return EOF;
+ else {
+ ip_file->col_no++;
+ ch = remote_config.buffer[remote_config.pos++];
+ if (ch == '\n') {
+ ip_file->prev_line_col_no = ip_file->col_no;
+ ++ip_file->line_no;
+ ip_file->col_no = 1;
+ }
+ return ch;
+ }
+ }
+}
+
+static void
+push_back_char(
+ struct FILE_INFO *ip_file,
+ int ch
+ )
+{
+ if (input_from_file)
+ UNGETC(ch, ip_file);
+ else {
+ if (ch == '\n') {
+ ip_file->col_no = ip_file->prev_line_col_no;
+ ip_file->prev_line_col_no = -1;
+ --ip_file->line_no;
+ }
+ --ip_file->col_no;
+
+ remote_config.pos--;
+ }
+}
+
+
+
+/* STATE MACHINES
+ * --------------
+ */
+
+/* Keywords */
+static int
+is_keyword(
+ char *lexeme,
+ follby *pfollowedby
+ )
+{
+ follby fb;
+ int curr_s; /* current state index */
+ int token;
+ int i;
+
+ curr_s = SCANNER_INIT_S;
+ token = 0;
+
+ for (i = 0; lexeme[i]; i++) {
+ while (curr_s && (lexeme[i] != SS_CH(sst[curr_s])))
+ curr_s = SS_OTHER_N(sst[curr_s]);
+
+ if (curr_s && (lexeme[i] == SS_CH(sst[curr_s]))) {
+ if ('\0' == lexeme[i + 1]
+ && FOLLBY_NON_ACCEPTING
+ != SS_FB(sst[curr_s])) {
+ fb = SS_FB(sst[curr_s]);
+ *pfollowedby = fb;
+ token = curr_s;
+ break;
+ }
+ curr_s = SS_MATCH_N(sst[curr_s]);
+ } else
+ break;
+ }
+
+ return token;
+}
+
+
+/* Integer */
+static int
+is_integer(
+ char *lexeme
+ )
+{
+ int i;
+ int is_neg;
+ u_int u_val;
+
+ i = 0;
+
+ /* Allow a leading minus sign */
+ if (lexeme[i] == '-') {
+ i++;
+ is_neg = TRUE;
+ } else {
+ is_neg = FALSE;
+ }
+
+ /* Check that all the remaining characters are digits */
+ for (; lexeme[i] != '\0'; i++) {
+ if (!isdigit(lexeme[i]))
+ return FALSE;
+ }
+
+ if (is_neg)
+ return TRUE;
+
+ /* Reject numbers that fit in unsigned but not in signed int */
+ if (1 == sscanf(lexeme, "%u", &u_val))
+ return (u_val <= INT_MAX);
+ else
+ return FALSE;
+}
+
+
+/* U_int -- assumes is_integer() has returned FALSE */
+static int
+is_u_int(
+ char *lexeme
+ )
+{
+ int i;
+ int is_hex;
+
+ i = 0;
+ if ('0' == lexeme[i] && 'x' == tolower(lexeme[i + 1])) {
+ i += 2;
+ is_hex = TRUE;
+ } else {
+ is_hex = FALSE;
+ }
+
+ /* Check that all the remaining characters are digits */
+ for (; lexeme[i] != '\0'; i++) {
+ if (is_hex && !isxdigit(lexeme[i]))
+ return FALSE;
+ if (!is_hex && !isdigit(lexeme[i]))
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+/* Double */
+static int
+is_double(
+ char *lexeme
+ )
+{
+ u_int num_digits = 0; /* Number of digits read */
+ u_int i;
+
+ i = 0;
+
+ /* Check for an optional '+' or '-' */
+ if ('+' == lexeme[i] || '-' == lexeme[i])
+ i++;
+
+ /* Read the integer part */
+ for (; lexeme[i] && isdigit(lexeme[i]); i++)
+ num_digits++;
+
+ /* Check for the optional decimal point */
+ if ('.' == lexeme[i]) {
+ i++;
+ /* Check for any digits after the decimal point */
+ for (; lexeme[i] && isdigit(lexeme[i]); i++)
+ num_digits++;
+ }
+
+ /*
+ * The number of digits in both the decimal part and the
+ * fraction part must not be zero at this point
+ */
+ if (!num_digits)
+ return 0;
+
+ /* Check if we are done */
+ if (!lexeme[i])
+ return 1;
+
+ /* There is still more input, read the exponent */
+ if ('e' == tolower(lexeme[i]))
+ i++;
+ else
+ return 0;
+
+ /* Read an optional Sign */
+ if ('+' == lexeme[i] || '-' == lexeme[i])
+ i++;
+
+ /* Now read the exponent part */
+ while (lexeme[i] && isdigit(lexeme[i]))
+ i++;
+
+ /* Check if we are done */
+ if (!lexeme[i])
+ return 1;
+ else
+ return 0;
+}
+
+
+/* is_special() - Test whether a character is a token */
+static inline int
+is_special(
+ int ch
+ )
+{
+ return strchr(special_chars, ch) != NULL;
+}
+
+
+static int
+is_EOC(
+ int ch
+ )
+{
+ if ((old_config_style && (ch == '\n')) ||
+ (!old_config_style && (ch == ';')))
+ return 1;
+ return 0;
+}
+
+
+char *
+quote_if_needed(char *str)
+{
+ char *ret;
+ size_t len;
+ size_t octets;
+
+ len = strlen(str);
+ octets = len + 2 + 1;
+ ret = emalloc(octets);
+ if ('"' != str[0]
+ && (strcspn(str, special_chars) < len
+ || strchr(str, ' ') != NULL)) {
+ snprintf(ret, octets, "\"%s\"", str);
+ } else
+ strlcpy(ret, str, octets);
+
+ return ret;
+}
+
+
+static int
+create_string_token(
+ char *lexeme
+ )
+{
+ char *pch;
+
+ /*
+ * ignore end of line whitespace
+ */
+ pch = lexeme;
+ while (*pch && isspace(*pch))
+ pch++;
+
+ if (!*pch) {
+ yylval.Integer = T_EOC;
+ return yylval.Integer;
+ }
+
+ yylval.String = estrdup(lexeme);
+ return T_String;
+}
+
+
+/*
+ * yylex() - function that does the actual scanning.
+ * Bison expects this function to be called yylex and for it to take no
+ * input and return an int.
+ * Conceptually yylex "returns" yylval as well as the actual return
+ * value representing the token or type.
+ */
+int
+yylex(
+ struct FILE_INFO *ip_file
+ )
+{
+ static follby followedby = FOLLBY_TOKEN;
+ int i;
+ int instring;
+ int yylval_was_set;
+ int converted;
+ int token; /* The return value */
+ int ch;
+
+ if (input_from_file)
+ ip_file = fp[curr_include_level];
+ instring = FALSE;
+ yylval_was_set = FALSE;
+
+ do {
+ /* Ignore whitespace at the beginning */
+ while (EOF != (ch = get_next_char(ip_file)) &&
+ isspace(ch) &&
+ !is_EOC(ch))
+ ; /* Null Statement */
+
+ if (EOF == ch) {
+
+ if (!input_from_file || !curr_include_level)
+ return 0;
+
+ FCLOSE(fp[curr_include_level]);
+ ip_file = fp[--curr_include_level];
+ token = T_EOC;
+ goto normal_return;
+
+ } else if (is_EOC(ch)) {
+
+ /* end FOLLBY_STRINGS_TO_EOC effect */
+ followedby = FOLLBY_TOKEN;
+ token = T_EOC;
+ goto normal_return;
+
+ } else if (is_special(ch) && FOLLBY_TOKEN == followedby) {
+ /* special chars are their own token values */
+ token = ch;
+ /*
+ * '=' outside simulator configuration implies
+ * a single string following as in:
+ * setvar Owner = "The Boss" default
+ */
+ if ('=' == ch && old_config_style)
+ followedby = FOLLBY_STRING;
+ yytext[0] = (char)ch;
+ yytext[1] = '\0';
+ goto normal_return;
+ } else
+ push_back_char(ip_file, ch);
+
+ /* save the position of start of the token */
+ ip_file->prev_token_line_no = ip_file->line_no;
+ ip_file->prev_token_col_no = ip_file->col_no;
+
+ /* Read in the lexeme */
+ i = 0;
+ while (EOF != (ch = get_next_char(ip_file))) {
+
+ yytext[i] = (char)ch;
+
+ /* Break on whitespace or a special character */
+ if (isspace(ch) || is_EOC(ch)
+ || '"' == ch
+ || (FOLLBY_TOKEN == followedby
+ && is_special(ch)))
+ break;
+
+ /* Read the rest of the line on reading a start
+ of comment character */
+ if ('#' == ch) {
+ while (EOF != (ch = get_next_char(ip_file))
+ && '\n' != ch)
+ ; /* Null Statement */
+ break;
+ }
+
+ i++;
+ if (i >= COUNTOF(yytext))
+ goto lex_too_long;
+ }
+ /* Pick up all of the string inside between " marks, to
+ * end of line. If we make it to EOL without a
+ * terminating " assume it for them.
+ *
+ * XXX - HMS: I'm not sure we want to assume the closing "
+ */
+ if ('"' == ch) {
+ instring = TRUE;
+ while (EOF != (ch = get_next_char(ip_file)) &&
+ ch != '"' && ch != '\n') {
+ yytext[i++] = (char)ch;
+ if (i >= COUNTOF(yytext))
+ goto lex_too_long;
+ }
+ /*
+ * yytext[i] will be pushed back as not part of
+ * this lexeme, but any closing quote should
+ * not be pushed back, so we read another char.
+ */
+ if ('"' == ch)
+ ch = get_next_char(ip_file);
+ }
+ /* Pushback the last character read that is not a part
+ * of this lexeme.
+ * If the last character read was an EOF, pushback a
+ * newline character. This is to prevent a parse error
+ * when there is no newline at the end of a file.
+ */
+ if (EOF == ch)
+ push_back_char(ip_file, '\n');
+ else
+ push_back_char(ip_file, ch);
+ yytext[i] = '\0';
+ } while (i == 0);
+
+ /* Now return the desired token */
+
+ /* First make sure that the parser is *not* expecting a string
+ * as the next token (based on the previous token that was
+ * returned) and that we haven't read a string.
+ */
+
+ if (followedby == FOLLBY_TOKEN && !instring) {
+ token = is_keyword(yytext, &followedby);
+ if (token) {
+ /*
+ * T_Server is exceptional as it forces the
+ * following token to be a string in the
+ * non-simulator parts of the configuration,
+ * but in the simulator configuration section,
+ * "server" is followed by "=" which must be
+ * recognized as a token not a string.
+ */
+ if (T_Server == token && !old_config_style)
+ followedby = FOLLBY_TOKEN;
+ goto normal_return;
+ } else if (is_integer(yytext)) {
+ yylval_was_set = TRUE;
+ errno = 0;
+ if ((yylval.Integer = strtol(yytext, NULL, 10)) == 0
+ && ((errno == EINVAL) || (errno == ERANGE))) {
+ msyslog(LOG_ERR,
+ "Integer cannot be represented: %s",
+ yytext);
+ if (input_from_file) {
+ exit(1);
+ } else {
+ /* force end of parsing */
+ yylval.Integer = 0;
+ return 0;
+ }
+ }
+ token = T_Integer;
+ goto normal_return;
+ } else if (is_u_int(yytext)) {
+ yylval_was_set = TRUE;
+ if ('0' == yytext[0] &&
+ 'x' == tolower(yytext[1]))
+ converted = sscanf(&yytext[2], "%x",
+ &yylval.U_int);
+ else
+ converted = sscanf(yytext, "%u",
+ &yylval.U_int);
+ if (1 != converted) {
+ msyslog(LOG_ERR,
+ "U_int cannot be represented: %s",
+ yytext);
+ if (input_from_file) {
+ exit(1);
+ } else {
+ /* force end of parsing */
+ yylval.Integer = 0;
+ return 0;
+ }
+ }
+ token = T_U_int;
+ goto normal_return;
+ } else if (is_double(yytext)) {
+ yylval_was_set = TRUE;
+ errno = 0;
+ if ((yylval.Double = atof(yytext)) == 0 && errno == ERANGE) {
+ msyslog(LOG_ERR,
+ "Double too large to represent: %s",
+ yytext);
+ exit(1);
+ } else {
+ token = T_Double;
+ goto normal_return;
+ }
+ } else {
+ /* Default: Everything is a string */
+ yylval_was_set = TRUE;
+ token = create_string_token(yytext);
+ goto normal_return;
+ }
+ }
+
+ /*
+ * Either followedby is not FOLLBY_TOKEN or this lexeme is part
+ * of a string. Hence, we need to return T_String.
+ *
+ * _Except_ we might have a -4 or -6 flag on a an association
+ * configuration line (server, peer, pool, etc.).
+ *
+ * This is a terrible hack, but the grammar is ambiguous so we
+ * don't have a choice. [SK]
+ *
+ * The ambiguity is in the keyword scanner, not ntp_parser.y.
+ * We do not require server addresses be quoted in ntp.conf,
+ * complicating the scanner's job. To avoid trying (and
+ * failing) to match an IP address or DNS name to a keyword,
+ * the association keywords use FOLLBY_STRING in the keyword
+ * table, which tells the scanner to force the next token to be
+ * a T_String, so it does not try to match a keyword but rather
+ * expects a string when -4/-6 modifiers to server, peer, etc.
+ * are encountered.
+ * restrict -4 and restrict -6 parsing works correctly without
+ * this hack, as restrict uses FOLLBY_TOKEN. [DH]
+ */
+ if ('-' == yytext[0]) {
+ if ('4' == yytext[1]) {
+ token = T_Ipv4_flag;
+ goto normal_return;
+ } else if ('6' == yytext[1]) {
+ token = T_Ipv6_flag;
+ goto normal_return;
+ }
+ }
+
+ instring = FALSE;
+ if (FOLLBY_STRING == followedby)
+ followedby = FOLLBY_TOKEN;
+
+ yylval_was_set = TRUE;
+ token = create_string_token(yytext);
+
+normal_return:
+ if (T_EOC == token)
+ DPRINTF(4,("\t<end of command>\n"));
+ else
+ DPRINTF(4, ("yylex: lexeme '%s' -> %s\n", yytext,
+ token_name(token)));
+
+ if (!yylval_was_set)
+ yylval.Integer = token;
+
+ return token;
+
+lex_too_long:
+ yytext[min(sizeof(yytext) - 1, 50)] = 0;
+ msyslog(LOG_ERR,
+ "configuration item on line %d longer than limit of %lu, began with '%s'",
+ ip_file->line_no, (u_long)min(sizeof(yytext) - 1, 50),
+ yytext);
+
+ /*
+ * If we hit the length limit reading the startup configuration
+ * file, abort.
+ */
+ if (input_from_file)
+ exit(sizeof(yytext) - 1);
+
+ /*
+ * If it's runtime configuration via ntpq :config treat it as
+ * if the configuration text ended before the too-long lexeme,
+ * hostname, or string.
+ */
+ yylval.Integer = 0;
+ return 0;
+}