diff options
author | Lorry Tar Creator <lorry-tar-importer@baserock.org> | 2014-12-02 09:01:21 +0000 |
---|---|---|
committer | <> | 2014-12-04 16:11:25 +0000 |
commit | bdab5265fcbf3f472545073a23f8999749a9f2b9 (patch) | |
tree | c6018dd03dea906f8f1fb5f105f05b71a7dc250a /ntpd/ntp_scanner.c | |
download | ntp-bdab5265fcbf3f472545073a23f8999749a9f2b9.tar.gz |
Imported from /home/lorry/working-area/delta_ntp/ntp-dev-4.2.7p482.tar.gz.ntp-dev-4.2.7p482
Diffstat (limited to 'ntpd/ntp_scanner.c')
-rw-r--r-- | ntpd/ntp_scanner.c | 755 |
1 files changed, 755 insertions, 0 deletions
diff --git a/ntpd/ntp_scanner.c b/ntpd/ntp_scanner.c new file mode 100644 index 0000000..3e09856 --- /dev/null +++ b/ntpd/ntp_scanner.c @@ -0,0 +1,755 @@ + +/* ntp_scanner.c + * + * The source code for a simple lexical analyzer. + * + * Written By: Sachin Kamboj + * University of Delaware + * Newark, DE 19711 + * Copyright (c) 2006 + */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <ctype.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> + +#include "ntpd.h" +#include "ntp_config.h" +#include "ntpsim.h" +#include "ntp_scanner.h" +#include "ntp_parser.h" + +/* ntp_keyword.h declares finite state machine and token text */ +#include "ntp_keyword.h" + + + +/* SCANNER GLOBAL VARIABLES + * ------------------------ + */ + +#define MAX_LEXEME (1024 + 1) /* The maximum size of a lexeme */ +char yytext[MAX_LEXEME]; /* Buffer for storing the input text/lexeme */ +u_int32 conf_file_sum; /* Simple sum of characters read */ + + + + +/* CONSTANTS + * --------- + */ + + +/* SCANNER GLOBAL VARIABLES + * ------------------------ + */ +const char special_chars[] = "{}(),;|="; + + +/* FUNCTIONS + * --------- + */ + +static int is_keyword(char *lexeme, follby *pfollowedby); + + +/* + * keyword() - Return the keyword associated with token T_ identifier. + * See also token_name() for the string-ized T_ identifier. + * Example: keyword(T_Server) returns "server" + * token_name(T_Server) returns "T_Server" + */ +const char * +keyword( + int token + ) +{ + int i; + const char *text; + + i = token - LOWEST_KEYWORD_ID; + + if (i >= 0 && i < COUNTOF(keyword_text)) + text = keyword_text[i]; + else + text = NULL; + + return (text != NULL) + ? text + : "(keyword not found)"; +} + + +/* FILE INTERFACE + * -------------- + * We define a couple of wrapper functions around the standard C fgetc + * and ungetc functions in order to include positional bookkeeping + */ + +struct FILE_INFO * +F_OPEN( + const char *path, + const char *mode + ) +{ + struct FILE_INFO *my_info; + + my_info = emalloc(sizeof *my_info); + + my_info->line_no = 1; + my_info->col_no = 0; + my_info->prev_line_col_no = 0; + my_info->prev_token_col_no = 0; + my_info->fname = path; + + my_info->fd = fopen(path, mode); + if (NULL == my_info->fd) { + free(my_info); + return NULL; + } + return my_info; +} + +int +FGETC( + struct FILE_INFO *stream + ) +{ + int ch; + + do + ch = fgetc(stream->fd); + while (EOF != ch && (CHAR_MIN > ch || ch > CHAR_MAX)); + + if (EOF != ch) { + if (input_from_file) + conf_file_sum += (u_char)ch; + ++stream->col_no; + if (ch == '\n') { + stream->prev_line_col_no = stream->col_no; + ++stream->line_no; + stream->col_no = 1; + } + } + + return ch; +} + +/* BUGS: 1. Function will fail on more than one line of pushback + * 2. No error checking is done to see if ungetc fails + * SK: I don't think its worth fixing these bugs for our purposes ;-) + */ +int +UNGETC( + int ch, + struct FILE_INFO *stream + ) +{ + if (input_from_file) + conf_file_sum -= (u_char)ch; + if (ch == '\n') { + stream->col_no = stream->prev_line_col_no; + stream->prev_line_col_no = -1; + --stream->line_no; + } + --stream->col_no; + return ungetc(ch, stream->fd); +} + +int +FCLOSE( + struct FILE_INFO *stream + ) +{ + int ret_val = fclose(stream->fd); + + if (!ret_val) + free(stream); + return ret_val; +} + +/* STREAM INTERFACE + * ---------------- + * Provide a wrapper for the stream functions so that the + * stream can either read from a file or from a character + * array. + * NOTE: This is not very efficient for reading from character + * arrays, but needed to allow remote configuration where the + * configuration command is provided through ntpq. + * + * The behavior of there two functions is determined by the + * input_from_file flag. + */ + +static int +get_next_char( + struct FILE_INFO *ip_file + ) +{ + char ch; + + if (input_from_file) + return FGETC(ip_file); + else { + if (remote_config.buffer[remote_config.pos] == '\0') + return EOF; + else { + ip_file->col_no++; + ch = remote_config.buffer[remote_config.pos++]; + if (ch == '\n') { + ip_file->prev_line_col_no = ip_file->col_no; + ++ip_file->line_no; + ip_file->col_no = 1; + } + return ch; + } + } +} + +static void +push_back_char( + struct FILE_INFO *ip_file, + int ch + ) +{ + if (input_from_file) + UNGETC(ch, ip_file); + else { + if (ch == '\n') { + ip_file->col_no = ip_file->prev_line_col_no; + ip_file->prev_line_col_no = -1; + --ip_file->line_no; + } + --ip_file->col_no; + + remote_config.pos--; + } +} + + + +/* STATE MACHINES + * -------------- + */ + +/* Keywords */ +static int +is_keyword( + char *lexeme, + follby *pfollowedby + ) +{ + follby fb; + int curr_s; /* current state index */ + int token; + int i; + + curr_s = SCANNER_INIT_S; + token = 0; + + for (i = 0; lexeme[i]; i++) { + while (curr_s && (lexeme[i] != SS_CH(sst[curr_s]))) + curr_s = SS_OTHER_N(sst[curr_s]); + + if (curr_s && (lexeme[i] == SS_CH(sst[curr_s]))) { + if ('\0' == lexeme[i + 1] + && FOLLBY_NON_ACCEPTING + != SS_FB(sst[curr_s])) { + fb = SS_FB(sst[curr_s]); + *pfollowedby = fb; + token = curr_s; + break; + } + curr_s = SS_MATCH_N(sst[curr_s]); + } else + break; + } + + return token; +} + + +/* Integer */ +static int +is_integer( + char *lexeme + ) +{ + int i; + int is_neg; + u_int u_val; + + i = 0; + + /* Allow a leading minus sign */ + if (lexeme[i] == '-') { + i++; + is_neg = TRUE; + } else { + is_neg = FALSE; + } + + /* Check that all the remaining characters are digits */ + for (; lexeme[i] != '\0'; i++) { + if (!isdigit(lexeme[i])) + return FALSE; + } + + if (is_neg) + return TRUE; + + /* Reject numbers that fit in unsigned but not in signed int */ + if (1 == sscanf(lexeme, "%u", &u_val)) + return (u_val <= INT_MAX); + else + return FALSE; +} + + +/* U_int -- assumes is_integer() has returned FALSE */ +static int +is_u_int( + char *lexeme + ) +{ + int i; + int is_hex; + + i = 0; + if ('0' == lexeme[i] && 'x' == tolower(lexeme[i + 1])) { + i += 2; + is_hex = TRUE; + } else { + is_hex = FALSE; + } + + /* Check that all the remaining characters are digits */ + for (; lexeme[i] != '\0'; i++) { + if (is_hex && !isxdigit(lexeme[i])) + return FALSE; + if (!is_hex && !isdigit(lexeme[i])) + return FALSE; + } + + return TRUE; +} + + +/* Double */ +static int +is_double( + char *lexeme + ) +{ + u_int num_digits = 0; /* Number of digits read */ + u_int i; + + i = 0; + + /* Check for an optional '+' or '-' */ + if ('+' == lexeme[i] || '-' == lexeme[i]) + i++; + + /* Read the integer part */ + for (; lexeme[i] && isdigit(lexeme[i]); i++) + num_digits++; + + /* Check for the optional decimal point */ + if ('.' == lexeme[i]) { + i++; + /* Check for any digits after the decimal point */ + for (; lexeme[i] && isdigit(lexeme[i]); i++) + num_digits++; + } + + /* + * The number of digits in both the decimal part and the + * fraction part must not be zero at this point + */ + if (!num_digits) + return 0; + + /* Check if we are done */ + if (!lexeme[i]) + return 1; + + /* There is still more input, read the exponent */ + if ('e' == tolower(lexeme[i])) + i++; + else + return 0; + + /* Read an optional Sign */ + if ('+' == lexeme[i] || '-' == lexeme[i]) + i++; + + /* Now read the exponent part */ + while (lexeme[i] && isdigit(lexeme[i])) + i++; + + /* Check if we are done */ + if (!lexeme[i]) + return 1; + else + return 0; +} + + +/* is_special() - Test whether a character is a token */ +static inline int +is_special( + int ch + ) +{ + return strchr(special_chars, ch) != NULL; +} + + +static int +is_EOC( + int ch + ) +{ + if ((old_config_style && (ch == '\n')) || + (!old_config_style && (ch == ';'))) + return 1; + return 0; +} + + +char * +quote_if_needed(char *str) +{ + char *ret; + size_t len; + size_t octets; + + len = strlen(str); + octets = len + 2 + 1; + ret = emalloc(octets); + if ('"' != str[0] + && (strcspn(str, special_chars) < len + || strchr(str, ' ') != NULL)) { + snprintf(ret, octets, "\"%s\"", str); + } else + strlcpy(ret, str, octets); + + return ret; +} + + +static int +create_string_token( + char *lexeme + ) +{ + char *pch; + + /* + * ignore end of line whitespace + */ + pch = lexeme; + while (*pch && isspace(*pch)) + pch++; + + if (!*pch) { + yylval.Integer = T_EOC; + return yylval.Integer; + } + + yylval.String = estrdup(lexeme); + return T_String; +} + + +/* + * yylex() - function that does the actual scanning. + * Bison expects this function to be called yylex and for it to take no + * input and return an int. + * Conceptually yylex "returns" yylval as well as the actual return + * value representing the token or type. + */ +int +yylex( + struct FILE_INFO *ip_file + ) +{ + static follby followedby = FOLLBY_TOKEN; + int i; + int instring; + int yylval_was_set; + int converted; + int token; /* The return value */ + int ch; + + if (input_from_file) + ip_file = fp[curr_include_level]; + instring = FALSE; + yylval_was_set = FALSE; + + do { + /* Ignore whitespace at the beginning */ + while (EOF != (ch = get_next_char(ip_file)) && + isspace(ch) && + !is_EOC(ch)) + ; /* Null Statement */ + + if (EOF == ch) { + + if (!input_from_file || !curr_include_level) + return 0; + + FCLOSE(fp[curr_include_level]); + ip_file = fp[--curr_include_level]; + token = T_EOC; + goto normal_return; + + } else if (is_EOC(ch)) { + + /* end FOLLBY_STRINGS_TO_EOC effect */ + followedby = FOLLBY_TOKEN; + token = T_EOC; + goto normal_return; + + } else if (is_special(ch) && FOLLBY_TOKEN == followedby) { + /* special chars are their own token values */ + token = ch; + /* + * '=' outside simulator configuration implies + * a single string following as in: + * setvar Owner = "The Boss" default + */ + if ('=' == ch && old_config_style) + followedby = FOLLBY_STRING; + yytext[0] = (char)ch; + yytext[1] = '\0'; + goto normal_return; + } else + push_back_char(ip_file, ch); + + /* save the position of start of the token */ + ip_file->prev_token_line_no = ip_file->line_no; + ip_file->prev_token_col_no = ip_file->col_no; + + /* Read in the lexeme */ + i = 0; + while (EOF != (ch = get_next_char(ip_file))) { + + yytext[i] = (char)ch; + + /* Break on whitespace or a special character */ + if (isspace(ch) || is_EOC(ch) + || '"' == ch + || (FOLLBY_TOKEN == followedby + && is_special(ch))) + break; + + /* Read the rest of the line on reading a start + of comment character */ + if ('#' == ch) { + while (EOF != (ch = get_next_char(ip_file)) + && '\n' != ch) + ; /* Null Statement */ + break; + } + + i++; + if (i >= COUNTOF(yytext)) + goto lex_too_long; + } + /* Pick up all of the string inside between " marks, to + * end of line. If we make it to EOL without a + * terminating " assume it for them. + * + * XXX - HMS: I'm not sure we want to assume the closing " + */ + if ('"' == ch) { + instring = TRUE; + while (EOF != (ch = get_next_char(ip_file)) && + ch != '"' && ch != '\n') { + yytext[i++] = (char)ch; + if (i >= COUNTOF(yytext)) + goto lex_too_long; + } + /* + * yytext[i] will be pushed back as not part of + * this lexeme, but any closing quote should + * not be pushed back, so we read another char. + */ + if ('"' == ch) + ch = get_next_char(ip_file); + } + /* Pushback the last character read that is not a part + * of this lexeme. + * If the last character read was an EOF, pushback a + * newline character. This is to prevent a parse error + * when there is no newline at the end of a file. + */ + if (EOF == ch) + push_back_char(ip_file, '\n'); + else + push_back_char(ip_file, ch); + yytext[i] = '\0'; + } while (i == 0); + + /* Now return the desired token */ + + /* First make sure that the parser is *not* expecting a string + * as the next token (based on the previous token that was + * returned) and that we haven't read a string. + */ + + if (followedby == FOLLBY_TOKEN && !instring) { + token = is_keyword(yytext, &followedby); + if (token) { + /* + * T_Server is exceptional as it forces the + * following token to be a string in the + * non-simulator parts of the configuration, + * but in the simulator configuration section, + * "server" is followed by "=" which must be + * recognized as a token not a string. + */ + if (T_Server == token && !old_config_style) + followedby = FOLLBY_TOKEN; + goto normal_return; + } else if (is_integer(yytext)) { + yylval_was_set = TRUE; + errno = 0; + if ((yylval.Integer = strtol(yytext, NULL, 10)) == 0 + && ((errno == EINVAL) || (errno == ERANGE))) { + msyslog(LOG_ERR, + "Integer cannot be represented: %s", + yytext); + if (input_from_file) { + exit(1); + } else { + /* force end of parsing */ + yylval.Integer = 0; + return 0; + } + } + token = T_Integer; + goto normal_return; + } else if (is_u_int(yytext)) { + yylval_was_set = TRUE; + if ('0' == yytext[0] && + 'x' == tolower(yytext[1])) + converted = sscanf(&yytext[2], "%x", + &yylval.U_int); + else + converted = sscanf(yytext, "%u", + &yylval.U_int); + if (1 != converted) { + msyslog(LOG_ERR, + "U_int cannot be represented: %s", + yytext); + if (input_from_file) { + exit(1); + } else { + /* force end of parsing */ + yylval.Integer = 0; + return 0; + } + } + token = T_U_int; + goto normal_return; + } else if (is_double(yytext)) { + yylval_was_set = TRUE; + errno = 0; + if ((yylval.Double = atof(yytext)) == 0 && errno == ERANGE) { + msyslog(LOG_ERR, + "Double too large to represent: %s", + yytext); + exit(1); + } else { + token = T_Double; + goto normal_return; + } + } else { + /* Default: Everything is a string */ + yylval_was_set = TRUE; + token = create_string_token(yytext); + goto normal_return; + } + } + + /* + * Either followedby is not FOLLBY_TOKEN or this lexeme is part + * of a string. Hence, we need to return T_String. + * + * _Except_ we might have a -4 or -6 flag on a an association + * configuration line (server, peer, pool, etc.). + * + * This is a terrible hack, but the grammar is ambiguous so we + * don't have a choice. [SK] + * + * The ambiguity is in the keyword scanner, not ntp_parser.y. + * We do not require server addresses be quoted in ntp.conf, + * complicating the scanner's job. To avoid trying (and + * failing) to match an IP address or DNS name to a keyword, + * the association keywords use FOLLBY_STRING in the keyword + * table, which tells the scanner to force the next token to be + * a T_String, so it does not try to match a keyword but rather + * expects a string when -4/-6 modifiers to server, peer, etc. + * are encountered. + * restrict -4 and restrict -6 parsing works correctly without + * this hack, as restrict uses FOLLBY_TOKEN. [DH] + */ + if ('-' == yytext[0]) { + if ('4' == yytext[1]) { + token = T_Ipv4_flag; + goto normal_return; + } else if ('6' == yytext[1]) { + token = T_Ipv6_flag; + goto normal_return; + } + } + + instring = FALSE; + if (FOLLBY_STRING == followedby) + followedby = FOLLBY_TOKEN; + + yylval_was_set = TRUE; + token = create_string_token(yytext); + +normal_return: + if (T_EOC == token) + DPRINTF(4,("\t<end of command>\n")); + else + DPRINTF(4, ("yylex: lexeme '%s' -> %s\n", yytext, + token_name(token))); + + if (!yylval_was_set) + yylval.Integer = token; + + return token; + +lex_too_long: + yytext[min(sizeof(yytext) - 1, 50)] = 0; + msyslog(LOG_ERR, + "configuration item on line %d longer than limit of %lu, began with '%s'", + ip_file->line_no, (u_long)min(sizeof(yytext) - 1, 50), + yytext); + + /* + * If we hit the length limit reading the startup configuration + * file, abort. + */ + if (input_from_file) + exit(sizeof(yytext) - 1); + + /* + * If it's runtime configuration via ntpq :config treat it as + * if the configuration text ended before the too-long lexeme, + * hostname, or string. + */ + yylval.Integer = 0; + return 0; +} |