diff options
author | Armin Rigo <arigo@tunes.org> | 2015-05-12 11:07:25 +0200 |
---|---|---|
committer | Armin Rigo <arigo@tunes.org> | 2015-05-12 11:07:25 +0200 |
commit | fe4bb73d2191ea7b2ee5586848e1c5bbbcbaa72b (patch) | |
tree | 5c80d5c2ee50bc8e99039c7483522e2febc08cb7 /c/parse_c_type.c | |
parent | 34dbd9932de50a5de29f0fdaab9e9a06526d93a7 (diff) | |
download | cffi-fe4bb73d2191ea7b2ee5586848e1c5bbbcbaa72b.tar.gz |
the big Moving Files Around step
Diffstat (limited to 'c/parse_c_type.c')
-rw-r--r-- | c/parse_c_type.c | 760 |
1 files changed, 760 insertions, 0 deletions
diff --git a/c/parse_c_type.c b/c/parse_c_type.c new file mode 100644 index 0000000..6fcd73c --- /dev/null +++ b/c/parse_c_type.c @@ -0,0 +1,760 @@ +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <errno.h> + +#define _CFFI_INTERNAL +#include "../cffi/parse_c_type.h" + + +enum token_e { + TOK_STAR='*', + TOK_OPEN_PAREN='(', + TOK_CLOSE_PAREN=')', + TOK_OPEN_BRACKET='[', + TOK_CLOSE_BRACKET=']', + TOK_COMMA=',', + + TOK_START=256, + TOK_END, + TOK_ERROR, + TOK_IDENTIFIER, + TOK_INTEGER, + TOK_DOTDOTDOT, + + /* keywords */ + TOK__BOOL, + TOK_CHAR, + //TOK__COMPLEX, + TOK_CONST, + TOK_DOUBLE, + TOK_ENUM, + TOK_FLOAT, + //TOK__IMAGINARY, + TOK_INT, + TOK_LONG, + TOK_SHORT, + TOK_SIGNED, + TOK_STRUCT, + TOK_UNION, + TOK_UNSIGNED, + TOK_VOID, + TOK_VOLATILE, +}; + +typedef struct { + struct _cffi_parse_info_s *info; + const char *input, *p; + size_t size; // the next token is at 'p' and of length 'size' + enum token_e kind; + _cffi_opcode_t *output; + size_t output_index; +} token_t; + +static int is_space(char x) +{ + return (x == ' ' || x == '\f' || x == '\n' || x == '\r' || + x == '\t' || x == '\v'); +} + +static int is_ident_first(char x) +{ + return (('A' <= x && x <= 'Z') || ('a' <= x && x <= 'z') || x == '_' || + x == '$'); /* '$' in names is supported here, for the struct + names invented by cparser */ +} + +static int is_digit(char x) +{ + return ('0' <= x && x <= '9'); +} + +static int is_hex_digit(char x) +{ + return (('0' <= x && x <= '9') || + ('A' <= x && x <= 'F') || + ('a' <= x && x <= 'f')); +} + +static int is_ident_next(char x) +{ + return (is_ident_first(x) || is_digit(x)); +} + +static char get_following_char(token_t *tok) +{ + const char *p = tok->p + tok->size; + if (tok->kind == TOK_ERROR) + return 0; + while (is_space(*p)) + p++; + return *p; +} + +static int number_of_commas(token_t *tok) +{ + const char *p = tok->p; + int result = 0; + int nesting = 0; + while (1) { + switch (*p++) { + case ',': result += !nesting; break; + case '(': nesting++; break; + case ')': if ((--nesting) < 0) return result; break; + case 0: return result; + default: break; + } + } +} + +static void next_token(token_t *tok) +{ + const char *p = tok->p + tok->size; + if (tok->kind == TOK_ERROR) + return; + while (!is_ident_first(*p)) { + if (is_space(*p)) { + p++; + } + else if (is_digit(*p)) { + tok->kind = TOK_INTEGER; + tok->p = p; + tok->size = 1; + if (p[1] == 'x' || p[1] == 'X') + tok->size = 2; + while (is_hex_digit(p[tok->size])) + tok->size++; + return; + } + else if (p[0] == '.' && p[1] == '.' && p[2] == '.') { + tok->kind = TOK_DOTDOTDOT; + tok->p = p; + tok->size = 3; + return; + } + else if (*p) { + tok->kind = *p; + tok->p = p; + tok->size = 1; + return; + } + else { + tok->kind = TOK_END; + tok->p = p; + tok->size = 0; + return; + } + } + tok->kind = TOK_IDENTIFIER; + tok->p = p; + tok->size = 1; + while (is_ident_next(p[tok->size])) + tok->size++; + + switch (*p) { + case '_': + if (tok->size == 5 && !memcmp(p, "_Bool", 5)) tok->kind = TOK__BOOL; + break; + case 'c': + if (tok->size == 4 && !memcmp(p, "char", 4)) tok->kind = TOK_CHAR; + if (tok->size == 5 && !memcmp(p, "const", 5)) tok->kind = TOK_CONST; + break; + case 'd': + if (tok->size == 6 && !memcmp(p, "double", 6)) tok->kind = TOK_DOUBLE; + break; + case 'e': + if (tok->size == 4 && !memcmp(p, "enum", 4)) tok->kind = TOK_ENUM; + break; + case 'f': + if (tok->size == 5 && !memcmp(p, "float", 5)) tok->kind = TOK_FLOAT; + break; + case 'i': + if (tok->size == 3 && !memcmp(p, "int", 3)) tok->kind = TOK_INT; + break; + case 'l': + if (tok->size == 4 && !memcmp(p, "long", 4)) tok->kind = TOK_LONG; + break; + case 's': + if (tok->size == 5 && !memcmp(p, "short", 5)) tok->kind = TOK_SHORT; + if (tok->size == 6 && !memcmp(p, "signed", 6)) tok->kind = TOK_SIGNED; + if (tok->size == 6 && !memcmp(p, "struct", 6)) tok->kind = TOK_STRUCT; + break; + case 'u': + if (tok->size == 5 && !memcmp(p, "union", 5)) tok->kind = TOK_UNION; + if (tok->size == 8 && !memcmp(p,"unsigned",8)) tok->kind = TOK_UNSIGNED; + break; + case 'v': + if (tok->size == 4 && !memcmp(p, "void", 4)) tok->kind = TOK_VOID; + if (tok->size == 8 && !memcmp(p,"volatile",8)) tok->kind = TOK_VOLATILE; + break; + } +} + +static int parse_error(token_t *tok, const char *msg) +{ + if (tok->kind != TOK_ERROR) { + tok->kind = TOK_ERROR; + tok->info->error_location = tok->p - tok->input; + tok->info->error_message = msg; + } + return -1; +} + +static int write_ds(token_t *tok, _cffi_opcode_t ds) +{ + size_t index = tok->output_index; + if (index >= tok->info->output_size) { + parse_error(tok, "internal type complexity limit reached"); + return -1; + } + tok->output[index] = ds; + tok->output_index = index + 1; + return index; +} + +#define MAX_SSIZE_T (((size_t)-1) >> 1) + +static int parse_complete(token_t *tok); + +static int parse_sequel(token_t *tok, int outer) +{ + /* Emit opcodes for the "sequel", which is the optional part of a + type declaration that follows the type name, i.e. everything + with '*', '[ ]', '( )'. Returns the entry point index pointing + the innermost opcode (the one that corresponds to the complete + type). The 'outer' argument is the index of the opcode outside + this "sequel". + */ + int check_for_grouping; + _cffi_opcode_t result, *p_current; + + header: + switch (tok->kind) { + case TOK_STAR: + outer = write_ds(tok, _CFFI_OP(_CFFI_OP_POINTER, outer)); + next_token(tok); + goto header; + case TOK_CONST: + /* ignored for now */ + next_token(tok); + goto header; + case TOK_VOLATILE: + /* ignored for now */ + next_token(tok); + goto header; + default: + break; + } + + check_for_grouping = 1; + if (tok->kind == TOK_IDENTIFIER) { + next_token(tok); /* skip a potential variable name */ + check_for_grouping = 0; + } + + result = 0; + p_current = &result; + + while (tok->kind == TOK_OPEN_PAREN) { + next_token(tok); + + if ((check_for_grouping--) == 1 && (tok->kind == TOK_STAR || + tok->kind == TOK_CONST || + tok->kind == TOK_VOLATILE || + tok->kind == TOK_OPEN_BRACKET)) { + /* just parentheses for grouping. Use a OP_NOOP to simplify */ + int x; + assert(p_current == &result); + x = tok->output_index; + p_current = tok->output + x; + + write_ds(tok, _CFFI_OP(_CFFI_OP_NOOP, 0)); + + x = parse_sequel(tok, x); + result = _CFFI_OP(_CFFI_GETOP(0), x); + } + else { + /* function type */ + int arg_total, base_index, arg_next, has_ellipsis=0; + + if (tok->kind == TOK_VOID && get_following_char(tok) == ')') { + next_token(tok); + } + + /* (over-)estimate 'arg_total'. May return 1 when it is really 0 */ + arg_total = number_of_commas(tok) + 1; + + *p_current = _CFFI_OP(_CFFI_GETOP(*p_current), tok->output_index); + p_current = tok->output + tok->output_index; + + base_index = write_ds(tok, _CFFI_OP(_CFFI_OP_FUNCTION, 0)); + if (base_index < 0) + return -1; + /* reserve (arg_total + 1) slots for the arguments and the + final FUNCTION_END */ + for (arg_next = 0; arg_next <= arg_total; arg_next++) + if (write_ds(tok, _CFFI_OP(0, 0)) < 0) + return -1; + + arg_next = base_index + 1; + + if (tok->kind != TOK_CLOSE_PAREN) { + while (1) { + int arg; + _cffi_opcode_t oarg; + + if (tok->kind == TOK_DOTDOTDOT) { + has_ellipsis = 1; + next_token(tok); + break; + } + arg = parse_complete(tok); + switch (_CFFI_GETOP(tok->output[arg])) { + case _CFFI_OP_ARRAY: + case _CFFI_OP_OPEN_ARRAY: + arg = _CFFI_GETARG(tok->output[arg]); + /* fall-through */ + case _CFFI_OP_FUNCTION: + oarg = _CFFI_OP(_CFFI_OP_POINTER, arg); + break; + default: + oarg = _CFFI_OP(_CFFI_OP_NOOP, arg); + break; + } + assert(arg_next - base_index <= arg_total); + tok->output[arg_next++] = oarg; + if (tok->kind != TOK_COMMA) + break; + next_token(tok); + } + } + tok->output[arg_next] = _CFFI_OP(_CFFI_OP_FUNCTION_END, + has_ellipsis); + } + + if (tok->kind != TOK_CLOSE_PAREN) + return parse_error(tok, "expected ')'"); + next_token(tok); + } + + while (tok->kind == TOK_OPEN_BRACKET) { + *p_current = _CFFI_OP(_CFFI_GETOP(*p_current), tok->output_index); + p_current = tok->output + tok->output_index; + + next_token(tok); + if (tok->kind != TOK_CLOSE_BRACKET) { + size_t length; + int gindex; + char *endptr; + + switch (tok->kind) { + + case TOK_INTEGER: + errno = 0; +#ifndef MS_WIN32 + if (sizeof(length) > sizeof(unsigned long)) + length = strtoull(tok->p, &endptr, 0); + else +#endif + length = strtoul(tok->p, &endptr, 0); + if (endptr != tok->p + tok->size) + return parse_error(tok, "invalid number"); + if (errno == ERANGE || length > MAX_SSIZE_T) + return parse_error(tok, "number too large"); + break; + + case TOK_IDENTIFIER: + gindex = search_in_globals(tok->info->ctx, tok->p, tok->size); + if (gindex >= 0) { + const struct _cffi_global_s *g; + g = &tok->info->ctx->globals[gindex]; + if (_CFFI_GETOP(g->type_op) == _CFFI_OP_CONSTANT_INT || + _CFFI_GETOP(g->type_op) == _CFFI_OP_ENUM) { + int neg; + struct _cffi_getconst_s gc; + gc.ctx = tok->info->ctx; + gc.gindex = gindex; + neg = ((int(*)(struct _cffi_getconst_s*))g->address) + (&gc); + if (neg == 0 && gc.value > MAX_SSIZE_T) + return parse_error(tok, + "integer constant too large"); + if (neg == 0 || gc.value == 0) { + length = (size_t)gc.value; + break; + } + if (neg != 1) + return parse_error(tok, "disagreement about" + " this constant's value"); + } + } + /* fall-through to the default case */ + default: + return parse_error(tok, "expected a positive integer constant"); + } + + next_token(tok); + + write_ds(tok, _CFFI_OP(_CFFI_OP_ARRAY, 0)); + write_ds(tok, (_cffi_opcode_t)length); + } + else + write_ds(tok, _CFFI_OP(_CFFI_OP_OPEN_ARRAY, 0)); + + if (tok->kind != TOK_CLOSE_BRACKET) + return parse_error(tok, "expected ']'"); + next_token(tok); + } + + *p_current = _CFFI_OP(_CFFI_GETOP(*p_current), outer); + return _CFFI_GETARG(result); +} + + +#define MAKE_SEARCH_FUNC(FIELD) \ + static \ + int search_in_##FIELD(const struct _cffi_type_context_s *ctx, \ + const char *search, size_t search_len) \ + { \ + int left = 0, right = ctx->num_##FIELD; \ + \ + while (left < right) { \ + int middle = (left + right) / 2; \ + const char *src = ctx->FIELD[middle].name; \ + int diff = strncmp(src, search, search_len); \ + if (diff == 0 && src[search_len] == '\0') \ + return middle; \ + else if (diff >= 0) \ + right = middle; \ + else \ + left = middle + 1; \ + } \ + return -1; \ + } + +MAKE_SEARCH_FUNC(globals) +MAKE_SEARCH_FUNC(struct_unions) +MAKE_SEARCH_FUNC(typenames) +MAKE_SEARCH_FUNC(enums) + +#undef MAKE_SEARCH_FUNC + + +static +int search_standard_typename(const char *p, size_t size) +{ + if (size < 6 || p[size-2] != '_' || p[size-1] != 't') + return -1; + + switch (p[4]) { + + case '1': + if (size == 8 && !memcmp(p, "uint16", 6)) return _CFFI_PRIM_UINT16; + break; + + case '2': + if (size == 7 && !memcmp(p, "int32", 5)) return _CFFI_PRIM_INT32; + break; + + case '3': + if (size == 8 && !memcmp(p, "uint32", 6)) return _CFFI_PRIM_UINT32; + break; + + case '4': + if (size == 7 && !memcmp(p, "int64", 5)) return _CFFI_PRIM_INT64; + break; + + case '6': + if (size == 8 && !memcmp(p, "uint64", 6)) return _CFFI_PRIM_UINT64; + if (size == 7 && !memcmp(p, "int16", 5)) return _CFFI_PRIM_INT16; + break; + + case '8': + if (size == 7 && !memcmp(p, "uint8", 5)) return _CFFI_PRIM_UINT8; + break; + + case 'a': + if (size == 8 && !memcmp(p, "intmax", 6)) return _CFFI_PRIM_INTMAX; + break; + + case 'e': + if (size == 7 && !memcmp(p, "ssize", 5)) return _CFFI_PRIM_SSIZE; + break; + + case 'f': + if (size == 11 && !memcmp(p, "int_fast8", 9)) return _CFFI_PRIM_INT_FAST8; + if (size == 12 && !memcmp(p, "int_fast16", 10)) return _CFFI_PRIM_INT_FAST16; + if (size == 12 && !memcmp(p, "int_fast32", 10)) return _CFFI_PRIM_INT_FAST32; + if (size == 12 && !memcmp(p, "int_fast64", 10)) return _CFFI_PRIM_INT_FAST64; + break; + + case 'i': + if (size == 9 && !memcmp(p, "ptrdiff", 7)) return _CFFI_PRIM_PTRDIFF; + break; + + case 'l': + if (size == 12 && !memcmp(p, "int_least8", 10)) return _CFFI_PRIM_INT_LEAST8; + if (size == 13 && !memcmp(p, "int_least16", 11)) return _CFFI_PRIM_INT_LEAST16; + if (size == 13 && !memcmp(p, "int_least32", 11)) return _CFFI_PRIM_INT_LEAST32; + if (size == 13 && !memcmp(p, "int_least64", 11)) return _CFFI_PRIM_INT_LEAST64; + break; + + case 'm': + if (size == 9 && !memcmp(p, "uintmax", 7)) return _CFFI_PRIM_UINTMAX; + break; + + case 'p': + if (size == 9 && !memcmp(p, "uintptr", 7)) return _CFFI_PRIM_UINTPTR; + break; + + case 'r': + if (size == 7 && !memcmp(p, "wchar", 5)) return _CFFI_PRIM_WCHAR; + break; + + case 't': + if (size == 8 && !memcmp(p, "intptr", 6)) return _CFFI_PRIM_INTPTR; + break; + + case '_': + if (size == 6 && !memcmp(p, "size", 4)) return _CFFI_PRIM_SIZE; + if (size == 6 && !memcmp(p, "int8", 4)) return _CFFI_PRIM_INT8; + if (size >= 12) { + switch (p[10]) { + case '1': + if (size == 14 && !memcmp(p, "uint_least16", 12)) return _CFFI_PRIM_UINT_LEAST16; + break; + case '2': + if (size == 13 && !memcmp(p, "uint_fast32", 11)) return _CFFI_PRIM_UINT_FAST32; + break; + case '3': + if (size == 14 && !memcmp(p, "uint_least32", 12)) return _CFFI_PRIM_UINT_LEAST32; + break; + case '4': + if (size == 13 && !memcmp(p, "uint_fast64", 11)) return _CFFI_PRIM_UINT_FAST64; + break; + case '6': + if (size == 14 && !memcmp(p, "uint_least64", 12)) return _CFFI_PRIM_UINT_LEAST64; + if (size == 13 && !memcmp(p, "uint_fast16", 11)) return _CFFI_PRIM_UINT_FAST16; + break; + case '8': + if (size == 13 && !memcmp(p, "uint_least8", 11)) return _CFFI_PRIM_UINT_LEAST8; + break; + case '_': + if (size == 12 && !memcmp(p, "uint_fast8", 10)) return _CFFI_PRIM_UINT_FAST8; + break; + default: + break; + } + } + break; + + default: + break; + } + return -1; +} + + +static int parse_complete(token_t *tok) +{ + unsigned int t0; + _cffi_opcode_t t1; + int modifiers_length, modifiers_sign; + + qualifiers: + switch (tok->kind) { + case TOK_CONST: + /* ignored for now */ + next_token(tok); + goto qualifiers; + case TOK_VOLATILE: + /* ignored for now */ + next_token(tok); + goto qualifiers; + default: + ; + } + + modifiers_length = 0; + modifiers_sign = 0; + modifiers: + switch (tok->kind) { + + case TOK_SHORT: + if (modifiers_length != 0) + return parse_error(tok, "'short' after another 'short' or 'long'"); + modifiers_length--; + next_token(tok); + goto modifiers; + + case TOK_LONG: + if (modifiers_length < 0) + return parse_error(tok, "'long' after 'short'"); + if (modifiers_length >= 2) + return parse_error(tok, "'long long long' is too long"); + modifiers_length++; + next_token(tok); + goto modifiers; + + case TOK_SIGNED: + if (modifiers_sign) + return parse_error(tok, "multiple 'signed' or 'unsigned'"); + modifiers_sign++; + next_token(tok); + goto modifiers; + + case TOK_UNSIGNED: + if (modifiers_sign) + return parse_error(tok, "multiple 'signed' or 'unsigned'"); + modifiers_sign--; + next_token(tok); + goto modifiers; + + default: + break; + } + + if (modifiers_length || modifiers_sign) { + + switch (tok->kind) { + + case TOK_VOID: + case TOK__BOOL: + case TOK_FLOAT: + case TOK_STRUCT: + case TOK_UNION: + case TOK_ENUM: + return parse_error(tok, "invalid combination of types"); + + case TOK_DOUBLE: + if (modifiers_sign != 0 || modifiers_length != 1) + return parse_error(tok, "invalid combination of types"); + next_token(tok); + t0 = _CFFI_PRIM_LONGDOUBLE; + break; + + case TOK_CHAR: + if (modifiers_length != 0) + return parse_error(tok, "invalid combination of types"); + modifiers_length = -2; + /* fall-through */ + case TOK_INT: + next_token(tok); + /* fall-through */ + default: + if (modifiers_sign >= 0) + switch (modifiers_length) { + case -2: t0 = _CFFI_PRIM_SCHAR; break; + case -1: t0 = _CFFI_PRIM_SHORT; break; + case 1: t0 = _CFFI_PRIM_LONG; break; + case 2: t0 = _CFFI_PRIM_LONGLONG; break; + default: t0 = _CFFI_PRIM_INT; break; + } + else + switch (modifiers_length) { + case -2: t0 = _CFFI_PRIM_UCHAR; break; + case -1: t0 = _CFFI_PRIM_USHORT; break; + case 1: t0 = _CFFI_PRIM_ULONG; break; + case 2: t0 = _CFFI_PRIM_ULONGLONG; break; + default: t0 = _CFFI_PRIM_UINT; break; + } + } + t1 = _CFFI_OP(_CFFI_OP_PRIMITIVE, t0); + } + else { + switch (tok->kind) { + case TOK_INT: + t1 = _CFFI_OP(_CFFI_OP_PRIMITIVE, _CFFI_PRIM_INT); + break; + case TOK_CHAR: + t1 = _CFFI_OP(_CFFI_OP_PRIMITIVE, _CFFI_PRIM_CHAR); + break; + case TOK_VOID: + t1 = _CFFI_OP(_CFFI_OP_PRIMITIVE, _CFFI_PRIM_VOID); + break; + case TOK__BOOL: + t1 = _CFFI_OP(_CFFI_OP_PRIMITIVE, _CFFI_PRIM_BOOL); + break; + case TOK_FLOAT: + t1 = _CFFI_OP(_CFFI_OP_PRIMITIVE, _CFFI_PRIM_FLOAT); + break; + case TOK_DOUBLE: + t1 = _CFFI_OP(_CFFI_OP_PRIMITIVE, _CFFI_PRIM_DOUBLE); + break; + case TOK_IDENTIFIER: + { + int n = search_in_typenames(tok->info->ctx, tok->p, tok->size); + if (n >= 0) { + t1 = _CFFI_OP(_CFFI_OP_TYPENAME, n); + break; + } + n = search_standard_typename(tok->p, tok->size); + if (n >= 0) { + t1 = _CFFI_OP(_CFFI_OP_PRIMITIVE, n); + break; + } + return parse_error(tok, "undefined type name"); + } + case TOK_STRUCT: + case TOK_UNION: + { + int n, kind = tok->kind; + next_token(tok); + if (tok->kind != TOK_IDENTIFIER) + return parse_error(tok, "struct or union name expected"); + + n = search_in_struct_unions(tok->info->ctx, tok->p, tok->size); + if (n < 0) + return parse_error(tok, "undefined struct/union name"); + if (((tok->info->ctx->struct_unions[n].flags & _CFFI_F_UNION) != 0) + ^ (kind == TOK_UNION)) + return parse_error(tok, "wrong kind of tag: struct vs union"); + + t1 = _CFFI_OP(_CFFI_OP_STRUCT_UNION, n); + break; + } + case TOK_ENUM: + { + int n; + next_token(tok); + if (tok->kind != TOK_IDENTIFIER) + return parse_error(tok, "enum name expected"); + + n = search_in_enums(tok->info->ctx, tok->p, tok->size); + if (n < 0) + return parse_error(tok, "undefined enum name"); + + t1 = _CFFI_OP(_CFFI_OP_ENUM, n); + break; + } + default: + return parse_error(tok, "identifier expected"); + } + next_token(tok); + } + + return parse_sequel(tok, write_ds(tok, t1)); +} + + +static +int parse_c_type(struct _cffi_parse_info_s *info, const char *input) +{ + int result; + token_t token; + + token.info = info; + token.kind = TOK_START; + token.input = input; + token.p = input; + token.size = 0; + token.output = info->output; + token.output_index = 0; + + next_token(&token); + result = parse_complete(&token); + + if (token.kind != TOK_END) + return parse_error(&token, "unexpected symbol"); + return result; +} |