From 393d055d3723ba9c343115e08f1d31c66994e75e Mon Sep 17 00:00:00 2001 From: Dave Beckett Date: Mon, 4 Apr 2011 10:17:53 -0700 Subject: Initial bison pull parser --- src/turtle_common.h | 3 ++ src/turtle_lexer.l | 6 +-- src/turtle_parser.y | 134 ++++++++++++++++++++++++++++++++++++++++------------ 3 files changed, 109 insertions(+), 34 deletions(-) diff --git a/src/turtle_common.h b/src/turtle_common.h index 5dcbd26f..6d198289 100644 --- a/src/turtle_common.h +++ b/src/turtle_common.h @@ -34,6 +34,9 @@ extern "C" { int turtle_syntax_error(raptor_parser *rdf_parser, const char *message, ...) RAPTOR_PRINTF_FORMAT(2, 3); raptor_uri* turtle_qname_to_uri(raptor_parser *rdf_parser, unsigned char *name, size_t name_len); +/* turtle_lexer.l */ +extern void turtle_token_free(raptor_world* world, int token, YYSTYPE *lval); + /* * Turtle parser object diff --git a/src/turtle_lexer.l b/src/turtle_lexer.l index a26201f2..270ba9f7 100644 --- a/src/turtle_lexer.l +++ b/src/turtle_lexer.l @@ -809,9 +809,7 @@ turtle_token_print(raptor_world* world, int token, YYSTYPE *lval) -#ifdef STANDALONE - -static void +void turtle_token_free(raptor_world* world, int token, YYSTYPE *lval) { if(!token) @@ -836,6 +834,8 @@ turtle_token_free(raptor_world* world, int token, YYSTYPE *lval) } +#ifdef STANDALONE + int main(int argc, char *argv[]) { diff --git a/src/turtle_parser.y b/src/turtle_parser.y index 451b8f9d..3b58c56f 100644 --- a/src/turtle_parser.y +++ b/src/turtle_parser.y @@ -65,6 +65,12 @@ #define YYERROR_VERBOSE 1 #endif +#ifdef RAPTOR_DEBUG +const char * turtle_token_print(raptor_world* world, int token, YYSTYPE *lval); +#endif + + + /* Slow down the grammar operation and watch it work */ #if RAPTOR_DEBUG > 2 #define YYDEBUG 1 @@ -73,6 +79,8 @@ /* the lexer does not seem to track this */ #undef RAPTOR_TURTLE_USE_ERROR_COLUMNS +#define TURTLE_PUSH_PARSE 1 + /* Prototypes */ int turtle_parser_error(void* rdf_parser, const char *msg); @@ -84,19 +92,6 @@ int turtle_lexer_get_column(yyscan_t yyscanner); /* What the lexer wants */ extern int turtle_lexer_lex (YYSTYPE *turtle_parser_lval, yyscan_t scanner); -#define YYLEX_PARAM ((raptor_turtle_parser*)(((raptor_parser*)rdf_parser)->context))->scanner - -/* Pure parser argument (a void*) */ -#define YYPARSE_PARAM rdf_parser - -/* Make the yyerror below use the rdf_parser */ -#undef yyerror -#define yyerror(message) turtle_parser_error(rdf_parser, message) - -/* Make lex/yacc interface as small as possible */ -#undef yylex -#define yylex turtle_lexer_lex - /* Prototypes for local functions */ static void raptor_turtle_generate_statement(raptor_parser *parser, raptor_statement *triple); @@ -107,9 +102,17 @@ static void raptor_turtle_generate_statement(raptor_parser *parser, raptor_state /* directives */ +/* Pure parser - want a reentrant parser */ +%define api.pure -%pure-parser +/* Push or pull parser? */ +%define api.push_pull "push" + /* Extra lexer parameter to turtle_lexer_lex */ +%lex-param { ((raptor_turtle_parser*)(((raptor_parser*)rdf_parser)->contxt))->scanner } + +/* Pure parser argument */ +%parse-param { raptor_parser* rdf_parser } /* Interface between lexer and parser */ %union { @@ -140,14 +143,14 @@ static void raptor_turtle_generate_statement(raptor_parser *parser, raptor_state %token COLONMINUS ":-" %token TRUE_TOKEN "true" %token FALSE_TOKEN "false" +%token PREFIX "@prefix" +%token BASE "@base" /* literals */ %token STRING_LITERAL "string literal" %token URI_LITERAL "URI literal" %token BLANK_LITERAL "blank node" %token QNAME_LITERAL "QName" -%token PREFIX "@prefix" -%token BASE "@base" %token IDENTIFIER "identifier" %token INTEGER_LITERAL "integer literal" %token FLOATING_LITERAL "floating point literal" @@ -633,14 +636,10 @@ directive : prefix | base prefix: PREFIX IDENTIFIER URI_LITERAL DOT { - unsigned char *prefix=$2; + unsigned char *prefix = $2; raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)(((raptor_parser*)rdf_parser)->context); raptor_namespace *ns; -#if 0 - Get around bison complaining about not using $1 -#endif - #if RAPTOR_DEBUG > 1 printf("directive @prefix %s %s\n",($2 ? (char*)$2 : "(default)"), raptor_uri_as_string($3)); #endif @@ -1211,8 +1210,10 @@ turtle_qname_to_uri(raptor_parser *rdf_parser, unsigned char *name, size_t name_ +#ifndef TURTLE_PUSH_PARSE static int -turtle_parse(raptor_parser *rdf_parser, const char *string, size_t length) { +turtle_parse(raptor_parser *rdf_parser, const char *string, size_t length) +{ raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context; void *buffer; @@ -1233,6 +1234,64 @@ turtle_parse(raptor_parser *rdf_parser, const char *string, size_t length) { return 0; } +#endif + + +#ifdef TURTLE_PUSH_PARSE +static int +turtle_push_parse(raptor_parser *rdf_parser, + const char *string, size_t length) +{ + raptor_world* world = rdf_parser->world; + raptor_turtle_parser* turtle_parser; + void *buffer; + int status; + yypstate *ps; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + if(!string || !*string) + return 0; + + if(turtle_lexer_lex_init(&turtle_parser->scanner)) + return 1; + turtle_parser->scanner_set = 1; + + turtle_lexer_set_extra(rdf_parser, turtle_parser->scanner); + buffer = turtle_lexer__scan_bytes(string, length, turtle_parser->scanner); + + /* returns a parser instance or 0 on out of memory */ + ps = yypstate_new(); + if(!ps) + return 1; + + do { + YYSTYPE lval; + int token; + + memset(&lval, 0, sizeof(YYSTYPE)); + + token = turtle_lexer_lex(&lval, turtle_parser->scanner); + +#ifdef RAPTOR_DEBUG + printf("token %s\n", turtle_token_print(world, token, &lval)); +#endif + + status = yypush_parse(ps, token, &lval, rdf_parser); + + /* turtle_token_free(world, token, &lval); */ + + if(!token || token == EOF || token == ERROR_TOKEN) + break; + } while (status == YYPUSH_MORE); + yypstate_delete(ps); + + turtle_lexer_lex_destroy(turtle_parser->scanner); + turtle_parser->scanner_set = 0; + + return 0; +} +#endif /** @@ -1404,9 +1463,14 @@ raptor_turtle_parse_chunk(raptor_parser* rdf_parser, /* Nothing to do */ if(!turtle_parser->buffer_length) return 0; - + +#ifdef TURTLE_PUSH_PARSE + turtle_push_parse(rdf_parser, + turtle_parser->buffer, turtle_parser->buffer_length); +#else turtle_parse(rdf_parser, turtle_parser->buffer, turtle_parser->buffer_length); - +#endif + if(rdf_parser->emitted_default_graph) { /* for non-TRIG - end default graph after last triple */ raptor_parser_end_graph(rdf_parser, NULL, 0); @@ -1660,9 +1724,9 @@ raptor_init_parser_trig(raptor_world* world) #define TURTLE_FILE_BUF_SIZE 2048 -static -void turtle_parser_print_statement(void *user, - const raptor_statement *statement) +static void +turtle_parser_print_statement(void *user, + raptor_statement *statement) { FILE* stream = (FILE*)user; raptor_statement_print(statement, stream); @@ -1721,21 +1785,29 @@ main(int argc, char *argv[]) turtle_parser.lineno= 1; - rdf_parser.world = raptor_world_instance(); - rdf_parser.context=&turtle_parser; - rdf_parser.base_uri = raptor_new_uri((const unsigned char*)"http://example.org/fake-base-uri/"); + rdf_parser.world = raptor_new_world(); + rdf_parser.context = &turtle_parser; + rdf_parser.base_uri = raptor_new_uri(rdf_parser.world, + (const unsigned char*)"http://example.org/fake-base-uri/"); - raptor_parser_set_statement_handler(&rdf_parser, stdout, turtle_parser_print_statement); + raptor_parser_set_statement_handler(&rdf_parser, stdout, + turtle_parser_print_statement); raptor_turtle_parse_init(&rdf_parser, "turtle"); turtle_parser.error_count = 0; +#ifdef TURTLE_PUSH_PARSE + turtle_push_parse(&rdf_parser, string, strlen(string)); +#else turtle_parse(&rdf_parser, string, strlen(string)); +#endif raptor_turtle_parse_terminate(&rdf_parser); raptor_free_uri(rdf_parser.base_uri); + raptor_free_world(rdf_parser.world); + return (0); } #endif -- cgit v1.2.1