summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Beckett <dave@dajobe.org>2011-04-04 10:17:53 -0700
committerDave Beckett <dave@dajobe.org>2011-04-04 10:17:53 -0700
commit393d055d3723ba9c343115e08f1d31c66994e75e (patch)
tree0efcdf58bdd2320e44f66122cd6ee60360c3848e
parent39877963ad189f6aeda867bdd1a6c7e6784b9596 (diff)
downloadraptor-393d055d3723ba9c343115e08f1d31c66994e75e.tar.gz
Initial bison pull parser
-rw-r--r--src/turtle_common.h3
-rw-r--r--src/turtle_lexer.l6
-rw-r--r--src/turtle_parser.y134
3 files changed, 109 insertions, 34 deletions
diff --git a/src/turtle_common.h b/src/turtle_common.h
index 5dcbd26f..6d198289 100644
--- a/src/turtle_common.h
+++ b/src/turtle_common.h
@@ -34,6 +34,9 @@ extern "C" {
int turtle_syntax_error(raptor_parser *rdf_parser, const char *message, ...) RAPTOR_PRINTF_FORMAT(2, 3);
raptor_uri* turtle_qname_to_uri(raptor_parser *rdf_parser, unsigned char *name, size_t name_len);
+/* turtle_lexer.l */
+extern void turtle_token_free(raptor_world* world, int token, YYSTYPE *lval);
+
/*
* Turtle parser object
diff --git a/src/turtle_lexer.l b/src/turtle_lexer.l
index a26201f2..270ba9f7 100644
--- a/src/turtle_lexer.l
+++ b/src/turtle_lexer.l
@@ -809,9 +809,7 @@ turtle_token_print(raptor_world* world, int token, YYSTYPE *lval)
-#ifdef STANDALONE
-
-static void
+void
turtle_token_free(raptor_world* world, int token, YYSTYPE *lval)
{
if(!token)
@@ -836,6 +834,8 @@ turtle_token_free(raptor_world* world, int token, YYSTYPE *lval)
}
+#ifdef STANDALONE
+
int
main(int argc, char *argv[])
{
diff --git a/src/turtle_parser.y b/src/turtle_parser.y
index 451b8f9d..3b58c56f 100644
--- a/src/turtle_parser.y
+++ b/src/turtle_parser.y
@@ -65,6 +65,12 @@
#define YYERROR_VERBOSE 1
#endif
+#ifdef RAPTOR_DEBUG
+const char * turtle_token_print(raptor_world* world, int token, YYSTYPE *lval);
+#endif
+
+
+
/* Slow down the grammar operation and watch it work */
#if RAPTOR_DEBUG > 2
#define YYDEBUG 1
@@ -73,6 +79,8 @@
/* the lexer does not seem to track this */
#undef RAPTOR_TURTLE_USE_ERROR_COLUMNS
+#define TURTLE_PUSH_PARSE 1
+
/* Prototypes */
int turtle_parser_error(void* rdf_parser, const char *msg);
@@ -84,19 +92,6 @@ int turtle_lexer_get_column(yyscan_t yyscanner);
/* What the lexer wants */
extern int turtle_lexer_lex (YYSTYPE *turtle_parser_lval, yyscan_t scanner);
-#define YYLEX_PARAM ((raptor_turtle_parser*)(((raptor_parser*)rdf_parser)->context))->scanner
-
-/* Pure parser argument (a void*) */
-#define YYPARSE_PARAM rdf_parser
-
-/* Make the yyerror below use the rdf_parser */
-#undef yyerror
-#define yyerror(message) turtle_parser_error(rdf_parser, message)
-
-/* Make lex/yacc interface as small as possible */
-#undef yylex
-#define yylex turtle_lexer_lex
-
/* Prototypes for local functions */
static void raptor_turtle_generate_statement(raptor_parser *parser, raptor_statement *triple);
@@ -107,9 +102,17 @@ static void raptor_turtle_generate_statement(raptor_parser *parser, raptor_state
/* directives */
+/* Pure parser - want a reentrant parser */
+%define api.pure
-%pure-parser
+/* Push or pull parser? */
+%define api.push_pull "push"
+ /* Extra lexer parameter to turtle_lexer_lex */
+%lex-param { ((raptor_turtle_parser*)(((raptor_parser*)rdf_parser)->contxt))->scanner }
+
+/* Pure parser argument */
+%parse-param { raptor_parser* rdf_parser }
/* Interface between lexer and parser */
%union {
@@ -140,14 +143,14 @@ static void raptor_turtle_generate_statement(raptor_parser *parser, raptor_state
%token COLONMINUS ":-"
%token TRUE_TOKEN "true"
%token FALSE_TOKEN "false"
+%token PREFIX "@prefix"
+%token BASE "@base"
/* literals */
%token <string> STRING_LITERAL "string literal"
%token <uri> URI_LITERAL "URI literal"
%token <string> BLANK_LITERAL "blank node"
%token <uri> QNAME_LITERAL "QName"
-%token <string> PREFIX "@prefix"
-%token <string> BASE "@base"
%token <string> IDENTIFIER "identifier"
%token <string> INTEGER_LITERAL "integer literal"
%token <string> FLOATING_LITERAL "floating point literal"
@@ -633,14 +636,10 @@ directive : prefix | base
prefix: PREFIX IDENTIFIER URI_LITERAL DOT
{
- unsigned char *prefix=$2;
+ unsigned char *prefix = $2;
raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)(((raptor_parser*)rdf_parser)->context);
raptor_namespace *ns;
-#if 0
- Get around bison complaining about not using $1
-#endif
-
#if RAPTOR_DEBUG > 1
printf("directive @prefix %s %s\n",($2 ? (char*)$2 : "(default)"), raptor_uri_as_string($3));
#endif
@@ -1211,8 +1210,10 @@ turtle_qname_to_uri(raptor_parser *rdf_parser, unsigned char *name, size_t name_
+#ifndef TURTLE_PUSH_PARSE
static int
-turtle_parse(raptor_parser *rdf_parser, const char *string, size_t length) {
+turtle_parse(raptor_parser *rdf_parser, const char *string, size_t length)
+{
raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context;
void *buffer;
@@ -1233,6 +1234,64 @@ turtle_parse(raptor_parser *rdf_parser, const char *string, size_t length) {
return 0;
}
+#endif
+
+
+#ifdef TURTLE_PUSH_PARSE
+static int
+turtle_push_parse(raptor_parser *rdf_parser,
+ const char *string, size_t length)
+{
+ raptor_world* world = rdf_parser->world;
+ raptor_turtle_parser* turtle_parser;
+ void *buffer;
+ int status;
+ yypstate *ps;
+
+ turtle_parser = (raptor_turtle_parser*)rdf_parser->context;
+
+ if(!string || !*string)
+ return 0;
+
+ if(turtle_lexer_lex_init(&turtle_parser->scanner))
+ return 1;
+ turtle_parser->scanner_set = 1;
+
+ turtle_lexer_set_extra(rdf_parser, turtle_parser->scanner);
+ buffer = turtle_lexer__scan_bytes(string, length, turtle_parser->scanner);
+
+ /* returns a parser instance or 0 on out of memory */
+ ps = yypstate_new();
+ if(!ps)
+ return 1;
+
+ do {
+ YYSTYPE lval;
+ int token;
+
+ memset(&lval, 0, sizeof(YYSTYPE));
+
+ token = turtle_lexer_lex(&lval, turtle_parser->scanner);
+
+#ifdef RAPTOR_DEBUG
+ printf("token %s\n", turtle_token_print(world, token, &lval));
+#endif
+
+ status = yypush_parse(ps, token, &lval, rdf_parser);
+
+ /* turtle_token_free(world, token, &lval); */
+
+ if(!token || token == EOF || token == ERROR_TOKEN)
+ break;
+ } while (status == YYPUSH_MORE);
+ yypstate_delete(ps);
+
+ turtle_lexer_lex_destroy(turtle_parser->scanner);
+ turtle_parser->scanner_set = 0;
+
+ return 0;
+}
+#endif
/**
@@ -1404,9 +1463,14 @@ raptor_turtle_parse_chunk(raptor_parser* rdf_parser,
/* Nothing to do */
if(!turtle_parser->buffer_length)
return 0;
-
+
+#ifdef TURTLE_PUSH_PARSE
+ turtle_push_parse(rdf_parser,
+ turtle_parser->buffer, turtle_parser->buffer_length);
+#else
turtle_parse(rdf_parser, turtle_parser->buffer, turtle_parser->buffer_length);
-
+#endif
+
if(rdf_parser->emitted_default_graph) {
/* for non-TRIG - end default graph after last triple */
raptor_parser_end_graph(rdf_parser, NULL, 0);
@@ -1660,9 +1724,9 @@ raptor_init_parser_trig(raptor_world* world)
#define TURTLE_FILE_BUF_SIZE 2048
-static
-void turtle_parser_print_statement(void *user,
- const raptor_statement *statement)
+static void
+turtle_parser_print_statement(void *user,
+ raptor_statement *statement)
{
FILE* stream = (FILE*)user;
raptor_statement_print(statement, stream);
@@ -1721,21 +1785,29 @@ main(int argc, char *argv[])
turtle_parser.lineno= 1;
- rdf_parser.world = raptor_world_instance();
- rdf_parser.context=&turtle_parser;
- rdf_parser.base_uri = raptor_new_uri((const unsigned char*)"http://example.org/fake-base-uri/");
+ rdf_parser.world = raptor_new_world();
+ rdf_parser.context = &turtle_parser;
+ rdf_parser.base_uri = raptor_new_uri(rdf_parser.world,
+ (const unsigned char*)"http://example.org/fake-base-uri/");
- raptor_parser_set_statement_handler(&rdf_parser, stdout, turtle_parser_print_statement);
+ raptor_parser_set_statement_handler(&rdf_parser, stdout,
+ turtle_parser_print_statement);
raptor_turtle_parse_init(&rdf_parser, "turtle");
turtle_parser.error_count = 0;
+#ifdef TURTLE_PUSH_PARSE
+ turtle_push_parse(&rdf_parser, string, strlen(string));
+#else
turtle_parse(&rdf_parser, string, strlen(string));
+#endif
raptor_turtle_parse_terminate(&rdf_parser);
raptor_free_uri(rdf_parser.base_uri);
+ raptor_free_world(rdf_parser.world);
+
return (0);
}
#endif