diff options
author | Dave Beckett <dave@dajobe.org> | 2013-12-07 17:06:57 -0800 |
---|---|---|
committer | Dave Beckett <dave@dajobe.org> | 2013-12-07 17:06:57 -0800 |
commit | ec31356a59c105fdda4e848aa1bf2d485615670e (patch) | |
tree | 7fec4d792735fd5bd8afaf7e2202f980f339ae49 /src/ntriples_parse.c | |
parent | f296cf9eb241108cf43d2c60b41bc6f1b69d6a3f (diff) | |
download | raptor-ec31356a59c105fdda4e848aa1bf2d485615670e.tar.gz |
Make N-Triples / N-Quads parser more carefully handle NUL
(raptor_ntriples_parse_chunk): Looks for end_ptr rather than NUL when
searching for \n or \r. It also checks for \\ and " 's
Diffstat (limited to 'src/ntriples_parse.c')
-rw-r--r-- | src/ntriples_parse.c | 116 |
1 files changed, 77 insertions, 39 deletions
diff --git a/src/ntriples_parse.c b/src/ntriples_parse.c index 6be864ca..47524bad 100644 --- a/src/ntriples_parse.c +++ b/src/ntriples_parse.c @@ -379,48 +379,53 @@ raptor_ntriples_parse_chunk(raptor_parser* rdf_parser, unsigned char *start; raptor_ntriples_parser_context *ntriples_parser = (raptor_ntriples_parser_context*)rdf_parser->context; int max_terms = ntriples_parser->is_nquads ? 4 : 3; - + unsigned char* end_ptr; + #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 RAPTOR_DEBUG2("adding %d bytes to buffer\n", (unsigned int)len); #endif - /* No data? It's the end */ - if(!len) - return 0; + if(len) { + buffer = RAPTOR_MALLOC(unsigned char*, ntriples_parser->line_length + len + 1); + if(!buffer) { + raptor_parser_fatal_error(rdf_parser, "Out of memory"); + return 1; + } - buffer = RAPTOR_MALLOC(unsigned char*, ntriples_parser->line_length + len + 1); - if(!buffer) { - raptor_parser_fatal_error(rdf_parser, "Out of memory"); - return 1; - } + if(ntriples_parser->line_length) { + memcpy(buffer, ntriples_parser->line, ntriples_parser->line_length); + RAPTOR_FREE(char*, ntriples_parser->line); + } - if(ntriples_parser->line_length) { - memcpy(buffer, ntriples_parser->line, ntriples_parser->line_length); - RAPTOR_FREE(char*, ntriples_parser->line); - } + ntriples_parser->line = buffer; - ntriples_parser->line = buffer; + /* move pointer to end of cdata buffer */ + ptr = buffer + ntriples_parser->line_length; - /* move pointer to end of cdata buffer */ - ptr = buffer+ntriples_parser->line_length; + /* adjust stored length */ + ntriples_parser->line_length += len; - /* adjust stored length */ - ntriples_parser->line_length += len; + /* now write new stuff at end of cdata buffer */ + memcpy(ptr, s, len); + ptr += len; + *ptr = '\0'; + } else + buffer = ntriples_parser->line; - /* now write new stuff at end of cdata buffer */ - memcpy(ptr, s, len); - ptr += len; - *ptr = '\0'; #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 RAPTOR_DEBUG2("buffer now %ld bytes\n", ntriples_parser->line_length); #endif - ptr = buffer+ntriples_parser->offset; - while(*(start = ptr)) { + if(!ntriples_parser->line_length) + return 0; + + ptr = buffer + ntriples_parser->offset; + end_ptr = buffer + ntriples_parser->line_length; + while((start = ptr) < end_ptr) { unsigned char *line_start = ptr; -#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 3 +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 RAPTOR_DEBUG3("line buffer now '%s' (offset %ld)\n", ptr, ptr-(buffer+ntriples_parser->offset)); #endif @@ -435,21 +440,52 @@ raptor_ntriples_parse_chunk(raptor_parser* rdf_parser, start = line_start = ptr; } - while(*ptr && *ptr != '\n' && *ptr != '\r') - ptr++; - - if(!*ptr) - break; + if(1) { + int quote = '\0'; + int bq = 0; + while(ptr < end_ptr) { + if(!bq) { + if(*ptr == '\\') { + bq = 1; + ptr++; + continue; + } + + if(!quote) { + if(*ptr == '\'' || *ptr == '"') + quote = *ptr; + if(*ptr == '\n' || *ptr == '\r') + break; + } else { + if(*ptr == quote) + quote = 0; + } + } + ptr++; + bq = 0; + } + } + if(ptr == end_ptr) { + if(!is_end) + /* middle of line */ + break; + } else { #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 - RAPTOR_DEBUG3("found newline \\x%02x at offset %d\n", *ptr, - ptr-line_start); + RAPTOR_DEBUG3("found newline \\x%02x at offset %ld\n", *ptr, + ptr-line_start); #endif - ntriples_parser->last_char = *ptr; - - len = ptr-line_start; + ntriples_parser->last_char = *ptr; + } + + len = ptr - line_start; rdf_parser->locator.column = 0; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG2("line (%ld) : >>>", len); + fwrite(line_start, sizeof(char), len, stderr); + fputs("<<<\n", stderr); +#endif *ptr = '\0'; if(raptor_ntriples_parse_line(rdf_parser, line_start, len, max_terms)) return 1; @@ -457,12 +493,14 @@ raptor_ntriples_parse_chunk(raptor_parser* rdf_parser, rdf_parser->locator.line++; /* go past newline */ - ptr++; - rdf_parser->locator.byte++; + if(ptr < end_ptr) { + ptr++; + rdf_parser->locator.byte++; + } #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 /* Do not peek if too far */ - if(ptr-buffer < ntriples_parser->line_length) + if(RAPTOR_BAD_CAST(size_t, ptr - buffer) < ntriples_parser->line_length) RAPTOR_DEBUG2("next char is \\x%02x\n", *ptr); else RAPTOR_DEBUG1("next char unknown - end of buffer\n"); @@ -473,7 +511,7 @@ raptor_ntriples_parse_chunk(raptor_parser* rdf_parser, len = ntriples_parser->line_length - ntriples_parser->offset; - if(len) { + if(len && ntriples_parser->line_length != len) { /* collapse buffer */ #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 |