summaryrefslogtreecommitdiff
path: root/src/ntriples_parse.c
diff options
context:
space:
mode:
authorDave Beckett <dave@dajobe.org>2013-11-28 22:09:29 -0800
committerDave Beckett <dave@dajobe.org>2013-11-28 22:09:29 -0800
commit220cb83fc2913f89c6243b31cd0c67e55066929d (patch)
tree84e598e9ae484c9cdd5e46fbc2ce6a6ab7f8f7c0 /src/ntriples_parse.c
parente10a4cd71f4631169d9808107afda0e2ec8590d2 (diff)
downloadraptor-220cb83fc2913f89c6243b31cd0c67e55066929d.tar.gz
(raptor_ntriples_parse_term): pull out of raptor_ntriples_parse_line()
Diffstat (limited to 'src/ntriples_parse.c')
-rw-r--r--src/ntriples_parse.c459
1 files changed, 244 insertions, 215 deletions
diff --git a/src/ntriples_parse.c b/src/ntriples_parse.c
index 288d35c6..bd331f7b 100644
--- a/src/ntriples_parse.c
+++ b/src/ntriples_parse.c
@@ -449,6 +449,244 @@ raptor_ntriples_term(raptor_parser* rdf_parser,
}
+/*
+ * raptor_ntriples_parse_term:
+ * @rdf_parser: parser
+ * @string: string input (in)
+ * @len_p: pointer to length of @string (in/out)
+ * @term_p: pointer to store term (out)
+ *
+ * INTERNAL - Parse a string into a #raptor_term
+ *
+ * Return value: number of bytes processed or 0 on failure
+ */
+static int
+raptor_ntriples_parse_term(raptor_parser* rdf_parser,
+ unsigned char *string, size_t *len_p,
+ raptor_term** term_p)
+{
+ unsigned char *p = string;
+ unsigned char *dest;
+ size_t term_length = 0;
+
+ switch(*p) {
+ case '<':
+ dest = p;
+
+ p++;
+ (*len_p)--;
+ rdf_parser->locator.column++;
+ rdf_parser->locator.byte++;
+
+ if(raptor_ntriples_term(rdf_parser,
+ (const unsigned char**)&p,
+ dest, len_p, &term_length,
+ '>', RAPTOR_TERM_CLASS_URI)) {
+ goto fail;
+ }
+
+ if(!raptor_turtle_check_uri_string(dest)) {
+ raptor_parser_error(rdf_parser, "URI '%s' contains bad character(s)",
+ dest);
+ goto fail;
+ }
+
+ if(1) {
+ raptor_uri *uri;
+
+ /* Check for bad ordinal predicate */
+ if(!strncmp((const char*)dest,
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#_", 44)) {
+ int ordinal = raptor_check_ordinal(dest + 44);
+ if(ordinal <= 0)
+ raptor_parser_error(rdf_parser, "Illegal ordinal value %d in property '%s'.", ordinal, dest);
+ }
+ if(raptor_uri_uri_string_is_absolute(dest) <= 0) {
+ raptor_parser_error(rdf_parser, "URI '%s' is not absolute.", dest);
+ goto fail;
+ }
+
+ uri = raptor_new_uri(rdf_parser->world, dest);
+ if(!uri) {
+ raptor_parser_error(rdf_parser, "Could not create URI for '%s'", (const char *)dest);
+ goto fail;
+ }
+
+ *term_p = raptor_new_term_from_uri(rdf_parser->world, uri);
+ raptor_free_uri(uri);
+ }
+ break;
+
+ case '"':
+ dest = p;
+
+ p++;
+ (*len_p)--;
+ rdf_parser->locator.column++;
+ rdf_parser->locator.byte++;
+
+ if(raptor_ntriples_term(rdf_parser,
+ (const unsigned char**)&p,
+ dest, len_p, &term_length,
+ '"', RAPTOR_TERM_CLASS_STRING)) {
+ goto fail;
+ }
+
+ if(1) {
+ unsigned char *object_literal_language = NULL;
+ unsigned char *object_literal_datatype = NULL;
+ raptor_uri* datatype_uri = NULL;
+
+ if(*len_p && *p == '@') {
+ unsigned char *q;
+ size_t lang_len;
+
+ object_literal_language = p;
+
+ /* Skip - */
+ p++;
+ (*len_p)--;
+ rdf_parser->locator.column++;
+ rdf_parser->locator.byte++;
+
+ if(!*len_p) {
+ raptor_parser_error(rdf_parser, "Missing language after \"string\"-");
+ goto fail;
+ }
+
+ if(raptor_ntriples_term(rdf_parser,
+ (const unsigned char**)&p,
+ object_literal_language, len_p, &lang_len,
+ '\0', RAPTOR_TERM_CLASS_LANGUAGE)) {
+ goto fail;
+ }
+
+ if(!lang_len) {
+ raptor_parser_error(rdf_parser, "Invalid language tag at @%s", p);
+ goto fail;
+ }
+
+ /* Normalize language to lowercase
+ * http://www.w3.org/TR/rdf-concepts/#dfn-language-identifier
+ */
+ for(q = object_literal_language; *q; q++) {
+ if(IS_ASCII_UPPER(*q))
+ *q = TO_ASCII_LOWER(*q);
+ }
+
+ }
+
+ if(*len_p > 1 && *p == '^' && p[1] == '^') {
+
+ object_literal_datatype = p;
+
+ /* Skip ^^ */
+ p += 2;
+ *len_p -= 2;
+ rdf_parser->locator.column += 2;
+ rdf_parser->locator.byte += 2;
+
+ if(!*len_p || (*len_p && *p != '<')) {
+ raptor_parser_error(rdf_parser, "Missing datatype URI-ref in\"string\"^^<URI-ref> after ^^");
+ goto fail;
+ }
+
+ p++;
+ (*len_p)--;
+ rdf_parser->locator.column++;
+ rdf_parser->locator.byte++;
+
+ if(raptor_ntriples_term(rdf_parser,
+ (const unsigned char**)&p,
+ object_literal_datatype, len_p, NULL,
+ '>', RAPTOR_TERM_CLASS_URI)) {
+ goto fail;
+ }
+
+ if(raptor_uri_uri_string_is_absolute(object_literal_datatype) <= 0) {
+ raptor_parser_error(rdf_parser, "Datatype URI '%s' is not absolute.", object_literal_datatype);
+ goto fail;
+ }
+
+ }
+
+ if(object_literal_datatype && object_literal_language) {
+ raptor_parser_warning(rdf_parser,
+ "Typed literal used with a language - ignoring the language");
+ object_literal_language = NULL;
+ }
+
+ if(object_literal_datatype) {
+ datatype_uri = raptor_new_uri(rdf_parser->world,
+ object_literal_datatype);
+ if(!datatype_uri) {
+ raptor_parser_error(rdf_parser,
+ "Could not create literal datatype uri '%s'",
+ object_literal_datatype);
+ goto fail;
+ }
+ object_literal_language = NULL;
+ }
+
+ *term_p = raptor_new_term_from_literal(rdf_parser->world,
+ dest,
+ datatype_uri,
+ object_literal_language);
+ }
+
+ break;
+
+
+ case '_':
+ /* store where _ was */
+ dest = p;
+
+ p++;
+ (*len_p)--;
+ rdf_parser->locator.column++;
+ rdf_parser->locator.byte++;
+
+ if(!*len_p || (*len_p > 0 && *p != ':')) {
+ raptor_parser_error(rdf_parser,
+ "Illegal bNodeID - _ not followed by :");
+ goto fail;
+ }
+
+ /* Found ':' - move on */
+
+ p++;
+ (*len_p)--;
+ rdf_parser->locator.column++;
+ rdf_parser->locator.byte++;
+
+ if(raptor_ntriples_term(rdf_parser,
+ (const unsigned char**)&p,
+ dest, len_p, &term_length,
+ '\0', RAPTOR_TERM_CLASS_BNODEID)) {
+ goto fail;
+ }
+
+ if(!term_length) {
+ raptor_parser_error(rdf_parser, "Bad or missing bNodeID after _:");
+ goto fail;
+ }
+
+ *term_p = raptor_new_term_from_blank(rdf_parser->world, dest);
+
+ break;
+
+ default:
+ raptor_parser_fatal_error(rdf_parser, "Unknown term type");
+ goto fail;
+ }
+
+ fail:
+
+ return p - string;
+}
+
+
+
#define MAX_NTRIPLES_TERMS 4
static int
@@ -459,9 +697,7 @@ raptor_ntriples_parse_line(raptor_parser* rdf_parser,
raptor_ntriples_parser_context *ntriples_parser = (raptor_ntriples_parser_context*)rdf_parser->context;
int i;
unsigned char *p;
- unsigned char *dest;
raptor_term* real_terms[MAX_NTRIPLES_TERMS] = {NULL, NULL, NULL, NULL};
- size_t term_length = 0;
int rc = 0;
/* ASSERTION:
@@ -535,221 +771,14 @@ raptor_ntriples_parse_line(raptor_parser* rdf_parser,
}
- switch(*p) {
- case '<':
- dest = p;
-
- p++;
- len--;
- rdf_parser->locator.column++;
- rdf_parser->locator.byte++;
-
- if(raptor_ntriples_term(rdf_parser,
- (const unsigned char**)&p,
- dest, &len, &term_length,
- '>', RAPTOR_TERM_CLASS_URI)) {
- rc = 1;
- goto cleanup;
- }
-
- if(!raptor_turtle_check_uri_string(dest)) {
- raptor_parser_error(rdf_parser, "URI '%s' contains bad character(s)", dest);
- rc = 1;
- goto cleanup;
- }
-
- if(1) {
- raptor_uri *uri;
-
- /* Check for bad ordinal predicate */
- if(!strncmp((const char*)dest,
- "http://www.w3.org/1999/02/22-rdf-syntax-ns#_", 44)) {
- int ordinal = raptor_check_ordinal(dest + 44);
- if(ordinal <= 0)
- raptor_parser_error(rdf_parser, "Illegal ordinal value %d in property '%s'.", ordinal, dest);
- }
- if(raptor_uri_uri_string_is_absolute(dest) <= 0) {
- raptor_parser_error(rdf_parser, "URI '%s' is not absolute.", dest);
- goto cleanup;
- }
-
- uri = raptor_new_uri(rdf_parser->world, dest);
- if(!uri) {
- raptor_parser_error(rdf_parser, "Could not create URI for '%s'", (const char *)dest);
- goto cleanup;
- }
- real_terms[i] = raptor_new_term_from_uri(rdf_parser->world, uri);
- raptor_free_uri(uri);
- }
- break;
-
- case '"':
- dest = p;
-
- p++;
- len--;
- rdf_parser->locator.column++;
- rdf_parser->locator.byte++;
-
- if(raptor_ntriples_term(rdf_parser,
- (const unsigned char**)&p,
- dest, &len, &term_length,
- '"', RAPTOR_TERM_CLASS_STRING)) {
- rc = 1;
- goto cleanup;
- }
-
- if(1) {
- unsigned char *object_literal_language = NULL;
- unsigned char *object_literal_datatype = NULL;
- raptor_uri* datatype_uri = NULL;
+ rc = raptor_ntriples_parse_term(rdf_parser, p, &len, &real_terms[i]);
- if(len && *p == '@') {
- unsigned char *q;
- size_t lang_len;
-
- object_literal_language = p;
-
- /* Skip - */
- p++;
- len--;
- rdf_parser->locator.column++;
- rdf_parser->locator.byte++;
-
- if(!len) {
- raptor_parser_error(rdf_parser, "Missing language after \"string\"-");
- goto cleanup;
- }
-
-
- if(raptor_ntriples_term(rdf_parser,
- (const unsigned char**)&p,
- object_literal_language, &len, &lang_len,
- '\0', RAPTOR_TERM_CLASS_LANGUAGE)) {
- rc = 1;
- goto cleanup;
- }
-
- if(!lang_len) {
- raptor_parser_error(rdf_parser, "Invalid language tag at @%s", p);
- rc = 1;
- goto cleanup;
- }
-
- /* Normalize language to lowercase
- * http://www.w3.org/TR/rdf-concepts/#dfn-language-identifier
- */
- for(q = object_literal_language; *q; q++) {
- if(IS_ASCII_UPPER(*q))
- *q = TO_ASCII_LOWER(*q);
- }
-
- }
-
- if(len >1 && *p == '^' && p[1] == '^') {
-
- object_literal_datatype = p;
-
- /* Skip ^^ */
- p += 2;
- len -= 2;
- rdf_parser->locator.column += 2;
- rdf_parser->locator.byte += 2;
-
- if(!len || (len && *p != '<')) {
- raptor_parser_error(rdf_parser, "Missing datatype URI-ref in\"string\"^^<URI-ref> after ^^");
- goto cleanup;
- }
-
- p++;
- len--;
- rdf_parser->locator.column++;
- rdf_parser->locator.byte++;
-
- if(raptor_ntriples_term(rdf_parser,
- (const unsigned char**)&p,
- object_literal_datatype, &len, NULL,
- '>', RAPTOR_TERM_CLASS_URI)) {
- rc = 1;
- goto cleanup;
- }
-
- if(raptor_uri_uri_string_is_absolute(object_literal_datatype) <= 0) {
- raptor_parser_error(rdf_parser, "Datatype URI '%s' is not absolute.", object_literal_datatype);
- rc = 1;
- goto cleanup;
- }
-
- }
-
- if(object_literal_datatype && object_literal_language) {
- raptor_parser_warning(rdf_parser, "Typed literal used with a language - ignoring the language");
- object_literal_language = NULL;
- }
-
- if(object_literal_datatype) {
- datatype_uri = raptor_new_uri(rdf_parser->world,
- object_literal_datatype);
- if(!datatype_uri) {
- raptor_parser_error(rdf_parser, "Could not create literal datatype uri '%s'", object_literal_datatype);
- goto cleanup;
- }
- object_literal_language = NULL;
- }
-
- real_terms[i] = raptor_new_term_from_literal(rdf_parser->world,
- dest,
- datatype_uri,
- object_literal_language);
- }
-
- break;
-
-
- case '_':
- /* store where _ was */
- dest = p;
-
- p++;
- len--;
- rdf_parser->locator.column++;
- rdf_parser->locator.byte++;
-
- if(!len || (len > 0 && *p != ':')) {
- raptor_parser_error(rdf_parser, "Illegal bNodeID - _ not followed by :");
- goto cleanup;
- }
-
- /* Found ':' - move on */
-
- p++;
- len--;
- rdf_parser->locator.column++;
- rdf_parser->locator.byte++;
-
- if(raptor_ntriples_term(rdf_parser,
- (const unsigned char**)&p,
- dest, &len, &term_length,
- '\0', RAPTOR_TERM_CLASS_BNODEID)) {
- rc = 1;
- goto cleanup;
- }
-
- if(!term_length) {
- raptor_parser_error(rdf_parser, "Bad or missing bNodeID after _:");
- goto cleanup;
- }
-
- real_terms[i] = raptor_new_term_from_blank(rdf_parser->world, dest);
-
- break;
-
- default:
- raptor_parser_fatal_error(rdf_parser, "Unknown term type");
- rc = 1;
- goto cleanup;
+ if(!rc) {
+ rc = 1;
+ goto cleanup;
}
-
+ p += rc;
+ rc = 0;
/* Skip whitespace after terms */
while(len > 0 && isspace((int)*p)) {