/* -*- Mode: c; c-basic-offset: 2 -*- * * raptor_rdfxml.c - Raptor RDF/XML Parser * * Copyright (C) 2000-2008, David Beckett http://www.dajobe.org/ * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/ * * This package is Free Software and part of Redland http://librdf.org/ * * It is licensed under the following three licenses as alternatives: * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version * 2. GNU General Public License (GPL) V2 or any newer version * 3. Apache License, V2.0 or any newer version * * You may not use this file except in compliance with at least one of * the above three licenses. * * See LICENSE.html or LICENSE.txt at the top of this package for the * complete terms and further detail along with the license texts for * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. * * */ #ifdef HAVE_CONFIG_H #include #endif #include #include #ifdef HAVE_STRINGS_H #include #endif #include #include #ifdef HAVE_ERRNO_H #include #endif #ifdef HAVE_STDLIB_H #include #endif /* Raptor includes */ #include "raptor2.h" #include "raptor_internal.h" /* Define these for far too much output */ #undef RAPTOR_DEBUG_VERBOSE #undef RAPTOR_DEBUG_CDATA /* Raptor structures */ typedef enum { /* Catch uninitialised state */ RAPTOR_STATE_INVALID = 0, /* Skipping current tree of elements - used to recover finding * illegal content, when parsling permissively. */ RAPTOR_STATE_SKIPPING, /* Not in RDF grammar yet - searching for a start element. * * This can be (goto NODE_ELEMENT_LIST) but since it is optional, * the start element can also be one of * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementURIs * * If RDF content is assumed, go straight to OBJ */ RAPTOR_STATE_UNKNOWN, /* A list of node elements * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementList */ RAPTOR_STATE_NODE_ELEMENT_LIST, /* Found an */ RAPTOR_STATE_DESCRIPTION, /* Found a property element * http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt */ RAPTOR_STATE_PROPERTYELT, /* A property element that is an ordinal - rdf:li, rdf:_n */ RAPTOR_STATE_MEMBER_PROPERTYELT, /* Found a node element * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElement */ RAPTOR_STATE_NODE_ELEMENT, /* A property element with rdf:parseType="Literal" * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeLiteralPropertyElt */ RAPTOR_STATE_PARSETYPE_LITERAL, /* A property element with rdf:parseType="Resource" * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeResourcePropertyElt */ RAPTOR_STATE_PARSETYPE_RESOURCE, /* A property element with rdf:parseType="Collection" * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeCollectionPropertyElt * * (This also handles daml:Collection) */ RAPTOR_STATE_PARSETYPE_COLLECTION, /* A property element with a rdf:parseType attribute and a value * not "Literal" or "Resource" * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeOtherPropertyElt */ RAPTOR_STATE_PARSETYPE_OTHER, RAPTOR_STATE_PARSETYPE_LAST = RAPTOR_STATE_PARSETYPE_OTHER } raptor_state; static const char* const raptor_state_names[RAPTOR_STATE_PARSETYPE_LAST+2] = { "INVALID", "SKIPPING", "UNKNOWN", "nodeElementList", "propertyElt", "Description", "propertyElt", "memberPropertyElt", "nodeElement", "parseTypeLiteral", "parseTypeResource", "parseTypeCollection", "parseTypeOther" }; static const char * raptor_rdfxml_state_as_string(raptor_state state) { if(state < 1 || state > RAPTOR_STATE_PARSETYPE_LAST) state = (raptor_state)0; return raptor_state_names[(int)state]; } /* * raptor_rdfxml_check_propertyElement_name: * @name: rdf namespace term * * Check if an rdf namespace name is allowed to be used as a Node Element. * * Return value: < 0 if unknown rdf namespace term, 0 if known and not allowed, > 0 if known and allowed */ static int raptor_rdfxml_check_nodeElement_name(const char *name) { int i; if(*name == '_') return 1; for(i = 0; raptor_rdf_ns_terms_info[i].name; i++) if(!strcmp(raptor_rdf_ns_terms_info[i].name, name)) return raptor_rdf_ns_terms_info[i].allowed_as_nodeElement; return -1; } /* * raptor_rdfxml_check_propertyElement_name: * @name: rdf namespace term * * Check if an rdf namespace name is allowed to be used as a Property Element. * * Return value: < 0 if unknown rdf namespace term, 0 if known and not allowed, > 0 if known and allowed */ static int raptor_rdfxml_check_propertyElement_name(const char *name) { int i; if(*name == '_') return 1; for(i = 0; raptor_rdf_ns_terms_info[i].name; i++) if(!strcmp(raptor_rdf_ns_terms_info[i].name, (const char*)name)) return raptor_rdf_ns_terms_info[i].allowed_as_propertyElement; return -1; } static int raptor_rdfxml_check_propertyAttribute_name(const char *name) { int i; if(*name == '_') return 1; for(i = 0; raptor_rdf_ns_terms_info[i].name; i++) if(!strcmp(raptor_rdf_ns_terms_info[i].name, (const char*)name)) return raptor_rdf_ns_terms_info[i].allowed_as_propertyAttribute; return -1; } typedef enum { /* undetermined yet - whitespace is stored */ RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN, /* literal content - no elements, cdata allowed, whitespace significant * blah */ RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL, /* parseType literal content (WF XML) - all content preserved * blah */ RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL, /* top-level nodes - 0+ elements expected, no cdata, whitespace ignored, * any non-whitespace cdata is error * only used for or implict */ RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES, /* properties - 0+ elements expected, no cdata, whitespace ignored, * any non-whitespace cdata is error * blah blah */ RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES, /* property content - all content preserved * any content type changes when first non-whitespace found * ... */ RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT, /* resource URI given - no element, no cdata, whitespace ignored, * any non-whitespace cdata is error * * */ RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE, /* skipping content - all content is preserved * Used when skipping content for unknown parseType-s, * error recovery, some other reason */ RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED, /* parseType Collection - all content preserved * Parsing of this determined by RDF/XML (Revised) closed collection rules * ... */ RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION, /* Like above but handles "daml:collection" */ RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION, /* dummy for use in strings below */ RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST } raptor_rdfxml_element_content_type; static const struct { const char * name; int whitespace_significant; /* non-blank cdata */ int cdata_allowed; /* XML element content */ int element_allowed; /* Do RDF-specific processing? (property attributes, rdf: attributes, ...) */ int rdf_processing; } rdf_content_type_info[RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST]={ {"Unknown", 1, 1, 1, 0 }, {"Literal", 1, 1, 0, 0 }, {"XML Literal", 1, 1, 1, 0 }, {"Nodes", 0, 0, 1, 1 }, {"Properties", 0, 1, 1, 1 }, {"Property Content",1, 1, 1, 1 }, {"Resource", 0, 0, 0, 0 }, {"Preserved", 1, 1, 1, 0 }, {"Collection", 1, 1, 1, 1 }, {"DAML Collection", 1, 1, 1, 1 }, }; static const char * raptor_rdfxml_element_content_type_as_string(raptor_rdfxml_element_content_type type) { if(type >= RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST) return "INVALID"; return rdf_content_type_info[type].name; } /* * Raptor Element/attributes on stack */ struct raptor_rdfxml_element_s { raptor_world* world; raptor_xml_element *xml_element; /* NULL at bottom of stack */ struct raptor_rdfxml_element_s *parent; /* attributes declared in M&S */ const unsigned char * rdf_attr[RDF_NS_LAST + 1]; /* how many of above seen */ int rdf_attr_count; /* state that this production matches */ raptor_state state; /* how to handle the content inside this XML element */ raptor_rdfxml_element_content_type content_type; /* starting state for children of this element */ raptor_state child_state; /* starting content type for children of this element */ raptor_rdfxml_element_content_type child_content_type; /* Reified statement identifier */ raptor_term* reified; unsigned const char* reified_id; /* Bag identifier */ raptor_term* bag; int last_bag_ordinal; /* starts at 0, so first predicate is rdf:_1 */ /* Subject identifier (URI/anon ID), type, source * * When the XML element represents a node, this is the identifier */ raptor_term* subject; /* Predicate URI * * When the XML element represents a node or predicate, * this is the identifier of the predicate */ raptor_term* predicate; /* Object identifier (URI/anon ID), type, source * * When this XML element generates a statement that needs an object, * possibly from a child element, this is the identifier of the object */ raptor_term* object; /* URI of datatype of literal */ raptor_uri *object_literal_datatype; /* last ordinal used, so initialising to 0 works, emitting rdf:_1 first */ int last_ordinal; /* If this element's parseType is a Collection * this identifies the anon node of current tail of the collection(list). */ const unsigned char *tail_id; /* RDF/XML specific checks */ /* all cdata so far is whitespace */ unsigned int content_cdata_all_whitespace; }; typedef struct raptor_rdfxml_element_s raptor_rdfxml_element; #define RAPTOR_RDFXML_N_CONCEPTS 5 /* * Raptor parser object */ struct raptor_rdfxml_parser_s { raptor_sax2 *sax2; /* stack of elements - elements add after current_element */ raptor_rdfxml_element *root_element; raptor_rdfxml_element *current_element; raptor_uri* concepts[RAPTOR_RDFXML_N_CONCEPTS]; /* set of seen rdf:ID / rdf:bagID values (with in-scope base URI) */ raptor_id_set* id_set; void *xml_content; size_t xml_content_length; raptor_iostream* iostream; /* writer for building parseType="Literal" content */ raptor_xml_writer* xml_writer; }; /* static variables */ #define RAPTOR_DAML_NS_URI(rdf_xml_parser) rdf_xml_parser->concepts[0] #define RAPTOR_DAML_List_URI(rdf_xml_parser) rdf_xml_parser->concepts[1] #define RAPTOR_DAML_first_URI(rdf_xml_parser) rdf_xml_parser->concepts[2] #define RAPTOR_DAML_rest_URI(rdf_xml_parser) rdf_xml_parser->concepts[3] #define RAPTOR_DAML_nil_URI(rdf_xml_parser) rdf_xml_parser->concepts[4] /* RAPTOR_RDFXML_N_CONCEPTS defines size of array */ /* prototypes for element functions */ static raptor_rdfxml_element* raptor_rdfxml_element_pop(raptor_rdfxml_parser *rdf_parser); static void raptor_rdfxml_element_push(raptor_rdfxml_parser *rdf_parser, raptor_rdfxml_element* element); static int raptor_rdfxml_record_ID(raptor_parser *rdf_parser, raptor_rdfxml_element *element, const unsigned char *id); /* prototypes for grammar functions */ static void raptor_rdfxml_start_element_grammar(raptor_parser *parser, raptor_rdfxml_element *element); static void raptor_rdfxml_end_element_grammar(raptor_parser *parser, raptor_rdfxml_element *element); static void raptor_rdfxml_cdata_grammar(raptor_parser *parser, const unsigned char *s, int len, int is_cdata); /* prototype for statement related functions */ static void raptor_rdfxml_generate_statement(raptor_parser *rdf_parser, raptor_term *subject, raptor_uri *predicate_uri, raptor_term *object, raptor_term *reified, raptor_rdfxml_element *bag_element); /* Prototypes for parsing data functions */ static int raptor_rdfxml_parse_init(raptor_parser* rdf_parser, const char *name); static void raptor_rdfxml_parse_terminate(raptor_parser *rdf_parser); static int raptor_rdfxml_parse_start(raptor_parser* rdf_parser); static int raptor_rdfxml_parse_chunk(raptor_parser* rdf_parser, const unsigned char *buffer, size_t len, int is_end); static void raptor_rdfxml_update_document_locator(raptor_parser *rdf_parser); static raptor_uri* raptor_rdfxml_inscope_base_uri(raptor_parser *rdf_parser); static raptor_rdfxml_element* raptor_rdfxml_element_pop(raptor_rdfxml_parser *rdf_xml_parser) { raptor_rdfxml_element *element = rdf_xml_parser->current_element; if(!element) return NULL; rdf_xml_parser->current_element = element->parent; if(rdf_xml_parser->root_element == element) /* just deleted root */ rdf_xml_parser->root_element = NULL; return element; } static void raptor_rdfxml_element_push(raptor_rdfxml_parser *rdf_xml_parser, raptor_rdfxml_element* element) { element->parent = rdf_xml_parser->current_element; rdf_xml_parser->current_element = element; if(!rdf_xml_parser->root_element) rdf_xml_parser->root_element = element; } static void raptor_free_rdfxml_element(raptor_rdfxml_element *element) { int i; /* Free special RDF M&S attributes */ for(i = 0; i <= RDF_NS_LAST; i++) if(element->rdf_attr[i]) RAPTOR_FREE(char*, element->rdf_attr[i]); if(element->subject) raptor_free_term(element->subject); if(element->predicate) raptor_free_term(element->predicate); if(element->object) raptor_free_term(element->object); if(element->bag) raptor_free_term(element->bag); if(element->reified) raptor_free_term(element->reified); if(element->tail_id) RAPTOR_FREE(char*, (char*)element->tail_id); if(element->object_literal_datatype) raptor_free_uri(element->object_literal_datatype); if(element->reified_id) RAPTOR_FREE(char*, (char*)element->reified_id); RAPTOR_FREE(raptor_rdfxml_element, element); } static void raptor_rdfxml_sax2_new_namespace_handler(void *user_data, raptor_namespace* nspace) { raptor_parser* rdf_parser; const unsigned char* namespace_name; size_t namespace_name_len; raptor_uri* uri = raptor_namespace_get_uri(nspace); rdf_parser = (raptor_parser*)user_data; raptor_parser_start_namespace(rdf_parser, nspace); if(!uri) return; namespace_name = raptor_uri_as_counted_string(uri, &namespace_name_len); if(namespace_name_len == raptor_rdf_namespace_uri_len-1 && !strncmp((const char*)namespace_name, (const char*)raptor_rdf_namespace_uri, namespace_name_len)) { const unsigned char *prefix = raptor_namespace_get_prefix(nspace); raptor_parser_warning(rdf_parser, "Declaring a namespace with prefix %s to URI %s - one letter short of the RDF namespace URI and probably a mistake.", prefix, namespace_name); } if(namespace_name_len > raptor_rdf_namespace_uri_len && !strncmp((const char*)namespace_name, (const char*)raptor_rdf_namespace_uri, raptor_rdf_namespace_uri_len)) { raptor_parser_error(rdf_parser, "Declaring a namespace URI %s to which the RDF namespace URI is a prefix is forbidden.", namespace_name); } } static void raptor_rdfxml_start_element_handler(void *user_data, raptor_xml_element* xml_element) { raptor_parser* rdf_parser; raptor_rdfxml_parser* rdf_xml_parser; raptor_rdfxml_element* element; int ns_attributes_count = 0; raptor_qname** named_attrs = NULL; int i; int count_bumped = 0; rdf_parser = (raptor_parser*)user_data; rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; if(rdf_parser->failed) return; raptor_rdfxml_update_document_locator(rdf_parser); /* Create new element structure */ element = RAPTOR_CALLOC(raptor_rdfxml_element*, 1, sizeof(*element)); if(!element) { raptor_parser_fatal_error(rdf_parser, "Out of memory"); rdf_parser->failed = 1; return; } element->world = rdf_parser->world; element->xml_element = xml_element; raptor_rdfxml_element_push(rdf_xml_parser, element); named_attrs = raptor_xml_element_get_attributes(xml_element); ns_attributes_count = raptor_xml_element_get_attributes_count(xml_element); /* RDF-specific processing of attributes */ if(ns_attributes_count) { raptor_qname** new_named_attrs; int offset = 0; raptor_rdfxml_element* parent_element; parent_element = element->parent; /* Allocate new array to move namespaced-attributes to if * rdf processing is performed */ new_named_attrs = RAPTOR_CALLOC(raptor_qname**, ns_attributes_count, sizeof(raptor_qname*)); if(!new_named_attrs) { raptor_parser_fatal_error(rdf_parser, "Out of memory"); rdf_parser->failed = 1; return; } for(i = 0; i < ns_attributes_count; i++) { raptor_qname* attr = named_attrs[i]; /* If: * 1 We are handling RDF content and RDF processing is allowed on * this element * OR * 2 We are not handling RDF content and * this element is at the top level (top level Desc. / typedNode) * i.e. we have no parent * then handle the RDF attributes */ if((parent_element && rdf_content_type_info[parent_element->child_content_type].rdf_processing) || !parent_element) { /* Save pointers to some RDF M&S attributes */ /* If RDF namespace-prefixed attributes */ if(attr->nspace && attr->nspace->is_rdf_ms) { const unsigned char *attr_name = attr->local_name; int j; for(j = 0; j <= RDF_NS_LAST; j++) if(!strcmp((const char*)attr_name, raptor_rdf_ns_terms_info[j].name)) { element->rdf_attr[j] = attr->value; element->rdf_attr_count++; /* Delete it if it was stored elsewhere */ #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG3("Found RDF namespace attribute '%s' URI %s\n", (char*)attr_name, attr->value); #endif /* make sure value isn't deleted from qname structure */ attr->value = NULL; raptor_free_qname(attr); attr = NULL; break; } } /* end if RDF namespaced-prefixed attributes */ if(!attr) continue; /* If non namespace-prefixed RDF attributes found on an element */ if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_NON_NS_ATTRIBUTES) && !attr->nspace) { const unsigned char *attr_name = attr->local_name; int j; for(j = 0; j <= RDF_NS_LAST; j++) if(!strcmp((const char*)attr_name, raptor_rdf_ns_terms_info[j].name)) { element->rdf_attr[j] = attr->value; element->rdf_attr_count++; if(!raptor_rdf_ns_terms_info[j].allowed_unprefixed_on_attribute) raptor_parser_warning(rdf_parser, "Using rdf attribute '%s' without the RDF namespace has been deprecated.", attr_name); /* Delete it if it was stored elsewhere */ /* make sure value isn't deleted from qname structure */ attr->value = NULL; raptor_free_qname(attr); attr = NULL; break; } } /* end if non-namespace prefixed RDF attributes */ if(!attr) continue; } /* end if leave literal XML alone */ if(attr) new_named_attrs[offset++] = attr; } /* new attribute count is set from attributes that haven't been skipped */ ns_attributes_count = offset; if(!ns_attributes_count) { /* all attributes were deleted so delete the new array */ RAPTOR_FREE(raptor_qname_array, new_named_attrs); new_named_attrs = NULL; } RAPTOR_FREE(raptor_qname_array, named_attrs); named_attrs = new_named_attrs; raptor_xml_element_set_attributes(xml_element, named_attrs, ns_attributes_count); } /* end if ns_attributes_count */ /* start from unknown; if we have a parent, it may set this */ element->state = RAPTOR_STATE_UNKNOWN; element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN; if(element->parent && element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN) { element->content_type = element->parent->child_content_type; if(element->parent->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE && element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION && element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) { raptor_qname* parent_el_name; parent_el_name = raptor_xml_element_get_name(element->parent->xml_element); /* If parent has an rdf:resource, this element should not be here */ raptor_parser_error(rdf_parser, "property element '%s' has multiple object node elements, skipping.", parent_el_name->local_name); element->state = RAPTOR_STATE_SKIPPING; element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED; } else { if(!element->parent->child_state) { raptor_parser_fatal_error(rdf_parser, "%s: Internal error: no parent element child_state set", __FUNCTION__); return; } element->state = element->parent->child_state; element->parent->xml_element->content_element_seen++; count_bumped++; /* leave literal XML alone */ if(!rdf_content_type_info[element->content_type].cdata_allowed) { if(element->parent->xml_element->content_element_seen && element->parent->xml_element->content_cdata_seen) { raptor_qname* parent_el_name; parent_el_name = raptor_xml_element_get_name(element->parent->xml_element); /* Uh oh - mixed content, the parent element has cdata too */ raptor_parser_warning(rdf_parser, "element '%s' has mixed content.", parent_el_name->local_name); } /* If there is some existing all-whitespace content cdata * before this node element, delete it */ if(element->parent->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES && element->parent->xml_element->content_element_seen && element->parent->content_cdata_all_whitespace && element->parent->xml_element->content_cdata_length) { element->parent->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; raptor_free_stringbuffer(element->parent->xml_element->content_cdata_sb); element->parent->xml_element->content_cdata_sb = NULL; element->parent->xml_element->content_cdata_length = 0; } } /* end if leave literal XML alone */ } /* end if parent has no rdf:resource */ } /* end if element->parent */ #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG2("Using content type %s\n", rdf_content_type_info[element->content_type].name); fprintf(stderr, "raptor_rdfxml_start_element_handler: Start ns-element: "); raptor_print_xml_element(xml_element, stderr); #endif /* Check for non namespaced stuff when not in a parseType literal, other */ if(rdf_content_type_info[element->content_type].rdf_processing) { const raptor_namespace* ns; ns = raptor_xml_element_get_name(xml_element)->nspace; /* The element */ /* If has no namespace or the namespace has no name (xmlns="") */ if((!ns || (ns && !raptor_namespace_get_uri(ns))) && element->parent) { raptor_qname* parent_el_name; parent_el_name = raptor_xml_element_get_name(element->parent->xml_element); raptor_parser_error(rdf_parser, "Using an element '%s' without a namespace is forbidden.", parent_el_name->local_name); element->state = RAPTOR_STATE_SKIPPING; /* Remove count above so that parent thinks this is empty */ if(count_bumped) element->parent->xml_element->content_element_seen--; element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED; } /* Check for any remaining non-namespaced attributes */ if(named_attrs) { for(i = 0; i < ns_attributes_count; i++) { raptor_qname *attr = named_attrs[i]; /* Check if any attributes are non-namespaced */ if(!attr->nspace || (attr->nspace && !raptor_namespace_get_uri(attr->nspace))) { raptor_parser_error(rdf_parser, "Using an attribute '%s' without a namespace is forbidden.", attr->local_name); raptor_free_qname(attr); named_attrs[i] = NULL; } } } } if(element->rdf_attr[RDF_NS_aboutEach] || element->rdf_attr[RDF_NS_aboutEachPrefix]) { raptor_parser_warning(rdf_parser, "element '%s' has aboutEach / aboutEachPrefix, skipping.", raptor_xml_element_get_name(xml_element)->local_name); element->state = RAPTOR_STATE_SKIPPING; /* Remove count above so that parent thinks this is empty */ if(count_bumped) element->parent->xml_element->content_element_seen--; element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED; } /* Right, now ready to enter the grammar */ raptor_rdfxml_start_element_grammar(rdf_parser, element); return; } static void raptor_rdfxml_end_element_handler(void *user_data, raptor_xml_element* xml_element) { raptor_parser* rdf_parser; raptor_rdfxml_parser* rdf_xml_parser; raptor_rdfxml_element* element; rdf_parser = (raptor_parser*)user_data; rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; if(!rdf_parser->failed) { raptor_rdfxml_update_document_locator(rdf_parser); raptor_rdfxml_end_element_grammar(rdf_parser, rdf_xml_parser->current_element); } element = raptor_rdfxml_element_pop(rdf_xml_parser); if(element) { if(element->parent) { /* Do not change this; PROPERTYELT will turn into MEMBER if necessary * See the switch case for MEMBER / PROPERTYELT where the test is done. * * PARSETYPE_RESOURCE should never be propogated up since it * will turn the next child (node) element into a property */ if(element->state != RAPTOR_STATE_MEMBER_PROPERTYELT && element->state != RAPTOR_STATE_PARSETYPE_RESOURCE) element->parent->child_state = element->state; } raptor_free_rdfxml_element(element); } } /* cdata (and ignorable whitespace for libxml). * s 0 terminated is for libxml */ static void raptor_rdfxml_characters_handler(void *user_data, raptor_xml_element* xml_element, const unsigned char *s, int len) { raptor_parser* rdf_parser = (raptor_parser*)user_data; raptor_rdfxml_cdata_grammar(rdf_parser, s, len, 0); } /* cdata (and ignorable whitespace for libxml). * s is 0 terminated for libxml2 */ static void raptor_rdfxml_cdata_handler(void *user_data, raptor_xml_element* xml_element, const unsigned char *s, int len) { raptor_parser* rdf_parser = (raptor_parser*)user_data; raptor_rdfxml_cdata_grammar(rdf_parser, s, len, 1); } /* comment handler * s is 0 terminated */ static void raptor_rdfxml_comment_handler(void *user_data, raptor_xml_element* xml_element, const unsigned char *s) { raptor_parser* rdf_parser = (raptor_parser*)user_data; raptor_rdfxml_parser* rdf_xml_parser; raptor_rdfxml_element* element; if(rdf_parser->failed || !xml_element) return; rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; element = rdf_xml_parser->current_element; if(element) { if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL) raptor_xml_writer_comment(rdf_xml_parser->xml_writer, s); } #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG2("XML Comment '%s'\n", s); #endif } static const unsigned char* const daml_namespace_uri_string = (const unsigned char*)"http://www.daml.org/2001/03/daml+oil#"; static const int daml_namespace_uri_string_len = 37; static int raptor_rdfxml_parse_init(raptor_parser* rdf_parser, const char *name) { raptor_rdfxml_parser* rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; raptor_sax2* sax2; raptor_world* world = rdf_parser->world; /* Allocate sax2 object */ sax2 = raptor_new_sax2(rdf_parser->world, &rdf_parser->locator, rdf_parser); rdf_xml_parser->sax2 = sax2; if(!sax2) return 1; /* Initialize sax2 element handlers */ raptor_sax2_set_start_element_handler(sax2, raptor_rdfxml_start_element_handler); raptor_sax2_set_end_element_handler(sax2, raptor_rdfxml_end_element_handler); raptor_sax2_set_characters_handler(sax2, raptor_rdfxml_characters_handler); raptor_sax2_set_cdata_handler(sax2, raptor_rdfxml_cdata_handler); raptor_sax2_set_comment_handler(sax2, raptor_rdfxml_comment_handler); raptor_sax2_set_namespace_handler(sax2, raptor_rdfxml_sax2_new_namespace_handler); /* Allocate uris */ RAPTOR_DAML_NS_URI(rdf_xml_parser) = raptor_new_uri_from_counted_string(world, daml_namespace_uri_string, daml_namespace_uri_string_len); RAPTOR_DAML_List_URI(rdf_xml_parser) = raptor_new_uri_from_uri_local_name(world, RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"List"); RAPTOR_DAML_first_URI(rdf_xml_parser) = raptor_new_uri_from_uri_local_name(world, RAPTOR_DAML_NS_URI(rdf_xml_parser) ,(const unsigned char *)"first"); RAPTOR_DAML_rest_URI(rdf_xml_parser) = raptor_new_uri_from_uri_local_name(world, RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"rest"); RAPTOR_DAML_nil_URI(rdf_xml_parser) = raptor_new_uri_from_uri_local_name(world, RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"nil"); /* Check for uri allocation failures */ if(!RAPTOR_DAML_NS_URI(rdf_xml_parser) || !RAPTOR_DAML_List_URI(rdf_xml_parser) || !RAPTOR_DAML_first_URI(rdf_xml_parser) || !RAPTOR_DAML_rest_URI(rdf_xml_parser) || !RAPTOR_DAML_nil_URI(rdf_xml_parser)) return 1; /* Everything succeeded */ return 0; } static int raptor_rdfxml_parse_start(raptor_parser* rdf_parser) { raptor_uri *uri = rdf_parser->base_uri; raptor_rdfxml_parser* rdf_xml_parser; rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; /* base URI required for RDF/XML */ if(!uri) return 1; /* Optionally normalize language to lowercase * http://www.w3.org/TR/rdf-concepts/#dfn-language-identifier */ raptor_sax2_set_option(rdf_xml_parser->sax2, RAPTOR_OPTION_NORMALIZE_LANGUAGE, NULL, RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NORMALIZE_LANGUAGE)); /* Optionally forbid internal network and file requests in the XML parser */ raptor_sax2_set_option(rdf_xml_parser->sax2, RAPTOR_OPTION_NO_NET, NULL, RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_NET)); raptor_sax2_set_option(rdf_xml_parser->sax2, RAPTOR_OPTION_NO_FILE, NULL, RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_FILE)); raptor_sax2_set_option(rdf_xml_parser->sax2, RAPTOR_OPTION_LOAD_EXTERNAL_ENTITIES, NULL, RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_LOAD_EXTERNAL_ENTITIES)); if(rdf_parser->uri_filter) raptor_sax2_set_uri_filter(rdf_xml_parser->sax2, rdf_parser->uri_filter, rdf_parser->uri_filter_user_data); raptor_sax2_parse_start(rdf_xml_parser->sax2, uri); /* Delete any existing id_set */ if(rdf_xml_parser->id_set) { raptor_free_id_set(rdf_xml_parser->id_set); rdf_xml_parser->id_set = NULL; } /* Create a new id_set if needed */ if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_CHECK_RDF_ID)) { rdf_xml_parser->id_set = raptor_new_id_set(rdf_parser->world); if(!rdf_xml_parser->id_set) return 1; } return 0; } static void raptor_rdfxml_parse_terminate(raptor_parser *rdf_parser) { raptor_rdfxml_parser* rdf_xml_parser; raptor_rdfxml_element* element; int i; rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; if(rdf_xml_parser->sax2) { raptor_free_sax2(rdf_xml_parser->sax2); rdf_xml_parser->sax2 = NULL; } while( (element = raptor_rdfxml_element_pop(rdf_xml_parser)) ) raptor_free_rdfxml_element(element); for(i = 0; i < RAPTOR_RDFXML_N_CONCEPTS; i++) { raptor_uri* concept_uri = rdf_xml_parser->concepts[i]; if(concept_uri) { raptor_free_uri(concept_uri); rdf_xml_parser->concepts[i] = NULL; } } if(rdf_xml_parser->id_set) { raptor_free_id_set(rdf_xml_parser->id_set); rdf_xml_parser->id_set = NULL; } if (rdf_xml_parser->xml_writer) { raptor_free_xml_writer(rdf_xml_parser->xml_writer); rdf_xml_parser->xml_writer = NULL; } if (rdf_xml_parser->iostream) { raptor_free_iostream(rdf_xml_parser->iostream); rdf_xml_parser->iostream = NULL; } if (rdf_xml_parser->xml_content) { RAPTOR_FREE(char*, rdf_xml_parser->xml_content); rdf_xml_parser->xml_content = NULL; rdf_xml_parser->xml_content_length = 0; } } static int raptor_rdfxml_parse_recognise_syntax(raptor_parser_factory* factory, const unsigned char *buffer, size_t len, const unsigned char *identifier, const unsigned char *suffix, const char *mime_type) { int score = 0; if(suffix) { if(!strcmp((const char*)suffix, "rdf") || !strcmp((const char*)suffix, "rdfs") || !strcmp((const char*)suffix, "foaf") || !strcmp((const char*)suffix, "doap") || !strcmp((const char*)suffix, "owl") || !strcmp((const char*)suffix, "daml")) score = 9; if(!strcmp((const char*)suffix, "rss")) score = 3; } if(identifier) { if(strstr((const char*)identifier, "rss1")) score += 5; else if(!suffix && strstr((const char*)identifier, "rss")) score += 3; else if(!suffix && strstr((const char*)identifier, "rdf")) score += 2; else if(!suffix && strstr((const char*)identifier, "RDF")) score += 2; } if(mime_type) { if(strstr((const char*)mime_type, "html")) score -= 4; else if(!strcmp((const char*)mime_type, "text/rdf")) score += 7; else if(!strcmp((const char*)mime_type, "application/xml")) score += 5; } if(buffer && len) { /* Check it's an XML namespace declared and not N3 or Turtle which * mention the namespace URI but not in this form. */ #define HAS_RDF_XMLNS1 (raptor_memstr((const char*)buffer, len, "xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL) #define HAS_RDF_XMLNS2 (raptor_memstr((const char*)buffer, len, "xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL) #define HAS_RDF_XMLNS3 (raptor_memstr((const char*)buffer, len, "xmlns=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL) #define HAS_RDF_XMLNS4 (raptor_memstr((const char*)buffer, len, "xmlns='http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL) #define HAS_RDF_ENTITY1 (raptor_memstr((const char*)buffer, len, "!ENTITY rdf 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'") != NULL) #define HAS_RDF_ENTITY2 (raptor_memstr((const char*)buffer, len, "!ENTITY rdf \"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"") != NULL) #define HAS_RDF_ENTITY3 (raptor_memstr((const char*)buffer, len, "xmlns:rdf=\"&rdf;\"") != NULL) #define HAS_RDF_ENTITY4 (raptor_memstr((const char*)buffer, len, "xmlns:rdf='&rdf;'") != NULL) #define HAS_HTML_NS (raptor_memstr((const char*)buffer, len, "http://www.w3.org/1999/xhtml") != NULL) #define HAS_HTML_ROOT (raptor_memstr((const char*)buffer, len, "context; if(rdf_parser->failed) return 1; rc = raptor_sax2_parse_chunk(rdf_xml_parser->sax2, buffer, len, is_end); if(is_end) { if(rdf_parser->emitted_default_graph) { raptor_parser_end_graph(rdf_parser, NULL, 0); rdf_parser->emitted_default_graph--; } } return rc; } static void raptor_rdfxml_generate_statement(raptor_parser *rdf_parser, raptor_term *subject_term, raptor_uri *predicate_uri, raptor_term *object_term, raptor_term *reified_term, raptor_rdfxml_element* bag_element) { raptor_statement *statement = &rdf_parser->statement; raptor_term* predicate_term = NULL; int free_reified_term = 0; if(rdf_parser->failed) return; #ifdef RAPTOR_DEBUG_VERBOSE if(!subject_term) RAPTOR_FATAL1("Statement has no subject\n"); if(!predicate_uri) RAPTOR_FATAL1("Statement has no predicate\n"); if(!object_term) RAPTOR_FATAL1("Statement has no object\n"); #endif predicate_term = raptor_new_term_from_uri(rdf_parser->world, predicate_uri); if(!predicate_term) return; statement->subject = subject_term; statement->predicate = predicate_term; statement->object = object_term; #ifdef RAPTOR_DEBUG_VERBOSE fprintf(stderr, "raptor_rdfxml_generate_statement: Generating statement: "); raptor_statement_print(statement, stderr); fputc('\n', stderr); #endif if(!rdf_parser->emitted_default_graph) { raptor_parser_start_graph(rdf_parser, NULL, 0); rdf_parser->emitted_default_graph++; } if(!rdf_parser->statement_handler) goto generate_tidy; /* Generate the statement; or is it a fact? */ (*rdf_parser->statement_handler)(rdf_parser->user_data, statement); /* the bagID mess */ if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_BAGID) && bag_element && bag_element->bag) { raptor_term* bag = bag_element->bag; raptor_uri* bag_predicate_uri = NULL; raptor_term* bag_predicate_term = NULL; statement->subject = bag; bag_element->last_bag_ordinal++; /* new URI object */ bag_predicate_uri = raptor_new_uri_from_rdf_ordinal(rdf_parser->world, bag_element->last_bag_ordinal); if(!bag_predicate_uri) goto generate_tidy; bag_predicate_term = raptor_new_term_from_uri(rdf_parser->world, bag_predicate_uri); raptor_free_uri(bag_predicate_uri); if(!bag_predicate_term) goto generate_tidy; statement->predicate = bag_predicate_term; if(!reified_term || !reified_term->value.blank.string) { unsigned char *reified_id = NULL; /* reified_term is NULL so generate a bag ID */ reified_id = raptor_world_generate_bnodeid(rdf_parser->world); if(!reified_id) goto generate_tidy; reified_term = raptor_new_term_from_blank(rdf_parser->world, reified_id); RAPTOR_FREE(char*, reified_id); if(!reified_term) goto generate_tidy; free_reified_term = 1; } statement->object = reified_term; (*rdf_parser->statement_handler)(rdf_parser->user_data, statement); if(bag_predicate_term) raptor_free_term(bag_predicate_term); } /* return if is there no reified ID (that is valid) */ if(!reified_term || !reified_term->value.blank.string) goto generate_tidy; /* otherwise generate reified statements */ statement->subject = reified_term; statement->predicate = RAPTOR_RDF_type_term(rdf_parser->world); statement->object = RAPTOR_RDF_Statement_term(rdf_parser->world); (*rdf_parser->statement_handler)(rdf_parser->user_data, statement); /* statement->subject = reified_term; */ statement->predicate = RAPTOR_RDF_subject_term(rdf_parser->world); statement->object = subject_term; (*rdf_parser->statement_handler)(rdf_parser->user_data, statement); /* statement->subject = reified_term; */ statement->predicate = RAPTOR_RDF_predicate_term(rdf_parser->world); statement->object = predicate_term; (*rdf_parser->statement_handler)(rdf_parser->user_data, statement); /* statement->subject = reified_term; */ statement->predicate = RAPTOR_RDF_object_term(rdf_parser->world); statement->object = object_term; (*rdf_parser->statement_handler)(rdf_parser->user_data, statement); generate_tidy: /* Tidy up things allocated here */ if(predicate_term) raptor_free_term(predicate_term); if(free_reified_term && reified_term) raptor_free_term(reified_term); } /** * raptor_rdfxml_element_has_property_attributes: * @element: element with the property attributes * * Return true if the element has at least one property attribute. * **/ static int raptor_rdfxml_element_has_property_attributes(raptor_rdfxml_element *element) { int i; if(element->xml_element->attribute_count > 0) return 1; /* look for rdf: properties */ for(i = 0; i <= RDF_NS_LAST; i++) { if(element->rdf_attr[i] && raptor_rdf_ns_terms_info[i].type != RAPTOR_TERM_TYPE_UNKNOWN) return 1; } return 0; } /** * raptor_rdfxml_process_property_attributes: * @rdf_parser: Raptor parser object * @attributes_element: element with the property attributes * @resource_element: element that defines the resource URI * subject->value etc. * @property_node_identifier: Use this identifier for the resource URI * and count any ordinals for it locally * * Process the property attributes for an element for a given resource. * **/ static int raptor_rdfxml_process_property_attributes(raptor_parser *rdf_parser, raptor_rdfxml_element *attributes_element, raptor_rdfxml_element *resource_element, raptor_term *property_node_identifier) { unsigned int i; raptor_term *resource_identifier; resource_identifier = property_node_identifier ? property_node_identifier : resource_element->subject; /* Process attributes as propAttr* = * (propName="string")* */ for(i = 0; i < attributes_element->xml_element->attribute_count; i++) { raptor_qname* attr = attributes_element->xml_element->attributes[i]; const unsigned char *name; const unsigned char *value; int handled = 0; if(!attr) continue; name = attr->local_name; value = attr->value; if(!attr->nspace) { raptor_rdfxml_update_document_locator(rdf_parser); raptor_parser_error(rdf_parser, "Using property attribute '%s' without a namespace is forbidden.", name); continue; } if(!raptor_unicode_check_utf8_nfc_string(value, strlen((const char*)value), NULL)) { raptor_log_level l; raptor_rdfxml_update_document_locator(rdf_parser); l = (RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NON_NFC_FATAL)) ? RAPTOR_LOG_LEVEL_ERROR : RAPTOR_LOG_LEVEL_WARN; raptor_parser_log_error(rdf_parser, l, "Property attribute '%s' has a string not in Unicode Normal Form C: %s", name, value); continue; } /* Generate the property statement using one of these properties: * 1) rdf:_n * 2) the URI from the rdf:* attribute where allowed * 3) otherwise forbidden (including rdf:li) */ if(attr->nspace->is_rdf_ms) { /* is rdf: namespace */ if(*name == '_') { int ordinal; /* recognise rdf:_ */ name++; ordinal = raptor_check_ordinal(name); if(ordinal < 1) { raptor_rdfxml_update_document_locator(rdf_parser); raptor_parser_error(rdf_parser, "Illegal ordinal value %d in property attribute '%s' seen on containing element '%s'.", ordinal, attr->local_name, name); } } else { int rc; raptor_rdfxml_update_document_locator(rdf_parser); rc = raptor_rdfxml_check_propertyAttribute_name((const char*)name); if(!rc) raptor_parser_error(rdf_parser, "RDF term %s is forbidden as a property attribute.", name); else if(rc < 0) raptor_parser_warning(rdf_parser, "Unknown RDF namespace property attribute '%s'.", name); } } /* end is RDF namespace property */ if(!handled) { raptor_term* object_term; object_term = raptor_new_term_from_literal(rdf_parser->world, (unsigned char*)value, NULL, NULL); /* else not rdf: namespace or unknown in rdf: namespace so * generate a statement with a literal object */ raptor_rdfxml_generate_statement(rdf_parser, resource_identifier, attr->uri, object_term, NULL, /* Property attributes are never reified*/ resource_element); raptor_free_term(object_term); } } /* end for ... attributes */ /* Handle rdf property attributes * (only rdf:type and rdf:value at present) */ for(i = 0; i <= RDF_NS_LAST; i++) { const unsigned char *value = attributes_element->rdf_attr[i]; size_t value_len; int object_is_literal; raptor_uri *property_uri; raptor_term* object_term; if(!value) continue; value_len = strlen((const char*)value); object_is_literal = (raptor_rdf_ns_terms_info[i].type == RAPTOR_TERM_TYPE_LITERAL); if(raptor_rdf_ns_terms_info[i].type == RAPTOR_TERM_TYPE_UNKNOWN) { const char *name = raptor_rdf_ns_terms_info[i].name; int rc = raptor_rdfxml_check_propertyAttribute_name(name); if(!rc) { raptor_rdfxml_update_document_locator(rdf_parser); raptor_parser_error(rdf_parser, "RDF term %s is forbidden as a property attribute.", name); continue; } else if(rc < 0) raptor_parser_warning(rdf_parser, "Unknown RDF namespace property attribute '%s'.", name); } if(object_is_literal && !raptor_unicode_check_utf8_nfc_string(value, value_len, NULL)) { raptor_log_level l; raptor_rdfxml_update_document_locator(rdf_parser); l = (RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NON_NFC_FATAL)) ? RAPTOR_LOG_LEVEL_ERROR : RAPTOR_LOG_LEVEL_WARN; raptor_parser_log_error(rdf_parser, l, "Property attribute '%s' has a string not in Unicode Normal Form C: %s", raptor_rdf_ns_terms_info[i].name, value); continue; } property_uri = raptor_new_uri_for_rdf_concept(rdf_parser->world, (const unsigned char*)raptor_rdf_ns_terms_info[i].name); if(object_is_literal) { object_term = raptor_new_term_from_literal(rdf_parser->world, (unsigned char*)value, NULL, NULL); } else { raptor_uri *base_uri; raptor_uri *object_uri; base_uri = raptor_rdfxml_inscope_base_uri(rdf_parser); object_uri = raptor_new_uri_relative_to_base(rdf_parser->world, base_uri, value); object_term = raptor_new_term_from_uri(rdf_parser->world, object_uri); raptor_free_uri(object_uri); } raptor_rdfxml_generate_statement(rdf_parser, resource_identifier, property_uri, object_term, NULL, /* Property attributes are never reified*/ resource_element); raptor_free_term(object_term); raptor_free_uri(property_uri); } /* end for rdf:property values */ return 0; } static void raptor_rdfxml_start_element_grammar(raptor_parser *rdf_parser, raptor_rdfxml_element *element) { raptor_rdfxml_parser *rdf_xml_parser; int finished; raptor_state state; raptor_xml_element* xml_element; raptor_qname* el_qname; const unsigned char *el_name; int element_in_rdf_ns; int rc = 0; raptor_uri* base_uri; raptor_uri* element_name_uri; rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; xml_element = element->xml_element; el_qname = raptor_xml_element_get_name(xml_element); el_name = el_qname->local_name; element_in_rdf_ns = (el_qname->nspace && el_qname->nspace->is_rdf_ms); base_uri = raptor_rdfxml_inscope_base_uri(rdf_parser); element_name_uri = el_qname->uri; state = element->state; #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG2("Starting in state %s\n", raptor_rdfxml_state_as_string(state)); #endif finished = 0; while(!finished) { switch(state) { case RAPTOR_STATE_SKIPPING: element->child_state = state; element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED; finished = 1; break; case RAPTOR_STATE_UNKNOWN: /* found ? */ if(element_in_rdf_ns) { if(raptor_uri_equals(element_name_uri, RAPTOR_RDF_RDF_URI(rdf_parser->world))) { element->child_state = RAPTOR_STATE_NODE_ELEMENT_LIST; element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES; /* Yes - need more content before can continue, * so wait for another element */ finished = 1; break; } if(raptor_uri_equals(element_name_uri, RAPTOR_RDF_Description_URI(rdf_parser->world))) { state = RAPTOR_STATE_DESCRIPTION; element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES; /* Yes - found something so move immediately to description */ break; } if(element_in_rdf_ns) { rc = raptor_rdfxml_check_nodeElement_name((const char*)el_name); if(!rc) { raptor_parser_error(rdf_parser, "rdf:%s is forbidden as a node element.", el_name); state = RAPTOR_STATE_SKIPPING; element->child_state = RAPTOR_STATE_SKIPPING; finished = 1; break; } else if(rc < 0) { raptor_parser_warning(rdf_parser, "rdf:%s is an unknown RDF namespaced element.", el_name); } } } /* If scanning for element, can continue */ if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_SCANNING)) { finished = 1; break; } /* Otherwise the choice of the next state can be made * from the current element by the OBJ state */ state = RAPTOR_STATE_NODE_ELEMENT_LIST; element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES; break; case RAPTOR_STATE_NODE_ELEMENT_LIST: /* Handling * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementList * * Everything goes to nodeElement */ state = RAPTOR_STATE_NODE_ELEMENT; element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES; break; case RAPTOR_STATE_DESCRIPTION: case RAPTOR_STATE_NODE_ELEMENT: case RAPTOR_STATE_PARSETYPE_RESOURCE: case RAPTOR_STATE_PARSETYPE_COLLECTION: /* Handling or other node element * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElement * * or a property element acting as a node element for * rdf:parseType="Resource" * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeResourcePropertyElt * or rdf:parseType="Collection" (and daml:Collection) * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeCollectionPropertyElt * * Only create a bag if bagID given */ if(!element_name_uri) { /* We cannot handle this */ raptor_parser_warning(rdf_parser, "Using node element '%s' without a namespace is forbidden.", el_qname->local_name); raptor_rdfxml_update_document_locator(rdf_parser); element->state = RAPTOR_STATE_SKIPPING; element->child_state = RAPTOR_STATE_SKIPPING; finished = 1; break; } if(element_in_rdf_ns) { rc = raptor_rdfxml_check_nodeElement_name((const char*)el_name); if(!rc) { raptor_parser_error(rdf_parser, "rdf:%s is forbidden as a node element.", el_name); state = RAPTOR_STATE_SKIPPING; element->state = RAPTOR_STATE_SKIPPING; element->child_state = RAPTOR_STATE_SKIPPING; finished = 1; break; } else if(rc < 0) { raptor_parser_warning(rdf_parser, "rdf:%s is an unknown RDF namespaced element.", el_name); } } if(element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION && element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION && element->parent && (element->parent->state == RAPTOR_STATE_PROPERTYELT || element->parent->state == RAPTOR_STATE_MEMBER_PROPERTYELT) && element->parent->xml_element->content_element_seen > 1) { raptor_rdfxml_update_document_locator(rdf_parser); raptor_parser_error(rdf_parser, "The enclosing property already has an object"); state = RAPTOR_STATE_SKIPPING; element->child_state = RAPTOR_STATE_SKIPPING; finished = 1; break; } if(state == RAPTOR_STATE_NODE_ELEMENT || state == RAPTOR_STATE_DESCRIPTION || state == RAPTOR_STATE_PARSETYPE_COLLECTION) { if(element_in_rdf_ns && raptor_uri_equals(element_name_uri, RAPTOR_RDF_Description_URI(rdf_parser->world))) state = RAPTOR_STATE_DESCRIPTION; else state = RAPTOR_STATE_NODE_ELEMENT; } if((element->rdf_attr[RDF_NS_ID]!=NULL) + (element->rdf_attr[RDF_NS_about]!=NULL) + (element->rdf_attr[RDF_NS_nodeID]!=NULL) > 1) { raptor_rdfxml_update_document_locator(rdf_parser); raptor_parser_error(rdf_parser, "Multiple attributes of rdf:ID, rdf:about and rdf:nodeID on element '%s' - only one allowed.", el_name); } if(element->rdf_attr[RDF_NS_ID]) { unsigned char* subject_id; raptor_uri* subject_uri; subject_id = (unsigned char*)element->rdf_attr[RDF_NS_ID]; if(!raptor_valid_xml_ID(rdf_parser, subject_id)) { raptor_parser_error(rdf_parser, "Illegal rdf:ID value '%s'", subject_id); state = RAPTOR_STATE_SKIPPING; element->child_state = RAPTOR_STATE_SKIPPING; finished = 1; break; } if(raptor_rdfxml_record_ID(rdf_parser, element, subject_id)) { raptor_parser_error(rdf_parser, "Duplicated rdf:ID value '%s'", subject_id); state = RAPTOR_STATE_SKIPPING; element->child_state = RAPTOR_STATE_SKIPPING; finished = 1; break; } /* after this, subject_id is the owner of the ID string */ element->rdf_attr[RDF_NS_ID] = NULL; subject_uri = raptor_new_uri_from_id(rdf_parser->world, base_uri, subject_id); RAPTOR_FREE(char*, subject_id); if(!subject_uri) goto oom; element->subject = raptor_new_term_from_uri(rdf_parser->world, subject_uri); raptor_free_uri(subject_uri); if(!element->subject) goto oom; } else if(element->rdf_attr[RDF_NS_about]) { raptor_uri* subject_uri; subject_uri = raptor_new_uri_relative_to_base(rdf_parser->world, base_uri, (const unsigned char*)element->rdf_attr[RDF_NS_about]); if(!subject_uri) goto oom; element->subject = raptor_new_term_from_uri(rdf_parser->world, subject_uri); raptor_free_uri(subject_uri); RAPTOR_FREE(char*, element->rdf_attr[RDF_NS_about]); element->rdf_attr[RDF_NS_about] = NULL; if(!element->subject) goto oom; } else if(element->rdf_attr[RDF_NS_nodeID]) { unsigned char* subject_id; subject_id = raptor_world_internal_generate_id(rdf_parser->world, (unsigned char*)element->rdf_attr[RDF_NS_nodeID]); if(!subject_id) goto oom; element->subject = raptor_new_term_from_blank(rdf_parser->world, subject_id); RAPTOR_FREE(char*, subject_id); element->rdf_attr[RDF_NS_nodeID] = NULL; if(!element->subject) goto oom; if(!raptor_valid_xml_ID(rdf_parser, element->subject->value.blank.string)) { raptor_parser_error(rdf_parser, "Illegal rdf:nodeID value '%s'", (const char*)element->subject->value.blank.string); state = RAPTOR_STATE_SKIPPING; element->child_state = RAPTOR_STATE_SKIPPING; finished = 1; break; } } else if(element->parent && element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION && element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION && element->parent->object) { /* copy from parent (property element), it has a URI for us */ element->subject = raptor_term_copy(element->parent->object); } else { unsigned char* subject_id; subject_id = raptor_world_generate_bnodeid(rdf_parser->world); if(!subject_id) goto oom; element->subject = raptor_new_term_from_blank(rdf_parser->world, subject_id); RAPTOR_FREE(char*, subject_id); if(!element->subject) goto oom; } if(element->rdf_attr[RDF_NS_bagID]) { if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_BAGID)) { unsigned char* bag_id; raptor_uri* bag_uri = NULL; bag_id = (unsigned char*)element->rdf_attr[RDF_NS_bagID]; element->rdf_attr[RDF_NS_bagID] = NULL; bag_uri = raptor_new_uri_from_id(rdf_parser->world, base_uri, bag_id); if(!bag_uri) { RAPTOR_FREE(char*, bag_id); goto oom; } element->bag = raptor_new_term_from_uri(rdf_parser->world, bag_uri); raptor_free_uri(bag_uri); if(!raptor_valid_xml_ID(rdf_parser, bag_id)) { raptor_parser_error(rdf_parser, "Illegal rdf:bagID value '%s'", bag_id); state = RAPTOR_STATE_SKIPPING; element->child_state = RAPTOR_STATE_SKIPPING; finished = 1; RAPTOR_FREE(char*, bag_id); break; } if(raptor_rdfxml_record_ID(rdf_parser, element, bag_id)) { raptor_parser_error(rdf_parser, "Duplicated rdf:bagID value '%s'", bag_id); state = RAPTOR_STATE_SKIPPING; element->child_state = RAPTOR_STATE_SKIPPING; finished = 1; RAPTOR_FREE(char*, bag_id); break; } RAPTOR_FREE(char*, bag_id); raptor_parser_warning(rdf_parser, "rdf:bagID is deprecated."); raptor_rdfxml_generate_statement(rdf_parser, element->bag, RAPTOR_RDF_type_URI(rdf_parser->world), RAPTOR_RDF_Bag_term(rdf_parser->world), NULL, NULL); } else { /* bagID forbidden */ raptor_parser_error(rdf_parser, "rdf:bagID is forbidden."); state = RAPTOR_STATE_SKIPPING; element->child_state = RAPTOR_STATE_SKIPPING; finished = 1; break; } } if(element->parent) { /* In a rdf:parseType="Collection" the resources are appended * to the list at the genid element->parent->tail_id */ if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION || element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) { /* rdf:type rdf:List */ const unsigned char * idList; raptor_uri *predicate_uri; raptor_term* idList_term; raptor_term* object_term; idList = raptor_world_generate_bnodeid(rdf_parser->world); if(!idList) goto oom; /* idList string is saved below in element->parent->tail_id */ idList_term = raptor_new_term_from_blank(rdf_parser->world, idList); if(!idList_term) { RAPTOR_FREE(char*, idList); goto oom; } if((element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) || RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_RDF_TYPE_RDF_LIST)) { raptor_uri* class_uri = NULL; if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) { class_uri = RAPTOR_DAML_List_URI(rdf_xml_parser); object_term = raptor_new_term_from_uri(rdf_parser->world, class_uri); } else object_term = raptor_term_copy(RAPTOR_RDF_List_term(rdf_parser->world)); raptor_rdfxml_generate_statement(rdf_parser, idList_term, RAPTOR_RDF_type_URI(rdf_parser->world), object_term, NULL, element); raptor_free_term(object_term); } predicate_uri = (element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_first_URI(rdf_xml_parser) : RAPTOR_RDF_first_URI(rdf_parser->world); /* rdf:first uri> */ raptor_rdfxml_generate_statement(rdf_parser, idList_term, predicate_uri, element->subject, NULL, NULL); /* If there is no rdf:parseType="Collection" */ if(!element->parent->tail_id) { /* Free any existing object still around. * I suspect this can never happen. */ if(element->parent->object) raptor_free_term(element->parent->object); element->parent->object = raptor_new_term_from_blank(rdf_parser->world, idList); } else { raptor_term* tail_id_term; tail_id_term = raptor_new_term_from_blank(rdf_parser->world, element->parent->tail_id); predicate_uri = (element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_rest_URI(rdf_xml_parser) : RAPTOR_RDF_rest_URI(rdf_parser->world); /* _:tail_id rdf:rest _:listRest */ raptor_rdfxml_generate_statement(rdf_parser, tail_id_term, predicate_uri, idList_term, NULL, NULL); raptor_free_term(tail_id_term); } /* update new tail */ if(element->parent->tail_id) RAPTOR_FREE(char*, (char*)element->parent->tail_id); element->parent->tail_id = idList; raptor_free_term(idList_term); } else if(element->parent->state != RAPTOR_STATE_UNKNOWN && element->state != RAPTOR_STATE_PARSETYPE_RESOURCE) { /* If there is a parent element (property) containing this * element (node) and it has no object, set it from this subject */ if(element->parent->object) { raptor_rdfxml_update_document_locator(rdf_parser); raptor_parser_error(rdf_parser, "Tried to set multiple objects of a statement"); } else { /* Store URI of this node in our parent as the property object */ element->parent->object = raptor_term_copy(element->subject); element->parent->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; } } } /* If this is a node element, generate the rdf:type statement * from this node */ if(state == RAPTOR_STATE_NODE_ELEMENT) { raptor_term* el_name_term; el_name_term = raptor_new_term_from_uri(rdf_parser->world, element_name_uri); raptor_rdfxml_generate_statement(rdf_parser, element->subject, RAPTOR_RDF_type_URI(rdf_parser->world), el_name_term, element->reified, element); raptor_free_term(el_name_term); } if(raptor_rdfxml_process_property_attributes(rdf_parser, element, element, NULL)) goto oom; /* for both productions now need some more content or * property elements before can do any more work. */ element->child_state = RAPTOR_STATE_PROPERTYELT; element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES; finished = 1; break; case RAPTOR_STATE_PARSETYPE_OTHER: /* FALLTHROUGH */ case RAPTOR_STATE_PARSETYPE_LITERAL: raptor_xml_writer_start_element(rdf_xml_parser->xml_writer, xml_element); element->child_state = RAPTOR_STATE_PARSETYPE_LITERAL; element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL; finished = 1; break; /* Handle all the detail of the various options of property element * http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt * * All the attributes must be scanned here to see what additional * property element work is needed. No triples are generated * until the end of this element, until it is clear if the * element was empty. */ case RAPTOR_STATE_MEMBER_PROPERTYELT: case RAPTOR_STATE_PROPERTYELT: if(!element_name_uri) { raptor_parser_error(rdf_parser, "Using property element '%s' without a namespace is forbidden.", raptor_xml_element_get_name(element->parent->xml_element)->local_name); raptor_rdfxml_update_document_locator(rdf_parser); element->state = RAPTOR_STATE_SKIPPING; element->child_state = RAPTOR_STATE_SKIPPING; finished = 1; break; } /* Handling rdf:li as a property, noting special processing */ if(element_in_rdf_ns && raptor_uri_equals(element_name_uri, RAPTOR_RDF_li_URI(rdf_parser->world))) { state = RAPTOR_STATE_MEMBER_PROPERTYELT; } if(element_in_rdf_ns) { rc = raptor_rdfxml_check_propertyElement_name((const char*)el_name); if(!rc) { raptor_parser_error(rdf_parser, "rdf:%s is forbidden as a property element.", el_name); state = RAPTOR_STATE_SKIPPING; element->child_state = RAPTOR_STATE_SKIPPING; finished = 1; break; } else if(rc < 0) { raptor_parser_warning(rdf_parser, "rdf:%s is an unknown RDF namespaced element.", el_name); } } /* rdf:ID on a property element - reify a statement. * Allowed on all property element forms */ if(element->rdf_attr[RDF_NS_ID]) { raptor_uri *reified_uri; element->reified_id = element->rdf_attr[RDF_NS_ID]; element->rdf_attr[RDF_NS_ID] = NULL; reified_uri = raptor_new_uri_from_id(rdf_parser->world, base_uri, element->reified_id); if(!reified_uri) goto oom; element->reified = raptor_new_term_from_uri(rdf_parser->world, reified_uri); raptor_free_uri(reified_uri); if(!element->reified) goto oom; if(!raptor_valid_xml_ID(rdf_parser, element->reified_id)) { raptor_parser_error(rdf_parser, "Illegal rdf:ID value '%s'", element->reified_id); state = RAPTOR_STATE_SKIPPING; element->child_state = RAPTOR_STATE_SKIPPING; finished = 1; break; } if(raptor_rdfxml_record_ID(rdf_parser, element, element->reified_id)) { raptor_parser_error(rdf_parser, "Duplicated rdf:ID value '%s'", element->reified_id); state = RAPTOR_STATE_SKIPPING; element->child_state = RAPTOR_STATE_SKIPPING; finished = 1; break; } } /* rdf:datatype on a property element. * Only allowed for * http://www.w3.org/TR/rdf-syntax-grammar/#literalPropertyElt */ if(element->rdf_attr[RDF_NS_datatype]) { raptor_uri *datatype_uri; datatype_uri = raptor_new_uri_relative_to_base(rdf_parser->world, base_uri, (const unsigned char*)element->rdf_attr[RDF_NS_datatype]); element->object_literal_datatype = datatype_uri; RAPTOR_FREE(char*, element->rdf_attr[RDF_NS_datatype]); element->rdf_attr[RDF_NS_datatype] = NULL; if(!element->object_literal_datatype) goto oom; } if(element->rdf_attr[RDF_NS_bagID]) { if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_BAGID)) { if(element->rdf_attr[RDF_NS_resource] || element->rdf_attr[RDF_NS_parseType]) { raptor_parser_error(rdf_parser, "rdf:bagID is forbidden on property element '%s' with an rdf:resource or rdf:parseType attribute.", el_name); /* prevent this being used later either */ RAPTOR_FREE(char*, element->rdf_attr[RDF_NS_bagID]); element->rdf_attr[RDF_NS_bagID] = NULL; } else { unsigned char* bag_id; raptor_uri* bag_uri; bag_id = (unsigned char*)element->rdf_attr[RDF_NS_bagID]; element->rdf_attr[RDF_NS_bagID] = NULL; bag_uri = raptor_new_uri_from_id(rdf_parser->world, base_uri, bag_id); if(!bag_uri) { RAPTOR_FREE(char*, bag_id); goto oom; } element->bag = raptor_new_term_from_uri(rdf_parser->world, bag_uri); raptor_free_uri(bag_uri); if(!element->bag) { RAPTOR_FREE(char*, bag_id); goto oom; } if(!raptor_valid_xml_ID(rdf_parser, bag_id)) { raptor_parser_error(rdf_parser, "Illegal rdf:bagID value '%s'", bag_id); state = RAPTOR_STATE_SKIPPING; element->child_state = RAPTOR_STATE_SKIPPING; finished = 1; RAPTOR_FREE(char*, bag_id); break; } if(raptor_rdfxml_record_ID(rdf_parser, element, bag_id)) { raptor_parser_error(rdf_parser, "Duplicated rdf:bagID value '%s'", bag_id); state = RAPTOR_STATE_SKIPPING; element->child_state = RAPTOR_STATE_SKIPPING; RAPTOR_FREE(char*, bag_id); finished = 1; break; } RAPTOR_FREE(char*, bag_id); raptor_parser_warning(rdf_parser, "rdf:bagID is deprecated."); } } else { /* bagID forbidden */ raptor_parser_error(rdf_parser, "rdf:bagID is forbidden."); state = RAPTOR_STATE_SKIPPING; element->child_state = RAPTOR_STATE_SKIPPING; finished = 1; break; } } /* if rdf:bagID on property element */ element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT; if(element->rdf_attr[RDF_NS_parseType]) { const unsigned char *parse_type; int i; int is_parseType_Literal = 0; parse_type = element->rdf_attr[RDF_NS_parseType]; if(raptor_rdfxml_element_has_property_attributes(element)) { raptor_parser_error(rdf_parser, "Property attributes cannot be used with rdf:parseType='%s'", parse_type); state = RAPTOR_STATE_SKIPPING; element->child_state = RAPTOR_STATE_SKIPPING; finished = 1; break; } /* Check for bad combinations of things with parseType */ for(i = 0; i <= RDF_NS_LAST; i++) if(element->rdf_attr[i] && i != RDF_NS_parseType) { raptor_parser_error(rdf_parser, "Attribute '%s' cannot be used with rdf:parseType='%s'", raptor_rdf_ns_terms_info[i].name, parse_type); state = RAPTOR_STATE_SKIPPING; element->child_state = RAPTOR_STATE_SKIPPING; break; } if(!strcmp((char*)parse_type, "Literal")) is_parseType_Literal = 1; else if(!strcmp((char*)parse_type, "Resource")) { unsigned char* subject_id; state = RAPTOR_STATE_PARSETYPE_RESOURCE; element->child_state = RAPTOR_STATE_PROPERTYELT; element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES; /* create a node for the subject of the contained properties */ subject_id = raptor_world_generate_bnodeid(rdf_parser->world); if(!subject_id) goto oom; element->subject = raptor_new_term_from_blank(rdf_parser->world, subject_id); RAPTOR_FREE(char*, subject_id); if(!element->subject) goto oom; } else if(!strcmp((char*)parse_type, "Collection")) { /* An rdf:parseType="Collection" appears as a single node */ element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; element->child_state = RAPTOR_STATE_PARSETYPE_COLLECTION; element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION; } else { if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_OTHER_PARSETYPES) && !raptor_strcasecmp((char*)parse_type, "daml:collection")) { /* A DAML collection appears as a single node */ element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; element->child_state = RAPTOR_STATE_PARSETYPE_COLLECTION; element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION; } else { if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_WARN_OTHER_PARSETYPES)) { raptor_parser_warning(rdf_parser, "Unknown rdf:parseType value '%s' taken as 'Literal'", parse_type); } is_parseType_Literal = 1; } } if(is_parseType_Literal) { raptor_xml_writer* xml_writer; /* rdf:parseType="Literal" - explicitly or default * if the parseType value is not recognised */ rdf_xml_parser->xml_content = NULL; rdf_xml_parser->xml_content_length = 0; rdf_xml_parser->iostream = raptor_new_iostream_to_string(rdf_parser->world, &rdf_xml_parser->xml_content, &rdf_xml_parser->xml_content_length, raptor_alloc_memory); if(!rdf_xml_parser->iostream) goto oom; xml_writer = raptor_new_xml_writer(rdf_parser->world, NULL, rdf_xml_parser->iostream); rdf_xml_parser->xml_writer = xml_writer; if(!rdf_xml_parser->xml_writer) goto oom; raptor_xml_writer_set_option(rdf_xml_parser->xml_writer, RAPTOR_OPTION_WRITER_XML_DECLARATION, NULL, 0); element->child_state = RAPTOR_STATE_PARSETYPE_LITERAL; element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL; element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL; } } else { /* Can only be the empty property element case * http://www.w3.org/TR/rdf-syntax-grammar/#emptyPropertyElt */ /* The presence of the rdf:resource or rdf:nodeID * attributes is checked at element close time */ /* * Assign reified URI here so we don't reify property attributes * using this id */ if(element->reified_id && !element->reified) { raptor_uri* reified_uri; reified_uri = raptor_new_uri_from_id(rdf_parser->world, base_uri, element->reified_id); if(!reified_uri) goto oom; element->reified = raptor_new_term_from_uri(rdf_parser->world, reified_uri); raptor_free_uri(reified_uri); if(!element->reified) goto oom; } if(element->rdf_attr[RDF_NS_resource] || element->rdf_attr[RDF_NS_nodeID]) { /* Done - wait for end of this element to end in order to * check the element was empty as expected */ element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; } else { /* Otherwise process content in obj (value) state */ element->child_state = RAPTOR_STATE_NODE_ELEMENT_LIST; element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT; } } finished = 1; break; case RAPTOR_STATE_INVALID: default: raptor_parser_fatal_error(rdf_parser, "%s Internal error - unexpected parser state %u - %s", __FUNCTION__, state, raptor_rdfxml_state_as_string(state)); finished = 1; } /* end switch */ if(state != element->state) { element->state = state; #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG3("Moved to state %d - %s\n", state, raptor_rdfxml_state_as_string(state)); #endif } } /* end while */ #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state)); #endif return; oom: raptor_parser_fatal_error(rdf_parser, "Out of memory, skipping"); element->state = RAPTOR_STATE_SKIPPING; } static void raptor_rdfxml_end_element_grammar(raptor_parser *rdf_parser, raptor_rdfxml_element *element) { raptor_rdfxml_parser *rdf_xml_parser; raptor_state state; int finished; raptor_xml_element* xml_element = element->xml_element; raptor_qname* el_qname; const unsigned char *el_name; int element_in_rdf_ns; raptor_uri* element_name_uri; rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; el_qname = raptor_xml_element_get_name(xml_element); el_name = el_qname->local_name; element_in_rdf_ns= (el_qname->nspace && el_qname->nspace->is_rdf_ms); element_name_uri = el_qname->uri; state = element->state; #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG2("Starting in state %s\n", raptor_rdfxml_state_as_string(state)); #endif finished= 0; while(!finished) { switch(state) { case RAPTOR_STATE_SKIPPING: finished = 1; break; case RAPTOR_STATE_UNKNOWN: finished = 1; break; case RAPTOR_STATE_NODE_ELEMENT_LIST: if(element_in_rdf_ns && raptor_uri_equals(element_name_uri, RAPTOR_RDF_RDF_URI(rdf_parser->world))) { /* end of RDF - boo hoo */ state = RAPTOR_STATE_UNKNOWN; finished = 1; break; } /* When scanning, another element ending is outside the RDF * world so this can happen without further work */ if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_SCANNING)) { state = RAPTOR_STATE_UNKNOWN; finished = 1; break; } /* otherwise found some junk after RDF content in an RDF-only * document (probably never get here since this would be * a mismatched XML tag and cause an error earlier) */ raptor_rdfxml_update_document_locator(rdf_parser); raptor_parser_warning(rdf_parser, "Element '%s' ended, expected end of RDF element", el_name); state = RAPTOR_STATE_UNKNOWN; finished = 1; break; case RAPTOR_STATE_DESCRIPTION: case RAPTOR_STATE_NODE_ELEMENT: case RAPTOR_STATE_PARSETYPE_RESOURCE: /* If there is a parent element containing this element and * the parent isn't a description, has an identifier, * create the statement between this node using parent property * (Need to check for identifier so that top-level typed nodes * don't get connect to parent element) */ if(state == RAPTOR_STATE_NODE_ELEMENT && element->parent && element->parent->subject) { raptor_rdfxml_generate_statement(rdf_parser, element->parent->subject, element_name_uri, element->subject, NULL, element); } else if(state == RAPTOR_STATE_PARSETYPE_RESOURCE && element->parent && element->parent->subject) { /* Handle rdf:li as the rdf:parseType="resource" property */ if(element_in_rdf_ns && raptor_uri_equals(element_name_uri, RAPTOR_RDF_li_URI(rdf_parser->world))) { raptor_uri* ordinal_predicate_uri; element->parent->last_ordinal++; ordinal_predicate_uri = raptor_new_uri_from_rdf_ordinal(rdf_parser->world, element->parent->last_ordinal); raptor_rdfxml_generate_statement(rdf_parser, element->parent->subject, ordinal_predicate_uri, element->subject, element->reified, element->parent); raptor_free_uri(ordinal_predicate_uri); } else { raptor_rdfxml_generate_statement(rdf_parser, element->parent->subject, element_name_uri, element->subject, element->reified, element->parent); } } finished = 1; break; case RAPTOR_STATE_PARSETYPE_COLLECTION: finished = 1; break; case RAPTOR_STATE_PARSETYPE_OTHER: /* FALLTHROUGH */ case RAPTOR_STATE_PARSETYPE_LITERAL: element->parent->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL; raptor_xml_writer_end_element(rdf_xml_parser->xml_writer, xml_element); finished = 1; break; case RAPTOR_STATE_PROPERTYELT: case RAPTOR_STATE_MEMBER_PROPERTYELT: /* A property element * http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt * * Literal content part is handled here. * The element content is handled in the internal states * Empty content is checked here. */ if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT) { if(xml_element->content_cdata_seen) element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL; else if(xml_element->content_element_seen) element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES; else { /* Empty Literal */ element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL; } } /* Handle terminating a rdf:parseType="Collection" list */ if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION || element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) { raptor_term* nil_term; if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) { raptor_uri* nil_uri = RAPTOR_DAML_nil_URI(rdf_xml_parser); nil_term = raptor_new_term_from_uri(rdf_parser->world, nil_uri); } else { nil_term = raptor_term_copy(RAPTOR_RDF_nil_term(rdf_parser->world)); } if(!element->tail_id) { /* If No List: set object of statement to rdf:nil */ element->object = raptor_term_copy(nil_term); } else { raptor_uri* rest_uri = NULL; raptor_term* tail_id_term; if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) rest_uri = RAPTOR_DAML_rest_URI(rdf_xml_parser); else rest_uri = RAPTOR_RDF_rest_URI(rdf_parser->world); tail_id_term = raptor_new_term_from_blank(rdf_parser->world, element->tail_id); /* terminate the list */ raptor_rdfxml_generate_statement(rdf_parser, tail_id_term, rest_uri, nil_term, NULL, NULL); raptor_free_term(tail_id_term); } raptor_free_term(nil_term); } /* end rdf:parseType="Collection" termination */ #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG3("Content type %s (%d)\n", raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type); #endif switch(element->content_type) { case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE: if(raptor_rdfxml_element_has_property_attributes(element) && element->child_state == RAPTOR_STATE_DESCRIPTION) { raptor_parser_error(rdf_parser, "Property element '%s' has both property attributes and a node element content", el_name); state = RAPTOR_STATE_SKIPPING; element->child_state = RAPTOR_STATE_SKIPPING; break; } if(!element->object) { if(element->rdf_attr[RDF_NS_resource]) { raptor_uri* resource_uri; resource_uri = raptor_new_uri_relative_to_base(rdf_parser->world, raptor_rdfxml_inscope_base_uri(rdf_parser), (const unsigned char*)element->rdf_attr[RDF_NS_resource]); if(!resource_uri) goto oom; element->object = raptor_new_term_from_uri(rdf_parser->world, resource_uri); raptor_free_uri(resource_uri); RAPTOR_FREE(char*, element->rdf_attr[RDF_NS_resource]); element->rdf_attr[RDF_NS_resource] = NULL; if(!element->object) goto oom; element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; } else if(element->rdf_attr[RDF_NS_nodeID]) { unsigned char* resource_id; resource_id = raptor_world_internal_generate_id(rdf_parser->world, (unsigned char*)element->rdf_attr[RDF_NS_nodeID]); if(!resource_id) goto oom; element->object = raptor_new_term_from_blank(rdf_parser->world, resource_id); RAPTOR_FREE(char*, resource_id); element->rdf_attr[RDF_NS_nodeID] = NULL; if(!element->object) goto oom; element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; if(!raptor_valid_xml_ID(rdf_parser, element->object->value.blank.string)) { raptor_parser_error(rdf_parser, "Illegal rdf:nodeID value '%s'", (const char*)element->object->value.blank.string); state = RAPTOR_STATE_SKIPPING; element->child_state = RAPTOR_STATE_SKIPPING; break; } } else { unsigned char* resource_id; resource_id = raptor_world_generate_bnodeid(rdf_parser->world); if(!resource_id) goto oom; element->object = raptor_new_term_from_blank(rdf_parser->world, resource_id); RAPTOR_FREE(char*, resource_id); if(!element->object) goto oom; element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; } if(raptor_rdfxml_process_property_attributes(rdf_parser, element, element->parent, element->object)) goto oom; } /* We know object is a resource, so delete any unsignficant * whitespace so that FALLTHROUGH code below finds the object. */ if(xml_element->content_cdata_length) { raptor_free_stringbuffer(xml_element->content_cdata_sb); xml_element->content_cdata_sb = NULL; xml_element->content_cdata_length = 0; } /* FALLTHROUGH */ case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL: if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL) { if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_BAGID)) { /* Only an empty literal can have a rdf:bagID */ if(element->bag) { if(xml_element->content_cdata_length > 0) { raptor_parser_error(rdf_parser, "rdf:bagID is forbidden on a literal property element '%s'.", el_name); /* prevent this being used later either */ element->rdf_attr[RDF_NS_bagID] = NULL; } else { raptor_rdfxml_generate_statement(rdf_parser, element->bag, RAPTOR_RDF_type_URI(rdf_parser->world), RAPTOR_RDF_Bag_term(rdf_parser->world), NULL, NULL); } } } /* if rdf:bagID */ /* If there is empty literal content with properties * generate a node to hang properties off */ if(raptor_rdfxml_element_has_property_attributes(element) && xml_element->content_cdata_length > 0) { raptor_parser_error(rdf_parser, "Literal property element '%s' has property attributes", el_name); state = RAPTOR_STATE_SKIPPING; element->child_state = RAPTOR_STATE_SKIPPING; break; } if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL && raptor_rdfxml_element_has_property_attributes(element) && !element->object) { unsigned char* object_id; object_id = raptor_world_generate_bnodeid(rdf_parser->world); if(!object_id) goto oom; element->object = raptor_new_term_from_blank(rdf_parser->world, object_id); RAPTOR_FREE(char*, object_id); if(!element->object) goto oom; element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; } if(raptor_rdfxml_process_property_attributes(rdf_parser, element, element, element->object)) goto oom; } /* just be friendly to older compilers and don't declare * variables in the middle of a block */ if(1) { raptor_uri *predicate_uri = NULL; int predicate_ordinal = -1; raptor_term* object_term = NULL; if(state == RAPTOR_STATE_MEMBER_PROPERTYELT) { predicate_ordinal = ++element->parent->last_ordinal; predicate_uri = raptor_new_uri_from_rdf_ordinal(rdf_parser->world, predicate_ordinal); } else { predicate_uri = element_name_uri; } if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL) { unsigned char* literal = NULL; raptor_uri* literal_datatype; unsigned char* literal_language = NULL; /* an empty stringbuffer - empty CDATA - is OK */ if(raptor_stringbuffer_length(xml_element->content_cdata_sb)) { literal = raptor_stringbuffer_as_string(xml_element->content_cdata_sb); if(!literal) goto oom; } literal_datatype = element->object_literal_datatype; if(!literal_datatype) literal_language = (unsigned char*)raptor_sax2_inscope_xml_language(rdf_xml_parser->sax2); if(!literal_datatype && literal && !raptor_unicode_check_utf8_nfc_string(literal, xml_element->content_cdata_length, NULL)) { raptor_log_level l; raptor_rdfxml_update_document_locator(rdf_parser); l = (RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NON_NFC_FATAL)) ? RAPTOR_LOG_LEVEL_ERROR : RAPTOR_LOG_LEVEL_WARN; raptor_parser_log_error(rdf_parser, l, "Property element '%s' has a string not in Unicode Normal Form C: %s", el_name, literal); } object_term = raptor_new_term_from_literal(rdf_parser->world, literal, literal_datatype, literal_language); } else { object_term = raptor_term_copy(element->object); } raptor_rdfxml_generate_statement(rdf_parser, element->parent->subject, predicate_uri, object_term, element->reified, element->parent); if(predicate_ordinal >= 0) raptor_free_uri(predicate_uri); raptor_free_term(object_term); } break; case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED: case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL: { unsigned char *buffer; size_t length; raptor_term* xmlliteral_term = NULL; if(rdf_xml_parser->xml_writer) { raptor_xml_writer_flush(rdf_xml_parser->xml_writer); raptor_free_iostream(rdf_xml_parser->iostream); rdf_xml_parser->iostream = NULL; buffer = (unsigned char*)rdf_xml_parser->xml_content; length = rdf_xml_parser->xml_content_length; } else { buffer = raptor_stringbuffer_as_string(xml_element->content_cdata_sb); length = xml_element->content_cdata_length; } if(!raptor_unicode_check_utf8_nfc_string(buffer, length, NULL)) { raptor_log_level l; raptor_rdfxml_update_document_locator(rdf_parser); l = (RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NON_NFC_FATAL)) ? RAPTOR_LOG_LEVEL_ERROR : RAPTOR_LOG_LEVEL_WARN; raptor_parser_log_error(rdf_parser, l, "Property element '%s' has XML literal content not in Unicode Normal Form C: %s", el_name, buffer); } xmlliteral_term = raptor_new_term_from_literal(rdf_parser->world, buffer, RAPTOR_RDF_XMLLiteral_URI(rdf_parser->world), NULL); if(state == RAPTOR_STATE_MEMBER_PROPERTYELT) { raptor_uri* predicate_uri; element->parent->last_ordinal++; predicate_uri = raptor_new_uri_from_rdf_ordinal(rdf_parser->world, element->parent->last_ordinal); raptor_rdfxml_generate_statement(rdf_parser, element->parent->subject, predicate_uri, xmlliteral_term, element->reified, element->parent); raptor_free_uri(predicate_uri); } else { raptor_rdfxml_generate_statement(rdf_parser, element->parent->subject, element_name_uri, xmlliteral_term, element->reified, element->parent); } raptor_free_term(xmlliteral_term); /* Finish the xml writer iostream for parseType="Literal" */ if(rdf_xml_parser->xml_writer) { raptor_free_xml_writer(rdf_xml_parser->xml_writer); rdf_xml_parser->xml_writer = NULL; RAPTOR_FREE(char*, rdf_xml_parser->xml_content); rdf_xml_parser->xml_content = NULL; rdf_xml_parser->xml_content_length = 0; } } break; case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION: case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION: case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES: case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES: case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT: case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN: case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST: default: raptor_parser_fatal_error(rdf_parser, "%s: Internal error in state RAPTOR_STATE_PROPERTYELT - got unexpected content type %s (%u)", __FUNCTION__, raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type); } /* end switch */ finished = 1; break; case RAPTOR_STATE_INVALID: default: raptor_parser_fatal_error(rdf_parser, "%s: Internal error - unexpected parser state %u - %s", __FUNCTION__, state, raptor_rdfxml_state_as_string(state)); finished = 1; } /* end switch */ if(state != element->state) { element->state = state; #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG3("Moved to state %d - %s\n", state, raptor_rdfxml_state_as_string(state)); #endif } } /* end while */ #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state)); #endif return; oom: raptor_parser_fatal_error(rdf_parser, "Out of memory, skipping"); element->state = RAPTOR_STATE_SKIPPING; } static void raptor_rdfxml_cdata_grammar(raptor_parser *rdf_parser, const unsigned char *s, int len, int is_cdata) { raptor_rdfxml_parser* rdf_xml_parser; raptor_rdfxml_element* element; raptor_xml_element* xml_element; raptor_state state; int all_whitespace = 1; int i; rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; if(rdf_parser->failed) return; #ifdef RAPTOR_DEBUG_CDATA RAPTOR_DEBUG2("Adding characters (is_cdata=%d): '", is_cdata); (void)fwrite(s, 1, len, stderr); fprintf(stderr, "' (%d bytes)\n", len); #endif for(i = 0; i < len; i++) if(!isspace(s[i])) { all_whitespace = 0; break; } element = rdf_xml_parser->current_element; /* this file is very broke - probably not XML, whatever */ if(!element) return; xml_element = element->xml_element; raptor_rdfxml_update_document_locator(rdf_parser); /* cdata never changes the parser state * and the containing element state always determines what to do. * Use the child_state first if there is one, since that applies */ state = element->child_state; #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG2("Working in state %s\n", raptor_rdfxml_state_as_string(state)); #endif #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG3("Content type %s (%d)\n", raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type); #endif if(state == RAPTOR_STATE_SKIPPING) return; if(state == RAPTOR_STATE_UNKNOWN) { /* Ignore all cdata if still looking for RDF */ if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_SCANNING)) return; /* Ignore all whitespace cdata before first element */ if(all_whitespace) return; /* This probably will never happen since that would make the * XML not be well-formed */ raptor_parser_warning(rdf_parser, "Character data before RDF element."); } if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES) { /* If found non-whitespace content, move to literal content */ if(!all_whitespace) element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL; } if(!rdf_content_type_info[element->child_content_type].whitespace_significant) { /* Whitespace is ignored except for literal or preserved content types */ if(all_whitespace) { #ifdef RAPTOR_DEBUG_CDATA RAPTOR_DEBUG2("Ignoring whitespace cdata inside element '%s'\n", raptor_xml_element_get_name(element->parent->xml_element)->local_name); #endif return; } if(xml_element->content_cdata_seen && xml_element->content_element_seen) { raptor_qname* parent_el_name; parent_el_name = raptor_xml_element_get_name(element->parent->xml_element); /* Uh oh - mixed content, this element has elements too */ raptor_parser_warning(rdf_parser, "element '%s' has mixed content.", parent_el_name->local_name); } } if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT) { element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL; #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG3("Content type changed to %s (%d)\n", raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type); #endif } if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL) raptor_xml_writer_cdata_counted(rdf_xml_parser->xml_writer, s, len); else { raptor_stringbuffer_append_counted_string(xml_element->content_cdata_sb, s, len, 1); element->content_cdata_all_whitespace &= all_whitespace; /* adjust stored length */ xml_element->content_cdata_length += len; } #ifdef RAPTOR_DEBUG_CDATA RAPTOR_DEBUG3("Content cdata now: %d bytes\n", xml_element->content_cdata_length); #endif #ifdef RAPTOR_DEBUG_VERBOSE RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state)); #endif } /** * raptor_rdfxml_inscope_base_uri: * @rdf_parser: Raptor parser object * * Return the in-scope base URI. * * Looks for the innermost xml:base on an element or document URI * * Return value: The URI string value or NULL on failure. **/ static raptor_uri* raptor_rdfxml_inscope_base_uri(raptor_parser *rdf_parser) { raptor_rdfxml_parser* rdf_xml_parser; raptor_uri* base_uri; rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; base_uri = raptor_sax2_inscope_base_uri(rdf_xml_parser->sax2); if(!base_uri) base_uri = rdf_parser->base_uri; return base_uri; } /** * raptor_rdfxml_record_ID: * @rdf_parser: Raptor parser object * @element: Current element * @id: ID string * * Record an rdf:ID / rdf:bagID value (with xml base) and check it hasn't been seen already. * * Record and check the ID values, if they have been seen already. * per in-scope-base URI. * * Return value: non-zero if already seen, or failure **/ static int raptor_rdfxml_record_ID(raptor_parser *rdf_parser, raptor_rdfxml_element *element, const unsigned char *id) { raptor_rdfxml_parser *rdf_xml_parser; raptor_uri* base_uri; size_t id_len; int rc; rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; if(!RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_CHECK_RDF_ID)) return 0; base_uri = raptor_rdfxml_inscope_base_uri(rdf_parser); id_len = strlen((const char*)id); rc = raptor_id_set_add(rdf_xml_parser->id_set, base_uri, id, id_len); return (rc != 0); } static void raptor_rdfxml_update_document_locator(raptor_parser *rdf_parser) { raptor_rdfxml_parser *rdf_xml_parser; rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; raptor_sax2_update_document_locator(rdf_xml_parser->sax2, &rdf_parser->locator); } static void raptor_rdfxml_parse_finish_factory(raptor_parser_factory* factory) { } static const char* const rdfxml_names[3] = { "rdfxml", "raptor", NULL}; static const char* const rdfxml_uri_strings[3] = { "http://www.w3.org/ns/formats/RDF_XML", "http://www.w3.org/TR/rdf-syntax-grammar", NULL }; #define RDFXML_TYPES_COUNT 2 static const raptor_type_q rdfxml_types[RDFXML_TYPES_COUNT + 1] = { { "application/rdf+xml", 19, 10}, { "text/rdf", 8, 6}, { NULL, 0, 0} }; static int raptor_rdfxml_parser_register_factory(raptor_parser_factory *factory) { int rc = 0; factory->desc.names = rdfxml_names; factory->desc.mime_types = rdfxml_types; factory->desc.label = "RDF/XML"; factory->desc.uri_strings = rdfxml_uri_strings; factory->desc.flags = RAPTOR_SYNTAX_NEED_BASE_URI; factory->context_length = sizeof(raptor_rdfxml_parser); factory->init = raptor_rdfxml_parse_init; factory->terminate = raptor_rdfxml_parse_terminate; factory->start = raptor_rdfxml_parse_start; factory->chunk = raptor_rdfxml_parse_chunk; factory->finish_factory = raptor_rdfxml_parse_finish_factory; factory->recognise_syntax = raptor_rdfxml_parse_recognise_syntax; return rc; } int raptor_init_parser_rdfxml(raptor_world* world) { return !raptor_world_register_parser_factory(world, &raptor_rdfxml_parser_register_factory); } #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 void raptor_rdfxml_parser_stats_print(raptor_rdfxml_parser* rdf_xml_parser, FILE *stream) { fputs("rdf:ID set ", stream); raptor_id_set_stats_print(rdf_xml_parser->id_set, stream); } #endif