diff options
-rw-r--r-- | librdfa/curie.c | 39 | ||||
-rw-r--r-- | librdfa/iri.c | 4 | ||||
-rw-r--r-- | librdfa/rdfa.c | 121 | ||||
-rw-r--r-- | librdfa/rdfa.h | 34 | ||||
-rw-r--r-- | librdfa/triple.c | 4 |
5 files changed, 114 insertions, 88 deletions
diff --git a/librdfa/curie.c b/librdfa/curie.c index 145b49b2..0288a001 100644 --- a/librdfa/curie.c +++ b/librdfa/curie.c @@ -30,13 +30,13 @@ // These are all of the @rel/@rev reserved words in XHTML 1.1 that // should generate triples. -#define XHTML_RELREV_RESERVED_WORDS_SIZE 23 +#define XHTML_RELREV_RESERVED_WORDS_SIZE 24 static const char* const g_relrev_reserved_words[XHTML_RELREV_RESERVED_WORDS_SIZE] = { "alternate", "appendix", "bookmark", "chapter", "cite", "contents", - "copyright", "glossary", "help", "icon", "index", "meta", "next", "p3pv1", - "prev", "role", "section", "stylesheet", "subsection", "start", - "license", "up", "last" + "copyright", "first", "glossary", "help", "icon", "index", + "meta", "next", "p3pv1", "prev", "role", "section", "stylesheet", + "subsection", "start", "license", "up", "last" }; // The base XHTML vocab URL is used to resolve URIs that are reserved @@ -131,25 +131,31 @@ char* rdfa_resolve_uri(rdfacontext* context, const char* uri) // end of the host part. if(end_index != NULL) { + char* rval_copy; + *end_index = '\0'; // if the '/' character after the host part was found, copy the host // part and append the given URI to the URI. - rval = rdfa_replace_string(rval, tmp); - rval = rdfa_join_string(rval, uri); + rval_copy = rdfa_replace_string(rval, tmp); + rval = rdfa_join_string(rval_copy, uri); + free(rval_copy); } else { // append the host part and the URI part as-is, ensuring that a // '/' exists at the end of the host part. unsigned int tlen = strlen(tmp) - 1; - rval = rdfa_replace_string(rval, tmp); + char* rval_copy; + + rval_copy = rdfa_replace_string(rval, tmp); if(rval[tlen] == '/') { rval[tlen] = '\0'; } - rval = rdfa_join_string(rval, uri); + rval = rdfa_join_string(rval_copy, uri); + free(rval_copy); } free(tmp); @@ -165,7 +171,7 @@ char* rdfa_resolve_uri(rdfacontext* context, const char* uri) { // if we have a relative URI, chop off the name of the file // and replace it with the relative pathname - char* end_index = rindex(context->base, '/'); + char* end_index = strrchr(context->base, '/'); if(end_index != NULL) { @@ -173,7 +179,7 @@ char* rdfa_resolve_uri(rdfacontext* context, const char* uri) char* end_index2; tmpstr = rdfa_replace_string(tmpstr, context->base); - end_index2= rindex(tmpstr, '/'); + end_index2= strrchr(tmpstr, '/'); end_index2++; *end_index2 = '\0'; @@ -366,15 +372,15 @@ char* rdfa_resolve_relrev_curie(rdfacontext* context, const char* uri) resource++; } - // search all of the XHTML @rel/@rev reserved words for a match - // against the given URI + // search all of the XHTML @rel/@rev reserved words for a + // case-insensitive match against the given URI for(i = 0; i < XHTML_RELREV_RESERVED_WORDS_SIZE; i++) { - if(strcmp(g_relrev_reserved_words[i], resource) == 0) + if(strcasecmp(g_relrev_reserved_words[i], resource) == 0) { // since the URI is a reserved word for @rel/@rev, generate // the full IRI and stop the loop. - rval = rdfa_join_string(XHTML_VOCAB_URI, resource); + rval = rdfa_join_string(XHTML_VOCAB_URI, g_relrev_reserved_words[i]); i = XHTML_RELREV_RESERVED_WORDS_SIZE; } } @@ -399,7 +405,8 @@ rdfalist* rdfa_resolve_curie_list( working_uris = rdfa_replace_string(working_uris, uris); // go through each item in the list of CURIEs and resolve each - ctoken = strtok_r(working_uris, " ", &uptr); + ctoken = strtok_r(working_uris, RDFA_WHITESPACE, &uptr); + while(ctoken != NULL) { char* resolved_curie = NULL; @@ -424,7 +431,7 @@ rdfalist* rdfa_resolve_curie_list( free(resolved_curie); } - ctoken = strtok_r(NULL, " ", &uptr); + ctoken = strtok_r(NULL, RDFA_WHITESPACE, &uptr); } free(working_uris); diff --git a/librdfa/iri.c b/librdfa/iri.c index 50c53032..2bb67bb6 100644 --- a/librdfa/iri.c +++ b/librdfa/iri.c @@ -39,12 +39,12 @@ char* rdfa_iri_get_base(const char* iri) char* eindex = 0; // search to see if there is iquery separator - eindex = index(iri, '?'); + eindex = strchr(iri, '?'); if(eindex == NULL) { // if there is no iquery separator, check to see if there is an // ifragment separator - eindex = index(iri, '#'); + eindex = strchr(iri, '#'); } // check to see if the output string needs to be different from the diff --git a/librdfa/rdfa.c b/librdfa/rdfa.c index 11dd4174..777d824a 100644 --- a/librdfa/rdfa.c +++ b/librdfa/rdfa.c @@ -101,7 +101,8 @@ void rdfa_init_context(rdfacontext* context) // and valgrind happy - they are not a part of the RDFa spec. context->bnode_count = 0; context->underscore_colon_bnode_name = NULL; - context->xml_literal_namespaces_inserted = 0; + context->xml_literal_namespaces_defined = 0; + context->xml_literal_xml_lang_defined = 0; context->content = NULL; context->datatype = NULL; context->property = NULL; @@ -129,20 +130,28 @@ static size_t rdfa_init_base( rdfacontext* context, char** working_buffer, size_t* working_buffer_size, char* temp_buffer, size_t bytes_read) { - size_t temp_buffer_size = sizeof(char) * READ_BUFFER_SIZE; char* head_end = NULL; size_t offset = context->wb_offset; + int needed_size = (offset + bytes_read) - *working_buffer_size; // search for the end of <head>, stop if <head> was found + // extend the working buffer size - if((offset + bytes_read) > *working_buffer_size) + if(needed_size > 0) { + size_t temp_buffer_size = sizeof(char) * READ_BUFFER_SIZE; + if((size_t)needed_size > temp_buffer_size) + temp_buffer_size += needed_size; + *working_buffer_size += temp_buffer_size; - *working_buffer = (char*)realloc(working_buffer, *working_buffer_size); + // +1 for NUL at end, to allow strstr() etc. to work + *working_buffer = (char*)realloc(*working_buffer, *working_buffer_size + 1); } // append to the working buffer memmove(*working_buffer + offset, temp_buffer, bytes_read); + // ensure the buffer is a NUL-terminated string + *(*working_buffer + offset + bytes_read) = '\0'; // search for the end of </head> in head_end = strstr(*working_buffer, "</head>"); @@ -165,7 +174,7 @@ static size_t rdfa_init_base( { char* href_start = strstr(base_start, "href="); char* uri_start = href_start + 6; - char* uri_end = index(uri_start, '"'); + char* uri_end = strchr(uri_start, '"'); if((uri_start != NULL) && (uri_end != NULL)) { @@ -248,8 +257,10 @@ static rdfacontext* rdfa_create_new_element_context(rdfalist* context_stack) rval->recurse = parent_context->recurse; rval->skip_element = 0; rval->callback_data = parent_context->callback_data; - rval->xml_literal_namespaces_inserted = - parent_context->xml_literal_namespaces_inserted; + rval->xml_literal_namespaces_defined = + parent_context->xml_literal_namespaces_defined; + rval->xml_literal_xml_lang_defined = + parent_context->xml_literal_xml_lang_defined; // inherit the parent context's new_subject // TODO: This is not anywhere in the syntax processing document @@ -390,7 +401,6 @@ static void XMLCALL const char* content = NULL; const char* datatype_curie = NULL; char* datatype = NULL; - unsigned char insert_xml_lang_in_xml_literal = 0; rdfa_push_item(context_stack, context, RDFALIST_FLAG_CONTEXT); @@ -413,8 +423,8 @@ static void XMLCALL context->xml_literal = rdfa_n_append_string( context->xml_literal, &context->xml_literal_size, name, strlen(name)); - - if(!context->xml_literal_namespaces_inserted) + + if(!context->xml_literal_namespaces_defined) { // append namespaces to XML Literal #ifdef LIBRDFA_IN_RAPTOR @@ -428,8 +438,9 @@ static void XMLCALL char* umap_key = NULL; char* umap_value = NULL; - insert_xml_lang_in_xml_literal = 1; - + // if the namespaces are not defined, then neither is the xml:lang + context->xml_literal_xml_lang_defined = 0; + #ifdef LIBRDFA_IN_RAPTOR ns_size = 0; ns_list = raptor_namespace_stack_to_array(nstack, &ns_size); @@ -441,9 +452,9 @@ static void XMLCALL while(*umap != NULL) #endif { - unsigned char namespace_already_defined = 0; - const char* predefined_namespace = NULL; - const char* predefined_namespace_value = NULL; + unsigned char insert_xmlns_definition = 1; + const char* attr = NULL; + const char* value = NULL; // get the next mapping to process #ifdef LIBRDFA_IN_RAPTOR @@ -463,22 +474,24 @@ static void XMLCALL if(attributes != NULL) { const char** attrs = attributes; - while((*attrs != NULL) && !namespace_already_defined) + while((*attrs != NULL) && insert_xmlns_definition) { - predefined_namespace = *attrs++; - predefined_namespace_value = *attrs++; - - if((strcmp(predefined_namespace, umap_key) == 0) || + attr = *attrs++; + value = *attrs++; + + // if the attribute is a umap_key, skip the definition + // of the attribute. + if((strcmp(attr, umap_key) == 0) || (strcmp(umap_key, XMLNS_DEFAULT_MAPPING) == 0)) { - namespace_already_defined = 1; + insert_xmlns_definition = 0; } } } // if the namespace isn't already defined on the element, // copy it to the XML Literal string. - if(!namespace_already_defined) + if(insert_xmlns_definition) { // append the namespace attribute to the XML Literal context->xml_literal = rdfa_n_append_string( @@ -505,25 +518,10 @@ static void XMLCALL context->xml_literal = rdfa_n_append_string( context->xml_literal, &context->xml_literal_size, "\"", 1); } - else - { - // append the namespace value - context->xml_literal = rdfa_n_append_string( - context->xml_literal, &context->xml_literal_size, " ", 1); - context->xml_literal = rdfa_n_append_string( - context->xml_literal, &context->xml_literal_size, - predefined_namespace, strlen(predefined_namespace)); - context->xml_literal = rdfa_n_append_string( - context->xml_literal, &context->xml_literal_size, "=\"", 2); - context->xml_literal = rdfa_n_append_string( - context->xml_literal, &context->xml_literal_size, - predefined_namespace_value, strlen(predefined_namespace_value)); - context->xml_literal = rdfa_n_append_string( - context->xml_literal, &context->xml_literal_size, "\"", 1); - } - namespace_already_defined = 0; + + insert_xmlns_definition = 1; } /* end while umap not NULL */ - context->xml_literal_namespaces_inserted = 1; + context->xml_literal_namespaces_defined = 1; #ifdef LIBRDFA_IN_RAPTOR if(ns_list) @@ -548,14 +546,18 @@ static void XMLCALL // append the attribute-value pair to the XML literal literal_text = (char*)malloc(strlen(attr) + strlen(value) + 5); sprintf(literal_text, " %s=\"%s\"", attr, value); - if(strstr("xmlns", attr) == NULL) - { - context->xml_literal = rdfa_n_append_string( - context->xml_literal, &context->xml_literal_size, - literal_text, strlen(literal_text)); - } + context->xml_literal = rdfa_n_append_string( + context->xml_literal, &context->xml_literal_size, + literal_text, strlen(literal_text)); free(literal_text); + // if xml:lang is defined, ensure that it is not overwritten + if(strcmp(attr, "xml:lang") == 0) + { + context->xml_literal_xml_lang_defined = 1; + } + + // process all of the RDFa attributes if(strcmp(attr, "about") == 0) { about_curie = value; @@ -649,9 +651,10 @@ static void XMLCALL } #endif // check to see if we should append an xml:lang to the XML Literal - // if one is defined in the context and does not exist on the element. + // if one is defined in the context and does not exist on the + // element. if((xml_lang == NULL) && (context->language != NULL) && - insert_xml_lang_in_xml_literal) + !context->xml_literal_xml_lang_defined) { context->xml_literal = rdfa_n_append_string( context->xml_literal, &context->xml_literal_size, @@ -661,6 +664,9 @@ static void XMLCALL context->language, strlen(context->language)); context->xml_literal = rdfa_n_append_string( context->xml_literal, &context->xml_literal_size, "\"", 1); + + // ensure that the lang isn't set in a subtree (unless it's overwritten) + context->xml_literal_xml_lang_defined = 1; } // close the XML Literal value @@ -790,7 +796,7 @@ static void XMLCALL // point on... if(property != NULL) { - context->xml_literal_namespaces_inserted = 0; + context->xml_literal_namespaces_defined = 0; } // save these for processing steps #9 and #10 @@ -901,8 +907,8 @@ static void XMLCALL if(context->xml_literal != NULL) { // get the data between the first tag and the last tag - content_start = index(context->xml_literal, '>'); - content_end = rindex(context->xml_literal, '<'); + content_start = strchr(context->xml_literal, '>'); + content_end = strrchr(context->xml_literal, '<'); if((content_start != NULL) && (content_end != NULL)) { @@ -1219,7 +1225,10 @@ int rdfa_parse_start(rdfacontext* context) int rval = RDFA_PARSE_SUCCESS; context->wb_allocated = sizeof(char) * READ_BUFFER_SIZE; - context->working_buffer = (char*)calloc(context->wb_allocated, sizeof(char)); + // +1 for NUL at end, to allow strstr() etc. to work + // malloc - only the first char needs to be NUL + context->working_buffer = (char*)malloc(context->wb_allocated + 1); + *context->working_buffer = '\0'; #ifndef LIBRDFA_IN_RAPTOR context->parser = XML_ParserCreate(NULL); @@ -1300,8 +1309,8 @@ int rdfa_parse_chunk(rdfacontext* context, char* data, size_t wblen, int done) #endif "%s at line %d, column %d\n", XML_ErrorString(XML_GetErrorCode(context->parser)), - XML_GetCurrentLineNumber(context->parser), - XML_GetCurrentColumnNumber(context->parser)); + (int)XML_GetCurrentLineNumber(context->parser), + (int)XML_GetCurrentColumnNumber(context->parser)); return RDFA_PARSE_FAILED; } #endif @@ -1327,8 +1336,8 @@ int rdfa_parse_chunk(rdfacontext* context, char* data, size_t wblen, int done) #endif "%s at line %d, column %d.\n", XML_ErrorString(XML_GetErrorCode(context->parser)), - XML_GetCurrentLineNumber(context->parser), - XML_GetCurrentColumnNumber(context->parser)); + (int)XML_GetCurrentLineNumber(context->parser), + (int)XML_GetCurrentColumnNumber(context->parser)); return RDFA_PARSE_FAILED; } #endif diff --git a/librdfa/rdfa.h b/librdfa/rdfa.h index 10e4da07..45bca40c 100644 --- a/librdfa/rdfa.h +++ b/librdfa/rdfa.h @@ -36,6 +36,13 @@ #define _LIBRDFA_RDFA_H_ #include <stdlib.h> +// Activate the stupid Windows DLL exporting mechanism if we're building for Windows +#ifdef WIN32 +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT +#endif + #ifdef LIBRDFA_IN_RAPTOR #ifdef HAVE_CONFIG_H #include <raptor_config.h> @@ -51,7 +58,7 @@ #endif #ifdef __cplusplus -extern "C" +extern "C" { #endif @@ -67,6 +74,8 @@ extern "C" #define XMLNS_DEFAULT_MAPPING "XMLNS_DEFAULT" +#define RDFA_WHITESPACE " \t\n\v\f\r" + /** * An RDF resource type is used to denote the content of a triple's * object value. @@ -170,7 +179,8 @@ typedef struct rdfacontext /* parse state */ size_t bnode_count; char* underscore_colon_bnode_name; - unsigned char xml_literal_namespaces_inserted; + unsigned char xml_literal_namespaces_defined; + unsigned char xml_literal_xml_lang_defined; size_t wb_allocated; char* working_buffer; size_t wb_offset; @@ -200,7 +210,7 @@ typedef struct rdfacontext * @return a pointer to the base RDFa context, or NULL if memory * allocation failed. */ -rdfacontext* rdfa_create_context(const char* base); +DLLEXPORT rdfacontext* rdfa_create_context(const char* base); /** * Sets the triple handler for the application. @@ -208,7 +218,7 @@ rdfacontext* rdfa_create_context(const char* base); * @param context the base rdfa context for the application. * @param th the triple handler function. */ -void rdfa_set_triple_handler(rdfacontext* context, triple_handler_fp th); +DLLEXPORT void rdfa_set_triple_handler(rdfacontext* context, triple_handler_fp th); /** * Sets the buffer filler for the application. @@ -216,7 +226,7 @@ void rdfa_set_triple_handler(rdfacontext* context, triple_handler_fp th); * @param context the base rdfa context for the application. * @param bf the buffer filler function. */ -void rdfa_set_buffer_filler(rdfacontext* context, buffer_filler_fp bf); +DLLEXPORT void rdfa_set_buffer_filler(rdfacontext* context, buffer_filler_fp bf); /** * Starts processing given the base rdfa context. @@ -227,17 +237,17 @@ void rdfa_set_buffer_filler(rdfacontext* context, buffer_filler_fp bf); * if there was a fatal error and RDFA_PARSE_WARNING if there * was a non-fatal error. */ -int rdfa_parse(rdfacontext* context); +DLLEXPORT int rdfa_parse(rdfacontext* context); -int rdfa_parse_start(rdfacontext* context); +DLLEXPORT int rdfa_parse_start(rdfacontext* context); -int rdfa_parse_chunk(rdfacontext* context, char* data, size_t wblen, int done); +DLLEXPORT int rdfa_parse_chunk(rdfacontext* context, char* data, size_t wblen, int done); -void rdfa_parse_end(rdfacontext* context); +DLLEXPORT void rdfa_parse_end(rdfacontext* context); -void rdfa_init_context(rdfacontext* context); +DLLEXPORT void rdfa_init_context(rdfacontext* context); -char* rdfa_iri_get_base(const char* iri); +DLLEXPORT char* rdfa_iri_get_base(const char* iri); /** * Destroys the given rdfa context by freeing all memory associated @@ -245,7 +255,7 @@ char* rdfa_iri_get_base(const char* iri); * * @param context the rdfa context. */ -void rdfa_free_context(rdfacontext* context); +DLLEXPORT void rdfa_free_context(rdfacontext* context); #ifdef __cplusplus } diff --git a/librdfa/triple.c b/librdfa/triple.c index 3196005a..37440e5c 100644 --- a/librdfa/triple.c +++ b/librdfa/triple.c @@ -437,7 +437,7 @@ void rdfa_complete_object_literal_triples(rdfacontext* context) current_object_literal = context->content; type = RDF_TYPE_PLAIN_LITERAL; } - else if(index(context->xml_literal, '<') == NULL) + else if(strchr(context->xml_literal, '<') == NULL) { current_object_literal = context->plain_literal; type = RDF_TYPE_PLAIN_LITERAL; @@ -467,7 +467,7 @@ void rdfa_complete_object_literal_triples(rdfacontext* context) // [current element], i.e., not including the element itself, and // giving it a datatype of rdf:XMLLiteral. if((current_object_literal == NULL) && - (index(context->xml_literal, '<') != NULL) && + (strchr(context->xml_literal, '<') != NULL) && ((context->datatype == NULL) || (strcmp(context->datatype, "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral") == 0))) |