/* -*- Mode: c; c-basic-offset: 2 -*- * * raptor_xml.c - Raptor XML routines * * Copyright (C) 2003-2008, David Beckett http://www.dajobe.org/ * Copyright (C) 2003-2004, University of Bristol, UK http://www.bristol.ac.uk/ * * This package is Free Software and part of Redland http://librdf.org/ * * It is licensed under the following three licenses as alternatives: * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version * 2. GNU General Public License (GPL) V2 or any newer version * 3. Apache License, V2.0 or any newer version * * You may not use this file except in compliance with at least one of * the above three licenses. * * See LICENSE.html or LICENSE.txt at the top of this package for the * complete terms and further detail along with the license texts for * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. * * */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include #ifdef HAVE_ERRNO_H #include #endif #ifdef HAVE_STDLIB_H #include #endif /* Raptor includes */ #include "raptor2.h" #include "raptor_internal.h" #ifndef STANDALONE /** * raptor_new_xml_element: * @name: The XML element name * @xml_language: the in-scope XML language (or NULL) * @xml_base: the in-scope XML base URI (or NULL) * * Constructor - create a new XML element from a QName * * The @xml_language and @xml_base become owned by the new object. * * Return value: a new #raptor_xml_element or NULL on failure **/ raptor_xml_element* raptor_new_xml_element(raptor_qname *name, const unsigned char *xml_language, raptor_uri *xml_base) { raptor_xml_element* xml_element; xml_element = RAPTOR_CALLOC(raptor_xml_element*, 1, sizeof(*xml_element)); if(!xml_element) return NULL; /* Element name */ xml_element->name = name; xml_element->xml_language = xml_language; xml_element->base_uri = xml_base; xml_element->declared_nspaces = NULL; xml_element->content_cdata_sb = raptor_new_stringbuffer(); if(!xml_element->content_cdata_sb) { RAPTOR_FREE(raptor_xml_element, xml_element); xml_element = NULL; } return xml_element; } /** * raptor_new_xml_element_from_namespace_local_name: * @ns: namespace * @name: the XML element local name * @xml_language: the in-scope XML language (or NULL) * @xml_base: base uri (or NULL) * * Constructor - create a new XML element from an XML namespace and a local name * * Added in 1.4.16. * * Return value: a new #raptor_xml_element or NULL on failure */ raptor_xml_element* raptor_new_xml_element_from_namespace_local_name(raptor_namespace *ns, const unsigned char *name, const unsigned char *xml_language, raptor_uri *xml_base) { raptor_uri *base_uri_copy; raptor_qname *qname; raptor_xml_element *element = NULL; qname = raptor_new_qname_from_namespace_local_name(ns->nstack->world, ns, name, NULL); if(qname) { base_uri_copy = xml_base ? raptor_uri_copy(xml_base) : NULL; element = raptor_new_xml_element(qname, xml_language, base_uri_copy); if(!element) { raptor_free_qname(qname); if(base_uri_copy) raptor_free_uri(base_uri_copy); } } return element; } /** * raptor_free_xml_element: * @element: XML Element * * Destructor - destroy a raptor_xml_element object. **/ void raptor_free_xml_element(raptor_xml_element *element) { unsigned int i; if(!element) return; for(i = 0; i < element->attribute_count; i++) if(element->attributes[i]) raptor_free_qname(element->attributes[i]); if(element->attributes) RAPTOR_FREE(raptor_qname_array, element->attributes); if(element->content_cdata_sb) raptor_free_stringbuffer(element->content_cdata_sb); if(element->base_uri) raptor_free_uri(element->base_uri); if(element->xml_language) RAPTOR_FREE(char*, element->xml_language); raptor_free_qname(element->name); if(element->declared_nspaces) raptor_free_sequence(element->declared_nspaces); RAPTOR_FREE(raptor_element, element); } /** * raptor_xml_element_get_name: * @xml_element: XML Element * * Get the XML Name of an XML element * * Return value: The Name. **/ raptor_qname* raptor_xml_element_get_name(raptor_xml_element *xml_element) { return xml_element->name; } /** * raptor_xml_element_set_attributes: * @xml_element: XML Element * @attributes: Array of XML Qname attributes with values * @count: Length of array * * Set the attributes on an XML element. * * The @attributes array becomes owned by the element after this function. **/ void raptor_xml_element_set_attributes(raptor_xml_element* xml_element, raptor_qname **attributes, int count) { xml_element->attributes = attributes; xml_element->attribute_count = count; } /** * raptor_xml_element_get_attributes: * @xml_element: XML Element * * Get the array of attributes on the XML element. * * Use raptor_xml_element_get_attributes_count() to get the count * of the array size. * * Return value: the array of qnames or NULL if none are present. **/ raptor_qname** raptor_xml_element_get_attributes(raptor_xml_element* xml_element) { return xml_element->attributes; } /** * raptor_xml_element_get_attributes_count: * @xml_element: XML Element * * Get the number of attributes on the XML element. * * Return value: Integer number of attributes - 0 or more. **/ int raptor_xml_element_get_attributes_count(raptor_xml_element* xml_element) { return xml_element->attribute_count; } /** * raptor_xml_element_declare_namespace: * @xml_element: XML Element * @nspace: raptor_namespace to declare * * Declare a namespace on the XML Element. * * Return value: non-0 if namespace cannot be declared **/ int raptor_xml_element_declare_namespace(raptor_xml_element* xml_element, raptor_namespace *nspace) { int i; const raptor_namespace *ns; if(!xml_element->declared_nspaces) xml_element->declared_nspaces = raptor_new_sequence(NULL, NULL); if((ns = xml_element->name->nspace)) { /* Cannot have same namespace already seen */ if(ns == nspace || /* ... or two default nspaces */ (!ns->prefix && !nspace->prefix) || /* ... or two same prefixes */ (ns->prefix && nspace->prefix && !strcmp((const char*)ns->prefix, (const char*)nspace->prefix)) ) return 1; } for(i = 0; (ns = (const raptor_namespace*)raptor_sequence_get_at(xml_element->declared_nspaces, i)); i++) { /* Cannot have same namespace already seen */ if(ns == nspace || /* ... or two default nspaces */ (!ns->prefix && !nspace->prefix) || /* ... or two same prefixes */ (ns->prefix && nspace->prefix && !strcmp((const char*)ns->prefix, (const char*)nspace->prefix)) ) return 1; } raptor_sequence_push(xml_element->declared_nspaces, nspace); return 0; } #ifdef RAPTOR_DEBUG void raptor_print_xml_element(raptor_xml_element *element, FILE* stream) { raptor_qname_print(stream, element->name); fputc('\n', stream); if(element->attribute_count) { unsigned int i; int printed = 0; fputs(" attributes: ", stream); for(i = 0; i < element->attribute_count; i++) { if(element->attributes[i]) { if(printed) fputc(' ', stream); raptor_qname_print(stream, element->attributes[i]); fprintf(stream, "='%s'", element->attributes[i]->value); printed = 1; } } fputc('\n', stream); } } #endif struct nsd { const raptor_namespace *nspace; unsigned char *declaration; size_t length; }; static int raptor_nsd_compare(const void *a, const void *b) { struct nsd* nsd_a = (struct nsd*)a; struct nsd* nsd_b = (struct nsd*)b; /* Sort NULLs earlier */ if(!nsd_a->declaration) return -1; else if(!nsd_b->declaration) return 1; return strcmp((const char*)nsd_a->declaration, (const char*)nsd_b->declaration); } /** * raptor_xml_element_write: * @element: XML element to format * @nstack: Namespace stack context to use in formatting * @is_empty: non-0 if element is empty * @is_end: non-0 if this is an end element (else is a start element) * @depth: XML element depth * @iostr: iostream object * * Write a formatted XML element to a #raptor_iostream * * Return value: non-0 on failure */ int raptor_xml_element_write(raptor_xml_element *element, raptor_namespace_stack *nstack, int is_empty, int is_end, int depth, raptor_iostream* iostr) { struct nsd *nspace_declarations = NULL; size_t nspace_declarations_count = 0; unsigned int i; /* max is 1 per element and 1 for each attribute + size of declared */ if(nstack) { int nspace_max_count = element->attribute_count+1; if(element->declared_nspaces) nspace_max_count += raptor_sequence_size(element->declared_nspaces); nspace_declarations = RAPTOR_CALLOC(struct nsd*, nspace_max_count, sizeof(struct nsd)); } if(element->name->nspace) { if(!is_end && nstack && !raptor_namespaces_namespace_in_scope(nstack, element->name->nspace)) { nspace_declarations[0].declaration= raptor_namespace_format_as_xml(element->name->nspace, &nspace_declarations[0].length); nspace_declarations[0].nspace = element->name->nspace; nspace_declarations_count++; } } if(!is_end && element->attributes) { for(i = 0; i < element->attribute_count; i++) { /* qname */ if(element->attributes[i]->nspace) { if(nstack && !raptor_namespaces_namespace_in_scope(nstack, element->attributes[i]->nspace) && element->attributes[i]->nspace != element->name->nspace) { /* not in scope and not same as element (so already going to be declared)*/ unsigned int j; int declare_me = 1; /* check it wasn't an earlier declaration too */ for(j = 0; j < nspace_declarations_count; j++) if(nspace_declarations[j].nspace == element->attributes[j]->nspace) { declare_me = 0; break; } if(declare_me) { nspace_declarations[nspace_declarations_count].declaration= raptor_namespace_format_as_xml(element->attributes[i]->nspace, &nspace_declarations[nspace_declarations_count].length); nspace_declarations[nspace_declarations_count].nspace = element->attributes[i]->nspace; nspace_declarations_count++; } } } } } if(!is_end && nstack && element->declared_nspaces && raptor_sequence_size(element->declared_nspaces) > 0) { for(i = 0; i< (unsigned int)raptor_sequence_size(element->declared_nspaces); i++) { raptor_namespace* nspace = (raptor_namespace*)raptor_sequence_get_at(element->declared_nspaces, i); unsigned int j; int declare_me = 1; /* check it wasn't an earlier declaration too */ for(j = 0; j < nspace_declarations_count; j++) if(nspace_declarations[j].nspace == nspace) { declare_me = 0; break; } if(declare_me) { nspace_declarations[nspace_declarations_count].declaration= raptor_namespace_format_as_xml(nspace, &nspace_declarations[nspace_declarations_count].length); nspace_declarations[nspace_declarations_count].nspace = nspace; nspace_declarations_count++; } } } raptor_iostream_write_byte('<', iostr); if(is_end) raptor_iostream_write_byte('/', iostr); if(element->name->nspace && element->name->nspace->prefix_length > 0) { raptor_iostream_counted_string_write((const char*)element->name->nspace->prefix, element->name->nspace->prefix_length, iostr); raptor_iostream_write_byte(':', iostr); } raptor_iostream_counted_string_write((const char*)element->name->local_name, element->name->local_name_length, iostr); /* declare namespaces */ if(nspace_declarations_count) { /* sort them into the canonical order */ qsort((void*)nspace_declarations, nspace_declarations_count, sizeof(struct nsd), raptor_nsd_compare); /* add them */ for(i = 0; i < nspace_declarations_count; i++) { raptor_iostream_write_byte(' ', iostr); raptor_iostream_counted_string_write((const char*)nspace_declarations[i].declaration, nspace_declarations[i].length, iostr); RAPTOR_FREE(char*, nspace_declarations[i].declaration); nspace_declarations[i].declaration = NULL; raptor_namespace_stack_start_namespace(nstack, (raptor_namespace*)nspace_declarations[i].nspace, depth); } } if(!is_end && element->attributes) { for(i = 0; i < element->attribute_count; i++) { raptor_iostream_write_byte(' ', iostr); if(element->attributes[i]->nspace && element->attributes[i]->nspace->prefix_length > 0) { raptor_iostream_counted_string_write((char*)element->attributes[i]->nspace->prefix, element->attributes[i]->nspace->prefix_length, iostr); raptor_iostream_write_byte(':', iostr); } raptor_iostream_counted_string_write((const char*)element->attributes[i]->local_name, element->attributes[i]->local_name_length, iostr); raptor_iostream_counted_string_write("=\"", 2, iostr); raptor_xml_escape_string_write(element->attributes[i]->value, element->attributes[i]->value_length, '"', iostr); raptor_iostream_write_byte('"', iostr); } } if(is_empty) raptor_iostream_write_byte('/', iostr); raptor_iostream_write_byte('>', iostr); if(nstack) RAPTOR_FREE(stringarray, nspace_declarations); return 0; } /** * raptor_xml_element_get_language: * @xml_element: XML Element * * Get the XML language of the element. * * Return value: XML language or NULL if none in scope **/ const unsigned char* raptor_xml_element_get_language(raptor_xml_element* xml_element) { return xml_element->xml_language; } /** * raptor_valid_xml_ID: * @rdf_parser: RDF parser * @string: The string to check. * * Check the string matches the xml:ID value constraints. * * This checks the syntax part of the xml:ID validity constraint, * that it matches [ VC: Name Token ] as amended by XML Namespaces: * * See Namespaces in XML NCName * * Return value: non-zero if the ID string is valid **/ int raptor_valid_xml_ID(raptor_parser *rdf_parser, const unsigned char *string) { size_t len = strlen((const char*)string); #ifdef RAPTOR_XML_1_1 #define XML_ID_XML_VERSION 11 #else #define XML_ID_XML_VERSION 10 #endif return raptor_xml_name_check(string, len, XML_ID_XML_VERSION); } /** * raptor_xml_escape_string_any: * @world: raptor world * @string: string to XML escape (UTF-8) * @len: length of string * @buffer: the buffer to use for new string (UTF-8) or NULL to just calculate expected length * @length: buffer size * @quote: optional quote character to escape for attribute content, or 0 * @xml_version: XML 1.0 (10) or XML 1.1 (11) * * Return an XML-escaped version a string. * * Follows * Canonical XML rules on Text Nodes and Attribute Nodes * * Both: * Replaces & and < * with &amp; and &lt; * respectively, preserving other characters. * * Text Nodes: * > is turned into &gt; * ##xD is turned into &##xD; * * Attribute Nodes: * > is generated not &gt. * ##x9, ##xA and ##xD are turned into * &##x9;, * &##xA; and * &##xD; * entities. * * If @quote is given it can be either of '\'' or '\"' * which will be turned into &apos; or * &quot; respectively. * ASCII NUL ('\0') or any other character will not be escaped. * * If @buffer is NULL, no work is done but the size of buffer * required is returned. The output in buffer remains in UTF-8. * * If the input @string is empty, a single NUL will be written to the * buffer. * * Return value: the number of bytes required / used or <0 on failure. **/ int raptor_xml_escape_string_any(raptor_world *world, const unsigned char *string, size_t len, unsigned char *buffer, size_t length, char quote, int xml_version) { size_t l; size_t new_len = 0; const unsigned char *p; unsigned char *q; int unichar_len; raptor_unichar unichar; if(!string) return -1; RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, -1); raptor_world_open(world); if(quote != '\"' && quote != '\'') quote='\0'; for(l = len, p = string; l; p++, l--) { if(*p > 0x7f) { unichar_len = raptor_unicode_utf8_string_get_char(p, l, &unichar); if(unichar_len < 0 || RAPTOR_GOOD_CAST(size_t, unichar_len) > l) { raptor_log_error(world, RAPTOR_LOG_LEVEL_ERROR, NULL, "Bad UTF-8 encoding."); return -1; } } else { unichar=*p; unichar_len = 1; } if(unichar == '&') /* & */ new_len+= 5; else if(unichar == '<' || (!quote && unichar == '>')) /* < or > */ new_len+= 4; else if(quote && unichar == (unsigned long)quote) /* ' or " */ new_len+= 6; else if(unichar == 0x0d || (quote && (unichar == 0x09 || unichar == 0x0a))) /* or or &xA; */ new_len+= 5; else if(unichar == 0x7f || (unichar < 0x20 && unichar != 0x09 && unichar != 0x0a)) { if(!unichar || xml_version < 11) { raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, NULL, "Cannot write illegal XML 1.0 character U+%6lX.", unichar); } else { /* &#xX; */ new_len+= 5; if(unichar > 0x0f) new_len++; } } else new_len+= unichar_len; unichar_len--; /* since loop does len-- */ p += unichar_len; l -= unichar_len; } if(length && new_len > length) return 0; if(!buffer) return RAPTOR_BAD_CAST(int, new_len); for(l = len, p = string, q = buffer; l; p++, l--) { if(*p > 0x7f) { unichar_len = raptor_unicode_utf8_string_get_char(p, l, &unichar); /* if the UTF-8 encoding is bad, we already did return -1 above */ } else { unichar=*p; unichar_len = 1; } if(unichar == '&') { memcpy(q, "&", 5); q+= 5; } else if(unichar == '<') { memcpy(q, "<", 4); q+= 4; } else if(!quote && unichar == '>') { memcpy(q, ">", 4); q+= 4; } else if(quote && unichar == RAPTOR_GOOD_CAST(unsigned long, quote)) { if(quote == '\'') memcpy(q, "'", 6); else memcpy(q, """, 6); q+= 6; } else if(unichar == 0x0d || (quote && (unichar == 0x09 || unichar == 0x0a))) { /* &#xX; */ *q++='&'; *q++='#'; *q++='x'; if(unichar == 0x09) *q++ = '9'; else *q++ = RAPTOR_GOOD_CAST(unsigned char, 'A' + (RAPTOR_GOOD_CAST(char, unichar) - 0x0a)); *q++= ';'; } else if(unichar == 0x7f || (unichar < 0x20 && unichar != 0x09 && unichar != 0x0a)) { if(!unichar || xml_version < 11) { raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, NULL, "Cannot write illegal XML 1.0 character U+%6lX.", unichar); } else { /* &#xX; */ *q++ = '&'; *q++ = '#'; *q++ = 'x'; q += raptor_format_integer((char*)q, 3, RAPTOR_GOOD_CAST(unsigned int, unichar), /* base */ 16, -1, '\0'); *q++ = ';'; } } else { /* coverity[negative_returns] * negative unichar_len values are checked and cause return -1 above */ memcpy(q, p, unichar_len); q+= unichar_len; } unichar_len--; /* since loop does len-- */ p += unichar_len; l -= unichar_len; } /* Terminate new string */ *q = '\0'; return RAPTOR_BAD_CAST(int, new_len); } /** * raptor_xml_escape_string: * @world: raptor world * @string: string to XML 1.0 escape (UTF-8) * @len: length of string * @buffer: the buffer to use for new string (UTF-8) or NULL to just calculate expected length. * @length: buffer size * @quote: optional quote character to escape for attribute content, or 0 * * Return an XML 1.0-escaped version a string. * * See raptor_xml_escape_string_any() for the conditions on parameters. * * Return value: the number of bytes required / used or <0 on failure. **/ int raptor_xml_escape_string(raptor_world *world, const unsigned char *string, size_t len, unsigned char *buffer, size_t length, char quote) { if(!string) return -1; RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, -1); raptor_world_open(world); return raptor_xml_escape_string_any(world, string, len, buffer, length, quote, 10); } /** * raptor_xml_escape_string_any_write: * @string: string to XML escape (UTF-8) * @len: length of string * @quote: optional quote character to escape for attribute content, or 0 * @xml_version: XML version - 10 (XML 1.0) or 11 (XML 1.1) * @iostr: the #raptor_iostream to write to * * Write an XML-escaped version of a string to an iostream. * * See raptor_xml_escape_string() for the escapes performed and * the conditions on @quote and @string. XML 1.1 allows additional * characters in XML such as U+0001 to U+001F inclusive. * * Return value: non 0 on failure **/ int raptor_xml_escape_string_any_write(const unsigned char *string, size_t len, char quote, int xml_version, raptor_iostream* iostr) { size_t l; const unsigned char *p; if(xml_version != 10) xml_version = 11; if(quote != '\"' && quote != '\'') quote='\0'; for(l = len, p = string; l; p++, l--) { int unichar_len = 1; raptor_unichar unichar=*p; if(*p > 0x7f) { unichar_len = raptor_unicode_utf8_string_get_char(p, l, &unichar); if(unichar_len < 0 || RAPTOR_GOOD_CAST(size_t, unichar_len) > l) { raptor_log_error(raptor_iostream_get_world(iostr), RAPTOR_LOG_LEVEL_ERROR, NULL, "Bad UTF-8 encoding."); return 1; } } if(unichar == '&') raptor_iostream_counted_string_write("&", 5, iostr); else if(unichar == '<') raptor_iostream_counted_string_write("<", 4, iostr); else if(!quote && unichar == '>') raptor_iostream_counted_string_write(">", 4, iostr); else if(quote && unichar == (unsigned long)quote) { if(quote == '\'') raptor_iostream_counted_string_write("'", 6, iostr); else raptor_iostream_counted_string_write(""", 6, iostr); } else if(unichar == 0x0d || (quote && (unichar == 0x09 || unichar == 0x0a))) { /* &#xX; */ raptor_iostream_counted_string_write("&#x", 3, iostr); if(unichar == 0x09) raptor_iostream_write_byte('9', iostr); else raptor_iostream_write_byte('A'+ ((char)unichar-0x0a), iostr); raptor_iostream_write_byte(';', iostr); } else if(unichar == 0x7f || (unichar < 0x20 && unichar != 0x09 && unichar != 0x0a)) { if(!unichar || xml_version < 11) { raptor_log_error_formatted(raptor_iostream_get_world(iostr), RAPTOR_LOG_LEVEL_ERROR, NULL, "Cannot write illegal XML 1.0 character U+%6lX.", unichar); } else { int width = (unichar < 0x10) ? 1 : 2; /* &#xX; */ raptor_iostream_counted_string_write("&#x", 3, iostr); raptor_iostream_hexadecimal_write(RAPTOR_GOOD_CAST(unsigned int, unichar), width, iostr); raptor_iostream_write_byte(';', iostr); } } else raptor_iostream_counted_string_write((const char*)p, unichar_len, iostr); unichar_len--; /* since loop does len-- */ p += unichar_len; l -= unichar_len; } return 0; } /** * raptor_xml_escape_string_write: * @string: string to XML 1.0 escape (UTF-8) * @len: length of string * @quote: optional quote character to escape for attribute content, or 0 * @iostr: the #raptor_iostream to write to * * Write an XML 1.0-escaped version of a string to an iostream. * * See raptor_xml_escape_string_any_write() for the escapes * performed and the conditions on @quote and @string. * * Return value: non 0 on failure **/ int raptor_xml_escape_string_write(const unsigned char *string, size_t len, char quote, raptor_iostream* iostr) { return raptor_xml_escape_string_any_write(string, len, quote, 10, iostr); } /** * raptor_xml_name_check: * @string: UTF-8 name string * @length: length of string * @xml_version: XML version * * Check a string is a legal XML name (and legal UTF8). * * xml_version is either 10 (for XML 1.0) or 11 for (XML 1.1). Any * other version fails. * * Return value: Non 0 if the string is a legal XML name **/ int raptor_xml_name_check(const unsigned char *string, size_t length, int xml_version) { int pos; if(xml_version != 10 && xml_version != 11) return 0; for(pos = 0; length > 0; pos++) { raptor_unichar unichar = 0; int unichar_len; unichar_len = raptor_unicode_utf8_string_get_char(string, length, &unichar); if(unichar_len < 0 || RAPTOR_GOOD_CAST(size_t, unichar_len) > length) return 0; if(unichar > raptor_unicode_max_codepoint) return 0; if(!pos) { /* start of name */ if(xml_version == 10) { if(!raptor_unicode_is_xml10_namestartchar(unichar)) return 0; } else { if(!raptor_unicode_is_xml11_namestartchar(unichar)) return 0; } } else { /* rest of name */ if(xml_version == 10) { if(!raptor_unicode_is_xml10_namechar(unichar)) return 0; } else { if(!raptor_unicode_is_xml11_namechar(unichar)) return 0; } } string += unichar_len; length -= unichar_len; } return 1; } #endif #ifdef STANDALONE /* static prototypes */ void raptor_bad_string_print(const unsigned char *input, FILE *stream); int main(int argc, char *argv[]); void raptor_bad_string_print(const unsigned char *input, FILE *stream) { while(*input) { char c=(char)*input; if(isprint(c)) fputc(c, stream); else fprintf(stream, "\\x%02X", (c & 0xff)); input++; } } int main(int argc, char *argv[]) { raptor_world *world; const char *program = raptor_basename(argv[0]); struct tv { const char *string; const char quote; const char *result; }; struct tv *t; struct tv test_values[]={ {"", 0, ""}, {"&", 0, "&"}, {"<", 0, "<"}, {">", 0, ">"}, {"\x09", 0, "\x09"}, {"\x0a", 0, "\x0a"}, {"\x0d", 0, " "}, {"'&'", '\'', "'&'"}, {"'<'", '\'', "'<'"}, {"'>'", '\'', "'>'"}, {"\x09", '\'', " "}, {"\x0a", '\'', " "}, {"\x0d", '\'', " "}, {"\"&\"", '\"', ""&""}, {"\"<\"", '\"', ""<""}, {"\">\"", '\"', "">""}, {"\x09", '\"', " "}, {"\x0a", '\"', " "}, {"\x0d", '\"', " "}, {"&", 0, "&amp;"}, {"", 0, "<foo>"}, #if 0 {"\x1f", 0, ""}, {"\xc2\x80", 0, "€"}, {"\xe0\xa0\x80", 0, "ࠀ"}, {"\xf0\x90\x80\x80", 0, "𐀀"}, {"\x7f", 0, ""}, {"\xdf\xbf", 0, "߿"}, {"\xef\xbf\xbd", 0, "�"}, {"\xf4\x8f\xbf\xbf", 0, "􏿿"}, {"\xc3\xbf", 0, "ÿ"}, {"\xf0\x8f\xbf\xbf", 0, "￿"}, #endif {NULL, 0, 0} }; int i; int failures = 0; world = raptor_new_world(); if(!world || raptor_world_open(world)) exit(1); for(i = 0; (t=&test_values[i]) && t->string; i++) { const unsigned char *utf8_string = (const unsigned char*)t->string; char quote = t->quote; size_t utf8_string_len = strlen((const char*)utf8_string); unsigned char *xml_string; int xml_string_len = 0; xml_string_len = raptor_xml_escape_string(world, utf8_string, utf8_string_len, NULL, 0, quote); if(xml_string_len < 0) { fprintf(stderr, "%s: raptor_xml_escape_string FAILED to escape string '", program); raptor_bad_string_print(utf8_string, stderr); fputs("'\n", stderr); failures++; continue; } xml_string = RAPTOR_MALLOC(unsigned char*, xml_string_len + 1); xml_string_len = raptor_xml_escape_string(world, utf8_string, utf8_string_len, xml_string, xml_string_len, quote); if(xml_string_len < 0) { fprintf(stderr, "%s: raptor_xml_escape_string FAILED to escape string '", program); raptor_bad_string_print(utf8_string, stderr); fputs("'\n", stderr); failures++; continue; } if(strcmp((const char*)xml_string, t->result)) { fprintf(stderr, "%s: raptor_xml_escape_string FAILED to escape string '", program); raptor_bad_string_print(utf8_string, stderr); fprintf(stderr, "', expected '%s', result was '%s'\n", t->result, xml_string); failures++; continue; } #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 fprintf(stderr, "%s: raptor_xml_escape_string escaped string to '%s' ok\n", program, xml_string); #endif RAPTOR_FREE(char*, xml_string); } #if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 if(!failures) fprintf(stderr, "%s: raptor_xml_escape_string all tests OK\n", program); #endif raptor_free_world(world); return failures; } #endif