diff options
author | Thies C. Arntzen <thies@php.net> | 2001-08-12 18:44:42 +0000 |
---|---|---|
committer | Thies C. Arntzen <thies@php.net> | 2001-08-12 18:44:42 +0000 |
commit | d5b1a7f440755f2ca94764ef0c7f1122c7d97e44 (patch) | |
tree | 349b40f4ea3f00fa0f2865ac0ceab84104d86296 /ext/xml | |
parent | f5c59863441ac2bb775d0dadfca03524327ea16e (diff) | |
download | php-git-d5b1a7f440755f2ca94764ef0c7f1122c7d97e44.tar.gz |
tags need to be UTF8-decoded as well.
guys, case_folding can only work when the parser target encoding equal (or
compatible) with the locale setting of the system as we use toupper to "do it"
Diffstat (limited to 'ext/xml')
-rw-r--r-- | ext/xml/tests/007.phpt | 42 | ||||
-rw-r--r-- | ext/xml/xml.c | 106 |
2 files changed, 94 insertions, 54 deletions
diff --git a/ext/xml/tests/007.phpt b/ext/xml/tests/007.phpt new file mode 100644 index 0000000000..1bff1dee0e --- /dev/null +++ b/ext/xml/tests/007.phpt @@ -0,0 +1,42 @@ +--TEST-- +xml_parse_into_struct/umlauts in tags +--SKIPIF-- +<?php include("skipif.inc"); ?> +--FILE-- +<?php +function startHandler($parser,$tag,$attr) { var_dump($tag,$attr); } +function endHandler($parser,$tag) { var_dump($tag); } + +$xmldata = '<?xml version="1.0" encoding="ISO-8859-1"?><äöü üäß="Üäß">ÄÖÜ</äöü>'; +$parser = xml_parser_create('ISO-8859-1'); +xml_set_element_handler($parser, "startHandler", "endHandler"); +xml_parse_into_struct($parser, $xmldata, $struct, $index); +var_dump($struct); +?> +--GET-- +--POST-- +--EXPECT-- +string(3) "ÄÖÜ" +array(1) { + ["ÜÄß"]=> + string(3) "Üäß" +} +string(3) "ÄÖÜ" +array(1) { + [0]=> + array(5) { + ["tag"]=> + string(3) "ÄÖÜ" + ["type"]=> + string(8) "complete" + ["level"]=> + int(1) + ["attributes"]=> + array(1) { + ["ÜÄß"]=> + string(3) "Üäß" + } + ["value"]=> + string(3) "ÄÖÜ" + } +} diff --git a/ext/xml/xml.c b/ext/xml/xml.c index ebb14f6a93..b11ca92b19 100644 --- a/ext/xml/xml.c +++ b/ext/xml/xml.c @@ -85,7 +85,7 @@ static zval *xml_call_handler(xml_parser *, zval *, int, zval **); static zval *_xml_xmlchar_zval(const XML_Char *, int, const XML_Char *); static int _xml_xmlcharlen(const XML_Char *); static void _xml_add_to_info(xml_parser *parser,char *name); - +inline static char *_xml_decode_tag(xml_parser *parser, const char *tag); void _xml_startElementHandler(void *, const char *, const char **); void _xml_endElementHandler(void *, const char *); @@ -621,47 +621,55 @@ static void _xml_add_to_info(xml_parser *parser,char *name) parser->curtag++; } +/* }}} */ +/* {{{ _xml_decode_tag() */ + +static char *_xml_decode_tag(xml_parser *parser, const char *tag) +{ + char *newstr; + int out_len; + + newstr = xml_utf8_decode(tag, strlen(tag), &out_len, parser->target_encoding); + + if (parser->case_folding) { + php_strtoupper(newstr, out_len); + } + + return newstr; +} /* }}} */ - /* {{{ _xml_startElementHandler() */ +/* {{{ _xml_startElementHandler() */ -void _xml_startElementHandler(void *userData, const char *name, - const char **attributes) +void _xml_startElementHandler(void *userData, const char *name, const char **attributes) { xml_parser *parser = (xml_parser *)userData; const char **attrs = attributes; + char *tag_name; + char *att, *val; + int att_len, val_len; + zval *retval, *args[3]; if (parser) { - zval *retval, *args[3]; - parser->level++; - if (parser->case_folding) { - name = php_strtoupper(estrdup(name), strlen(name)); - } + tag_name = _xml_decode_tag(parser, name); if (parser->startElementHandler) { args[0] = _xml_resource_zval(parser->index); - args[1] = _xml_string_zval(name); + args[1] = _xml_string_zval(tag_name); MAKE_STD_ZVAL(args[2]); array_init(args[2]); + while (attributes && *attributes) { - char *key = (char *)attributes[0]; - char *value = (char *)attributes[1]; - char *decoded_value; - int decoded_len; - if (parser->case_folding) { - key = php_strtoupper(estrdup(key), strlen(key)); - } - decoded_value = xml_utf8_decode(value, strlen(value), - &decoded_len, - parser->target_encoding); - - add_assoc_stringl(args[2], key, decoded_value, decoded_len, 0); - if (parser->case_folding) { - efree(key); - } + att = _xml_decode_tag(parser, attributes[0]); + val = xml_utf8_decode(attributes[1], strlen(attributes[1]), &val_len, parser->target_encoding); + + add_assoc_stringl(args[2], att, val, val_len, 0); + attributes += 2; + + efree(att); } if ((retval = xml_call_handler(parser, parser->startElementHandler, 3, args))) { @@ -680,34 +688,27 @@ void _xml_startElementHandler(void *userData, const char *name, array_init(tag); array_init(atr); - _xml_add_to_info(parser,((char *) name) + parser->toffset); + _xml_add_to_info(parser,((char *) tag_name) + parser->toffset); - add_assoc_string(tag,"tag",((char *) name) + parser->toffset,1); /* cast to avoid gcc-warning */ + add_assoc_string(tag,"tag",((char *) tag_name) + parser->toffset,1); /* cast to avoid gcc-warning */ add_assoc_string(tag,"type","open",1); add_assoc_long(tag,"level",parser->level); - parser->ltags[parser->level-1] = estrdup(name); + parser->ltags[parser->level-1] = estrdup(tag_name); parser->lastwasopen = 1; attributes = attrs; + while (attributes && *attributes) { - char *key = (char *)attributes[0]; - char *value = (char *)attributes[1]; - char *decoded_value; - int decoded_len; - if (parser->case_folding) { - key = php_strtoupper(estrdup(key), strlen(key)); - } - decoded_value = xml_utf8_decode(value, strlen(value), - &decoded_len, - parser->target_encoding); + att = _xml_decode_tag(parser, attributes[0]); + val = xml_utf8_decode(attributes[1], strlen(attributes[1]), &val_len, parser->target_encoding); - add_assoc_stringl(atr,key,decoded_value,decoded_len,0); + add_assoc_stringl(atr,att,val,val_len,0); + atcnt++; - if (parser->case_folding) { - efree(key); - } attributes += 2; + + efree(att); } if (atcnt) { @@ -720,9 +721,7 @@ void _xml_startElementHandler(void *userData, const char *name, zend_hash_next_index_insert(parser->data->value.ht,&tag,sizeof(zval*),(void *) &parser->ctag); } - if (parser->case_folding) { - efree((char *)name); - } + efree(tag_name); } } @@ -732,17 +731,16 @@ void _xml_startElementHandler(void *userData, const char *name, void _xml_endElementHandler(void *userData, const char *name) { xml_parser *parser = (xml_parser *)userData; + char *tag_name; if (parser) { zval *retval, *args[2]; - if (parser->case_folding) { - name = php_strtoupper(estrdup(name), strlen(name)); - } + tag_name = _xml_decode_tag(parser, name); if (parser->endElementHandler) { args[0] = _xml_resource_zval(parser->index); - args[1] = _xml_string_zval(name); + args[1] = _xml_string_zval(tag_name); if ((retval = xml_call_handler(parser, parser->endElementHandler, 2, args))) { zval_dtor(retval); @@ -760,9 +758,9 @@ void _xml_endElementHandler(void *userData, const char *name) array_init(tag); - _xml_add_to_info(parser,((char *) name) + parser->toffset); + _xml_add_to_info(parser,((char *) tag_name) + parser->toffset); - add_assoc_string(tag,"tag",((char *) name) + parser->toffset,1); /* cast to avoid gcc-warning */ + add_assoc_string(tag,"tag",((char *) tag_name) + parser->toffset,1); /* cast to avoid gcc-warning */ add_assoc_string(tag,"type","close",1); add_assoc_long(tag,"level",parser->level); @@ -772,12 +770,12 @@ void _xml_endElementHandler(void *userData, const char *name) parser->lastwasopen = 0; } - if (parser->case_folding) { - efree((char *)name); - } + efree(tag_name); + if (parser->ltags) { efree(parser->ltags[parser->level-1]); } + parser->level--; } } |