diff options
-rw-r--r-- | src/lxml/serializer.pxi | 196 |
1 files changed, 195 insertions, 1 deletions
diff --git a/src/lxml/serializer.pxi b/src/lxml/serializer.pxi index 77608a9c..c4c590b7 100644 --- a/src/lxml/serializer.pxi +++ b/src/lxml/serializer.pxi @@ -399,6 +399,199 @@ cdef void _writeNextSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node, pretty_print, encoding) c_sibling = c_sibling.next + +cdef unsigned char *xmlSerializeHexCharRef(unsigned char *out, int val): + cdef xmlChar *ptr + cdef xmlChar c + + out[0] = '&' + out += 1 + + out[0] = '#' + out += 1 + + out[0] = 'x' + out += 1 + + if (val < 0x10): + ptr = out + elif (val < 0x100): + ptr = out + 1 + elif (val < 0x1000): + ptr = out + 2 + elif (val < 0x10000): + ptr = out + 3 + elif (val < 0x100000): + ptr = out + 4 + else: + ptr = out + 5 + + out = ptr + 1 + while val > 0: + c = (val & 0xF) + + if c == 0: + ptr[0] = '0' + elif c == 1: + ptr[0] = '1' + elif c == 2: + ptr[0] = '2' + elif c == 3: + ptr[0] = '3' + elif c == 4: + ptr[0] = '4' + elif c == 5: + ptr[0] = '5' + elif c == 6: + ptr[0] = '6' + elif c == 7: + ptr[0] = '7' + elif c == 8: + ptr[0] = '8' + elif c == 9: + ptr[0] = '9' + elif c == 0xA: + ptr[0] = 'A' + elif c == 0xB: + ptr[0] = 'B' + elif c == 0xC: + ptr[0] = 'C' + elif c == 0xD: + ptr[0] = 'D' + elif c == 0xE: + ptr[0] = 'E' + elif c == 0xF: + ptr[0] = 'F' + else: + ptr[0] = '0' + + ptr -= 1 + + val >>= 4 + + out[0] = ';' + out += 1 + out[0] = 0 + + return out + + +cdef void _write_attr_string(tree.xmlOutputBuffer* buf, const xmlChar *string): + cdef xmlChar *base + cdef xmlChar *cur + + cdef xmlChar tmp[12]; + cdef int val = 0; + cdef int l = 1; + + if string == NULL: + return + + base = cur = string + while (cur[0] != 0): + if (cur[0] == '\n'): + if (base != cur): + tree.xmlOutputBufferWrite(buf, cur - base, _cstr(base)) + + tree.xmlOutputBufferWrite(buf, 5, " ") + cur += 1; + base = cur; + + elif (cur[0] == '\r'): + if (base != cur): + tree.xmlOutputBufferWrite(buf, cur - base, _cstr(base)) + + tree.xmlOutputBufferWrite(buf, 5, " "); + cur += 1; + base = cur; + + elif (cur[0] == '\t'): + if (base != cur): + tree.xmlOutputBufferWrite(buf, cur - base, _cstr(base)) + + tree.xmlOutputBufferWrite(buf, 4, "	"); + cur += 1; + base = cur; + + elif (cur[0] == '"'): + if (base != cur): + tree.xmlOutputBufferWrite(buf, cur - base, _cstr(base)) + + tree.xmlOutputBufferWrite(buf, 6, """); + cur += 1; + base = cur; + + elif (cur[0] == '<'): + if (base != cur): + tree.xmlOutputBufferWrite(buf, cur - base, _cstr(base)) + + tree.xmlOutputBufferWrite(buf, 4, "<"); + cur += 1; + base = cur; + + elif (cur[0] == '>'): + if (base != cur): + tree.xmlOutputBufferWrite(buf, cur - base, _cstr(base)) + + tree.xmlOutputBufferWrite(buf, 4, ">"); + cur += 1; + base = cur; + elif (cur[0] == '&'): + if (base != cur): + tree.xmlOutputBufferWrite(buf, cur - base, _cstr(base)) + + tree.xmlOutputBufferWrite(buf, 5, "&") + cur += 1 + base = cur + + elif (cur[0] >= 0x80) and (cur[1] != 0): + + if (base != cur): + tree.xmlOutputBufferWrite(buf, cur - base, _cstr(base)) + + if (cur[0] < 0xC0): + raise ValueError("Not utf8") + + elif (cur[0] < 0xE0): + val = (cur[0]) & 0x1F; + val <<= 6; + val |= (cur[1]) & 0x3F; + l = 2; + + elif ((cur[0] < 0xF0) and (cur[2] != 0)): + val = (cur[0]) & 0x0F; + val <<= 6; + val |= (cur[1]) & 0x3F; + val <<= 6; + val |= (cur[2]) & 0x3F; + l = 3; + + elif ((cur[0] < 0xF8) and (cur[2] != 0) and (cur[3] != 0)): + val = (cur[0]) & 0x07; + val <<= 6; + val |= (cur[1]) & 0x3F; + val <<= 6; + val |= (cur[2]) & 0x3F; + val <<= 6; + val |= (cur[3]) & 0x3F; + l = 4; + + if ((l == 1) or (not tree.xmlIsChar_ch(val))): + raise ValueError(val) + + # We could do multiple things here. Just save + # as a char ref + xmlSerializeHexCharRef(tmp, val); + tree.xmlOutputBufferWrite(buf, -1, _cstr(<xmlChar *> tmp)); + cur += l; + base = cur; + + else: + cur += 1; + + if (base != cur): + tree.xmlOutputBufferWrite(buf, cur - base, _cstr(base)) + ############################################################ # output to file-like objects @@ -883,7 +1076,8 @@ cdef class _IncrementalFileWriter: tree.xmlOutputBufferWrite(self._c_out, 1, ' ') self._write_qname(name, prefix) tree.xmlOutputBufferWrite(self._c_out, 2, '="') - tree.xmlBufAttrSerializeTxtContent(self._c_out, NULL, NULL, _xcstr(value)) + _write_attr_string(self._c_out, _xcstr(value)) + tree.xmlOutputBufferWrite(self._c_out, 1, '"') cdef _write_end_element(self, element_config): |