summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBurak Arslan <burak.arslan@arskom.com.tr>2017-01-06 19:44:21 +0300
committerBurak Arslan <burak.arslan@arskom.com.tr>2017-01-06 19:44:21 +0300
commit5933f1428d0dc243d5dc8f86b0458e2bf1cf342d (patch)
tree106670f4b7fabc40b409276c81ab570e19755e2b
parent457e3f7e00865e5c3354d68d82142e96ae4290a6 (diff)
downloadpython-lxml-5933f1428d0dc243d5dc8f86b0458e2bf1cf342d.tar.gz
port libxml2's xmlBufAttrSerializeTxtContent as _write_attr_string
-rw-r--r--src/lxml/serializer.pxi196
1 files changed, 195 insertions, 1 deletions
diff --git a/src/lxml/serializer.pxi b/src/lxml/serializer.pxi
index 77608a9c..c4c590b7 100644
--- a/src/lxml/serializer.pxi
+++ b/src/lxml/serializer.pxi
@@ -399,6 +399,199 @@ cdef void _writeNextSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
pretty_print, encoding)
c_sibling = c_sibling.next
+
+cdef unsigned char *xmlSerializeHexCharRef(unsigned char *out, int val):
+ cdef xmlChar *ptr
+ cdef xmlChar c
+
+ out[0] = '&'
+ out += 1
+
+ out[0] = '#'
+ out += 1
+
+ out[0] = 'x'
+ out += 1
+
+ if (val < 0x10):
+ ptr = out
+ elif (val < 0x100):
+ ptr = out + 1
+ elif (val < 0x1000):
+ ptr = out + 2
+ elif (val < 0x10000):
+ ptr = out + 3
+ elif (val < 0x100000):
+ ptr = out + 4
+ else:
+ ptr = out + 5
+
+ out = ptr + 1
+ while val > 0:
+ c = (val & 0xF)
+
+ if c == 0:
+ ptr[0] = '0'
+ elif c == 1:
+ ptr[0] = '1'
+ elif c == 2:
+ ptr[0] = '2'
+ elif c == 3:
+ ptr[0] = '3'
+ elif c == 4:
+ ptr[0] = '4'
+ elif c == 5:
+ ptr[0] = '5'
+ elif c == 6:
+ ptr[0] = '6'
+ elif c == 7:
+ ptr[0] = '7'
+ elif c == 8:
+ ptr[0] = '8'
+ elif c == 9:
+ ptr[0] = '9'
+ elif c == 0xA:
+ ptr[0] = 'A'
+ elif c == 0xB:
+ ptr[0] = 'B'
+ elif c == 0xC:
+ ptr[0] = 'C'
+ elif c == 0xD:
+ ptr[0] = 'D'
+ elif c == 0xE:
+ ptr[0] = 'E'
+ elif c == 0xF:
+ ptr[0] = 'F'
+ else:
+ ptr[0] = '0'
+
+ ptr -= 1
+
+ val >>= 4
+
+ out[0] = ';'
+ out += 1
+ out[0] = 0
+
+ return out
+
+
+cdef void _write_attr_string(tree.xmlOutputBuffer* buf, const xmlChar *string):
+ cdef xmlChar *base
+ cdef xmlChar *cur
+
+ cdef xmlChar tmp[12];
+ cdef int val = 0;
+ cdef int l = 1;
+
+ if string == NULL:
+ return
+
+ base = cur = string
+ while (cur[0] != 0):
+ if (cur[0] == '\n'):
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, _cstr(base))
+
+ tree.xmlOutputBufferWrite(buf, 5, "&#10;")
+ cur += 1;
+ base = cur;
+
+ elif (cur[0] == '\r'):
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, _cstr(base))
+
+ tree.xmlOutputBufferWrite(buf, 5, "&#13;");
+ cur += 1;
+ base = cur;
+
+ elif (cur[0] == '\t'):
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, _cstr(base))
+
+ tree.xmlOutputBufferWrite(buf, 4, "&#9;");
+ cur += 1;
+ base = cur;
+
+ elif (cur[0] == '"'):
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, _cstr(base))
+
+ tree.xmlOutputBufferWrite(buf, 6, "&quot;");
+ cur += 1;
+ base = cur;
+
+ elif (cur[0] == '<'):
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, _cstr(base))
+
+ tree.xmlOutputBufferWrite(buf, 4, "&lt;");
+ cur += 1;
+ base = cur;
+
+ elif (cur[0] == '>'):
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, _cstr(base))
+
+ tree.xmlOutputBufferWrite(buf, 4, "&gt;");
+ cur += 1;
+ base = cur;
+ elif (cur[0] == '&'):
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, _cstr(base))
+
+ tree.xmlOutputBufferWrite(buf, 5, "&amp;")
+ cur += 1
+ base = cur
+
+ elif (cur[0] >= 0x80) and (cur[1] != 0):
+
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, _cstr(base))
+
+ if (cur[0] < 0xC0):
+ raise ValueError("Not utf8")
+
+ elif (cur[0] < 0xE0):
+ val = (cur[0]) & 0x1F;
+ val <<= 6;
+ val |= (cur[1]) & 0x3F;
+ l = 2;
+
+ elif ((cur[0] < 0xF0) and (cur[2] != 0)):
+ val = (cur[0]) & 0x0F;
+ val <<= 6;
+ val |= (cur[1]) & 0x3F;
+ val <<= 6;
+ val |= (cur[2]) & 0x3F;
+ l = 3;
+
+ elif ((cur[0] < 0xF8) and (cur[2] != 0) and (cur[3] != 0)):
+ val = (cur[0]) & 0x07;
+ val <<= 6;
+ val |= (cur[1]) & 0x3F;
+ val <<= 6;
+ val |= (cur[2]) & 0x3F;
+ val <<= 6;
+ val |= (cur[3]) & 0x3F;
+ l = 4;
+
+ if ((l == 1) or (not tree.xmlIsChar_ch(val))):
+ raise ValueError(val)
+
+ # We could do multiple things here. Just save
+ # as a char ref
+ xmlSerializeHexCharRef(tmp, val);
+ tree.xmlOutputBufferWrite(buf, -1, _cstr(<xmlChar *> tmp));
+ cur += l;
+ base = cur;
+
+ else:
+ cur += 1;
+
+ if (base != cur):
+ tree.xmlOutputBufferWrite(buf, cur - base, _cstr(base))
+
############################################################
# output to file-like objects
@@ -883,7 +1076,8 @@ cdef class _IncrementalFileWriter:
tree.xmlOutputBufferWrite(self._c_out, 1, ' ')
self._write_qname(name, prefix)
tree.xmlOutputBufferWrite(self._c_out, 2, '="')
- tree.xmlBufAttrSerializeTxtContent(self._c_out, NULL, NULL, _xcstr(value))
+ _write_attr_string(self._c_out, _xcstr(value))
+
tree.xmlOutputBufferWrite(self._c_out, 1, '"')
cdef _write_end_element(self, element_config):