diff --git a/encoding.c b/encoding.c index bbd26135..cbe057cc 100644 --- a/encoding.c +++ b/encoding.c @@ -43,6 +43,7 @@ #include "private/buf.h" #include "private/enc.h" +#include "private/entities.h" #include "private/error.h" #ifdef LIBXML_ICU_ENABLED @@ -1744,8 +1745,7 @@ retry: * and continue the transcoding phase, hoping the error * did not mangle the encoder state. */ - charrefLen = snprintf((char *) &charref[0], sizeof(charref), - "&#%d;", cur); + charrefLen = xmlSerializeDecCharRef((char *) charref, cur); xmlBufGrow(out, charrefLen * 4); c_out = xmlBufAvail(out); c_in = charrefLen; @@ -1856,8 +1856,7 @@ retry: * and continue the transcoding phase, hoping the error * did not mangle the encoder state. */ - charrefLen = snprintf((char *) &charref[0], sizeof(charref), - "&#%d;", cur); + charrefLen = xmlSerializeDecCharRef((char *) charref, cur); xmlBufferShrink(in, len); xmlBufferGrow(out, charrefLen * 4); written = out->size - out->use - 1; diff --git a/entities.c b/entities.c index 6c8c2a1b..3b36a2da 100644 --- a/entities.c +++ b/entities.c @@ -512,6 +512,71 @@ xmlGetDocEntity(const xmlDoc *doc, const xmlChar *name) { return(xmlGetPredefinedEntity(name)); } +int +xmlSerializeHexCharRef(char *buf, int val) { + char *out = buf; + int shift = 0, bits; + + *out++ = '&'; + *out++ = '#'; + *out++ = 'x'; + + bits = val; + if (bits & 0xFF0000) { + shift = 16; + bits &= 0xFF0000; + } else if (bits & 0x00FF00) { + shift = 8; + bits &= 0x00FF00; + } + if (bits & 0xF0F0F0) { + shift += 4; + } + + do { + int d = (val >> shift) & 0x0F; + + if (d < 10) + *out++ = '0' + d; + else + *out++ = 'A' + (d - 10); + + shift -= 4; + } while (shift >= 0); + + *out++ = ';'; + + return(out - buf); +} + +int +xmlSerializeDecCharRef(char *buf, int val) { + char *out = buf; + int len, i; + + *out++ = '&'; + *out++ = '#'; + + if (val < 100) { + len = (val < 10) ? 1 : 2; + } else if (val < 10000) { + len = (val < 1000) ? 3 : 4; + } else if (val < 1000000) { + len = (val < 100000) ? 5 : 6; + } else { + len = 7; + } + + for (i = len - 1; i >= 0; i--) { + out[i] = '0' + val % 10; + val /= 10; + } + + out[len] = ';'; + + return(len + 3); +} + static const char xmlEscapeSafe[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -540,7 +605,7 @@ xmlEscapeText(const xmlChar *text, int flags) { unescaped = cur; while (*cur != '\0') { - char buf[13]; + char buf[12]; const xmlChar *end; const xmlChar *repl; size_t used; @@ -618,7 +683,7 @@ xmlEscapeText(const xmlChar *text, int flags) { val = 0xFFFD; } - replSize = snprintf(buf, sizeof(buf), "&#x%X;", val); + replSize = xmlSerializeHexCharRef(buf, val); repl = BAD_CAST buf; } else if ((flags & XML_ESCAPE_ALLOW_INVALID) || (c >= 0x20) || diff --git a/include/private/entities.h b/include/private/entities.h index d09487ab..d038cf05 100644 --- a/include/private/entities.h +++ b/include/private/entities.h @@ -27,6 +27,11 @@ #define XML_ESCAPE_QUOT (1u << 3) #define XML_ESCAPE_ALLOW_INVALID (1u << 4) +XML_HIDDEN int +xmlSerializeHexCharRef(char *buf, int val); +XML_HIDDEN int +xmlSerializeDecCharRef(char *buf, int val); + XML_HIDDEN xmlChar * xmlEscapeText(const xmlChar *text, int flags); diff --git a/xmlsave.c b/xmlsave.c index ccee19a1..d3cde5b9 100644 --- a/xmlsave.c +++ b/xmlsave.c @@ -125,51 +125,10 @@ xmlSaveErr(xmlOutputBufferPtr out, int code, xmlNodePtr node, * Special escaping routines * * * ************************************************************************/ -static char * -xmlSerializeHexCharRef(char *out, int val) { - char *ptr; - - *out++ = '&'; - *out++ = '#'; - *out++ = 'x'; - if (val < 0x10) ptr = out; - else if (val < 0x100) ptr = out + 1; - else if (val < 0x1000) ptr = out + 2; - else if (val < 0x10000) ptr = out + 3; - else if (val < 0x100000) ptr = out + 4; - else ptr = out + 5; - out = ptr + 1; - while (val > 0) { - switch (val & 0xF) { - case 0: *ptr-- = '0'; break; - case 1: *ptr-- = '1'; break; - case 2: *ptr-- = '2'; break; - case 3: *ptr-- = '3'; break; - case 4: *ptr-- = '4'; break; - case 5: *ptr-- = '5'; break; - case 6: *ptr-- = '6'; break; - case 7: *ptr-- = '7'; break; - case 8: *ptr-- = '8'; break; - case 9: *ptr-- = '9'; break; - case 0xA: *ptr-- = 'A'; break; - case 0xB: *ptr-- = 'B'; break; - case 0xC: *ptr-- = 'C'; break; - case 0xD: *ptr-- = 'D'; break; - case 0xE: *ptr-- = 'E'; break; - case 0xF: *ptr-- = 'F'; break; - default: *ptr-- = '0'; break; - } - val >>= 4; - } - *out++ = ';'; - *out = 0; - return(out); -} static void xmlSerializeText(xmlOutputBufferPtr buf, const xmlChar *string, unsigned flags) { - char tmp[12]; const char *base, *cur; if (string == NULL) @@ -178,33 +137,12 @@ xmlSerializeText(xmlOutputBufferPtr buf, const xmlChar *string, base = cur = (const char *) string; while (*cur != 0) { + char tempBuf[12]; const char *repl = NULL; int replSize = 0; + int chunkSize = 1; int c = (unsigned char) *cur; - if ((c >= 0x80) && (flags & XML_ESCAPE_NON_ASCII)) { - int val = 0, l = 4; - - if (base != cur) - xmlOutputBufferWrite(buf, cur - base, base); - - val = xmlGetUTF8Char((const xmlChar *) cur, &l); - if (val < 0) { - val = 0xFFFD; - cur++; - } else { - if (!IS_CHAR(val)) - val = 0xFFFD; - cur += l; - } - - xmlSerializeHexCharRef(tmp, val); - xmlOutputBufferWriteString(buf, tmp); - base = cur; - - continue; - } - switch (c) { case '\t': if (flags & XML_ESCAPE_ATTR) { @@ -255,6 +193,20 @@ xmlSerializeText(xmlOutputBufferPtr buf, const xmlChar *string, if (c < 0x20) { repl = "�"; replSize = 8; + } else if ((c >= 0x80) && (flags & XML_ESCAPE_NON_ASCII)) { + int val = 0, l = 4; + + val = xmlGetUTF8Char((const xmlChar *) cur, &l); + if (val < 0) { + val = 0xFFFD; + } else { + if (!IS_CHAR(val)) + val = 0xFFFD; + chunkSize = l; + } + + replSize = xmlSerializeHexCharRef(tempBuf, val); + repl = tempBuf; } break; } @@ -265,7 +217,7 @@ xmlSerializeText(xmlOutputBufferPtr buf, const xmlChar *string, if (base != cur) xmlOutputBufferWrite(buf, cur - base, base); xmlOutputBufferWrite(buf, replSize, repl); - cur++; + cur += chunkSize; base = cur; } }