diff options
Diffstat (limited to 'ACEXML/common/Encoding.cpp')
-rw-r--r-- | ACEXML/common/Encoding.cpp | 17 |
1 files changed, 11 insertions, 6 deletions
diff --git a/ACEXML/common/Encoding.cpp b/ACEXML/common/Encoding.cpp index c6e53ed8c47..9ca972816b8 100644 --- a/ACEXML/common/Encoding.cpp +++ b/ACEXML/common/Encoding.cpp @@ -18,8 +18,8 @@ const ACEXML_UTF8 ACEXML_Encoding::byte_order_mark_[][4] = { { '\xFF', '\xFE', '\x00', '\x00' }, // UCS-4, little-endian (4321 order) { '\x00', '\x00', '\xFF', '\xFE' }, // UCS-4, unusual octet order (2143) { '\xFE', '\xFF', '\x00', '\x00' }, // UCS-4, unusual octet order (3412) - { '\xFE', '\xFF', '\xFF', '\xFF' }, // UTF-16, big-endian (3 & 4 ignored) - { '\xFF', '\xFE', '\xFF', '\xFF' }, // UTF-16, little-endian ( 3 & 4 ignored) + { '\xFE', '\xFF', '\xFF', '\xFF' }, // UTF-16, big-endian (3 & 4 != 0) + { '\xFF', '\xFE', '\xFF', '\xFF' }, // UTF-16, little-endian ( 3 & 4 != 0) { '\xEF', '\xBB', '\xBF', '\xFF' } // UTF-8 }; @@ -36,11 +36,13 @@ const ACEXML_UTF8 ACEXML_Encoding::magic_values_[][4] = { const ACEXML_Char* ACEXML_Encoding::get_encoding (const char* input) { - if (ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF16BE][0], input, 2) == 0) + if ((ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF16BE][0], input, 2) == 0) + && (input[2] != 0 || input[3] != 0)) // 3 & 4 should not be both zero return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16BE]; - else if (ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF16LE][0], input, 2) == 0) + else if ((ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF16LE][0], input, 2) == 0) + && (input[2] != 0 && input[3] != 0)) // 3 & 4 should not be both zero return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16LE]; - else if (ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF8][0], input, 4) == 0) + else if (ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF8][0], input, 3) == 0) return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF8]; else if (ACE_OS::memcmp (&ACEXML_Encoding::magic_values_[ACEXML_Encoding::UTF16BE][0], input, 4) == 0) return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16BE]; @@ -49,5 +51,8 @@ ACEXML_Encoding::get_encoding (const char* input) else if (ACE_OS::memcmp (&ACEXML_Encoding::magic_values_[ACEXML_Encoding::UTF8][0], input, 4) == 0) return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF8]; else - return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::OTHER]; + { + ACE_ERROR ((LM_ERROR, "Unknown encoding. Assuming UTF-8\n")); + return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF8]; + } } |