summaryrefslogtreecommitdiff
path: root/ACEXML/common/Encoding.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'ACEXML/common/Encoding.cpp')
-rw-r--r--ACEXML/common/Encoding.cpp17
1 files changed, 11 insertions, 6 deletions
diff --git a/ACEXML/common/Encoding.cpp b/ACEXML/common/Encoding.cpp
index c6e53ed8c47..9ca972816b8 100644
--- a/ACEXML/common/Encoding.cpp
+++ b/ACEXML/common/Encoding.cpp
@@ -18,8 +18,8 @@ const ACEXML_UTF8 ACEXML_Encoding::byte_order_mark_[][4] = {
{ '\xFF', '\xFE', '\x00', '\x00' }, // UCS-4, little-endian (4321 order)
{ '\x00', '\x00', '\xFF', '\xFE' }, // UCS-4, unusual octet order (2143)
{ '\xFE', '\xFF', '\x00', '\x00' }, // UCS-4, unusual octet order (3412)
- { '\xFE', '\xFF', '\xFF', '\xFF' }, // UTF-16, big-endian (3 & 4 ignored)
- { '\xFF', '\xFE', '\xFF', '\xFF' }, // UTF-16, little-endian ( 3 & 4 ignored)
+ { '\xFE', '\xFF', '\xFF', '\xFF' }, // UTF-16, big-endian (3 & 4 != 0)
+ { '\xFF', '\xFE', '\xFF', '\xFF' }, // UTF-16, little-endian ( 3 & 4 != 0)
{ '\xEF', '\xBB', '\xBF', '\xFF' } // UTF-8
};
@@ -36,11 +36,13 @@ const ACEXML_UTF8 ACEXML_Encoding::magic_values_[][4] = {
const ACEXML_Char*
ACEXML_Encoding::get_encoding (const char* input)
{
- if (ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF16BE][0], input, 2) == 0)
+ if ((ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF16BE][0], input, 2) == 0)
+ && (input[2] != 0 || input[3] != 0)) // 3 & 4 should not be both zero
return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16BE];
- else if (ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF16LE][0], input, 2) == 0)
+ else if ((ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF16LE][0], input, 2) == 0)
+ && (input[2] != 0 && input[3] != 0)) // 3 & 4 should not be both zero
return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16LE];
- else if (ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF8][0], input, 4) == 0)
+ else if (ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF8][0], input, 3) == 0)
return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF8];
else if (ACE_OS::memcmp (&ACEXML_Encoding::magic_values_[ACEXML_Encoding::UTF16BE][0], input, 4) == 0)
return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16BE];
@@ -49,5 +51,8 @@ ACEXML_Encoding::get_encoding (const char* input)
else if (ACE_OS::memcmp (&ACEXML_Encoding::magic_values_[ACEXML_Encoding::UTF8][0], input, 4) == 0)
return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF8];
else
- return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::OTHER];
+ {
+ ACE_ERROR ((LM_ERROR, "Unknown encoding. Assuming UTF-8\n"));
+ return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF8];
+ }
}