diff options
Diffstat (limited to 'ACEXML/common/HttpCharStream.cpp')
-rw-r--r-- | ACEXML/common/HttpCharStream.cpp | 119 |
1 files changed, 108 insertions, 11 deletions
diff --git a/ACEXML/common/HttpCharStream.cpp b/ACEXML/common/HttpCharStream.cpp index 83426595168..2f1a9d4d754 100644 --- a/ACEXML/common/HttpCharStream.cpp +++ b/ACEXML/common/HttpCharStream.cpp @@ -4,6 +4,7 @@ #include "ace/ace_wchar.h" #include "ace/Auto_Ptr.h" #include "ACEXML/common/HttpCharStream.h" +#include "ACEXML/common/Encoding.h" ACE_RCSID (common, HttpCharStream, "$Id$") @@ -72,7 +73,7 @@ ACEXML_HttpCharStream::open (const ACEXML_Char *url) this->close(); ACE_ERROR_RETURN ((LM_ERROR, "Server returned status %d : %s\n", result, - "Refer HTTP/1.1 for details"), -1); + "Refer HTTP/1.0 for details"), -1); } this->size_ = len; @@ -236,6 +237,9 @@ ACEXML_HttpCharStream::get_url (size_t& len) if (this->stream_->seek (data_offset, SEEK_SET) == -1) ACE_ERROR_RETURN ((LM_ERROR, "%s: %m", "Error in seeking to beginning of data"), -1); + + if (this->determine_encoding() == -1) + return -1; return status; } @@ -257,7 +261,7 @@ ACEXML_HttpCharStream::send_request (void) // Ensure that the <command> memory is deallocated. ACE_Auto_Basic_Array_Ptr<char> cmd_ptr (command); - int bytes = ACE_OS::sprintf (command, "GET %s HTTP/1.1\r\n", path); + int bytes = ACE_OS::sprintf (command, "GET %s HTTP/1.0\r\n", path); bytes += ACE_OS::sprintf (&command[bytes], "Host: %s\r\n", this->url_addr_->get_host_name ()); bytes += ACE_OS::sprintf (&command[bytes], "\r\n"); @@ -302,16 +306,53 @@ ACEXML_HttpCharStream::close (void) } int -ACEXML_HttpCharStream::get (ACEXML_Char& ch) +ACEXML_HttpCharStream::determine_encoding (void) { - ch = (ACEXML_Char) this->stream_->get_char(); - return (ch == (ACEXML_Char)EOF ? -1 :0); + char input[4] = {0, 0, 0, 0}; + int i = 0; + for (; i < 4 && input[i] != -1; ++i) + input[i] = this->stream_->peek_char(i); + if (i < 4) + return -1; + const ACEXML_Char* temp = ACEXML_Encoding::get_encoding (input); + if (!temp) + return -1; + else + { + this->encoding_ = ACE::strnew (temp); + ACE_DEBUG ((LM_DEBUG, "URI's encoding is %s\n", this->encoding_)); + } + // Move over the byte-order-mark if present. + for (int j = 0; j < 3; ++j) + { + if (input[i] == '\xFF' || input[i] == '\xFE' || input[i] == '\xEF' || + input[i] == '\xBB' || input[i] == '\xBF') + { + this->stream_->get_char(); + continue; + } + break; + } + return 0; +} + +void +ACEXML_HttpCharStream::rewind (void) +{ + this->stream_->rewind(); +} + +const ACEXML_Char* +ACEXML_HttpCharStream::getEncoding (void) +{ + return this->encoding_; } int ACEXML_HttpCharStream::read (ACEXML_Char *str, size_t len) { + len = len * sizeof (ACEXML_Char); char* temp = ACE_const_cast (char*, this->stream_->recv (len)); str = ACE_TEXT_CHAR_TO_TCHAR (temp); if (str == 0) @@ -319,20 +360,76 @@ ACEXML_HttpCharStream::read (ACEXML_Char *str, return len; } + +int +ACEXML_HttpCharStream::get (ACEXML_Char& ch) +{ +#if defined (ACE_USES_WCHAR) + return this->get_i (ch); +#else + ch = (ACEXML_Char) this->stream_->get_char(); + return (ch == (ACEXML_Char)EOF ? -1 :0); +#endif /* ACE_USES_WCHAR */ +} + int ACEXML_HttpCharStream::peek (void) { +#if defined (ACE_USES_WCHAR) + return this->peek_i(); +#else return this->stream_->peek_char (0); +#endif /* ACE_USES_WCHAR */ } -void -ACEXML_HttpCharStream::rewind (void) + +#if defined (ACE_USES_WCHAR) +int +ACEXML_HttpCharStream::get_i (ACEXML_Char& ch) { - this->stream_->rewind(); + if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0) + { + ch = (ACEXML_Char) this->stream_->getchar(); + return (ch == (ACEXML_Char)EOF ? -1 : 0); + } + int BE = (ACE_OS::strcmp (this->encoding_, + ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0; + ACEXML_Char input[2] = {0}; + int i = 0; + for (; i < 2 && input[i] != EOF; ++i) + { + input[i] = this->stream_->get_char(); + } + if (i < 2) + { + ch = 0; + return input[i]; + } + ch = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0]; + return 0; } -const ACEXML_Char* -ACEXML_HttpCharStream::getEncoding (void) +int +ACEXML_HttpCharStream::peek_i (void) { - return this->encoding_; + // If we are reading a UTF-8 encoded file, just use the plain unget. + if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0) + { + ACEXML_Char ch = (ACEXML_Char) this->stream_->peek_char (0); + return ch; + } + + int BE = (ACE_OS::strcmp (this->encoding_, + ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0; + // Peek into the stream. + ACEXML_Char input[2]; + int i = 0; + for (; i < 2 && input[i] != EOF; ++i) + { + input[i] = this->peek_char (i); + } + if (i < 2) + return -1; + return (BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0]); } +#endif /* ACE_USES_WCHAR */ |