summaryrefslogtreecommitdiff
path: root/ACEXML/common/HttpCharStream.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'ACEXML/common/HttpCharStream.cpp')
-rw-r--r--ACEXML/common/HttpCharStream.cpp119
1 files changed, 108 insertions, 11 deletions
diff --git a/ACEXML/common/HttpCharStream.cpp b/ACEXML/common/HttpCharStream.cpp
index 83426595168..2f1a9d4d754 100644
--- a/ACEXML/common/HttpCharStream.cpp
+++ b/ACEXML/common/HttpCharStream.cpp
@@ -4,6 +4,7 @@
#include "ace/ace_wchar.h"
#include "ace/Auto_Ptr.h"
#include "ACEXML/common/HttpCharStream.h"
+#include "ACEXML/common/Encoding.h"
ACE_RCSID (common, HttpCharStream, "$Id$")
@@ -72,7 +73,7 @@ ACEXML_HttpCharStream::open (const ACEXML_Char *url)
this->close();
ACE_ERROR_RETURN ((LM_ERROR, "Server returned status %d : %s\n",
result,
- "Refer HTTP/1.1 for details"), -1);
+ "Refer HTTP/1.0 for details"), -1);
}
this->size_ = len;
@@ -236,6 +237,9 @@ ACEXML_HttpCharStream::get_url (size_t& len)
if (this->stream_->seek (data_offset, SEEK_SET) == -1)
ACE_ERROR_RETURN ((LM_ERROR, "%s: %m",
"Error in seeking to beginning of data"), -1);
+
+ if (this->determine_encoding() == -1)
+ return -1;
return status;
}
@@ -257,7 +261,7 @@ ACEXML_HttpCharStream::send_request (void)
// Ensure that the <command> memory is deallocated.
ACE_Auto_Basic_Array_Ptr<char> cmd_ptr (command);
- int bytes = ACE_OS::sprintf (command, "GET %s HTTP/1.1\r\n", path);
+ int bytes = ACE_OS::sprintf (command, "GET %s HTTP/1.0\r\n", path);
bytes += ACE_OS::sprintf (&command[bytes], "Host: %s\r\n",
this->url_addr_->get_host_name ());
bytes += ACE_OS::sprintf (&command[bytes], "\r\n");
@@ -302,16 +306,53 @@ ACEXML_HttpCharStream::close (void)
}
int
-ACEXML_HttpCharStream::get (ACEXML_Char& ch)
+ACEXML_HttpCharStream::determine_encoding (void)
{
- ch = (ACEXML_Char) this->stream_->get_char();
- return (ch == (ACEXML_Char)EOF ? -1 :0);
+ char input[4] = {0, 0, 0, 0};
+ int i = 0;
+ for (; i < 4 && input[i] != -1; ++i)
+ input[i] = this->stream_->peek_char(i);
+ if (i < 4)
+ return -1;
+ const ACEXML_Char* temp = ACEXML_Encoding::get_encoding (input);
+ if (!temp)
+ return -1;
+ else
+ {
+ this->encoding_ = ACE::strnew (temp);
+ ACE_DEBUG ((LM_DEBUG, "URI's encoding is %s\n", this->encoding_));
+ }
+ // Move over the byte-order-mark if present.
+ for (int j = 0; j < 3; ++j)
+ {
+ if (input[i] == '\xFF' || input[i] == '\xFE' || input[i] == '\xEF' ||
+ input[i] == '\xBB' || input[i] == '\xBF')
+ {
+ this->stream_->get_char();
+ continue;
+ }
+ break;
+ }
+ return 0;
+}
+
+void
+ACEXML_HttpCharStream::rewind (void)
+{
+ this->stream_->rewind();
+}
+
+const ACEXML_Char*
+ACEXML_HttpCharStream::getEncoding (void)
+{
+ return this->encoding_;
}
int
ACEXML_HttpCharStream::read (ACEXML_Char *str,
size_t len)
{
+ len = len * sizeof (ACEXML_Char);
char* temp = ACE_const_cast (char*, this->stream_->recv (len));
str = ACE_TEXT_CHAR_TO_TCHAR (temp);
if (str == 0)
@@ -319,20 +360,76 @@ ACEXML_HttpCharStream::read (ACEXML_Char *str,
return len;
}
+
+int
+ACEXML_HttpCharStream::get (ACEXML_Char& ch)
+{
+#if defined (ACE_USES_WCHAR)
+ return this->get_i (ch);
+#else
+ ch = (ACEXML_Char) this->stream_->get_char();
+ return (ch == (ACEXML_Char)EOF ? -1 :0);
+#endif /* ACE_USES_WCHAR */
+}
+
int
ACEXML_HttpCharStream::peek (void)
{
+#if defined (ACE_USES_WCHAR)
+ return this->peek_i();
+#else
return this->stream_->peek_char (0);
+#endif /* ACE_USES_WCHAR */
}
-void
-ACEXML_HttpCharStream::rewind (void)
+
+#if defined (ACE_USES_WCHAR)
+int
+ACEXML_HttpCharStream::get_i (ACEXML_Char& ch)
{
- this->stream_->rewind();
+ if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
+ {
+ ch = (ACEXML_Char) this->stream_->getchar();
+ return (ch == (ACEXML_Char)EOF ? -1 : 0);
+ }
+ int BE = (ACE_OS::strcmp (this->encoding_,
+ ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
+ ACEXML_Char input[2] = {0};
+ int i = 0;
+ for (; i < 2 && input[i] != EOF; ++i)
+ {
+ input[i] = this->stream_->get_char();
+ }
+ if (i < 2)
+ {
+ ch = 0;
+ return input[i];
+ }
+ ch = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0];
+ return 0;
}
-const ACEXML_Char*
-ACEXML_HttpCharStream::getEncoding (void)
+int
+ACEXML_HttpCharStream::peek_i (void)
{
- return this->encoding_;
+ // If we are reading a UTF-8 encoded file, just use the plain unget.
+ if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
+ {
+ ACEXML_Char ch = (ACEXML_Char) this->stream_->peek_char (0);
+ return ch;
+ }
+
+ int BE = (ACE_OS::strcmp (this->encoding_,
+ ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
+ // Peek into the stream.
+ ACEXML_Char input[2];
+ int i = 0;
+ for (; i < 2 && input[i] != EOF; ++i)
+ {
+ input[i] = this->peek_char (i);
+ }
+ if (i < 2)
+ return -1;
+ return (BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0]);
}
+#endif /* ACE_USES_WCHAR */