summaryrefslogtreecommitdiff
path: root/ACEXML
diff options
context:
space:
mode:
authorkitty <kitty@ae88bc3d-4319-0410-8dbf-d08b4c9d3795>2002-10-15 22:21:36 +0000
committerkitty <kitty@ae88bc3d-4319-0410-8dbf-d08b4c9d3795>2002-10-15 22:21:36 +0000
commita4636be6a25c38dbd6e28519383324301e6c7175 (patch)
tree2b6857e552e9ee9ac02b074a28baa99b0ebc3d8e /ACEXML
parent56194729036ba549a82869904ffcdf96d1caf43d (diff)
downloadATCD-a4636be6a25c38dbd6e28519383324301e6c7175.tar.gz
ChangeLogTag: Tue Oct 15 17:17:44 2002 Krishnakumar B <kitty@cse.wustl.edu>
Diffstat (limited to 'ACEXML')
-rw-r--r--ACEXML/apps/svcconf/Makefile6
-rw-r--r--ACEXML/common/CharStream.h5
-rw-r--r--ACEXML/common/Encoding.cpp53
-rw-r--r--ACEXML/common/Encoding.h61
-rw-r--r--ACEXML/common/Exception.cpp8
-rw-r--r--ACEXML/common/FileCharStream.cpp160
-rw-r--r--ACEXML/common/FileCharStream.h53
-rw-r--r--ACEXML/common/HttpCharStream.cpp19
-rw-r--r--ACEXML/common/HttpCharStream.h11
-rw-r--r--ACEXML/common/InputSource.cpp4
-rw-r--r--ACEXML/common/Makefile115
-rw-r--r--ACEXML/common/Mem_Map_Stream.cpp9
-rw-r--r--ACEXML/common/Mem_Map_Stream.h2
-rw-r--r--ACEXML/common/StrCharStream.cpp64
-rw-r--r--ACEXML/common/StrCharStream.h20
-rw-r--r--ACEXML/common/XML_Common.dsp8
-rw-r--r--ACEXML/examples/SAXPrint/Makefile2
-rw-r--r--ACEXML/parser/parser/Makefile1
-rw-r--r--ACEXML/parser/parser/Parser.cpp24
19 files changed, 568 insertions, 57 deletions
diff --git a/ACEXML/apps/svcconf/Makefile b/ACEXML/apps/svcconf/Makefile
index 41395961076..9adc75b4372 100644
--- a/ACEXML/apps/svcconf/Makefile
+++ b/ACEXML/apps/svcconf/Makefile
@@ -49,6 +49,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ACEXML/common/ContentHandler.h \
$(ACE_ROOT)/ACEXML/common/Env.h \
$(ACE_ROOT)/ACEXML/common/XML_Macros.h \
+ $(ACE_ROOT)/ace/Exception_Macros.h \
$(ACE_ROOT)/ACEXML/common/Exception.h \
$(ACE_ROOT)/ACEXML/common/XML_Types.h \
$(ACE_ROOT)/ace/OS.h \
@@ -157,7 +158,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ace/Svc_Conf_Tokens.h \
$(ACE_ROOT)/ace/DLL.h \
$(ACE_ROOT)/ace/Service_Object.i \
- $(ACE_ROOT)/ace/Service_Types.i \
+ $(ACE_ROOT)/ace/Service_Types.i Svcconf_Handler.i \
$(ACE_ROOT)/ace/Service_Config.h \
$(ACE_ROOT)/ace/Unbounded_Queue.h \
$(ACE_ROOT)/ace/Unbounded_Queue.inl \
@@ -240,6 +241,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ACEXML/common/ContentHandler.h \
$(ACE_ROOT)/ACEXML/common/Env.h \
$(ACE_ROOT)/ACEXML/common/XML_Macros.h \
+ $(ACE_ROOT)/ace/Exception_Macros.h \
$(ACE_ROOT)/ACEXML/common/Exception.h \
$(ACE_ROOT)/ACEXML/common/XML_Types.h \
$(ACE_ROOT)/ace/OS.h \
@@ -397,7 +399,9 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
Svcconf_Handler.h \
$(ACE_ROOT)/ACEXML/common/DefaultHandler.h \
$(ACE_ROOT)/ACEXML/common/DefaultHandler.i \
+ Svcconf_Handler.i \
$(ACE_ROOT)/ACEXML/common/FileCharStream.h \
+ $(ACE_ROOT)/ACEXML/common/Encoding.h \
$(ACE_ROOT)/ACEXML/common/StrCharStream.h
# IF YOU PUT ANYTHING HERE IT WILL GO AWAY
diff --git a/ACEXML/common/CharStream.h b/ACEXML/common/CharStream.h
index 719e4edb3e9..1b5c199f527 100644
--- a/ACEXML/common/CharStream.h
+++ b/ACEXML/common/CharStream.h
@@ -66,6 +66,11 @@ public:
*/
virtual int peek (void) = 0;
+ /*
+ * Get the character encoding for a byte stream or URI.
+ */
+ virtual const ACEXML_Char *getEncoding (void) = 0;
+
};
#include "ace/post.h"
diff --git a/ACEXML/common/Encoding.cpp b/ACEXML/common/Encoding.cpp
new file mode 100644
index 00000000000..7fe8811cdbd
--- /dev/null
+++ b/ACEXML/common/Encoding.cpp
@@ -0,0 +1,53 @@
+// -*- C++ -*- $Id$
+
+#include "ACEXML/common/Encoding.h"
+
+const ACEXML_Char* ACEXML_Encoding::encoding_names_[8] = {
+ ACE_TEXT ("UCS-4BE"),
+ ACE_TEXT ("UCS-4LE"),
+ ACE_TEXT ("UCS-4_2143"),
+ ACE_TEXT ("UCS-4_3412"),
+ ACE_TEXT ("UTF-16BE"),
+ ACE_TEXT ("UTF-16LE"),
+ ACE_TEXT ("UTF-8"),
+ ACE_TEXT ("Unsupported Encoding")
+};
+
+const ACEXML_UTF8 ACEXML_Encoding::byte_order_mark_[][4] = {
+ { '\x00', '\x00', '\xFE', '\xFF' }, // UCS-4, big-endian (1234 order)
+ { '\xFF', '\xFE', '\x00', '\x00' }, // UCS-4, little-endian (4321 order)
+ { '\x00', '\x00', '\xFF', '\xFE' }, // UCS-4, unusual octet order (2143)
+ { '\xFE', '\xFF', '\x00', '\x00' }, // UCS-4, unusual octet order (3412)
+ { '\xFE', '\xFF', '\xFF', '\xFF' }, // UTF-16, big-endian (3 & 4 ignored)
+ { '\xFF', '\xFE', '\xFF', '\xFF' }, // UTF-16, little-endian ( 3 & 4 ignored)
+ { '\xEF', '\xBB', '\xBF', '\xFF' } // UTF-8
+};
+
+const ACEXML_UTF8 ACEXML_Encoding::magic_values_[][4] = {
+ { '\x00', '\x00', '\x00', '\x3c' }, //
+ { '\x3c', '\x00', '\x00', '\x00' }, // UCS-4 and variants
+ { '\x00', '\x00', '\x3c', '\x00' }, //
+ { '\x00', '\x3c', '\x00', '\x00' }, //
+ { '\x00', '\x3c', '\x00', '\x3f' }, // UTF-16BE
+ { '\x3c', '\x00', '\x3f', '\x00' }, // UTF-16LE
+ { '\x3c', '\x3f', '\x78', '\x6d' }, // UTF-8
+};
+
+const ACEXML_Char*
+ACEXML_Encoding::get_encoding (const char* input)
+{
+ if (ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF16BE][0], input, 2) == 0)
+ return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16BE];
+ else if (ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF16LE][0], input, 2) == 0)
+ return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16LE];
+ else if (ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF8][0], input, 4) == 0)
+ return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF8];
+ else if (ACE_OS::memcmp (&ACEXML_Encoding::magic_values_[ACEXML_Encoding::UTF16BE][0], input, 4) == 0)
+ return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16BE];
+ else if (ACE_OS::memcmp (&ACEXML_Encoding::magic_values_[ACEXML_Encoding::UTF16LE][0], input, 4) == 0)
+ return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16LE];
+ else if (ACE_OS::memcmp (&ACEXML_Encoding::magic_values_[ACEXML_Encoding::UTF8][0], input, 4) == 0)
+ return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF8];
+ else
+ return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::OTHER];
+}
diff --git a/ACEXML/common/Encoding.h b/ACEXML/common/Encoding.h
new file mode 100644
index 00000000000..fc0917c0b91
--- /dev/null
+++ b/ACEXML/common/Encoding.h
@@ -0,0 +1,61 @@
+// -*- C++ -*-
+
+//=============================================================================
+/**
+ * @file Encoding.h
+ *
+ * This file provides utility functions to determine the encoding of a file
+ * or a byte stream automatically.
+ *
+ * $Id$
+ *
+ * @author Krishnakumar B <kitty@cs.wustl.edu>
+ */
+//=============================================================================
+
+#ifndef _ACEXML_ENCODING_H
+#define _ACEXML_ENCODING_H
+
+#include "ace/pre.h"
+#include "ACEXML/common/ACEXML_Export.h"
+
+#if !defined (ACE_LACKS_PRAGMA_ONCE)
+#pragma once
+#endif /* ACE_LACKS_PRAGMA_ONCE */
+
+#include "ACEXML/common/XML_Types.h"
+
+/**
+ * @class ACEXML_Encoding Encoding.h "ACEXML/common/Encoding.h"
+ *
+ * @brief ACEXML_Encoding
+ *
+ * Wrapper class for determining the encoding of a file or a byte stream.
+ */
+class ACEXML_Export ACEXML_Encoding
+{
+public:
+ enum {
+ UCS4BE = 0,
+ UCS4LE = 1,
+ UCS4_2143 = 2,
+ UCS4_3412 = 3,
+ UTF16BE = 4,
+ UTF16LE = 5,
+ UTF8 = 6,
+ OTHER = 7
+ } ENCODING;
+
+ static const ACEXML_Char* encoding_names_[8];
+
+ static const ACEXML_UTF8 byte_order_mark_[][4];
+
+ static const ACEXML_UTF8 magic_values_[][4];
+
+ static const ACEXML_Char* get_encoding (const char* input);
+
+};
+
+#include "ace/post.h"
+
+#endif /* _ACEXML_ENCODING_H */
diff --git a/ACEXML/common/Exception.cpp b/ACEXML/common/Exception.cpp
index 9bdace863e2..3086a8a7bfb 100644
--- a/ACEXML/common/Exception.cpp
+++ b/ACEXML/common/Exception.cpp
@@ -31,14 +31,6 @@ ACEXML_Exception::~ACEXML_Exception (void)
}
-ACEXML_Exception&
-ACEXML_Exception::operator= (const ACEXML_Exception& src)
-{
- this->exception_name_ = src.exception_name_;
- ACE_ASSERT (this->exception_name_ != 0);
- return *this;
-}
-
int
ACEXML_Exception::is_a (const ACEXML_Char *name)
{
diff --git a/ACEXML/common/FileCharStream.cpp b/ACEXML/common/FileCharStream.cpp
index 943b8cddd1e..d319ba90e03 100644
--- a/ACEXML/common/FileCharStream.cpp
+++ b/ACEXML/common/FileCharStream.cpp
@@ -4,16 +4,13 @@
#include "ace/ACE.h"
ACEXML_FileCharStream::ACEXML_FileCharStream (void)
- : filename_ (0),
- infile_ (NULL)
+ : filename_ (0), encoding_ (0), size_ (0), infile_ (NULL), peek_ (0)
{
}
ACEXML_FileCharStream::~ACEXML_FileCharStream (void)
{
- if (this->infile_ != NULL)
- ACE_OS::fclose (this->infile_);
- delete this->filename_;
+ this->close();
}
int
@@ -22,6 +19,9 @@ ACEXML_FileCharStream::open (const ACEXML_Char *name)
delete[] this->filename_;
this->filename_ = 0;
+ delete[] this->encoding_;
+ this->encoding_ = 0;
+
this->infile_ = ACE_OS::fopen (name, ACE_TEXT ("r"));
if (this->infile_ == NULL)
return -1;
@@ -32,10 +32,57 @@ ACEXML_FileCharStream::open (const ACEXML_Char *name)
this->size_ = statbuf.st_size;
this->filename_ = ACE::strnew (name);
+ if (this->determine_encoding() == -1)
+ return -1;
return 0;
}
int
+ACEXML_FileCharStream::determine_encoding (void)
+{
+ char input[4];
+ int retval = 0;
+ int i = 0;
+ for (; i < 4 && retval != -1; ++i)
+ retval = this->getchar(input[i]);
+ if (i < 4)
+ return -1;
+ const ACEXML_Char* temp = ACEXML_Encoding::get_encoding (input);
+ if (!temp)
+ return -1;
+ if (ACE_OS::strcmp (temp,
+ ACEXML_Encoding::encoding_names_[ACEXML_Encoding::OTHER]) == 0)
+ return -1;
+ else
+ {
+ this->encoding_ = ACE::strnew (temp);
+ ACE_DEBUG ((LM_DEBUG, "File's encoding is %s\n", this->encoding_));
+ }
+ // Rewind the stream
+ this->rewind();
+ // Move over the byte-order-mark if present.
+ char ch;
+ for (int j = 0; j < 2; ++j)
+ {
+ this->getchar (ch);
+ if (ch == '\xFF' || ch == '\xFE' || ch == '\xEF')
+ continue;
+ else
+ {
+ ungetc (ch, this->infile_);
+ break;
+ }
+ }
+ return 0;
+}
+
+void
+ACEXML_FileCharStream::rewind()
+{
+ ACE_OS::rewind (this->infile_);
+}
+
+int
ACEXML_FileCharStream::available (void)
{
long curr;
@@ -47,18 +94,20 @@ ACEXML_FileCharStream::available (void)
int
ACEXML_FileCharStream::close (void)
{
- delete this->filename_;
- this->filename_ = 0;
+ if (this->infile_ != NULL)
ACE_OS::fclose (this->infile_);
- this->infile_ = NULL;
+ delete[] this->filename_;
+ delete[] this->encoding_;
this->size_ = 0;
+ this->peek_ = 0;
return 0;
}
+
int
-ACEXML_FileCharStream::get (ACEXML_Char& ch)
+ACEXML_FileCharStream::getchar (char& ch)
{
- ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_);
+ ch = ACE_OS::fgetc (this->infile_);
return (feof(this->infile_) ? -1 : 0);
}
@@ -70,9 +119,100 @@ ACEXML_FileCharStream::read (ACEXML_Char *str,
}
int
+ACEXML_FileCharStream::get (ACEXML_Char& ch)
+{
+#if defined (ACE_USES_WCHAR)
+ return this->get_i (ch);
+#else
+ ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_);
+ return (feof(this->infile_) ? -1 : 0);
+#endif /* ACE_USES_WCHAR */
+}
+
+#if defined (ACE_USES_WCHAR)
+int
+ACEXML_FileCharStream::get_i (ACEXML_Char& ch)
+{
+ if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
+ {
+ ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_);
+ return (feof(this->infile_) ? -1 : 0);
+ }
+ // If we have a value in peek_, return it.
+ if (this->peek_ != 0)
+ {
+ ch = this->peek_;
+ this->peek_ = 0;
+ return 0;
+ }
+
+ int BE = (ACE_OS::strcmp (this->encoding_,
+ ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
+ ACEXML_Char input[2];
+ for (int i = 0; i < 2 && !feof (this->infile_); ++i)
+ {
+ input[i] = ACE_OS::fgetwc (this->infile_);
+ }
+ if (i < 2)
+ {
+ ch = 0;
+ return -1;
+ }
+ ch = (BE) ? (input[0] << 8) | input[1] : (input[1] << 8) | input[0];
+ return 0;
+}
+#endif /* ACE_USES_WCHAR */
+
+int
ACEXML_FileCharStream::peek (void)
{
+#if defined (ACE_USES_WCHAR)
+ return this->peek_i();
+#else
ACEXML_Char ch = ACE_OS::fgetc (this->infile_);
::ungetc (ch, this->infile_);
return ch;
+#endif /* ACE_USES_WCHAR */
+}
+
+#if defined (ACE_USES_WCHAR)
+int
+ACEXML_FileCharStream::peek_i (void)
+{
+ // If we are reading a UTF-8 encoded file, just use the plain unget.
+ if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
+ {
+ ACEXML_Char ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_);
+ ::ungetc (ch, this->infile_);
+ return ch;
+ }
+
+ // If somebody had already called peek() and not consumed it, return the
+ // value held in this->peek_.
+ if (this->peek_ != 0)
+ return this->peek_;
+
+ // Peek into the stream. This reads two characters off the stream, keeps
+ // it in peek_.
+ int BE = (ACE_OS::strcmp (this->encoding_,
+ ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
+ ACEXML_Char input[2];
+ for (int i = 0; i < 2 && !feof (this->infile_); ++i)
+ {
+ input[i] = ACE_OS::fgetwc (this->infile_);
+ }
+ if (i < 2)
+ {
+ this->peek_ = 0;
+ return -1;
+ }
+ this->peek_ = (BE) ? (input[0] << 8) | input[1] : (input[1] << 8) | input[0];
+ return this->peek_;
+}
+#endif /* ACE_USES_WCHAR */
+
+const ACEXML_Char*
+ACEXML_FileCharStream::getEncoding (void)
+{
+ return this->encoding_;
}
diff --git a/ACEXML/common/FileCharStream.h b/ACEXML/common/FileCharStream.h
index 46004e4e715..79def212f48 100644
--- a/ACEXML/common/FileCharStream.h
+++ b/ACEXML/common/FileCharStream.h
@@ -21,6 +21,7 @@
#endif /* ACE_LACKS_PRAGMA_ONCE */
#include "ACEXML/common/CharStream.h"
+#include "ACEXML/common/Encoding.h"
#include "ace/streams.h"
/**
@@ -64,19 +65,61 @@ public:
size_t len);
/**
+ * Determine the encoding of the file.
+ */
+ virtual int determine_encoding (void);
+
+
+ /**
* Peek the next ACEXML_Char in the CharStream. Return the
- * character if succeess, -1 if EOS is reached.
+ * character if success, -1 if EOF is reached.
*/
virtual int peek (void);
+ /**
+ * Resets the file pointer to the beginning of the stream.
+ */
+ virtual void rewind (void);
-private:
- ACEXML_Char *filename_;
+ /*
+ * Get the character encoding for a byte stream or URI.
+ */
+ virtual const ACEXML_Char *getEncoding (void);
- off_t size_;
+protected:
- FILE *infile_;
+ /** Read the next character as a normal character. Return -1 if EOF is
+ * reached, else return 0.
+ */
+ virtual int getchar (char& ch);
+
+private:
+
+#if defined (ACE_USES_WCHAR)
+ /**
+ * Read the next character from the stream taking into account the
+ * encoding of the file.
+ */
+ int get_i (ACEXML_Char& ch);
+ /**
+ * Read the next character from the stream taking into account the
+ * encoding of the file. Subsequent call to get() returns this
+ * character.
+ */
+ int peek_i (void);
+
+#endif /* ACE_USES_WCHAR */
+
+ ACEXML_Char* filename_;
+ ACEXML_Char* encoding_;
+ off_t size_;
+ FILE* infile_;
+ // This is needed to ensure that we can implement a peek operation on a
+ // UTF-16 encoded file. It is a bit hackish, but there is no other way of
+ // implementing a peek() as the standard I/O FILE* guarantees only one
+ // pushback.
+ ACEXML_Char peek_;
};
diff --git a/ACEXML/common/HttpCharStream.cpp b/ACEXML/common/HttpCharStream.cpp
index a397a4fc114..29f34023229 100644
--- a/ACEXML/common/HttpCharStream.cpp
+++ b/ACEXML/common/HttpCharStream.cpp
@@ -23,7 +23,8 @@ ACEXML_HttpCharStream::ACEXML_HttpCharStream (void)
url_addr_(0),
stream_(0),
connector_(0),
- size_(0)
+ size_(0),
+ encoding_ (0)
{
}
@@ -286,6 +287,10 @@ ACEXML_HttpCharStream::close (void)
this->connector_ = 0;
this->size_ = 0;
+
+ delete[] this->encoding_;
+ this->encoding_ = 0;
+
return 0;
}
@@ -312,3 +317,15 @@ ACEXML_HttpCharStream::peek (void)
{
return this->stream_->peek_char (0);
}
+
+void
+ACEXML_HttpCharStream::rewind (void)
+{
+ this->stream_->rewind();
+}
+
+const ACEXML_Char*
+ACEXML_HttpCharStream::getEncoding (void)
+{
+ return this->encoding_;
+}
diff --git a/ACEXML/common/HttpCharStream.h b/ACEXML/common/HttpCharStream.h
index 1e8cc66057e..7bce23a224b 100644
--- a/ACEXML/common/HttpCharStream.h
+++ b/ACEXML/common/HttpCharStream.h
@@ -71,6 +71,15 @@ public:
*/
virtual int peek (void);
+ /**
+ * Resets the file pointer to the beginning of the stream.
+ */
+ virtual void rewind (void);
+
+ /**
+ * Get the encoding of the file
+ */
+ virtual const ACEXML_Char* getEncoding (void);
private:
@@ -94,6 +103,8 @@ private:
off_t size_;
+ ACEXML_Char* encoding_;
+
};
diff --git a/ACEXML/common/InputSource.cpp b/ACEXML/common/InputSource.cpp
index 1149643f44a..0dcdeb0f2ba 100644
--- a/ACEXML/common/InputSource.cpp
+++ b/ACEXML/common/InputSource.cpp
@@ -17,6 +17,7 @@ ACEXML_InputSource::ACEXML_InputSource (ACEXML_CharStream *stm)
charStream_ (stm),
encoding_ (0)
{
+ this->setEncoding (stm->getEncoding());
}
/*
@@ -77,8 +78,7 @@ ACEXML_InputSource::setCharStream (ACEXML_CharStream *stm)
/*
* Set the character stream for this input source.
- * /
- virtual void setCharacterStream (Reader *characterStream);
+ *
*/
void
diff --git a/ACEXML/common/Makefile b/ACEXML/common/Makefile
index a041da36fbd..ee20d8016f5 100644
--- a/ACEXML/common/Makefile
+++ b/ACEXML/common/Makefile
@@ -28,7 +28,8 @@ FILES = Attributes_Def_Builder \
Mem_Map_Stream \
URL_Addr \
HttpCharStream \
- StreamFactory
+ StreamFactory \
+ Encoding
DEFS = $(addsuffix .h,$(FILES))
LSRC = $(addsuffix .cpp,$(FILES))
@@ -156,6 +157,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ace/SString.i \
Env.h \
XML_Macros.h \
+ $(ACE_ROOT)/ace/Exception_Macros.h \
Exception.h \
Exception.i \
Env.i \
@@ -377,6 +379,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
ContentHandler.h \
Env.h \
XML_Macros.h \
+ $(ACE_ROOT)/ace/Exception_Macros.h \
Exception.h \
XML_Types.h \
$(ACE_ROOT)/ace/OS.h \
@@ -577,6 +580,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ace/SString.i \
Env.h \
XML_Macros.h \
+ $(ACE_ROOT)/ace/Exception_Macros.h \
Exception.h \
Exception.i \
Env.i \
@@ -683,6 +687,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ace/SString.i \
Env.h \
XML_Macros.h \
+ $(ACE_ROOT)/ace/Exception_Macros.h \
Exception.h \
Exception.i \
Env.i \
@@ -696,6 +701,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ace/ace_wchar.h \
$(ACE_ROOT)/ace/ace_wchar.inl \
XML_Macros.h \
+ $(ACE_ROOT)/ace/Exception_Macros.h \
Exception.h \
XML_Types.h \
$(ACE_ROOT)/ace/OS.h \
@@ -884,6 +890,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ace/Auto_Ptr.cpp \
$(ACE_ROOT)/ace/SString.i \
XML_Macros.h \
+ $(ACE_ROOT)/ace/Exception_Macros.h \
Exception.i
.obj/FileCharStream.o .obj/FileCharStream.so .shobj/FileCharStream.o .shobj/FileCharStream.so: FileCharStream.cpp \
@@ -981,7 +988,8 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ace/Auto_Ptr.h \
$(ACE_ROOT)/ace/Auto_Ptr.i \
$(ACE_ROOT)/ace/Auto_Ptr.cpp \
- $(ACE_ROOT)/ace/SString.i
+ $(ACE_ROOT)/ace/SString.i \
+ Encoding.h
.obj/InputSource.o .obj/InputSource.so .shobj/InputSource.o .shobj/InputSource.so: InputSource.cpp \
InputSource.h \
@@ -1415,6 +1423,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ace/Auto_Ptr.cpp \
$(ACE_ROOT)/ace/SString.i \
XML_Macros.h \
+ $(ACE_ROOT)/ace/Exception_Macros.h \
Exception.i \
SAXExceptions.i
@@ -1513,7 +1522,8 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ace/Auto_Ptr.h \
$(ACE_ROOT)/ace/Auto_Ptr.i \
$(ACE_ROOT)/ace/Auto_Ptr.cpp \
- $(ACE_ROOT)/ace/SString.i
+ $(ACE_ROOT)/ace/SString.i \
+ Encoding.h
.obj/Transcode.o .obj/Transcode.so .shobj/Transcode.o .shobj/Transcode.so: Transcode.cpp \
Transcode.h \
@@ -1710,6 +1720,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ace/SString.i \
Env.h \
XML_Macros.h \
+ $(ACE_ROOT)/ace/Exception_Macros.h \
Exception.h \
Exception.i \
Env.i \
@@ -1728,6 +1739,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
ContentHandler.h \
Env.h \
XML_Macros.h \
+ $(ACE_ROOT)/ace/Exception_Macros.h \
Exception.h \
XML_Types.h \
$(ACE_ROOT)/ace/OS.h \
@@ -2557,6 +2569,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ace/SString.i \
FileCharStream.h \
CharStream.h \
+ Encoding.h \
HttpCharStream.h \
URL_Addr.h \
$(ACE_ROOT)/ace/INET_Addr.h \
@@ -2681,4 +2694,100 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ace/WFMO_Reactor.h \
$(ACE_ROOT)/ace/Connector.cpp
+.obj/Encoding.o .obj/Encoding.so .shobj/Encoding.o .shobj/Encoding.so: Encoding.cpp \
+ Encoding.h \
+ $(ACE_ROOT)/ace/pre.h \
+ ACEXML_Export.h \
+ $(ACE_ROOT)/ace/post.h \
+ $(ACE_ROOT)/ace/ace_wchar.h \
+ $(ACE_ROOT)/ace/ace_wchar.inl \
+ XML_Types.h \
+ $(ACE_ROOT)/ace/OS.h \
+ $(ACE_ROOT)/ace/OS_Dirent.h \
+ $(ACE_ROOT)/ace/OS_Export.h \
+ $(ACE_ROOT)/ace/OS_Errno.h \
+ $(ACE_ROOT)/ace/OS_Errno.inl \
+ $(ACE_ROOT)/ace/OS_Dirent.inl \
+ $(ACE_ROOT)/ace/OS_String.h \
+ $(ACE_ROOT)/ace/Basic_Types.h \
+ $(ACE_ROOT)/ace/ACE_export.h \
+ $(ACE_ROOT)/ace/Basic_Types.i \
+ $(ACE_ROOT)/ace/OS_String.inl \
+ $(ACE_ROOT)/ace/OS_Memory.h \
+ $(ACE_ROOT)/ace/OS_Memory.inl \
+ $(ACE_ROOT)/ace/OS_TLI.h \
+ $(ACE_ROOT)/ace/OS_TLI.inl \
+ $(ACE_ROOT)/ace/Time_Value.h \
+ $(ACE_ROOT)/ace/Time_Value.inl \
+ $(ACE_ROOT)/ace/Default_Constants.h \
+ $(ACE_ROOT)/ace/Global_Macros.h \
+ $(ACE_ROOT)/ace/Min_Max.h \
+ $(ACE_ROOT)/ace/streams.h \
+ $(ACE_ROOT)/ace/Trace.h \
+ $(ACE_ROOT)/ace/OS.i \
+ $(ACE_ROOT)/ace/SString.h \
+ $(ACE_ROOT)/ace/String_Base.h \
+ $(ACE_ROOT)/ace/ACE.h \
+ $(ACE_ROOT)/ace/Flag_Manip.h \
+ $(ACE_ROOT)/ace/Flag_Manip.i \
+ $(ACE_ROOT)/ace/Handle_Ops.h \
+ $(ACE_ROOT)/ace/Handle_Ops.i \
+ $(ACE_ROOT)/ace/Lib_Find.h \
+ $(ACE_ROOT)/ace/Lib_Find.i \
+ $(ACE_ROOT)/ace/Init_ACE.h \
+ $(ACE_ROOT)/ace/Init_ACE.i \
+ $(ACE_ROOT)/ace/Sock_Connect.h \
+ $(ACE_ROOT)/ace/Sock_Connect.i \
+ $(ACE_ROOT)/ace/ACE.i \
+ $(ACE_ROOT)/ace/String_Base_Const.h \
+ $(ACE_ROOT)/ace/String_Base.i \
+ $(ACE_ROOT)/ace/Malloc_Base.h \
+ $(ACE_ROOT)/ace/String_Base.cpp \
+ $(ACE_ROOT)/ace/Malloc.h \
+ $(ACE_ROOT)/ace/Log_Msg.h \
+ $(ACE_ROOT)/ace/Log_Priority.h \
+ $(ACE_ROOT)/ace/OS_Log_Msg_Attributes.h \
+ $(ACE_ROOT)/ace/OS_Log_Msg_Attributes.inl \
+ $(ACE_ROOT)/ace/Malloc.i \
+ $(ACE_ROOT)/ace/Malloc_T.h \
+ $(ACE_ROOT)/ace/Synch.h \
+ $(ACE_ROOT)/ace/Synch.i \
+ $(ACE_ROOT)/ace/Synch_T.h \
+ $(ACE_ROOT)/ace/Synch_T.i \
+ $(ACE_ROOT)/ace/Thread.h \
+ $(ACE_ROOT)/ace/Thread_Adapter.h \
+ $(ACE_ROOT)/ace/Base_Thread_Adapter.h \
+ $(ACE_ROOT)/ace/Base_Thread_Adapter.inl \
+ $(ACE_ROOT)/ace/Thread_Adapter.inl \
+ $(ACE_ROOT)/ace/Thread.i \
+ $(ACE_ROOT)/ace/Synch_T.cpp \
+ $(ACE_ROOT)/ace/Malloc_Allocator.h \
+ $(ACE_ROOT)/ace/Malloc_Allocator.i \
+ $(ACE_ROOT)/ace/Free_List.h \
+ $(ACE_ROOT)/ace/Free_List.i \
+ $(ACE_ROOT)/ace/Free_List.cpp \
+ $(ACE_ROOT)/ace/Malloc_T.i \
+ $(ACE_ROOT)/ace/Malloc_T.cpp \
+ $(ACE_ROOT)/ace/Memory_Pool.h \
+ $(ACE_ROOT)/ace/Event_Handler.h \
+ $(ACE_ROOT)/ace/Event_Handler.i \
+ $(ACE_ROOT)/ace/Signal.h \
+ $(ACE_ROOT)/ace/Signal.i \
+ $(ACE_ROOT)/ace/Mem_Map.h \
+ $(ACE_ROOT)/ace/Mem_Map.i \
+ $(ACE_ROOT)/ace/SV_Semaphore_Complex.h \
+ $(ACE_ROOT)/ace/SV_Semaphore_Simple.h \
+ $(ACE_ROOT)/ace/SV_Semaphore_Simple.i \
+ $(ACE_ROOT)/ace/SV_Semaphore_Complex.i \
+ $(ACE_ROOT)/ace/Unbounded_Set.h \
+ $(ACE_ROOT)/ace/Node.h \
+ $(ACE_ROOT)/ace/Node.cpp \
+ $(ACE_ROOT)/ace/Unbounded_Set.inl \
+ $(ACE_ROOT)/ace/Unbounded_Set.cpp \
+ $(ACE_ROOT)/ace/Memory_Pool.i \
+ $(ACE_ROOT)/ace/Auto_Ptr.h \
+ $(ACE_ROOT)/ace/Auto_Ptr.i \
+ $(ACE_ROOT)/ace/Auto_Ptr.cpp \
+ $(ACE_ROOT)/ace/SString.i
+
# IF YOU PUT ANYTHING HERE IT WILL GO AWAY
diff --git a/ACEXML/common/Mem_Map_Stream.cpp b/ACEXML/common/Mem_Map_Stream.cpp
index 88313e76c00..cb96b21bc0c 100644
--- a/ACEXML/common/Mem_Map_Stream.cpp
+++ b/ACEXML/common/Mem_Map_Stream.cpp
@@ -39,16 +39,13 @@ ACEXML_Mem_Map_Stream::get_char (void)
return *this->get_pos_++;
}
-int
+void
ACEXML_Mem_Map_Stream::rewind (void)
{
- this->recv_pos_ =
- ACE_reinterpret_cast (char *,
+ this->recv_pos_ = ACE_reinterpret_cast (char *,
this->mem_map_.addr ());
this->get_pos_ = this->recv_pos_;
- this->end_of_mapping_plus1_ =
- this->recv_pos_ + this->mem_map_.size ();
- return 0;
+ this->end_of_mapping_plus1_ = this->recv_pos_ + this->mem_map_.size ();
}
int
diff --git a/ACEXML/common/Mem_Map_Stream.h b/ACEXML/common/Mem_Map_Stream.h
index faf35bdb7a1..6dfd468a90e 100644
--- a/ACEXML/common/Mem_Map_Stream.h
+++ b/ACEXML/common/Mem_Map_Stream.h
@@ -115,7 +115,7 @@ public:
* This works since all the data has been cached in the memory-mapped
* backing store.
*/
- virtual int rewind (void);
+ virtual void rewind (void);
/**
* Returns the nth character <offset> from the <get> position in the
diff --git a/ACEXML/common/StrCharStream.cpp b/ACEXML/common/StrCharStream.cpp
index c185df0a070..046511407c2 100644
--- a/ACEXML/common/StrCharStream.cpp
+++ b/ACEXML/common/StrCharStream.cpp
@@ -1,19 +1,16 @@
// $Id$
#include "ACEXML/common/StrCharStream.h"
+#include "ACEXML/common/Encoding.h"
#include "ace/ACE.h"
ACEXML_StrCharStream::ACEXML_StrCharStream (void)
- : start_ (0),
- ptr_ (0),
- end_ (0)
+ : start_ (0), ptr_ (0), end_ (0), encoding_ (0)
{
}
ACEXML_StrCharStream::ACEXML_StrCharStream (const ACEXML_Char *str)
- : start_ (0),
- ptr_ (0),
- end_ (0)
+ : start_ (0), ptr_ (0), end_ (0), encoding_ (0)
{
this->open (str);
}
@@ -21,19 +18,21 @@ ACEXML_StrCharStream::ACEXML_StrCharStream (const ACEXML_Char *str)
ACEXML_StrCharStream::~ACEXML_StrCharStream (void)
{
- delete this->start_;
+ this->close();
}
int
ACEXML_StrCharStream::open (const ACEXML_Char *str)
{
- delete this->start_;
+ delete[] this->start_;
+ delete[] this->encoding_;
- if (str != 0 &&
- (this->start_ = ACE::strnew (str)) != 0)
+ if (str != 0 && (this->start_ = ACE::strnew (str)) != 0)
{
this->ptr_ = this->start_;
this->end_ = this->start_ + ACE_OS_String::strlen (this->start_);
+ if (this->determine_encoding() == -1)
+ return -1;
return 0;
}
@@ -52,12 +51,46 @@ ACEXML_StrCharStream::available (void)
int
ACEXML_StrCharStream::close (void)
{
- delete this->start_;
+ delete[] this->start_;
+ delete[] this->encoding_;
this->start_ = this->ptr_ = this->end_ = 0;
return 0;
}
int
+ACEXML_StrCharStream::determine_encoding (void)
+{
+ char input[4];
+ int retval = 0;
+ char* sptr = (char*)this->start_;
+ int i = 0;
+ for ( ; i < 4 && sptr != (char*)this->end_; ++sptr, ++i)
+ {
+ retval = input[i] = *sptr;
+ }
+ if (i < 4)
+ return -1;
+ const ACEXML_Char* temp = ACEXML_Encoding::get_encoding (input);
+ if (!temp)
+ return -1;
+ if (ACE_OS::strcmp (temp,
+ ACEXML_Encoding::encoding_names_[ACEXML_Encoding::OTHER]) == 0)
+ return -1;
+ else
+ {
+ this->encoding_ = ACE::strnew (temp);
+ ACE_DEBUG ((LM_DEBUG, "String's encoding is %s\n", this->encoding_));
+ }
+ return 0;
+}
+
+void
+ACEXML_StrCharStream::rewind (void)
+{
+ this->ptr_ = this->start_;
+}
+
+int
ACEXML_StrCharStream::get (ACEXML_Char& ch)
{
if (this->start_ != 0 && this->ptr_ != this->end_)
@@ -90,8 +123,13 @@ ACEXML_StrCharStream::read (ACEXML_Char *str,
int
ACEXML_StrCharStream::peek (void)
{
- if (this->start_ !=0 &&
- this->ptr_ != this->end_)
+ if (this->start_ != 0 && this->ptr_ != this->end_)
return *this->ptr_;
return -1;
}
+
+const ACEXML_Char*
+ACEXML_StrCharStream::getEncoding (void)
+{
+ return this->encoding_;
+}
diff --git a/ACEXML/common/StrCharStream.h b/ACEXML/common/StrCharStream.h
index 461d26c2270..6321decb9a2 100644
--- a/ACEXML/common/StrCharStream.h
+++ b/ACEXML/common/StrCharStream.h
@@ -55,6 +55,11 @@ public:
virtual int close (void);
/**
+ * Determine the encoding of the file.
+ */
+ virtual int determine_encoding (void);
+
+ /**
* Read the next ACEXML_Char. Return -1 if we are not able to
* return an ACEXML_Char, 0 if EOS is reached, or 1 if succeed.
*/
@@ -72,14 +77,23 @@ public:
*/
virtual int peek (void);
+ /*
+ * Get the character encoding for a byte stream or URI.
+ */
+ virtual const ACEXML_Char *getEncoding (void);
+
+ /**
+ * Resets the pointer to the beginning of the stream.
+ */
+ virtual void rewind (void);
+
private:
ACEXML_Char *start_;
-
ACEXML_Char *ptr_;
-
ACEXML_Char *end_;
-};
+ ACEXML_Char* encoding_;
+};
#include "ace/post.h"
diff --git a/ACEXML/common/XML_Common.dsp b/ACEXML/common/XML_Common.dsp
index 25e03f23c36..decdb107711 100644
--- a/ACEXML/common/XML_Common.dsp
+++ b/ACEXML/common/XML_Common.dsp
@@ -118,6 +118,10 @@ SOURCE=.\Element_Def_Builder.cpp
# End Source File
# Begin Source File
+SOURCE=.\Encoding.cpp
+# End Source File
+# Begin Source File
+
SOURCE=.\Env.cpp
# End Source File
# Begin Source File
@@ -226,6 +230,10 @@ SOURCE=.\Element_Def_Builder.h
# End Source File
# Begin Source File
+SOURCE=.\Encoding.h
+# End Source File
+# Begin Source File
+
SOURCE=.\EntityResolver.h
# End Source File
# Begin Source File
diff --git a/ACEXML/examples/SAXPrint/Makefile b/ACEXML/examples/SAXPrint/Makefile
index e6ee6c07959..c7737ae8414 100644
--- a/ACEXML/examples/SAXPrint/Makefile
+++ b/ACEXML/examples/SAXPrint/Makefile
@@ -53,6 +53,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ACEXML/common/ContentHandler.h \
$(ACE_ROOT)/ACEXML/common/Env.h \
$(ACE_ROOT)/ACEXML/common/XML_Macros.h \
+ $(ACE_ROOT)/ace/Exception_Macros.h \
$(ACE_ROOT)/ACEXML/common/Exception.h \
$(ACE_ROOT)/ACEXML/common/XML_Types.h \
$(ACE_ROOT)/ace/OS.h \
@@ -165,6 +166,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ACEXML/common/ContentHandler.h \
$(ACE_ROOT)/ACEXML/common/Env.h \
$(ACE_ROOT)/ACEXML/common/XML_Macros.h \
+ $(ACE_ROOT)/ace/Exception_Macros.h \
$(ACE_ROOT)/ACEXML/common/Exception.h \
$(ACE_ROOT)/ACEXML/common/XML_Types.h \
$(ACE_ROOT)/ace/OS.h \
diff --git a/ACEXML/parser/parser/Makefile b/ACEXML/parser/parser/Makefile
index e9089cb5d23..91aaf5bf525 100644
--- a/ACEXML/parser/parser/Makefile
+++ b/ACEXML/parser/parser/Makefile
@@ -183,6 +183,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ACEXML/common/ContentHandler.h \
$(ACE_ROOT)/ACEXML/common/Env.h \
$(ACE_ROOT)/ACEXML/common/XML_Macros.h \
+ $(ACE_ROOT)/ace/Exception_Macros.h \
$(ACE_ROOT)/ACEXML/common/Exception.h \
$(ACE_ROOT)/ACEXML/common/XML_Types.h \
$(ACE_ROOT)/ace/OS.h \
diff --git a/ACEXML/parser/parser/Parser.cpp b/ACEXML/parser/parser/Parser.cpp
index 4b9cec81de8..4b49b5ed895 100644
--- a/ACEXML/parser/parser/Parser.cpp
+++ b/ACEXML/parser/parser/Parser.cpp
@@ -294,9 +294,22 @@ ACEXML_Parser::parse_xml_prolog (ACEXML_ENV_SINGLE_ARG_DECL)
return;
}
else
+ {
seen_encoding = 1;
- // @@ Handle encoding here. We don't handle
- // various encodings for this parser.
+ if (ACE_OS::strcmp (astring,
+ this->instream_->getEncoding()) != 0)
+ {
+ if (ACE_OS::strstr (astring,
+ this->instream_->getEncoding()) != 0)
+ {
+ ACE_ERROR ((LM_ERROR,
+ ACE_TEXT ("Detected Encoding is %s : Declared Encoding is %s"),
+ this->instream_->getEncoding(), astring));
+ this->report_fatal_error (ACE_TEXT ("Encoding declaration doesn't match detected encoding") ACEXML_ENV_ARG_PARAMETER);
+ return;
+ }
+ }
+ }
continue;
}
else
@@ -1815,8 +1828,11 @@ ACEXML_Parser::parse_token (const ACEXML_Char* keyword)
if (keyword == 0)
return -1;
const ACEXML_Char* ptr = keyword;
- for (; *ptr != 0 && this->get() == *ptr; ++ptr)
- ;
+ ACEXML_Char ch;
+ for (; *ptr != 0 && ((ch = this->get()) == *ptr); ++ptr)
+ {
+ // ACE_DEBUG ((LM_DEBUG, ACE_TEXT ("ch = %c : ptr = %c"), ch, *ptr));
+ }
if (*ptr == 0)
return 0;
else