diff options
author | kitty <kitty@ae88bc3d-4319-0410-8dbf-d08b4c9d3795> | 2002-10-15 22:21:36 +0000 |
---|---|---|
committer | kitty <kitty@ae88bc3d-4319-0410-8dbf-d08b4c9d3795> | 2002-10-15 22:21:36 +0000 |
commit | 4e650f55396a019769cfc64f5a3f0a935474c5c2 (patch) | |
tree | 2b6857e552e9ee9ac02b074a28baa99b0ebc3d8e /ACEXML | |
parent | d33acee2a1b1fbf8235e3a30680a51c69a522b4f (diff) | |
download | ATCD-4e650f55396a019769cfc64f5a3f0a935474c5c2.tar.gz |
ChangeLogTag: Tue Oct 15 17:17:44 2002 Krishnakumar B <kitty@cse.wustl.edu>
Diffstat (limited to 'ACEXML')
-rw-r--r-- | ACEXML/apps/svcconf/Makefile | 6 | ||||
-rw-r--r-- | ACEXML/common/CharStream.h | 5 | ||||
-rw-r--r-- | ACEXML/common/Encoding.cpp | 53 | ||||
-rw-r--r-- | ACEXML/common/Encoding.h | 61 | ||||
-rw-r--r-- | ACEXML/common/Exception.cpp | 8 | ||||
-rw-r--r-- | ACEXML/common/FileCharStream.cpp | 160 | ||||
-rw-r--r-- | ACEXML/common/FileCharStream.h | 53 | ||||
-rw-r--r-- | ACEXML/common/HttpCharStream.cpp | 19 | ||||
-rw-r--r-- | ACEXML/common/HttpCharStream.h | 11 | ||||
-rw-r--r-- | ACEXML/common/InputSource.cpp | 4 | ||||
-rw-r--r-- | ACEXML/common/Makefile | 115 | ||||
-rw-r--r-- | ACEXML/common/Mem_Map_Stream.cpp | 9 | ||||
-rw-r--r-- | ACEXML/common/Mem_Map_Stream.h | 2 | ||||
-rw-r--r-- | ACEXML/common/StrCharStream.cpp | 64 | ||||
-rw-r--r-- | ACEXML/common/StrCharStream.h | 20 | ||||
-rw-r--r-- | ACEXML/common/XML_Common.dsp | 8 | ||||
-rw-r--r-- | ACEXML/examples/SAXPrint/Makefile | 2 | ||||
-rw-r--r-- | ACEXML/parser/parser/Makefile | 1 | ||||
-rw-r--r-- | ACEXML/parser/parser/Parser.cpp | 24 |
19 files changed, 568 insertions, 57 deletions
diff --git a/ACEXML/apps/svcconf/Makefile b/ACEXML/apps/svcconf/Makefile index 41395961076..9adc75b4372 100644 --- a/ACEXML/apps/svcconf/Makefile +++ b/ACEXML/apps/svcconf/Makefile @@ -49,6 +49,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ACEXML/common/ContentHandler.h \ $(ACE_ROOT)/ACEXML/common/Env.h \ $(ACE_ROOT)/ACEXML/common/XML_Macros.h \ + $(ACE_ROOT)/ace/Exception_Macros.h \ $(ACE_ROOT)/ACEXML/common/Exception.h \ $(ACE_ROOT)/ACEXML/common/XML_Types.h \ $(ACE_ROOT)/ace/OS.h \ @@ -157,7 +158,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ace/Svc_Conf_Tokens.h \ $(ACE_ROOT)/ace/DLL.h \ $(ACE_ROOT)/ace/Service_Object.i \ - $(ACE_ROOT)/ace/Service_Types.i \ + $(ACE_ROOT)/ace/Service_Types.i Svcconf_Handler.i \ $(ACE_ROOT)/ace/Service_Config.h \ $(ACE_ROOT)/ace/Unbounded_Queue.h \ $(ACE_ROOT)/ace/Unbounded_Queue.inl \ @@ -240,6 +241,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ACEXML/common/ContentHandler.h \ $(ACE_ROOT)/ACEXML/common/Env.h \ $(ACE_ROOT)/ACEXML/common/XML_Macros.h \ + $(ACE_ROOT)/ace/Exception_Macros.h \ $(ACE_ROOT)/ACEXML/common/Exception.h \ $(ACE_ROOT)/ACEXML/common/XML_Types.h \ $(ACE_ROOT)/ace/OS.h \ @@ -397,7 +399,9 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU Svcconf_Handler.h \ $(ACE_ROOT)/ACEXML/common/DefaultHandler.h \ $(ACE_ROOT)/ACEXML/common/DefaultHandler.i \ + Svcconf_Handler.i \ $(ACE_ROOT)/ACEXML/common/FileCharStream.h \ + $(ACE_ROOT)/ACEXML/common/Encoding.h \ $(ACE_ROOT)/ACEXML/common/StrCharStream.h # IF YOU PUT ANYTHING HERE IT WILL GO AWAY diff --git a/ACEXML/common/CharStream.h b/ACEXML/common/CharStream.h index 719e4edb3e9..1b5c199f527 100644 --- a/ACEXML/common/CharStream.h +++ b/ACEXML/common/CharStream.h @@ -66,6 +66,11 @@ public: */ virtual int peek (void) = 0; + /* + * Get the character encoding for a byte stream or URI. + */ + virtual const ACEXML_Char *getEncoding (void) = 0; + }; #include "ace/post.h" diff --git a/ACEXML/common/Encoding.cpp b/ACEXML/common/Encoding.cpp new file mode 100644 index 00000000000..7fe8811cdbd --- /dev/null +++ b/ACEXML/common/Encoding.cpp @@ -0,0 +1,53 @@ +// -*- C++ -*- $Id$ + +#include "ACEXML/common/Encoding.h" + +const ACEXML_Char* ACEXML_Encoding::encoding_names_[8] = { + ACE_TEXT ("UCS-4BE"), + ACE_TEXT ("UCS-4LE"), + ACE_TEXT ("UCS-4_2143"), + ACE_TEXT ("UCS-4_3412"), + ACE_TEXT ("UTF-16BE"), + ACE_TEXT ("UTF-16LE"), + ACE_TEXT ("UTF-8"), + ACE_TEXT ("Unsupported Encoding") +}; + +const ACEXML_UTF8 ACEXML_Encoding::byte_order_mark_[][4] = { + { '\x00', '\x00', '\xFE', '\xFF' }, // UCS-4, big-endian (1234 order) + { '\xFF', '\xFE', '\x00', '\x00' }, // UCS-4, little-endian (4321 order) + { '\x00', '\x00', '\xFF', '\xFE' }, // UCS-4, unusual octet order (2143) + { '\xFE', '\xFF', '\x00', '\x00' }, // UCS-4, unusual octet order (3412) + { '\xFE', '\xFF', '\xFF', '\xFF' }, // UTF-16, big-endian (3 & 4 ignored) + { '\xFF', '\xFE', '\xFF', '\xFF' }, // UTF-16, little-endian ( 3 & 4 ignored) + { '\xEF', '\xBB', '\xBF', '\xFF' } // UTF-8 +}; + +const ACEXML_UTF8 ACEXML_Encoding::magic_values_[][4] = { + { '\x00', '\x00', '\x00', '\x3c' }, // + { '\x3c', '\x00', '\x00', '\x00' }, // UCS-4 and variants + { '\x00', '\x00', '\x3c', '\x00' }, // + { '\x00', '\x3c', '\x00', '\x00' }, // + { '\x00', '\x3c', '\x00', '\x3f' }, // UTF-16BE + { '\x3c', '\x00', '\x3f', '\x00' }, // UTF-16LE + { '\x3c', '\x3f', '\x78', '\x6d' }, // UTF-8 +}; + +const ACEXML_Char* +ACEXML_Encoding::get_encoding (const char* input) +{ + if (ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF16BE][0], input, 2) == 0) + return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16BE]; + else if (ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF16LE][0], input, 2) == 0) + return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16LE]; + else if (ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF8][0], input, 4) == 0) + return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF8]; + else if (ACE_OS::memcmp (&ACEXML_Encoding::magic_values_[ACEXML_Encoding::UTF16BE][0], input, 4) == 0) + return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16BE]; + else if (ACE_OS::memcmp (&ACEXML_Encoding::magic_values_[ACEXML_Encoding::UTF16LE][0], input, 4) == 0) + return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16LE]; + else if (ACE_OS::memcmp (&ACEXML_Encoding::magic_values_[ACEXML_Encoding::UTF8][0], input, 4) == 0) + return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF8]; + else + return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::OTHER]; +} diff --git a/ACEXML/common/Encoding.h b/ACEXML/common/Encoding.h new file mode 100644 index 00000000000..fc0917c0b91 --- /dev/null +++ b/ACEXML/common/Encoding.h @@ -0,0 +1,61 @@ +// -*- C++ -*- + +//============================================================================= +/** + * @file Encoding.h + * + * This file provides utility functions to determine the encoding of a file + * or a byte stream automatically. + * + * $Id$ + * + * @author Krishnakumar B <kitty@cs.wustl.edu> + */ +//============================================================================= + +#ifndef _ACEXML_ENCODING_H +#define _ACEXML_ENCODING_H + +#include "ace/pre.h" +#include "ACEXML/common/ACEXML_Export.h" + +#if !defined (ACE_LACKS_PRAGMA_ONCE) +#pragma once +#endif /* ACE_LACKS_PRAGMA_ONCE */ + +#include "ACEXML/common/XML_Types.h" + +/** + * @class ACEXML_Encoding Encoding.h "ACEXML/common/Encoding.h" + * + * @brief ACEXML_Encoding + * + * Wrapper class for determining the encoding of a file or a byte stream. + */ +class ACEXML_Export ACEXML_Encoding +{ +public: + enum { + UCS4BE = 0, + UCS4LE = 1, + UCS4_2143 = 2, + UCS4_3412 = 3, + UTF16BE = 4, + UTF16LE = 5, + UTF8 = 6, + OTHER = 7 + } ENCODING; + + static const ACEXML_Char* encoding_names_[8]; + + static const ACEXML_UTF8 byte_order_mark_[][4]; + + static const ACEXML_UTF8 magic_values_[][4]; + + static const ACEXML_Char* get_encoding (const char* input); + +}; + +#include "ace/post.h" + +#endif /* _ACEXML_ENCODING_H */ diff --git a/ACEXML/common/Exception.cpp b/ACEXML/common/Exception.cpp index 9bdace863e2..3086a8a7bfb 100644 --- a/ACEXML/common/Exception.cpp +++ b/ACEXML/common/Exception.cpp @@ -31,14 +31,6 @@ ACEXML_Exception::~ACEXML_Exception (void) } -ACEXML_Exception& -ACEXML_Exception::operator= (const ACEXML_Exception& src) -{ - this->exception_name_ = src.exception_name_; - ACE_ASSERT (this->exception_name_ != 0); - return *this; -} - int ACEXML_Exception::is_a (const ACEXML_Char *name) { diff --git a/ACEXML/common/FileCharStream.cpp b/ACEXML/common/FileCharStream.cpp index 943b8cddd1e..d319ba90e03 100644 --- a/ACEXML/common/FileCharStream.cpp +++ b/ACEXML/common/FileCharStream.cpp @@ -4,16 +4,13 @@ #include "ace/ACE.h" ACEXML_FileCharStream::ACEXML_FileCharStream (void) - : filename_ (0), - infile_ (NULL) + : filename_ (0), encoding_ (0), size_ (0), infile_ (NULL), peek_ (0) { } ACEXML_FileCharStream::~ACEXML_FileCharStream (void) { - if (this->infile_ != NULL) - ACE_OS::fclose (this->infile_); - delete this->filename_; + this->close(); } int @@ -22,6 +19,9 @@ ACEXML_FileCharStream::open (const ACEXML_Char *name) delete[] this->filename_; this->filename_ = 0; + delete[] this->encoding_; + this->encoding_ = 0; + this->infile_ = ACE_OS::fopen (name, ACE_TEXT ("r")); if (this->infile_ == NULL) return -1; @@ -32,10 +32,57 @@ ACEXML_FileCharStream::open (const ACEXML_Char *name) this->size_ = statbuf.st_size; this->filename_ = ACE::strnew (name); + if (this->determine_encoding() == -1) + return -1; return 0; } int +ACEXML_FileCharStream::determine_encoding (void) +{ + char input[4]; + int retval = 0; + int i = 0; + for (; i < 4 && retval != -1; ++i) + retval = this->getchar(input[i]); + if (i < 4) + return -1; + const ACEXML_Char* temp = ACEXML_Encoding::get_encoding (input); + if (!temp) + return -1; + if (ACE_OS::strcmp (temp, + ACEXML_Encoding::encoding_names_[ACEXML_Encoding::OTHER]) == 0) + return -1; + else + { + this->encoding_ = ACE::strnew (temp); + ACE_DEBUG ((LM_DEBUG, "File's encoding is %s\n", this->encoding_)); + } + // Rewind the stream + this->rewind(); + // Move over the byte-order-mark if present. + char ch; + for (int j = 0; j < 2; ++j) + { + this->getchar (ch); + if (ch == '\xFF' || ch == '\xFE' || ch == '\xEF') + continue; + else + { + ungetc (ch, this->infile_); + break; + } + } + return 0; +} + +void +ACEXML_FileCharStream::rewind() +{ + ACE_OS::rewind (this->infile_); +} + +int ACEXML_FileCharStream::available (void) { long curr; @@ -47,18 +94,20 @@ ACEXML_FileCharStream::available (void) int ACEXML_FileCharStream::close (void) { - delete this->filename_; - this->filename_ = 0; + if (this->infile_ != NULL) ACE_OS::fclose (this->infile_); - this->infile_ = NULL; + delete[] this->filename_; + delete[] this->encoding_; this->size_ = 0; + this->peek_ = 0; return 0; } + int -ACEXML_FileCharStream::get (ACEXML_Char& ch) +ACEXML_FileCharStream::getchar (char& ch) { - ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_); + ch = ACE_OS::fgetc (this->infile_); return (feof(this->infile_) ? -1 : 0); } @@ -70,9 +119,100 @@ ACEXML_FileCharStream::read (ACEXML_Char *str, } int +ACEXML_FileCharStream::get (ACEXML_Char& ch) +{ +#if defined (ACE_USES_WCHAR) + return this->get_i (ch); +#else + ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_); + return (feof(this->infile_) ? -1 : 0); +#endif /* ACE_USES_WCHAR */ +} + +#if defined (ACE_USES_WCHAR) +int +ACEXML_FileCharStream::get_i (ACEXML_Char& ch) +{ + if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0) + { + ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_); + return (feof(this->infile_) ? -1 : 0); + } + // If we have a value in peek_, return it. + if (this->peek_ != 0) + { + ch = this->peek_; + this->peek_ = 0; + return 0; + } + + int BE = (ACE_OS::strcmp (this->encoding_, + ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0; + ACEXML_Char input[2]; + for (int i = 0; i < 2 && !feof (this->infile_); ++i) + { + input[i] = ACE_OS::fgetwc (this->infile_); + } + if (i < 2) + { + ch = 0; + return -1; + } + ch = (BE) ? (input[0] << 8) | input[1] : (input[1] << 8) | input[0]; + return 0; +} +#endif /* ACE_USES_WCHAR */ + +int ACEXML_FileCharStream::peek (void) { +#if defined (ACE_USES_WCHAR) + return this->peek_i(); +#else ACEXML_Char ch = ACE_OS::fgetc (this->infile_); ::ungetc (ch, this->infile_); return ch; +#endif /* ACE_USES_WCHAR */ +} + +#if defined (ACE_USES_WCHAR) +int +ACEXML_FileCharStream::peek_i (void) +{ + // If we are reading a UTF-8 encoded file, just use the plain unget. + if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0) + { + ACEXML_Char ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_); + ::ungetc (ch, this->infile_); + return ch; + } + + // If somebody had already called peek() and not consumed it, return the + // value held in this->peek_. + if (this->peek_ != 0) + return this->peek_; + + // Peek into the stream. This reads two characters off the stream, keeps + // it in peek_. + int BE = (ACE_OS::strcmp (this->encoding_, + ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0; + ACEXML_Char input[2]; + for (int i = 0; i < 2 && !feof (this->infile_); ++i) + { + input[i] = ACE_OS::fgetwc (this->infile_); + } + if (i < 2) + { + this->peek_ = 0; + return -1; + } + this->peek_ = (BE) ? (input[0] << 8) | input[1] : (input[1] << 8) | input[0]; + return this->peek_; +} +#endif /* ACE_USES_WCHAR */ + +const ACEXML_Char* +ACEXML_FileCharStream::getEncoding (void) +{ + return this->encoding_; } diff --git a/ACEXML/common/FileCharStream.h b/ACEXML/common/FileCharStream.h index 46004e4e715..79def212f48 100644 --- a/ACEXML/common/FileCharStream.h +++ b/ACEXML/common/FileCharStream.h @@ -21,6 +21,7 @@ #endif /* ACE_LACKS_PRAGMA_ONCE */ #include "ACEXML/common/CharStream.h" +#include "ACEXML/common/Encoding.h" #include "ace/streams.h" /** @@ -64,19 +65,61 @@ public: size_t len); /** + * Determine the encoding of the file. + */ + virtual int determine_encoding (void); + + + /** * Peek the next ACEXML_Char in the CharStream. Return the - * character if succeess, -1 if EOS is reached. + * character if success, -1 if EOF is reached. */ virtual int peek (void); + /** + * Resets the file pointer to the beginning of the stream. + */ + virtual void rewind (void); -private: - ACEXML_Char *filename_; + /* + * Get the character encoding for a byte stream or URI. + */ + virtual const ACEXML_Char *getEncoding (void); - off_t size_; +protected: - FILE *infile_; + /** Read the next character as a normal character. Return -1 if EOF is + * reached, else return 0. + */ + virtual int getchar (char& ch); + +private: + +#if defined (ACE_USES_WCHAR) + /** + * Read the next character from the stream taking into account the + * encoding of the file. + */ + int get_i (ACEXML_Char& ch); + /** + * Read the next character from the stream taking into account the + * encoding of the file. Subsequent call to get() returns this + * character. + */ + int peek_i (void); + +#endif /* ACE_USES_WCHAR */ + + ACEXML_Char* filename_; + ACEXML_Char* encoding_; + off_t size_; + FILE* infile_; + // This is needed to ensure that we can implement a peek operation on a + // UTF-16 encoded file. It is a bit hackish, but there is no other way of + // implementing a peek() as the standard I/O FILE* guarantees only one + // pushback. + ACEXML_Char peek_; }; diff --git a/ACEXML/common/HttpCharStream.cpp b/ACEXML/common/HttpCharStream.cpp index a397a4fc114..29f34023229 100644 --- a/ACEXML/common/HttpCharStream.cpp +++ b/ACEXML/common/HttpCharStream.cpp @@ -23,7 +23,8 @@ ACEXML_HttpCharStream::ACEXML_HttpCharStream (void) url_addr_(0), stream_(0), connector_(0), - size_(0) + size_(0), + encoding_ (0) { } @@ -286,6 +287,10 @@ ACEXML_HttpCharStream::close (void) this->connector_ = 0; this->size_ = 0; + + delete[] this->encoding_; + this->encoding_ = 0; + return 0; } @@ -312,3 +317,15 @@ ACEXML_HttpCharStream::peek (void) { return this->stream_->peek_char (0); } + +void +ACEXML_HttpCharStream::rewind (void) +{ + this->stream_->rewind(); +} + +const ACEXML_Char* +ACEXML_HttpCharStream::getEncoding (void) +{ + return this->encoding_; +} diff --git a/ACEXML/common/HttpCharStream.h b/ACEXML/common/HttpCharStream.h index 1e8cc66057e..7bce23a224b 100644 --- a/ACEXML/common/HttpCharStream.h +++ b/ACEXML/common/HttpCharStream.h @@ -71,6 +71,15 @@ public: */ virtual int peek (void); + /** + * Resets the file pointer to the beginning of the stream. + */ + virtual void rewind (void); + + /** + * Get the encoding of the file + */ + virtual const ACEXML_Char* getEncoding (void); private: @@ -94,6 +103,8 @@ private: off_t size_; + ACEXML_Char* encoding_; + }; diff --git a/ACEXML/common/InputSource.cpp b/ACEXML/common/InputSource.cpp index 1149643f44a..0dcdeb0f2ba 100644 --- a/ACEXML/common/InputSource.cpp +++ b/ACEXML/common/InputSource.cpp @@ -17,6 +17,7 @@ ACEXML_InputSource::ACEXML_InputSource (ACEXML_CharStream *stm) charStream_ (stm), encoding_ (0) { + this->setEncoding (stm->getEncoding()); } /* @@ -77,8 +78,7 @@ ACEXML_InputSource::setCharStream (ACEXML_CharStream *stm) /* * Set the character stream for this input source. - * / - virtual void setCharacterStream (Reader *characterStream); + * */ void diff --git a/ACEXML/common/Makefile b/ACEXML/common/Makefile index a041da36fbd..ee20d8016f5 100644 --- a/ACEXML/common/Makefile +++ b/ACEXML/common/Makefile @@ -28,7 +28,8 @@ FILES = Attributes_Def_Builder \ Mem_Map_Stream \ URL_Addr \ HttpCharStream \ - StreamFactory + StreamFactory \ + Encoding DEFS = $(addsuffix .h,$(FILES)) LSRC = $(addsuffix .cpp,$(FILES)) @@ -156,6 +157,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ace/SString.i \ Env.h \ XML_Macros.h \ + $(ACE_ROOT)/ace/Exception_Macros.h \ Exception.h \ Exception.i \ Env.i \ @@ -377,6 +379,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU ContentHandler.h \ Env.h \ XML_Macros.h \ + $(ACE_ROOT)/ace/Exception_Macros.h \ Exception.h \ XML_Types.h \ $(ACE_ROOT)/ace/OS.h \ @@ -577,6 +580,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ace/SString.i \ Env.h \ XML_Macros.h \ + $(ACE_ROOT)/ace/Exception_Macros.h \ Exception.h \ Exception.i \ Env.i \ @@ -683,6 +687,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ace/SString.i \ Env.h \ XML_Macros.h \ + $(ACE_ROOT)/ace/Exception_Macros.h \ Exception.h \ Exception.i \ Env.i \ @@ -696,6 +701,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ace/ace_wchar.h \ $(ACE_ROOT)/ace/ace_wchar.inl \ XML_Macros.h \ + $(ACE_ROOT)/ace/Exception_Macros.h \ Exception.h \ XML_Types.h \ $(ACE_ROOT)/ace/OS.h \ @@ -884,6 +890,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ace/Auto_Ptr.cpp \ $(ACE_ROOT)/ace/SString.i \ XML_Macros.h \ + $(ACE_ROOT)/ace/Exception_Macros.h \ Exception.i .obj/FileCharStream.o .obj/FileCharStream.so .shobj/FileCharStream.o .shobj/FileCharStream.so: FileCharStream.cpp \ @@ -981,7 +988,8 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ace/Auto_Ptr.h \ $(ACE_ROOT)/ace/Auto_Ptr.i \ $(ACE_ROOT)/ace/Auto_Ptr.cpp \ - $(ACE_ROOT)/ace/SString.i + $(ACE_ROOT)/ace/SString.i \ + Encoding.h .obj/InputSource.o .obj/InputSource.so .shobj/InputSource.o .shobj/InputSource.so: InputSource.cpp \ InputSource.h \ @@ -1415,6 +1423,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ace/Auto_Ptr.cpp \ $(ACE_ROOT)/ace/SString.i \ XML_Macros.h \ + $(ACE_ROOT)/ace/Exception_Macros.h \ Exception.i \ SAXExceptions.i @@ -1513,7 +1522,8 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ace/Auto_Ptr.h \ $(ACE_ROOT)/ace/Auto_Ptr.i \ $(ACE_ROOT)/ace/Auto_Ptr.cpp \ - $(ACE_ROOT)/ace/SString.i + $(ACE_ROOT)/ace/SString.i \ + Encoding.h .obj/Transcode.o .obj/Transcode.so .shobj/Transcode.o .shobj/Transcode.so: Transcode.cpp \ Transcode.h \ @@ -1710,6 +1720,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ace/SString.i \ Env.h \ XML_Macros.h \ + $(ACE_ROOT)/ace/Exception_Macros.h \ Exception.h \ Exception.i \ Env.i \ @@ -1728,6 +1739,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU ContentHandler.h \ Env.h \ XML_Macros.h \ + $(ACE_ROOT)/ace/Exception_Macros.h \ Exception.h \ XML_Types.h \ $(ACE_ROOT)/ace/OS.h \ @@ -2557,6 +2569,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ace/SString.i \ FileCharStream.h \ CharStream.h \ + Encoding.h \ HttpCharStream.h \ URL_Addr.h \ $(ACE_ROOT)/ace/INET_Addr.h \ @@ -2681,4 +2694,100 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ace/WFMO_Reactor.h \ $(ACE_ROOT)/ace/Connector.cpp +.obj/Encoding.o .obj/Encoding.so .shobj/Encoding.o .shobj/Encoding.so: Encoding.cpp \ + Encoding.h \ + $(ACE_ROOT)/ace/pre.h \ + ACEXML_Export.h \ + $(ACE_ROOT)/ace/post.h \ + $(ACE_ROOT)/ace/ace_wchar.h \ + $(ACE_ROOT)/ace/ace_wchar.inl \ + XML_Types.h \ + $(ACE_ROOT)/ace/OS.h \ + $(ACE_ROOT)/ace/OS_Dirent.h \ + $(ACE_ROOT)/ace/OS_Export.h \ + $(ACE_ROOT)/ace/OS_Errno.h \ + $(ACE_ROOT)/ace/OS_Errno.inl \ + $(ACE_ROOT)/ace/OS_Dirent.inl \ + $(ACE_ROOT)/ace/OS_String.h \ + $(ACE_ROOT)/ace/Basic_Types.h \ + $(ACE_ROOT)/ace/ACE_export.h \ + $(ACE_ROOT)/ace/Basic_Types.i \ + $(ACE_ROOT)/ace/OS_String.inl \ + $(ACE_ROOT)/ace/OS_Memory.h \ + $(ACE_ROOT)/ace/OS_Memory.inl \ + $(ACE_ROOT)/ace/OS_TLI.h \ + $(ACE_ROOT)/ace/OS_TLI.inl \ + $(ACE_ROOT)/ace/Time_Value.h \ + $(ACE_ROOT)/ace/Time_Value.inl \ + $(ACE_ROOT)/ace/Default_Constants.h \ + $(ACE_ROOT)/ace/Global_Macros.h \ + $(ACE_ROOT)/ace/Min_Max.h \ + $(ACE_ROOT)/ace/streams.h \ + $(ACE_ROOT)/ace/Trace.h \ + $(ACE_ROOT)/ace/OS.i \ + $(ACE_ROOT)/ace/SString.h \ + $(ACE_ROOT)/ace/String_Base.h \ + $(ACE_ROOT)/ace/ACE.h \ + $(ACE_ROOT)/ace/Flag_Manip.h \ + $(ACE_ROOT)/ace/Flag_Manip.i \ + $(ACE_ROOT)/ace/Handle_Ops.h \ + $(ACE_ROOT)/ace/Handle_Ops.i \ + $(ACE_ROOT)/ace/Lib_Find.h \ + $(ACE_ROOT)/ace/Lib_Find.i \ + $(ACE_ROOT)/ace/Init_ACE.h \ + $(ACE_ROOT)/ace/Init_ACE.i \ + $(ACE_ROOT)/ace/Sock_Connect.h \ + $(ACE_ROOT)/ace/Sock_Connect.i \ + $(ACE_ROOT)/ace/ACE.i \ + $(ACE_ROOT)/ace/String_Base_Const.h \ + $(ACE_ROOT)/ace/String_Base.i \ + $(ACE_ROOT)/ace/Malloc_Base.h \ + $(ACE_ROOT)/ace/String_Base.cpp \ + $(ACE_ROOT)/ace/Malloc.h \ + $(ACE_ROOT)/ace/Log_Msg.h \ + $(ACE_ROOT)/ace/Log_Priority.h \ + $(ACE_ROOT)/ace/OS_Log_Msg_Attributes.h \ + $(ACE_ROOT)/ace/OS_Log_Msg_Attributes.inl \ + $(ACE_ROOT)/ace/Malloc.i \ + $(ACE_ROOT)/ace/Malloc_T.h \ + $(ACE_ROOT)/ace/Synch.h \ + $(ACE_ROOT)/ace/Synch.i \ + $(ACE_ROOT)/ace/Synch_T.h \ + $(ACE_ROOT)/ace/Synch_T.i \ + $(ACE_ROOT)/ace/Thread.h \ + $(ACE_ROOT)/ace/Thread_Adapter.h \ + $(ACE_ROOT)/ace/Base_Thread_Adapter.h \ + $(ACE_ROOT)/ace/Base_Thread_Adapter.inl \ + $(ACE_ROOT)/ace/Thread_Adapter.inl \ + $(ACE_ROOT)/ace/Thread.i \ + $(ACE_ROOT)/ace/Synch_T.cpp \ + $(ACE_ROOT)/ace/Malloc_Allocator.h \ + $(ACE_ROOT)/ace/Malloc_Allocator.i \ + $(ACE_ROOT)/ace/Free_List.h \ + $(ACE_ROOT)/ace/Free_List.i \ + $(ACE_ROOT)/ace/Free_List.cpp \ + $(ACE_ROOT)/ace/Malloc_T.i \ + $(ACE_ROOT)/ace/Malloc_T.cpp \ + $(ACE_ROOT)/ace/Memory_Pool.h \ + $(ACE_ROOT)/ace/Event_Handler.h \ + $(ACE_ROOT)/ace/Event_Handler.i \ + $(ACE_ROOT)/ace/Signal.h \ + $(ACE_ROOT)/ace/Signal.i \ + $(ACE_ROOT)/ace/Mem_Map.h \ + $(ACE_ROOT)/ace/Mem_Map.i \ + $(ACE_ROOT)/ace/SV_Semaphore_Complex.h \ + $(ACE_ROOT)/ace/SV_Semaphore_Simple.h \ + $(ACE_ROOT)/ace/SV_Semaphore_Simple.i \ + $(ACE_ROOT)/ace/SV_Semaphore_Complex.i \ + $(ACE_ROOT)/ace/Unbounded_Set.h \ + $(ACE_ROOT)/ace/Node.h \ + $(ACE_ROOT)/ace/Node.cpp \ + $(ACE_ROOT)/ace/Unbounded_Set.inl \ + $(ACE_ROOT)/ace/Unbounded_Set.cpp \ + $(ACE_ROOT)/ace/Memory_Pool.i \ + $(ACE_ROOT)/ace/Auto_Ptr.h \ + $(ACE_ROOT)/ace/Auto_Ptr.i \ + $(ACE_ROOT)/ace/Auto_Ptr.cpp \ + $(ACE_ROOT)/ace/SString.i + # IF YOU PUT ANYTHING HERE IT WILL GO AWAY diff --git a/ACEXML/common/Mem_Map_Stream.cpp b/ACEXML/common/Mem_Map_Stream.cpp index 88313e76c00..cb96b21bc0c 100644 --- a/ACEXML/common/Mem_Map_Stream.cpp +++ b/ACEXML/common/Mem_Map_Stream.cpp @@ -39,16 +39,13 @@ ACEXML_Mem_Map_Stream::get_char (void) return *this->get_pos_++; } -int +void ACEXML_Mem_Map_Stream::rewind (void) { - this->recv_pos_ = - ACE_reinterpret_cast (char *, + this->recv_pos_ = ACE_reinterpret_cast (char *, this->mem_map_.addr ()); this->get_pos_ = this->recv_pos_; - this->end_of_mapping_plus1_ = - this->recv_pos_ + this->mem_map_.size (); - return 0; + this->end_of_mapping_plus1_ = this->recv_pos_ + this->mem_map_.size (); } int diff --git a/ACEXML/common/Mem_Map_Stream.h b/ACEXML/common/Mem_Map_Stream.h index faf35bdb7a1..6dfd468a90e 100644 --- a/ACEXML/common/Mem_Map_Stream.h +++ b/ACEXML/common/Mem_Map_Stream.h @@ -115,7 +115,7 @@ public: * This works since all the data has been cached in the memory-mapped * backing store. */ - virtual int rewind (void); + virtual void rewind (void); /** * Returns the nth character <offset> from the <get> position in the diff --git a/ACEXML/common/StrCharStream.cpp b/ACEXML/common/StrCharStream.cpp index c185df0a070..046511407c2 100644 --- a/ACEXML/common/StrCharStream.cpp +++ b/ACEXML/common/StrCharStream.cpp @@ -1,19 +1,16 @@ // $Id$ #include "ACEXML/common/StrCharStream.h" +#include "ACEXML/common/Encoding.h" #include "ace/ACE.h" ACEXML_StrCharStream::ACEXML_StrCharStream (void) - : start_ (0), - ptr_ (0), - end_ (0) + : start_ (0), ptr_ (0), end_ (0), encoding_ (0) { } ACEXML_StrCharStream::ACEXML_StrCharStream (const ACEXML_Char *str) - : start_ (0), - ptr_ (0), - end_ (0) + : start_ (0), ptr_ (0), end_ (0), encoding_ (0) { this->open (str); } @@ -21,19 +18,21 @@ ACEXML_StrCharStream::ACEXML_StrCharStream (const ACEXML_Char *str) ACEXML_StrCharStream::~ACEXML_StrCharStream (void) { - delete this->start_; + this->close(); } int ACEXML_StrCharStream::open (const ACEXML_Char *str) { - delete this->start_; + delete[] this->start_; + delete[] this->encoding_; - if (str != 0 && - (this->start_ = ACE::strnew (str)) != 0) + if (str != 0 && (this->start_ = ACE::strnew (str)) != 0) { this->ptr_ = this->start_; this->end_ = this->start_ + ACE_OS_String::strlen (this->start_); + if (this->determine_encoding() == -1) + return -1; return 0; } @@ -52,12 +51,46 @@ ACEXML_StrCharStream::available (void) int ACEXML_StrCharStream::close (void) { - delete this->start_; + delete[] this->start_; + delete[] this->encoding_; this->start_ = this->ptr_ = this->end_ = 0; return 0; } int +ACEXML_StrCharStream::determine_encoding (void) +{ + char input[4]; + int retval = 0; + char* sptr = (char*)this->start_; + int i = 0; + for ( ; i < 4 && sptr != (char*)this->end_; ++sptr, ++i) + { + retval = input[i] = *sptr; + } + if (i < 4) + return -1; + const ACEXML_Char* temp = ACEXML_Encoding::get_encoding (input); + if (!temp) + return -1; + if (ACE_OS::strcmp (temp, + ACEXML_Encoding::encoding_names_[ACEXML_Encoding::OTHER]) == 0) + return -1; + else + { + this->encoding_ = ACE::strnew (temp); + ACE_DEBUG ((LM_DEBUG, "String's encoding is %s\n", this->encoding_)); + } + return 0; +} + +void +ACEXML_StrCharStream::rewind (void) +{ + this->ptr_ = this->start_; +} + +int ACEXML_StrCharStream::get (ACEXML_Char& ch) { if (this->start_ != 0 && this->ptr_ != this->end_) @@ -90,8 +123,13 @@ ACEXML_StrCharStream::read (ACEXML_Char *str, int ACEXML_StrCharStream::peek (void) { - if (this->start_ !=0 && - this->ptr_ != this->end_) + if (this->start_ != 0 && this->ptr_ != this->end_) return *this->ptr_; return -1; } + +const ACEXML_Char* +ACEXML_StrCharStream::getEncoding (void) +{ + return this->encoding_; +} diff --git a/ACEXML/common/StrCharStream.h b/ACEXML/common/StrCharStream.h index 461d26c2270..6321decb9a2 100644 --- a/ACEXML/common/StrCharStream.h +++ b/ACEXML/common/StrCharStream.h @@ -55,6 +55,11 @@ public: virtual int close (void); /** + * Determine the encoding of the file. + */ + virtual int determine_encoding (void); + + /** * Read the next ACEXML_Char. Return -1 if we are not able to * return an ACEXML_Char, 0 if EOS is reached, or 1 if succeed. */ @@ -72,14 +77,23 @@ public: */ virtual int peek (void); + /* + * Get the character encoding for a byte stream or URI. + */ + virtual const ACEXML_Char *getEncoding (void); + + /** + * Resets the pointer to the beginning of the stream. + */ + virtual void rewind (void); + private: ACEXML_Char *start_; - ACEXML_Char *ptr_; - ACEXML_Char *end_; -}; + ACEXML_Char* encoding_; +}; #include "ace/post.h" diff --git a/ACEXML/common/XML_Common.dsp b/ACEXML/common/XML_Common.dsp index 25e03f23c36..decdb107711 100644 --- a/ACEXML/common/XML_Common.dsp +++ b/ACEXML/common/XML_Common.dsp @@ -118,6 +118,10 @@ SOURCE=.\Element_Def_Builder.cpp # End Source File
# Begin Source File
+SOURCE=.\Encoding.cpp
+# End Source File
+# Begin Source File
+
SOURCE=.\Env.cpp
# End Source File
# Begin Source File
@@ -226,6 +230,10 @@ SOURCE=.\Element_Def_Builder.h # End Source File
# Begin Source File
+SOURCE=.\Encoding.h
+# End Source File
+# Begin Source File
+
SOURCE=.\EntityResolver.h
# End Source File
# Begin Source File
diff --git a/ACEXML/examples/SAXPrint/Makefile b/ACEXML/examples/SAXPrint/Makefile index e6ee6c07959..c7737ae8414 100644 --- a/ACEXML/examples/SAXPrint/Makefile +++ b/ACEXML/examples/SAXPrint/Makefile @@ -53,6 +53,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ACEXML/common/ContentHandler.h \ $(ACE_ROOT)/ACEXML/common/Env.h \ $(ACE_ROOT)/ACEXML/common/XML_Macros.h \ + $(ACE_ROOT)/ace/Exception_Macros.h \ $(ACE_ROOT)/ACEXML/common/Exception.h \ $(ACE_ROOT)/ACEXML/common/XML_Types.h \ $(ACE_ROOT)/ace/OS.h \ @@ -165,6 +166,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ACEXML/common/ContentHandler.h \ $(ACE_ROOT)/ACEXML/common/Env.h \ $(ACE_ROOT)/ACEXML/common/XML_Macros.h \ + $(ACE_ROOT)/ace/Exception_Macros.h \ $(ACE_ROOT)/ACEXML/common/Exception.h \ $(ACE_ROOT)/ACEXML/common/XML_Types.h \ $(ACE_ROOT)/ace/OS.h \ diff --git a/ACEXML/parser/parser/Makefile b/ACEXML/parser/parser/Makefile index e9089cb5d23..91aaf5bf525 100644 --- a/ACEXML/parser/parser/Makefile +++ b/ACEXML/parser/parser/Makefile @@ -183,6 +183,7 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ACEXML/common/ContentHandler.h \ $(ACE_ROOT)/ACEXML/common/Env.h \ $(ACE_ROOT)/ACEXML/common/XML_Macros.h \ + $(ACE_ROOT)/ace/Exception_Macros.h \ $(ACE_ROOT)/ACEXML/common/Exception.h \ $(ACE_ROOT)/ACEXML/common/XML_Types.h \ $(ACE_ROOT)/ace/OS.h \ diff --git a/ACEXML/parser/parser/Parser.cpp b/ACEXML/parser/parser/Parser.cpp index 4b9cec81de8..4b49b5ed895 100644 --- a/ACEXML/parser/parser/Parser.cpp +++ b/ACEXML/parser/parser/Parser.cpp @@ -294,9 +294,22 @@ ACEXML_Parser::parse_xml_prolog (ACEXML_ENV_SINGLE_ARG_DECL) return; } else + { seen_encoding = 1; - // @@ Handle encoding here. We don't handle - // various encodings for this parser. + if (ACE_OS::strcmp (astring, + this->instream_->getEncoding()) != 0) + { + if (ACE_OS::strstr (astring, + this->instream_->getEncoding()) != 0) + { + ACE_ERROR ((LM_ERROR, + ACE_TEXT ("Detected Encoding is %s : Declared Encoding is %s"), + this->instream_->getEncoding(), astring)); + this->report_fatal_error (ACE_TEXT ("Encoding declaration doesn't match detected encoding") ACEXML_ENV_ARG_PARAMETER); + return; + } + } + } continue; } else @@ -1815,8 +1828,11 @@ ACEXML_Parser::parse_token (const ACEXML_Char* keyword) if (keyword == 0) return -1; const ACEXML_Char* ptr = keyword; - for (; *ptr != 0 && this->get() == *ptr; ++ptr) - ; + ACEXML_Char ch; + for (; *ptr != 0 && ((ch = this->get()) == *ptr); ++ptr) + { + // ACE_DEBUG ((LM_DEBUG, ACE_TEXT ("ch = %c : ptr = %c"), ch, *ptr)); + } if (*ptr == 0) return 0; else |