diff options
author | kitty <kitty@ae88bc3d-4319-0410-8dbf-d08b4c9d3795> | 2002-11-15 00:50:17 +0000 |
---|---|---|
committer | kitty <kitty@ae88bc3d-4319-0410-8dbf-d08b4c9d3795> | 2002-11-15 00:50:17 +0000 |
commit | c1ae0478065bfb4136ce1c4ea861428a6b0d1138 (patch) | |
tree | 02286fba3cffd453d2eb953746d58364595350b6 | |
parent | 9341853540c4dfda33b28b1eb1af19b4ada5e97f (diff) | |
download | ATCD-c1ae0478065bfb4136ce1c4ea861428a6b0d1138.tar.gz |
ChangeLogTag: Tue Nov 12 19:48:34 2002 Krishnakumar B <kitty@cs.wustl.edu>
37 files changed, 4365 insertions, 2014 deletions
diff --git a/ACEXML/apps/svcconf/Makefile b/ACEXML/apps/svcconf/Makefile index e908e8ca2f7..8652af06061 100644 --- a/ACEXML/apps/svcconf/Makefile +++ b/ACEXML/apps/svcconf/Makefile @@ -395,7 +395,9 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ace/Obstack_T.cpp \ $(ACE_ROOT)/ACEXML/parser/parser/Entity_Manager.h \ $(ACE_ROOT)/ACEXML/parser/parser/Entity_Manager.i \ - $(ACE_ROOT)/ACEXML/parser/parser/ParserErrors.h \ + $(ACE_ROOT)/ACEXML/parser/parser/ParserInternals.h \ + $(ACE_ROOT)/ACEXML/parser/parser/ParserContext.h \ + $(ACE_ROOT)/ACEXML/parser/parser/ParserContext.inl \ $(ACE_ROOT)/ACEXML/parser/parser/Parser.i \ Svcconf_Handler.h \ $(ACE_ROOT)/ACEXML/common/DefaultHandler.h \ diff --git a/ACEXML/common/Attributes_Def_Builder.h b/ACEXML/common/Attributes_Def_Builder.h index 26404fe6d86..ed7139bd2a3 100644 --- a/ACEXML/common/Attributes_Def_Builder.h +++ b/ACEXML/common/Attributes_Def_Builder.h @@ -39,7 +39,7 @@ public: typedef auto_ptr<ACEXML_Attribute_Def_Builder> VAR; - typedef enum { + enum ATT_TYPE { CDATA, ID, IDREF, @@ -51,14 +51,14 @@ public: NOTATION, ENUMERATION, ERROR_TYPE - } ATT_TYPE; + }; - typedef enum { + enum DEFAULT_DECL { REQUIRED, IMPLIED, FIXED, INVALID - } DEFAULT_DECL; + }; virtual ~ACEXML_Attribute_Def_Builder () = 0; diff --git a/ACEXML/common/DefaultHandler.cpp b/ACEXML/common/DefaultHandler.cpp index f96e29d3cf3..60d2d783366 100644 --- a/ACEXML/common/DefaultHandler.cpp +++ b/ACEXML/common/DefaultHandler.cpp @@ -103,7 +103,7 @@ ACEXML_DefaultHandler::startPrefixMapping (const ACEXML_Char *, // No-op. } - // *** Methods inherit from ACEXML_DTDHandler. + // *** Methods inherited from ACEXML_DTDHandler. void ACEXML_DefaultHandler::notationDecl (const ACEXML_Char *, @@ -124,7 +124,7 @@ ACEXML_DefaultHandler::unparsedEntityDecl (const ACEXML_Char *, // No-op. } - // Methods inherit from ACEXML_EnitityResolver. + // Methods inherited from ACEXML_EntityResolver. ACEXML_InputSource * ACEXML_DefaultHandler::resolveEntity (const ACEXML_Char *, @@ -135,7 +135,7 @@ ACEXML_DefaultHandler::resolveEntity (const ACEXML_Char *, return 0; } - // Methods inherit from ACEXML_ErrorHandler. + // Methods inherited from ACEXML_ErrorHandler. /* * Receive notification of a recoverable error. diff --git a/ACEXML/common/Encoding.cpp b/ACEXML/common/Encoding.cpp index c6e53ed8c47..9ca972816b8 100644 --- a/ACEXML/common/Encoding.cpp +++ b/ACEXML/common/Encoding.cpp @@ -18,8 +18,8 @@ const ACEXML_UTF8 ACEXML_Encoding::byte_order_mark_[][4] = { { '\xFF', '\xFE', '\x00', '\x00' }, // UCS-4, little-endian (4321 order) { '\x00', '\x00', '\xFF', '\xFE' }, // UCS-4, unusual octet order (2143) { '\xFE', '\xFF', '\x00', '\x00' }, // UCS-4, unusual octet order (3412) - { '\xFE', '\xFF', '\xFF', '\xFF' }, // UTF-16, big-endian (3 & 4 ignored) - { '\xFF', '\xFE', '\xFF', '\xFF' }, // UTF-16, little-endian ( 3 & 4 ignored) + { '\xFE', '\xFF', '\xFF', '\xFF' }, // UTF-16, big-endian (3 & 4 != 0) + { '\xFF', '\xFE', '\xFF', '\xFF' }, // UTF-16, little-endian ( 3 & 4 != 0) { '\xEF', '\xBB', '\xBF', '\xFF' } // UTF-8 }; @@ -36,11 +36,13 @@ const ACEXML_UTF8 ACEXML_Encoding::magic_values_[][4] = { const ACEXML_Char* ACEXML_Encoding::get_encoding (const char* input) { - if (ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF16BE][0], input, 2) == 0) + if ((ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF16BE][0], input, 2) == 0) + && (input[2] != 0 || input[3] != 0)) // 3 & 4 should not be both zero return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16BE]; - else if (ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF16LE][0], input, 2) == 0) + else if ((ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF16LE][0], input, 2) == 0) + && (input[2] != 0 && input[3] != 0)) // 3 & 4 should not be both zero return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16LE]; - else if (ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF8][0], input, 4) == 0) + else if (ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF8][0], input, 3) == 0) return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF8]; else if (ACE_OS::memcmp (&ACEXML_Encoding::magic_values_[ACEXML_Encoding::UTF16BE][0], input, 4) == 0) return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16BE]; @@ -49,5 +51,8 @@ ACEXML_Encoding::get_encoding (const char* input) else if (ACE_OS::memcmp (&ACEXML_Encoding::magic_values_[ACEXML_Encoding::UTF8][0], input, 4) == 0) return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF8]; else - return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::OTHER]; + { + ACE_ERROR ((LM_ERROR, "Unknown encoding. Assuming UTF-8\n")); + return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF8]; + } } diff --git a/ACEXML/common/Exception.cpp b/ACEXML/common/Exception.cpp index 3086a8a7bfb..88b2a5709a7 100644 --- a/ACEXML/common/Exception.cpp +++ b/ACEXML/common/Exception.cpp @@ -45,7 +45,7 @@ ACEXML_Exception::is_a (const ACEXML_Char *name) void ACEXML_Exception::print (void) { - ACE_DEBUG ((LM_ERROR, + ACE_ERROR ((LM_ERROR, ACE_TEXT ("ACEXML: (%P|%t) EXCEPTION : %s\n"), this->exception_name_)); } diff --git a/ACEXML/common/FileCharStream.cpp b/ACEXML/common/FileCharStream.cpp index 07aaa2ee97c..1d7811494ef 100644 --- a/ACEXML/common/FileCharStream.cpp +++ b/ACEXML/common/FileCharStream.cpp @@ -50,22 +50,22 @@ ACEXML_FileCharStream::determine_encoding (void) const ACEXML_Char* temp = ACEXML_Encoding::get_encoding (input); if (!temp) return -1; - if (ACE_OS::strcmp (temp, - ACEXML_Encoding::encoding_names_[ACEXML_Encoding::OTHER]) == 0) - return -1; else { this->encoding_ = ACE::strnew (temp); - ACE_DEBUG ((LM_DEBUG, "File's encoding is %s\n", this->encoding_)); + ACE_DEBUG ((LM_DEBUG, ACE_TEXT ("File's encoding is %s\n"), + this->encoding_)); } // Rewind the stream this->rewind(); // Move over the byte-order-mark if present. char ch; - for (int j = 0; j < 2; ++j) + for (int j = 0; j < 3; ++j) { - this->getchar_i (ch); - if (ch == '\xFF' || ch == '\xFE' || ch == '\xEF') + if (this->getchar_i (ch) < 0) + return -1; + if (ch == '\xFF' || ch == '\xFE' || ch == '\xEF' || ch == '\xBB' || + ch == '\xBF') continue; else { @@ -115,7 +115,7 @@ int ACEXML_FileCharStream::read (ACEXML_Char *str, size_t len) { - return ACE_OS::fread (str, len, 1, this->infile_); + return ACE_OS::fread (str, len, sizeof (ACEXML_Char), this->infile_); } int @@ -159,7 +159,7 @@ ACEXML_FileCharStream::get_i (ACEXML_Char& ch) ch = 0; return -1; } - ch = (BE) ? (input[0] << 8) | input[1] : (input[1] << 8) | input[0]; + ch = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0]; return 0; } #endif /* ACE_USES_WCHAR */ @@ -208,7 +208,7 @@ ACEXML_FileCharStream::peek_i (void) this->peek_ = 0; return -1; } - this->peek_ = (BE) ? (input[0] << 8) | input[1] : (input[1] << 8) | input[0]; + this->peek_ = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0]; return this->peek_; } #endif /* ACE_USES_WCHAR */ diff --git a/ACEXML/common/HttpCharStream.cpp b/ACEXML/common/HttpCharStream.cpp index 83426595168..2f1a9d4d754 100644 --- a/ACEXML/common/HttpCharStream.cpp +++ b/ACEXML/common/HttpCharStream.cpp @@ -4,6 +4,7 @@ #include "ace/ace_wchar.h" #include "ace/Auto_Ptr.h" #include "ACEXML/common/HttpCharStream.h" +#include "ACEXML/common/Encoding.h" ACE_RCSID (common, HttpCharStream, "$Id$") @@ -72,7 +73,7 @@ ACEXML_HttpCharStream::open (const ACEXML_Char *url) this->close(); ACE_ERROR_RETURN ((LM_ERROR, "Server returned status %d : %s\n", result, - "Refer HTTP/1.1 for details"), -1); + "Refer HTTP/1.0 for details"), -1); } this->size_ = len; @@ -236,6 +237,9 @@ ACEXML_HttpCharStream::get_url (size_t& len) if (this->stream_->seek (data_offset, SEEK_SET) == -1) ACE_ERROR_RETURN ((LM_ERROR, "%s: %m", "Error in seeking to beginning of data"), -1); + + if (this->determine_encoding() == -1) + return -1; return status; } @@ -257,7 +261,7 @@ ACEXML_HttpCharStream::send_request (void) // Ensure that the <command> memory is deallocated. ACE_Auto_Basic_Array_Ptr<char> cmd_ptr (command); - int bytes = ACE_OS::sprintf (command, "GET %s HTTP/1.1\r\n", path); + int bytes = ACE_OS::sprintf (command, "GET %s HTTP/1.0\r\n", path); bytes += ACE_OS::sprintf (&command[bytes], "Host: %s\r\n", this->url_addr_->get_host_name ()); bytes += ACE_OS::sprintf (&command[bytes], "\r\n"); @@ -302,16 +306,53 @@ ACEXML_HttpCharStream::close (void) } int -ACEXML_HttpCharStream::get (ACEXML_Char& ch) +ACEXML_HttpCharStream::determine_encoding (void) { - ch = (ACEXML_Char) this->stream_->get_char(); - return (ch == (ACEXML_Char)EOF ? -1 :0); + char input[4] = {0, 0, 0, 0}; + int i = 0; + for (; i < 4 && input[i] != -1; ++i) + input[i] = this->stream_->peek_char(i); + if (i < 4) + return -1; + const ACEXML_Char* temp = ACEXML_Encoding::get_encoding (input); + if (!temp) + return -1; + else + { + this->encoding_ = ACE::strnew (temp); + ACE_DEBUG ((LM_DEBUG, "URI's encoding is %s\n", this->encoding_)); + } + // Move over the byte-order-mark if present. + for (int j = 0; j < 3; ++j) + { + if (input[i] == '\xFF' || input[i] == '\xFE' || input[i] == '\xEF' || + input[i] == '\xBB' || input[i] == '\xBF') + { + this->stream_->get_char(); + continue; + } + break; + } + return 0; +} + +void +ACEXML_HttpCharStream::rewind (void) +{ + this->stream_->rewind(); +} + +const ACEXML_Char* +ACEXML_HttpCharStream::getEncoding (void) +{ + return this->encoding_; } int ACEXML_HttpCharStream::read (ACEXML_Char *str, size_t len) { + len = len * sizeof (ACEXML_Char); char* temp = ACE_const_cast (char*, this->stream_->recv (len)); str = ACE_TEXT_CHAR_TO_TCHAR (temp); if (str == 0) @@ -319,20 +360,76 @@ ACEXML_HttpCharStream::read (ACEXML_Char *str, return len; } + +int +ACEXML_HttpCharStream::get (ACEXML_Char& ch) +{ +#if defined (ACE_USES_WCHAR) + return this->get_i (ch); +#else + ch = (ACEXML_Char) this->stream_->get_char(); + return (ch == (ACEXML_Char)EOF ? -1 :0); +#endif /* ACE_USES_WCHAR */ +} + int ACEXML_HttpCharStream::peek (void) { +#if defined (ACE_USES_WCHAR) + return this->peek_i(); +#else return this->stream_->peek_char (0); +#endif /* ACE_USES_WCHAR */ } -void -ACEXML_HttpCharStream::rewind (void) + +#if defined (ACE_USES_WCHAR) +int +ACEXML_HttpCharStream::get_i (ACEXML_Char& ch) { - this->stream_->rewind(); + if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0) + { + ch = (ACEXML_Char) this->stream_->getchar(); + return (ch == (ACEXML_Char)EOF ? -1 : 0); + } + int BE = (ACE_OS::strcmp (this->encoding_, + ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0; + ACEXML_Char input[2] = {0}; + int i = 0; + for (; i < 2 && input[i] != EOF; ++i) + { + input[i] = this->stream_->get_char(); + } + if (i < 2) + { + ch = 0; + return input[i]; + } + ch = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0]; + return 0; } -const ACEXML_Char* -ACEXML_HttpCharStream::getEncoding (void) +int +ACEXML_HttpCharStream::peek_i (void) { - return this->encoding_; + // If we are reading a UTF-8 encoded file, just use the plain unget. + if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0) + { + ACEXML_Char ch = (ACEXML_Char) this->stream_->peek_char (0); + return ch; + } + + int BE = (ACE_OS::strcmp (this->encoding_, + ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0; + // Peek into the stream. + ACEXML_Char input[2]; + int i = 0; + for (; i < 2 && input[i] != EOF; ++i) + { + input[i] = this->peek_char (i); + } + if (i < 2) + return -1; + return (BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0]); } +#endif /* ACE_USES_WCHAR */ diff --git a/ACEXML/common/HttpCharStream.h b/ACEXML/common/HttpCharStream.h index 7bce23a224b..59813c51eb7 100644 --- a/ACEXML/common/HttpCharStream.h +++ b/ACEXML/common/HttpCharStream.h @@ -77,6 +77,11 @@ public: virtual void rewind (void); /** + * Determine the encoding of the file. + */ + virtual int determine_encoding (void); + + /** * Get the encoding of the file */ virtual const ACEXML_Char* getEncoding (void); diff --git a/ACEXML/common/InputSource.cpp b/ACEXML/common/InputSource.cpp index 0dcdeb0f2ba..1292f3caa31 100644 --- a/ACEXML/common/InputSource.cpp +++ b/ACEXML/common/InputSource.cpp @@ -22,8 +22,7 @@ ACEXML_InputSource::ACEXML_InputSource (ACEXML_CharStream *stm) /* * Create a new input source with a character stream. - * / - InputSource (Reader); + * */ ACEXML_InputSource::ACEXML_InputSource (const ACEXML_Char *systemId) @@ -40,31 +39,25 @@ ACEXML_InputSource::~ACEXML_InputSource (void) } ACEXML_CharStream * -ACEXML_InputSource::getCharStream (void) +ACEXML_InputSource::getCharStream (void) const { return this->charStream_; } - /* - * Get the character stream for this input source. - * / - virtual Reader *getCharacterStream (void); - */ - const ACEXML_Char * -ACEXML_InputSource::getEncoding (void) +ACEXML_InputSource::getEncoding (void) const { return this->encoding_; } const ACEXML_Char * -ACEXML_InputSource::getPublicId (void) +ACEXML_InputSource::getPublicId (void) const { return this->publicId_; } const ACEXML_Char * -ACEXML_InputSource::getSystemId (void) +ACEXML_InputSource::getSystemId (void) const { return this->systemId_; } @@ -76,11 +69,6 @@ ACEXML_InputSource::setCharStream (ACEXML_CharStream *stm) this->charStream_ = stm; } - /* - * Set the character stream for this input source. - * - */ - void ACEXML_InputSource::setEncoding (const ACEXML_Char *encoding) { diff --git a/ACEXML/common/InputSource.h b/ACEXML/common/InputSource.h index 2d8c5a7beb5..aca68f1adeb 100644 --- a/ACEXML/common/InputSource.h +++ b/ACEXML/common/InputSource.h @@ -63,7 +63,7 @@ public: * Notice that ACEXML_InputSource assumes the ownership * of <stream> */ - ACEXML_InputSource (ACEXML_CharStream *stream); + ACE_EXPLICIT ACEXML_InputSource (ACEXML_CharStream *stream); /* * Create a new input source with a system identifier. @@ -78,22 +78,22 @@ public: /* * Get the ACEXML_Char stream for this input source. */ - virtual ACEXML_CharStream *getCharStream (void); + virtual ACEXML_CharStream *getCharStream (void) const; /* * Get the character encoding for a byte stream or URI. */ - virtual const ACEXML_Char *getEncoding (void); + virtual const ACEXML_Char *getEncoding (void) const; /* * Get the public identifier for this input source. */ - virtual const ACEXML_Char *getPublicId (void); + virtual const ACEXML_Char *getPublicId (void) const; /* * Get the system identifier for this input source. */ - virtual const ACEXML_Char *getSystemId (void); + virtual const ACEXML_Char *getSystemId (void) const; /* * Set the ACEXML_Char stream for this input source. diff --git a/ACEXML/common/LocatorImpl.cpp b/ACEXML/common/LocatorImpl.cpp index d304092e9fc..d7f0d1028bc 100644 --- a/ACEXML/common/LocatorImpl.cpp +++ b/ACEXML/common/LocatorImpl.cpp @@ -14,6 +14,16 @@ ACEXML_LocatorImpl::ACEXML_LocatorImpl (void) { } +ACEXML_LocatorImpl::ACEXML_LocatorImpl (const ACEXML_Char* systemId, + const ACEXML_Char* publicId) + : publicId_ (publicId ? ACE::strnew (publicId) : 0), + systemId_ (systemId ? ACE::strnew (systemId) : 0), + lineNumber_ (1), + columnNumber_ (0) +{ +} + + ACEXML_LocatorImpl::ACEXML_LocatorImpl (const ACEXML_Locator& locator) : publicId_ (ACE::strnew (locator.getPublicId ())), systemId_ (ACE::strnew (locator.getSystemId ())), diff --git a/ACEXML/common/LocatorImpl.h b/ACEXML/common/LocatorImpl.h index 5e0bbdace23..2bea4e80c22 100644 --- a/ACEXML/common/LocatorImpl.h +++ b/ACEXML/common/LocatorImpl.h @@ -66,7 +66,13 @@ public: */ ACEXML_LocatorImpl (void); - /* + /** + * Construct a locator with systemId and publicId + * + */ + ACEXML_LocatorImpl (const ACEXML_Char* systemId, + const ACEXML_Char* publicId); + /** * Copy constructor. Create a persistent copy of the current state * of a locator. When the original locator changes, this copy will * still keep the original values (and it can be used outside the diff --git a/ACEXML/common/Makefile b/ACEXML/common/Makefile index ee20d8016f5..64962069496 100644 --- a/ACEXML/common/Makefile +++ b/ACEXML/common/Makefile @@ -2470,7 +2470,8 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ace/Service_Repository.h \ $(ACE_ROOT)/ace/Service_Repository.i \ $(ACE_ROOT)/ace/WFMO_Reactor.h \ - $(ACE_ROOT)/ace/Connector.cpp + $(ACE_ROOT)/ace/Connector.cpp \ + Encoding.h .obj/StreamFactory.o .obj/StreamFactory.so .shobj/StreamFactory.o .shobj/StreamFactory.so: StreamFactory.cpp \ StreamFactory.h \ diff --git a/ACEXML/common/NamespaceSupport.cpp b/ACEXML/common/NamespaceSupport.cpp index b28b9d07248..5f385d6a706 100644 --- a/ACEXML/common/NamespaceSupport.cpp +++ b/ACEXML/common/NamespaceSupport.cpp @@ -2,18 +2,15 @@ #include "ACEXML/common/NamespaceSupport.h" -static const ACEXML_Char ACEXML_XMLNS_PREFIX_name[] = {'x', 'm', 'l', 'n', 's', 0}; +static const ACEXML_Char ACEXML_XMLNS_PREFIX_name[] = ACE_TEXT ("xmlns"); + const ACEXML_Char *ACEXML_NamespaceSupport::XMLNS_PREFIX = ACEXML_XMLNS_PREFIX_name; static const ACEXML_Char ACEXML_DEFAULT_NS_PREFIX[] = {0}; -static const ACEXML_Char ACEXML_TABOO_NS_PREFIX[] = {'x', 'm', 'l', 0}; +static const ACEXML_Char ACEXML_TABOO_NS_PREFIX[] = ACE_TEXT ("xml"); -static const ACEXML_Char ACEXML_XMLNS_URI_name[] = { - 'h', 't', 't', 'p', ':', '/', '/', - 'w', 'w', 'w', '.', 'w', '3', '.', 'o', 'r', 'g', '/', - 'X', 'M', 'L', '/', '1', '9', '9', '8', '/', - 'n', 'a', 'm', 'e', 's', 'p', 'a', 'c', 'e', 0}; +static const ACEXML_Char ACEXML_XMLNS_URI_name[] = ACE_TEXT ("http://www.w3.org/XML/1998/namespace"); const ACEXML_Char *ACEXML_NamespaceSupport::XMLNS = ACEXML_XMLNS_URI_name; #if !defined (__ACEXML_INLINE__) @@ -250,7 +247,8 @@ ACEXML_NamespaceSupport::processName (const ACEXML_Char *qName, int ACEXML_NamespaceSupport::reset (void) { - // Not implemented. + while (this->popContext() != -1) + ; return 0; } diff --git a/ACEXML/common/NamespaceSupport.h b/ACEXML/common/NamespaceSupport.h index 83d78a3c58d..c39bd6fc731 100644 --- a/ACEXML/common/NamespaceSupport.h +++ b/ACEXML/common/NamespaceSupport.h @@ -210,7 +210,7 @@ public: /** * Reset this Namespace support object for reuse. - * @todo Not implemented. + * */ int reset (void); diff --git a/ACEXML/common/SAXExceptions.cpp b/ACEXML/common/SAXExceptions.cpp index fe0963cd169..4f56ed31d99 100644 --- a/ACEXML/common/SAXExceptions.cpp +++ b/ACEXML/common/SAXExceptions.cpp @@ -105,7 +105,7 @@ ACEXML_SAXException::is_a (const ACEXML_Char *name) void ACEXML_SAXException::print (void) { - ACE_DEBUG ((LM_ERROR, + ACE_ERROR ((LM_ERROR, ACE_TEXT ("ACEXML: (%P|%t) %s: %s\n"), this->exception_name_, this->message())); } @@ -249,7 +249,7 @@ ACEXML_SAXParseException::is_a (const ACEXML_Char *name) void ACEXML_SAXParseException::print (void) { - ACE_DEBUG ((LM_ERROR, + ACE_ERROR ((LM_ERROR, ACE_TEXT ("ACEXML: (%P|%t) %s: %s\n"), this->exception_name_, this->message())); } diff --git a/ACEXML/common/StrCharStream.cpp b/ACEXML/common/StrCharStream.cpp index 046511407c2..4ea75a3743c 100644 --- a/ACEXML/common/StrCharStream.cpp +++ b/ACEXML/common/StrCharStream.cpp @@ -73,9 +73,6 @@ ACEXML_StrCharStream::determine_encoding (void) const ACEXML_Char* temp = ACEXML_Encoding::get_encoding (input); if (!temp) return -1; - if (ACE_OS::strcmp (temp, - ACEXML_Encoding::encoding_names_[ACEXML_Encoding::OTHER]) == 0) - return -1; else { this->encoding_ = ACE::strnew (temp); @@ -110,7 +107,7 @@ ACEXML_StrCharStream::read (ACEXML_Char *str, if (this->start_ != 0 && this->ptr_ != this->end_) { - if ((int) len > this->end_ - this->ptr_) + if (len * sizeof (ACEXML_Char) > (size_t) (this->end_ - this->ptr_)) len = this->end_ - this->ptr_; ACE_OS_String::strncpy (str, this->ptr_, len); diff --git a/ACEXML/common/StreamFactory.cpp b/ACEXML/common/StreamFactory.cpp index 80edc8a265f..df458e61697 100644 --- a/ACEXML/common/StreamFactory.cpp +++ b/ACEXML/common/StreamFactory.cpp @@ -24,17 +24,16 @@ ACEXML_StreamFactory::create_stream (const ACEXML_Char* uri) ACE_NEW_RETURN (hstream, ACEXML_HttpCharStream, 0); if (hstream->open (uri) != -1) return hstream; - else - return 0; } else { + if (ACE_OS::strstr (uri, ACE_TEXT ("file://")) != 0) + uri += 7; // Skip over file:// ACE_NEW_RETURN (fstream, ACEXML_FileCharStream, 0); if (fstream->open (uri) != -1) return fstream; - else - return 0; } + return 0; } ACEXML_StreamFactory::~ACEXML_StreamFactory () diff --git a/ACEXML/common/Transcode.cpp b/ACEXML/common/Transcode.cpp index 4007d346494..d857756d7dd 100644 --- a/ACEXML/common/Transcode.cpp +++ b/ACEXML/common/Transcode.cpp @@ -7,6 +7,239 @@ #endif /* __ACEXML_INLINE__ */ int +ACEXML_Transcoder::utf162utf8 (ACEXML_UTF16 src, + ACEXML_UTF8 *dst, + size_t len) +{ + // Check for valid argument first... + + if (dst == 0) + return INVALID_ARGS; + + if (src < 0x80) + { + if (len < 1) + return DESTINATION_TOO_SHORT; + + *dst = ACE_static_cast (ACEXML_UTF8, src); + return 1; + } + else if (src < 0x800) + { + if (len < 2) + return DESTINATION_TOO_SHORT; + + *dst = 0xc0 | (src / 0x40); + *(dst+1) = 0x80 | (src % 0x40); + return 2; + } + else + { + if (len < 3) + return DESTINATION_TOO_SHORT; + + // Surrogates (0xD800 - 0xDFFF) are not valid unicode values + if (src >= 0xD800 && src < 0xE000) + return IS_SURROGATE; + + *dst = 0xe0 | (src / 0x1000); + *(dst+1) = 0x80 | ((src % 0x1000) / 0x40); + *(dst+2) = 0x80 | (src % 0x40); + return 3; + } + ACE_NOTREACHED (return NON_UNICODE;) + } + +int +ACEXML_Transcoder::ucs42utf8 (ACEXML_UCS4 src, + ACEXML_UTF8 *dst, + size_t len) +{ + if (src < 0x10000) + { + int retv = ACEXML_Transcoder::utf162utf8 + (ACE_static_cast (ACEXML_UTF16, src), + dst, len); + return (retv == IS_SURROGATE ? NON_UNICODE : retv); + } + else if (src >= 0x100000 && src < 0x110000) + { + if (len < 4) + return DESTINATION_TOO_SHORT; + + if (dst == 0) + return INVALID_ARGS; + + *dst = 0xf0 | (src / 0x40000); + *(dst+1) = 0x80 | ((src % 0x40000) / 0x1000); + *(dst+2) = 0x80 | ((src % 0x1000) / 0x40); + *(dst+3) = 0x80 | (src % 0x40); + return 4; + } + return NON_UNICODE; +} + + +int +ACEXML_Transcoder::ucs42utf16 (ACEXML_UCS4 src, + ACEXML_UTF16 *dst, + size_t len) +{ + if (dst == 0) + return INVALID_ARGS; + + if (src < 0x10000) + { + if (len < 1) + return DESTINATION_TOO_SHORT; + + if (src >= 0xD800 && src < 0xE000) + return NON_UNICODE; // Surrogates are not valid unicode value + + *dst = ACE_static_cast (ACEXML_UTF16, src); + return 1; + } + else if (src >= 0x100000 && src < 0x110000) + // Scalar values are encoded into surrogates + { + if (len < 2) + return DESTINATION_TOO_SHORT; + + *dst = 0xD800 | (src / 0x400); + *(dst+1) = 0xDC00 | (src % 0x400); + return 2; + } + + return NON_UNICODE; +} + +int +ACEXML_Transcoder::surrogate2utf8 (ACEXML_UTF16 high, + ACEXML_UTF16 low, + ACEXML_UTF8 *dst, + size_t len) +{ + if (len < 3) + return DESTINATION_TOO_SHORT; + + if (dst == 0 || + (high >= 0xD800 && high < 0xDC00) || + (low >= 0xDC00 && low < 0xE000)) + return INVALID_ARGS; + + ACEXML_UCS4 src = (high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000; + *dst = 0xD800 | (src / 0x400); + *(dst+1) = 0xDC00 | (src % 0x400); + return 2; +} + +int +ACEXML_Transcoder::surrogate2ucs4 (ACEXML_UTF16 high, + ACEXML_UTF16 low, + ACEXML_UCS4 &dst) +{ + if ((high >= 0xD800 && high < 0xDC00) || + (low >= 0xDC00 && low < 0xE000)) + return INVALID_ARGS; + + dst = (high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000; + return SUCCESS; +} + +int +ACEXML_Transcoder::utf82ucs4 (const ACEXML_UTF8 *the_src, + size_t len, + ACEXML_UCS4 &dst) +{ + if (the_src == 0) + return INVALID_ARGS; + + const unsigned char *src = ACE_reinterpret_cast (const unsigned char *, + the_src); + + size_t forward = 1; + + if (forward > len) + return END_OF_SOURCE; + + if (ACE_static_cast (unsigned char, *src) < 0x80) + dst = *src; + else if ((*src & 0xE0) == 0xC0) + { + dst = (*(src++) & 0x1f) * 0x40; + if (++forward > len) + return END_OF_SOURCE; + if ((*src & 0xC0) != 0x80) + return NON_UNICODE; // Error transcoding unicode scalar + dst += *src & 0x3f; + } + else if ((*src & 0xF0) == 0xE0) + { + dst = (*src++ & 0x0f) * 0x40; + if (++forward > len) + return END_OF_SOURCE; + if ((*src & 0xC0) != 0x80) + return NON_UNICODE; + dst = (dst + (*src++ & 0x3f)) * 0x40; + if (++forward > len) + return END_OF_SOURCE; + if ((*src & 0xC0) != 0x80) + return NON_UNICODE; + dst += *src & 0x3f; + } + else if ((*src & 0xF8) == 0xF0) + { + dst = (*src++ & 0x0f) * 0x40; + if (++forward > len) + return END_OF_SOURCE; + if ((*src & 0xC0) != 0x80) + return NON_UNICODE; + dst = (dst + (*src++ & 0x3f)) * 0x40; + if (++forward > len) + return END_OF_SOURCE; + if ((*src & 0xC0) != 0x80) + return NON_UNICODE; + dst = (dst + (*src++ & 0x3f)) * 0x40; + if (++forward > len) + return END_OF_SOURCE; + if ((*src & 0xC0) != 0x80) + return NON_UNICODE; + dst += *src & 0x3f; + } + else + return NON_UNICODE; + + return forward; +} + +int +ACEXML_Transcoder::utf162ucs4 (const ACEXML_UTF16 *src, + size_t len, + ACEXML_UCS4 &dst) +{ + if (src == 0) + return INVALID_ARGS; + + size_t forward = 1; + if (*src >= 0xDC00 && *src < 0xE000) + { + if (len < 2) + return END_OF_SOURCE; + return ACEXML_Transcoder::surrogate2ucs4 (*src, + *(src+1), + dst); + } + else + { + if (len < 1) + return END_OF_SOURCE; + dst = *src; + } + + return forward; +} + +int ACEXML_Transcoder::utf8s2utf16s (const ACEXML_UTF8 *src, ACEXML_UTF16 *dst, size_t len) diff --git a/ACEXML/common/Transcode.i b/ACEXML/common/Transcode.i index e9ec3936e80..77b4466a3cb 100644 --- a/ACEXML/common/Transcode.i +++ b/ACEXML/common/Transcode.i @@ -1,234 +1 @@ // -*- C++ -*- $Id$ - -ACEXML_INLINE int -ACEXML_Transcoder::utf162utf8 (ACEXML_UTF16 src, - ACEXML_UTF8 *dst, - size_t len) -{ - // Check for valid argument first... - - if (dst == 0) - return INVALID_ARGS; - - if (src < 0x80) - { - if (len < 1) - return DESTINATION_TOO_SHORT; - - *dst = ACE_static_cast (ACEXML_UTF8, src); - return 1; - } - else if (src < 0x800) - { - if (len < 2) - return DESTINATION_TOO_SHORT; - - *dst = 0xc0 | (src / 0x40); - *(dst+1) = 0x80 | (src % 0x40); - return 2; - } - else - { - if (len < 3) - return DESTINATION_TOO_SHORT; - - // Surrogates (0xD800 - 0xDFFF) are not valid unicode values - if (src >= 0xD800 && src < 0xE000) - return IS_SURROGATE; - - *dst = 0xe0 | (src / 0x1000); - *(dst+1) = 0x80 | ((src % 0x1000) / 0x40); - *(dst+2) = 0x80 | (src % 0x40); - return 3; - } - ACE_NOTREACHED (return NON_UNICODE;) -} - -ACEXML_INLINE int -ACEXML_Transcoder::ucs42utf8 (ACEXML_UCS4 src, - ACEXML_UTF8 *dst, - size_t len) -{ - if (src < 0x10000) - { - int retv = ACEXML_Transcoder::utf162utf8 - (ACE_static_cast (ACEXML_UTF16, src), - dst, len); - return (retv == IS_SURROGATE ? NON_UNICODE : retv); - } - else if (src >= 0x100000 && src < 0x110000) - { - if (len < 4) - return DESTINATION_TOO_SHORT; - - if (dst == 0) - return INVALID_ARGS; - - *dst = 0xf0 | (src / 0x40000); - *(dst+1) = 0x80 | ((src % 0x40000) / 0x1000); - *(dst+2) = 0x80 | ((src % 0x1000) / 0x40); - *(dst+3) = 0x80 | (src % 0x40); - return 4; - } - return NON_UNICODE; -} - - -ACEXML_INLINE int -ACEXML_Transcoder::ucs42utf16 (ACEXML_UCS4 src, - ACEXML_UTF16 *dst, - size_t len) -{ - if (dst == 0) - return INVALID_ARGS; - - if (src < 0x10000) - { - if (len < 1) - return DESTINATION_TOO_SHORT; - - if (src >= 0xD800 && src < 0xE000) - return NON_UNICODE; // Surrogates are not valid unicode value - - *dst = ACE_static_cast (ACEXML_UTF16, src); - return 1; - } - else if (src >= 0x100000 && src < 0x110000) - // Scalar values are encoded into surrogates - { - if (len < 2) - return DESTINATION_TOO_SHORT; - - *dst = 0xD800 | (src / 0x400); - *(dst+1) = 0xDC00 | (src % 0x400); - return 2; - } - - return NON_UNICODE; -} - -ACEXML_INLINE int -ACEXML_Transcoder::surrogate2utf8 (ACEXML_UTF16 high, - ACEXML_UTF16 low, - ACEXML_UTF8 *dst, - size_t len) -{ - if (len < 3) - return DESTINATION_TOO_SHORT; - - if (dst == 0 || - (high >= 0xD800 && high < 0xDC00) || - (low >= 0xDC00 && low < 0xE000)) - return INVALID_ARGS; - - ACEXML_UCS4 src = (high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000; - *dst = 0xD800 | (src / 0x400); - *(dst+1) = 0xDC00 | (src % 0x400); - return 2; -} - -ACEXML_INLINE int -ACEXML_Transcoder::surrogate2ucs4 (ACEXML_UTF16 high, - ACEXML_UTF16 low, - ACEXML_UCS4 &dst) -{ - if ((high >= 0xD800 && high < 0xDC00) || - (low >= 0xDC00 && low < 0xE000)) - return INVALID_ARGS; - - dst = (high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000; - return SUCCESS; -} - -ACEXML_INLINE int -ACEXML_Transcoder::utf82ucs4 (const ACEXML_UTF8 *the_src, - size_t len, - ACEXML_UCS4 &dst) -{ - if (the_src == 0) - return INVALID_ARGS; - - const unsigned char *src = ACE_reinterpret_cast (const unsigned char *, - the_src); - - size_t forward = 1; - - if (forward > len) - return END_OF_SOURCE; - - if (ACE_static_cast (unsigned char, *src) < 0x80) - dst = *src; - else if ((*src & 0xE0) == 0xC0) - { - dst = (*(src++) & 0x1f) * 0x40; - if (++forward > len) - return END_OF_SOURCE; - if ((*src & 0xC0) != 0x80) - return NON_UNICODE; // Error transcoding unicode scalar - dst += *src & 0x3f; - } - else if ((*src & 0xF0) == 0xE0) - { - dst = (*src++ & 0x0f) * 0x40; - if (++forward > len) - return END_OF_SOURCE; - if ((*src & 0xC0) != 0x80) - return NON_UNICODE; - dst = (dst + (*src++ & 0x3f)) * 0x40; - if (++forward > len) - return END_OF_SOURCE; - if ((*src & 0xC0) != 0x80) - return NON_UNICODE; - dst += *src & 0x3f; - } - else if ((*src & 0xF8) == 0xF0) - { - dst = (*src++ & 0x0f) * 0x40; - if (++forward > len) - return END_OF_SOURCE; - if ((*src & 0xC0) != 0x80) - return NON_UNICODE; - dst = (dst + (*src++ & 0x3f)) * 0x40; - if (++forward > len) - return END_OF_SOURCE; - if ((*src & 0xC0) != 0x80) - return NON_UNICODE; - dst = (dst + (*src++ & 0x3f)) * 0x40; - if (++forward > len) - return END_OF_SOURCE; - if ((*src & 0xC0) != 0x80) - return NON_UNICODE; - dst += *src & 0x3f; - } - else - return NON_UNICODE; - - return forward; -} - -ACEXML_INLINE int -ACEXML_Transcoder::utf162ucs4 (const ACEXML_UTF16 *src, - size_t len, - ACEXML_UCS4 &dst) -{ - if (src == 0) - return INVALID_ARGS; - - size_t forward = 1; - if (*src >= 0xDC00 && *src < 0xE000) - { - if (len < 2) - return END_OF_SOURCE; - return ACEXML_Transcoder::surrogate2ucs4 (*src, - *(src+1), - dst); - } - else - { - if (len < 1) - return END_OF_SOURCE; - dst = *src; - } - - return forward; -} diff --git a/ACEXML/examples/SAXPrint/SAXPrint_Handler.cpp b/ACEXML/examples/SAXPrint/SAXPrint_Handler.cpp index 462c14ba7a7..921f04d881c 100644 --- a/ACEXML/examples/SAXPrint/SAXPrint_Handler.cpp +++ b/ACEXML/examples/SAXPrint/SAXPrint_Handler.cpp @@ -206,7 +206,8 @@ ACEXML_SAXPrint_Handler::error (ACEXML_SAXParseException & ex ACEXML_ENV_ARG_DECL_NOT_USED) ACE_THROW_SPEC ((ACEXML_SAXException)) { - ACE_DEBUG ((LM_DEBUG, "%s: line :%d col: %d ", this->fileName_, + ACE_DEBUG ((LM_DEBUG, "%s: line :%d col: %d ", + (this->locator_->getSystemId() == 0 ? this->fileName_ : this->locator_->getSystemId()), this->locator_->getLineNumber(), this->locator_->getColumnNumber())); ex.print(); @@ -217,7 +218,8 @@ ACEXML_SAXPrint_Handler::fatalError (ACEXML_SAXParseException & ex ACEXML_ENV_ARG_DECL_NOT_USED) ACE_THROW_SPEC ((ACEXML_SAXException)) { - ACE_DEBUG ((LM_DEBUG, "%s: line :%d col: %d ", this->fileName_, + ACE_DEBUG ((LM_DEBUG, "%s: line :%d col: %d ", + (this->locator_->getSystemId() == 0 ? this->fileName_ : this->locator_->getSystemId()), this->locator_->getLineNumber(), this->locator_->getColumnNumber())); ex.print(); @@ -228,7 +230,8 @@ ACEXML_SAXPrint_Handler::warning (ACEXML_SAXParseException & ex ACEXML_ENV_ARG_DECL_NOT_USED) ACE_THROW_SPEC ((ACEXML_SAXException)) { - ACE_DEBUG ((LM_DEBUG, "%s: line :%d col: %d ", this->fileName_, + ACE_DEBUG ((LM_DEBUG, "%s: line :%d col: %d ", + (this->locator_->getSystemId() == 0 ? this->fileName_ : this->locator_->getSystemId()), this->locator_->getLineNumber(), this->locator_->getColumnNumber())); ex.print(); diff --git a/ACEXML/examples/SAXPrint/main.cpp b/ACEXML/examples/SAXPrint/main.cpp index 771411867b1..6986744dd18 100644 --- a/ACEXML/examples/SAXPrint/main.cpp +++ b/ACEXML/examples/SAXPrint/main.cpp @@ -110,7 +110,8 @@ ACE_TMAIN (int argc, ACE_TCHAR *argv[]) -1); ACEXML_Parser parser; - ACEXML_InputSource input(stm); + ACEXML_InputSource* input = 0; + ACE_NEW_RETURN (input, ACEXML_InputSource (stm), -1); parser.setContentHandler (handler); parser.setDTDHandler (handler); @@ -119,7 +120,7 @@ ACE_TMAIN (int argc, ACE_TCHAR *argv[]) ACEXML_TRY_NEW_ENV { - parser.parse (&input ACEXML_ENV_ARG_PARAMETER); + parser.parse (input ACEXML_ENV_ARG_PARAMETER); ACEXML_TRY_CHECK; } ACEXML_CATCH (ACEXML_SAXException, ex) diff --git a/ACEXML/parser/parser/Entity_Manager.cpp b/ACEXML/parser/parser/Entity_Manager.cpp index d0c86862805..0b781b6f0e4 100644 --- a/ACEXML/parser/parser/Entity_Manager.cpp +++ b/ACEXML/parser/parser/Entity_Manager.cpp @@ -2,45 +2,17 @@ #include "ACEXML/parser/parser/Entity_Manager.h" +static const ACEXML_Char empty_string[] = { 0 }; + #if !defined (__ACEXML_INLINE__) # include "ACEXML/parser/parser/Entity_Manager.i" #endif /* __ACEXML_INLINE__ */ -static const ACEXML_Char amp_name[] = {'a', 'm', 'p', 0 }; -static const ACEXML_Char amp_value[] = {'&', 0}; -static const ACEXML_Char lt_name[] = {'l', 't', 0}; -static const ACEXML_Char lt_value[] = {'<', 0}; -static const ACEXML_Char gt_name[] = {'g', 't', 0}; -static const ACEXML_Char gt_value[] = {'>', 0}; -static const ACEXML_Char apos_name[] = {'a', 'p', 'o', 's', 0}; -static const ACEXML_Char apos_value[] = {'\'', 0}; -static const ACEXML_Char quot_name[] = {'q', 'u', 'o', 't', 0}; -static const ACEXML_Char quot_value[] = {'"', 0}; ACEXML_Entity_Manager::ACEXML_Entity_Manager (void) : entities_ () { - // @@ No way to know if these bindings succeed or not. - - ACEXML_String ampname (amp_name, 0, 0); - ACEXML_String ampvalue (amp_value, 0, 0); - this->entities_.bind (ampname, ampvalue); - - ACEXML_String ltname (lt_name, 0, 0); - ACEXML_String ltvalue (lt_value, 0, 0); - this->entities_.bind (ltname, ltvalue); - - ACEXML_String gtname (gt_name, 0, 0); - ACEXML_String gtvalue (gt_value, 0, 0); - this->entities_.bind (gtname, gtvalue); - - ACEXML_String aposname (apos_name, 0, 0); - ACEXML_String aposvalue (apos_value, 0, 0); - this->entities_.bind (aposname, aposvalue); - ACEXML_String quotname (quot_name, 0, 0); - ACEXML_String quotvalue (quot_value, 0, 0); - this->entities_.bind (quotname, quotvalue); } ACEXML_Entity_Manager::~ACEXML_Entity_Manager (void) diff --git a/ACEXML/parser/parser/Entity_Manager.h b/ACEXML/parser/parser/Entity_Manager.h index 7d2f4ef9b6e..ba16279eb05 100644 --- a/ACEXML/parser/parser/Entity_Manager.h +++ b/ACEXML/parser/parser/Entity_Manager.h @@ -44,6 +44,12 @@ typedef ACE_Hash_Map_Reverse_Iterator_Ex<ACEXML_String, ACE_Equal_To<ACEXML_String>, ACE_Null_Mutex> ACEXML_ENTITIES_MANAGER_REVERSE_ITER; +typedef ACE_Hash_Map_Bucket_Iterator<ACEXML_String, + ACEXML_String, + ACE_Hash<ACEXML_String>, + ACE_Equal_To<ACEXML_String>, + ACE_Null_Mutex> ACEXML_ENTITY_ENTRY_ITERATOR; + /** * @class ACEXML_Entity_Manager Entity_Manager.h "ACEXML/parser/parser/Entity_Manager.h" * @@ -61,11 +67,21 @@ public: ~ACEXML_Entity_Manager (void); /// Add a new entity declaration. - int add_entity (const ACEXML_Char *ref, - const ACEXML_Char *value); + int add_entity (const ACEXML_Char *ref, const ACEXML_Char *value); /// Resolve an entity reference. - const ACEXML_String *resolve_entity (const ACEXML_Char *ref); + const ACEXML_Char* resolve_entity (const ACEXML_Char *ref); + + /// Resolve an entity reference and return the tuple of @c systemId and + /// @c publicId + int resolve_entity (const ACEXML_Char* ref, ACEXML_Char*& systemId, + ACEXML_Char*& publicId); + + /// Number of items in the Entity Manager + const size_t size(void) const; + + /// Reset the state + int reset (void); private: ACEXML_ENTITIES_MANAGER entities_; diff --git a/ACEXML/parser/parser/Entity_Manager.i b/ACEXML/parser/parser/Entity_Manager.i index 696b82b64e4..26da9ad15d9 100644 --- a/ACEXML/parser/parser/Entity_Manager.i +++ b/ACEXML/parser/parser/Entity_Manager.i @@ -9,13 +9,46 @@ ACEXML_Entity_Manager::add_entity (const ACEXML_Char *ref, return this->entities_.bind (name, value); } -ACEXML_INLINE const ACEXML_String * +ACEXML_INLINE const ACEXML_Char* ACEXML_Entity_Manager::resolve_entity (const ACEXML_Char *ref) { ACEXML_ENTITY_ENTRY *entry; if (this->entities_.find (ACEXML_String (ref, 0, 0), entry) == 0) - return &entry->int_id_; + return entry->int_id_.c_str(); return 0; } + +ACEXML_INLINE int +ACEXML_Entity_Manager::resolve_entity (const ACEXML_Char* ref, + ACEXML_Char*& systemId, + ACEXML_Char*& publicId) +{ + publicId = systemId = 0; + ACEXML_ENTITY_ENTRY_ITERATOR iter (this->entities_, ref); + ACEXML_ENTITY_ENTRY_ITERATOR end (this->entities_, ref, 1); + + if (iter != end) + { + systemId = ACE_const_cast (ACEXML_Char*, (*iter).int_id_.c_str()); + ++iter; + if (iter != end) + publicId = ACE_const_cast (ACEXML_Char*, (*iter).int_id_.c_str()); + return 0; + } + return -1; +} + +ACEXML_INLINE int +ACEXML_Entity_Manager::reset (void) +{ + return this->entities_.close(); +} + + +ACEXML_INLINE const size_t +ACEXML_Entity_Manager::size (void) const +{ + return this->entities_.current_size(); +} diff --git a/ACEXML/parser/parser/Makefile b/ACEXML/parser/parser/Makefile index b16dcfec6ba..434a79dfacb 100644 --- a/ACEXML/parser/parser/Makefile +++ b/ACEXML/parser/parser/Makefile @@ -8,7 +8,9 @@ LIB = libACEXML_Parser.a SHLIB = libACEXML_Parser.$(SOEXT) FILES = Entity_Manager \ - Parser + Parser \ + ParserInternals \ + ParserContext DEFS = $(addsuffix .h,$(FILES)) LSRC = $(addsuffix .cpp,$(FILES)) @@ -171,22 +173,11 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ace/Reactor_Impl.h \ Entity_Manager.i -.obj/Parser.o .obj/Parser.so .shobj/Parser.o .shobj/Parser.so: Parser.cpp \ - Parser.h \ - $(ACE_ROOT)/ace/pre.h \ - Parser_export.h \ +.obj/Parser.o .obj/Parser.so .shobj/Parser.o .shobj/Parser.so: Parser.cpp $(ACE_ROOT)/ace/ACE.h \ + $(ACE_ROOT)/ace/pre.h $(ACE_ROOT)/ace/OS.h \ $(ACE_ROOT)/ace/post.h \ $(ACE_ROOT)/ace/ace_wchar.h \ $(ACE_ROOT)/ace/ace_wchar.inl \ - $(ACE_ROOT)/ACEXML/common/XMLReader.h \ - $(ACE_ROOT)/ACEXML/common/ACEXML_Export.h \ - $(ACE_ROOT)/ACEXML/common/ContentHandler.h \ - $(ACE_ROOT)/ACEXML/common/Env.h \ - $(ACE_ROOT)/ACEXML/common/XML_Macros.h \ - $(ACE_ROOT)/ace/Exception_Macros.h \ - $(ACE_ROOT)/ACEXML/common/Exception.h \ - $(ACE_ROOT)/ACEXML/common/XML_Types.h \ - $(ACE_ROOT)/ace/OS.h \ $(ACE_ROOT)/ace/OS_Dirent.h \ $(ACE_ROOT)/ace/OS_Export.h \ $(ACE_ROOT)/ace/OS_Errno.h \ @@ -209,9 +200,6 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ace/streams.h \ $(ACE_ROOT)/ace/Trace.h \ $(ACE_ROOT)/ace/OS.i \ - $(ACE_ROOT)/ace/SString.h \ - $(ACE_ROOT)/ace/String_Base.h \ - $(ACE_ROOT)/ace/ACE.h \ $(ACE_ROOT)/ace/Flag_Manip.h \ $(ACE_ROOT)/ace/Flag_Manip.i \ $(ACE_ROOT)/ace/Handle_Ops.h \ @@ -223,6 +211,11 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ace/Sock_Connect.h \ $(ACE_ROOT)/ace/Sock_Connect.i \ $(ACE_ROOT)/ace/ACE.i \ + $(ACE_ROOT)/ACEXML/common/Transcode.h \ + $(ACE_ROOT)/ACEXML/common/ACEXML_Export.h \ + $(ACE_ROOT)/ACEXML/common/XML_Types.h \ + $(ACE_ROOT)/ace/SString.h \ + $(ACE_ROOT)/ace/String_Base.h \ $(ACE_ROOT)/ace/String_Base_Const.h \ $(ACE_ROOT)/ace/String_Base.i \ $(ACE_ROOT)/ace/Malloc_Base.h \ @@ -273,16 +266,40 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ace/Auto_Ptr.i \ $(ACE_ROOT)/ace/Auto_Ptr.cpp \ $(ACE_ROOT)/ace/SString.i \ + $(ACE_ROOT)/ACEXML/common/Transcode.i \ + $(ACE_ROOT)/ACEXML/common/AttributesImpl.h \ + $(ACE_ROOT)/ACEXML/common/Attributes.h \ + $(ACE_ROOT)/ace/Containers_T.h \ + $(ACE_ROOT)/ace/Containers.h \ + $(ACE_ROOT)/ace/Containers.i \ + $(ACE_ROOT)/ace/Array_Base.h \ + $(ACE_ROOT)/ace/Array_Base.inl \ + $(ACE_ROOT)/ace/Array_Base.cpp \ + $(ACE_ROOT)/ace/Unbounded_Queue.h \ + $(ACE_ROOT)/ace/Unbounded_Queue.inl \ + $(ACE_ROOT)/ace/Unbounded_Queue.cpp \ + $(ACE_ROOT)/ace/Containers_T.i \ + $(ACE_ROOT)/ace/Containers_T.cpp \ + $(ACE_ROOT)/ACEXML/common/AttributesImpl.i \ + $(ACE_ROOT)/ACEXML/common/StrCharStream.h \ + $(ACE_ROOT)/ACEXML/common/CharStream.h \ + $(ACE_ROOT)/ACEXML/common/StreamFactory.h \ + Parser.h \ + Parser_export.h \ + $(ACE_ROOT)/ACEXML/common/XMLReader.h \ + $(ACE_ROOT)/ACEXML/common/ContentHandler.h \ + $(ACE_ROOT)/ACEXML/common/Env.h \ + $(ACE_ROOT)/ACEXML/common/XML_Macros.h \ + $(ACE_ROOT)/ace/Exception_Macros.h \ + $(ACE_ROOT)/ACEXML/common/Exception.h \ $(ACE_ROOT)/ACEXML/common/Exception.i \ $(ACE_ROOT)/ACEXML/common/Env.i \ $(ACE_ROOT)/ACEXML/common/SAXExceptions.h \ $(ACE_ROOT)/ACEXML/common/SAXExceptions.i \ $(ACE_ROOT)/ACEXML/common/Locator.h \ - $(ACE_ROOT)/ACEXML/common/Attributes.h \ $(ACE_ROOT)/ACEXML/common/DTDHandler.h \ $(ACE_ROOT)/ACEXML/common/EntityResolver.h \ $(ACE_ROOT)/ACEXML/common/InputSource.h \ - $(ACE_ROOT)/ACEXML/common/CharStream.h \ $(ACE_ROOT)/ACEXML/common/ErrorHandler.h \ $(ACE_ROOT)/ACEXML/common/LocatorImpl.h \ $(ACE_ROOT)/ACEXML/common/LocatorImpl.i \ @@ -305,9 +322,6 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ace/DLL.h \ $(ACE_ROOT)/ace/Service_Object.i \ $(ACE_ROOT)/ace/Service_Types.i \ - $(ACE_ROOT)/ace/Unbounded_Queue.h \ - $(ACE_ROOT)/ace/Unbounded_Queue.inl \ - $(ACE_ROOT)/ace/Unbounded_Queue.cpp \ $(ACE_ROOT)/ace/XML_Svc_Conf.h \ $(ACE_ROOT)/ace/Service_Config.i \ $(ACE_ROOT)/ace/Reactor.h \ @@ -322,14 +336,6 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ace/Timer_Queue_T.cpp \ $(ACE_ROOT)/ace/Reactor.i \ $(ACE_ROOT)/ace/Reactor_Impl.h \ - $(ACE_ROOT)/ace/Containers_T.h \ - $(ACE_ROOT)/ace/Containers.h \ - $(ACE_ROOT)/ace/Containers.i \ - $(ACE_ROOT)/ace/Array_Base.h \ - $(ACE_ROOT)/ace/Array_Base.inl \ - $(ACE_ROOT)/ace/Array_Base.cpp \ - $(ACE_ROOT)/ace/Containers_T.i \ - $(ACE_ROOT)/ace/Containers_T.cpp \ $(ACE_ROOT)/ACEXML/common/NamespaceSupport.i \ $(ACE_ROOT)/ace/Obstack.h \ $(ACE_ROOT)/ace/Obstack_T.h \ @@ -339,11 +345,224 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU $(ACE_ROOT)/ace/Obstack_T.cpp \ Entity_Manager.h \ Entity_Manager.i \ - ParserErrors.h \ - Parser.i \ - $(ACE_ROOT)/ACEXML/common/Transcode.h \ - $(ACE_ROOT)/ACEXML/common/Transcode.i \ - $(ACE_ROOT)/ACEXML/common/AttributesImpl.h \ - $(ACE_ROOT)/ACEXML/common/AttributesImpl.i + ParserInternals.h \ + ParserContext.h \ + ParserContext.inl \ + Parser.i + +.obj/ParserInternals.o .obj/ParserInternals.so .shobj/ParserInternals.o .shobj/ParserInternals.so: ParserInternals.cpp \ + ParserInternals.h \ + $(ACE_ROOT)/ace/pre.h \ + Parser_export.h \ + $(ACE_ROOT)/ace/post.h \ + $(ACE_ROOT)/ace/ace_wchar.h \ + $(ACE_ROOT)/ace/ace_wchar.inl \ + $(ACE_ROOT)/ACEXML/common/XML_Types.h \ + $(ACE_ROOT)/ace/OS.h \ + $(ACE_ROOT)/ace/OS_Dirent.h \ + $(ACE_ROOT)/ace/OS_Export.h \ + $(ACE_ROOT)/ace/OS_Errno.h \ + $(ACE_ROOT)/ace/OS_Errno.inl \ + $(ACE_ROOT)/ace/OS_Dirent.inl \ + $(ACE_ROOT)/ace/OS_String.h \ + $(ACE_ROOT)/ace/Basic_Types.h \ + $(ACE_ROOT)/ace/ACE_export.h \ + $(ACE_ROOT)/ace/Basic_Types.i \ + $(ACE_ROOT)/ace/OS_String.inl \ + $(ACE_ROOT)/ace/OS_Memory.h \ + $(ACE_ROOT)/ace/OS_Memory.inl \ + $(ACE_ROOT)/ace/OS_TLI.h \ + $(ACE_ROOT)/ace/OS_TLI.inl \ + $(ACE_ROOT)/ace/Time_Value.h \ + $(ACE_ROOT)/ace/Time_Value.inl \ + $(ACE_ROOT)/ace/Default_Constants.h \ + $(ACE_ROOT)/ace/Global_Macros.h \ + $(ACE_ROOT)/ace/Min_Max.h \ + $(ACE_ROOT)/ace/streams.h \ + $(ACE_ROOT)/ace/Trace.h \ + $(ACE_ROOT)/ace/OS.i \ + $(ACE_ROOT)/ace/SString.h \ + $(ACE_ROOT)/ace/String_Base.h \ + $(ACE_ROOT)/ace/ACE.h \ + $(ACE_ROOT)/ace/Flag_Manip.h \ + $(ACE_ROOT)/ace/Flag_Manip.i \ + $(ACE_ROOT)/ace/Handle_Ops.h \ + $(ACE_ROOT)/ace/Handle_Ops.i \ + $(ACE_ROOT)/ace/Lib_Find.h \ + $(ACE_ROOT)/ace/Lib_Find.i \ + $(ACE_ROOT)/ace/Init_ACE.h \ + $(ACE_ROOT)/ace/Init_ACE.i \ + $(ACE_ROOT)/ace/Sock_Connect.h \ + $(ACE_ROOT)/ace/Sock_Connect.i \ + $(ACE_ROOT)/ace/ACE.i \ + $(ACE_ROOT)/ace/String_Base_Const.h \ + $(ACE_ROOT)/ace/String_Base.i \ + $(ACE_ROOT)/ace/Malloc_Base.h \ + $(ACE_ROOT)/ace/String_Base.cpp \ + $(ACE_ROOT)/ace/Malloc.h \ + $(ACE_ROOT)/ace/Log_Msg.h \ + $(ACE_ROOT)/ace/Log_Priority.h \ + $(ACE_ROOT)/ace/OS_Log_Msg_Attributes.h \ + $(ACE_ROOT)/ace/OS_Log_Msg_Attributes.inl \ + $(ACE_ROOT)/ace/Malloc.i \ + $(ACE_ROOT)/ace/Malloc_T.h \ + $(ACE_ROOT)/ace/Synch.h \ + $(ACE_ROOT)/ace/Synch.i \ + $(ACE_ROOT)/ace/Synch_T.h \ + $(ACE_ROOT)/ace/Synch_T.i \ + $(ACE_ROOT)/ace/Thread.h \ + $(ACE_ROOT)/ace/Thread_Adapter.h \ + $(ACE_ROOT)/ace/Base_Thread_Adapter.h \ + $(ACE_ROOT)/ace/Base_Thread_Adapter.inl \ + $(ACE_ROOT)/ace/Thread_Adapter.inl \ + $(ACE_ROOT)/ace/Thread.i \ + $(ACE_ROOT)/ace/Synch_T.cpp \ + $(ACE_ROOT)/ace/Malloc_Allocator.h \ + $(ACE_ROOT)/ace/Malloc_Allocator.i \ + $(ACE_ROOT)/ace/Free_List.h \ + $(ACE_ROOT)/ace/Free_List.i \ + $(ACE_ROOT)/ace/Free_List.cpp \ + $(ACE_ROOT)/ace/Malloc_T.i \ + $(ACE_ROOT)/ace/Malloc_T.cpp \ + $(ACE_ROOT)/ace/Memory_Pool.h \ + $(ACE_ROOT)/ace/Event_Handler.h \ + $(ACE_ROOT)/ace/Event_Handler.i \ + $(ACE_ROOT)/ace/Signal.h \ + $(ACE_ROOT)/ace/Signal.i \ + $(ACE_ROOT)/ace/Mem_Map.h \ + $(ACE_ROOT)/ace/Mem_Map.i \ + $(ACE_ROOT)/ace/SV_Semaphore_Complex.h \ + $(ACE_ROOT)/ace/SV_Semaphore_Simple.h \ + $(ACE_ROOT)/ace/SV_Semaphore_Simple.i \ + $(ACE_ROOT)/ace/SV_Semaphore_Complex.i \ + $(ACE_ROOT)/ace/Unbounded_Set.h \ + $(ACE_ROOT)/ace/Node.h \ + $(ACE_ROOT)/ace/Node.cpp \ + $(ACE_ROOT)/ace/Unbounded_Set.inl \ + $(ACE_ROOT)/ace/Unbounded_Set.cpp \ + $(ACE_ROOT)/ace/Memory_Pool.i \ + $(ACE_ROOT)/ace/Auto_Ptr.h \ + $(ACE_ROOT)/ace/Auto_Ptr.i \ + $(ACE_ROOT)/ace/Auto_Ptr.cpp \ + $(ACE_ROOT)/ace/SString.i + +.obj/ParserContext.o .obj/ParserContext.so .shobj/ParserContext.o .shobj/ParserContext.so: ParserContext.cpp \ + ParserContext.h \ + $(ACE_ROOT)/ace/pre.h \ + Parser_export.h \ + $(ACE_ROOT)/ace/post.h \ + $(ACE_ROOT)/ace/ace_wchar.h \ + $(ACE_ROOT)/ace/ace_wchar.inl \ + $(ACE_ROOT)/ACEXML/common/XML_Types.h \ + $(ACE_ROOT)/ace/OS.h \ + $(ACE_ROOT)/ace/OS_Dirent.h \ + $(ACE_ROOT)/ace/OS_Export.h \ + $(ACE_ROOT)/ace/OS_Errno.h \ + $(ACE_ROOT)/ace/OS_Errno.inl \ + $(ACE_ROOT)/ace/OS_Dirent.inl \ + $(ACE_ROOT)/ace/OS_String.h \ + $(ACE_ROOT)/ace/Basic_Types.h \ + $(ACE_ROOT)/ace/ACE_export.h \ + $(ACE_ROOT)/ace/Basic_Types.i \ + $(ACE_ROOT)/ace/OS_String.inl \ + $(ACE_ROOT)/ace/OS_Memory.h \ + $(ACE_ROOT)/ace/OS_Memory.inl \ + $(ACE_ROOT)/ace/OS_TLI.h \ + $(ACE_ROOT)/ace/OS_TLI.inl \ + $(ACE_ROOT)/ace/Time_Value.h \ + $(ACE_ROOT)/ace/Time_Value.inl \ + $(ACE_ROOT)/ace/Default_Constants.h \ + $(ACE_ROOT)/ace/Global_Macros.h \ + $(ACE_ROOT)/ace/Min_Max.h \ + $(ACE_ROOT)/ace/streams.h \ + $(ACE_ROOT)/ace/Trace.h \ + $(ACE_ROOT)/ace/OS.i \ + $(ACE_ROOT)/ace/SString.h \ + $(ACE_ROOT)/ace/String_Base.h \ + $(ACE_ROOT)/ace/ACE.h \ + $(ACE_ROOT)/ace/Flag_Manip.h \ + $(ACE_ROOT)/ace/Flag_Manip.i \ + $(ACE_ROOT)/ace/Handle_Ops.h \ + $(ACE_ROOT)/ace/Handle_Ops.i \ + $(ACE_ROOT)/ace/Lib_Find.h \ + $(ACE_ROOT)/ace/Lib_Find.i \ + $(ACE_ROOT)/ace/Init_ACE.h \ + $(ACE_ROOT)/ace/Init_ACE.i \ + $(ACE_ROOT)/ace/Sock_Connect.h \ + $(ACE_ROOT)/ace/Sock_Connect.i \ + $(ACE_ROOT)/ace/ACE.i \ + $(ACE_ROOT)/ace/String_Base_Const.h \ + $(ACE_ROOT)/ace/String_Base.i \ + $(ACE_ROOT)/ace/Malloc_Base.h \ + $(ACE_ROOT)/ace/String_Base.cpp \ + $(ACE_ROOT)/ace/Malloc.h \ + $(ACE_ROOT)/ace/Log_Msg.h \ + $(ACE_ROOT)/ace/Log_Priority.h \ + $(ACE_ROOT)/ace/OS_Log_Msg_Attributes.h \ + $(ACE_ROOT)/ace/OS_Log_Msg_Attributes.inl \ + $(ACE_ROOT)/ace/Malloc.i \ + $(ACE_ROOT)/ace/Malloc_T.h \ + $(ACE_ROOT)/ace/Synch.h \ + $(ACE_ROOT)/ace/Synch.i \ + $(ACE_ROOT)/ace/Synch_T.h \ + $(ACE_ROOT)/ace/Synch_T.i \ + $(ACE_ROOT)/ace/Thread.h \ + $(ACE_ROOT)/ace/Thread_Adapter.h \ + $(ACE_ROOT)/ace/Base_Thread_Adapter.h \ + $(ACE_ROOT)/ace/Base_Thread_Adapter.inl \ + $(ACE_ROOT)/ace/Thread_Adapter.inl \ + $(ACE_ROOT)/ace/Thread.i \ + $(ACE_ROOT)/ace/Synch_T.cpp \ + $(ACE_ROOT)/ace/Malloc_Allocator.h \ + $(ACE_ROOT)/ace/Malloc_Allocator.i \ + $(ACE_ROOT)/ace/Free_List.h \ + $(ACE_ROOT)/ace/Free_List.i \ + $(ACE_ROOT)/ace/Free_List.cpp \ + $(ACE_ROOT)/ace/Malloc_T.i \ + $(ACE_ROOT)/ace/Malloc_T.cpp \ + $(ACE_ROOT)/ace/Memory_Pool.h \ + $(ACE_ROOT)/ace/Event_Handler.h \ + $(ACE_ROOT)/ace/Event_Handler.i \ + $(ACE_ROOT)/ace/Signal.h \ + $(ACE_ROOT)/ace/Signal.i \ + $(ACE_ROOT)/ace/Mem_Map.h \ + $(ACE_ROOT)/ace/Mem_Map.i \ + $(ACE_ROOT)/ace/SV_Semaphore_Complex.h \ + $(ACE_ROOT)/ace/SV_Semaphore_Simple.h \ + $(ACE_ROOT)/ace/SV_Semaphore_Simple.i \ + $(ACE_ROOT)/ace/SV_Semaphore_Complex.i \ + $(ACE_ROOT)/ace/Unbounded_Set.h \ + $(ACE_ROOT)/ace/Node.h \ + $(ACE_ROOT)/ace/Node.cpp \ + $(ACE_ROOT)/ace/Unbounded_Set.inl \ + $(ACE_ROOT)/ace/Unbounded_Set.cpp \ + $(ACE_ROOT)/ace/Memory_Pool.i \ + $(ACE_ROOT)/ace/Auto_Ptr.h \ + $(ACE_ROOT)/ace/Auto_Ptr.i \ + $(ACE_ROOT)/ace/Auto_Ptr.cpp \ + $(ACE_ROOT)/ace/SString.i \ + $(ACE_ROOT)/ACEXML/common/InputSource.h \ + $(ACE_ROOT)/ACEXML/common/ACEXML_Export.h \ + $(ACE_ROOT)/ACEXML/common/CharStream.h \ + $(ACE_ROOT)/ACEXML/common/Locator.h \ + $(ACE_ROOT)/ACEXML/common/LocatorImpl.h \ + $(ACE_ROOT)/ACEXML/common/LocatorImpl.i \ + $(ACE_ROOT)/ace/Functor.h \ + $(ACE_ROOT)/ace/Functor.i \ + $(ACE_ROOT)/ace/Functor_T.h \ + $(ACE_ROOT)/ace/Functor_T.i \ + $(ACE_ROOT)/ace/Functor_T.cpp \ + $(ACE_ROOT)/ace/Containers_T.h \ + $(ACE_ROOT)/ace/Containers.h \ + $(ACE_ROOT)/ace/Containers.i \ + $(ACE_ROOT)/ace/Array_Base.h \ + $(ACE_ROOT)/ace/Array_Base.inl \ + $(ACE_ROOT)/ace/Array_Base.cpp \ + $(ACE_ROOT)/ace/Unbounded_Queue.h \ + $(ACE_ROOT)/ace/Unbounded_Queue.inl \ + $(ACE_ROOT)/ace/Unbounded_Queue.cpp \ + $(ACE_ROOT)/ace/Containers_T.i \ + $(ACE_ROOT)/ace/Containers_T.cpp \ + ParserContext.inl # IF YOU PUT ANYTHING HERE IT WILL GO AWAY diff --git a/ACEXML/parser/parser/Parser.cpp b/ACEXML/parser/parser/Parser.cpp index 3dd7cba6216..c4da35f3260 100644 --- a/ACEXML/parser/parser/Parser.cpp +++ b/ACEXML/parser/parser/Parser.cpp @@ -1,9 +1,12 @@ // $Id$ -#include "ACEXML/parser/parser/Parser.h" +#include "ace/ACE.h" #include "ACEXML/common/Transcode.h" #include "ACEXML/common/AttributesImpl.h" -#include "ace/ACE.h" +#include "ACEXML/common/StrCharStream.h" +#include "ACEXML/common/StreamFactory.h" +#include "ACEXML/parser/parser/Parser.h" +#include "ACEXML/parser/parser/ParserInternals.h" #if !defined (__ACEXML_INLINE__) # include "ACEXML/parser/parser/Parser.i" @@ -21,88 +24,25 @@ ACEXML_Parser::namespaces_feature_[] = ACE_TEXT ("http://xml.org/sax/features/na const ACEXML_Char ACEXML_Parser::namespace_prefixes_feature_[] = ACE_TEXT ("http://xml.org/sax/features/namespace-prefixes"); - -static const ACEXML_Char* ACEXML_Parser_Msg[] = { - ACE_TEXT ("Invalid input source"), - ACE_TEXT ("Expecting '<'"), - ACE_TEXT ("Expecting '>'"), - ACE_TEXT ("Invalid comment"), - ACE_TEXT ("Duplicate DOCTYPE definition"), - ACE_TEXT ("Unexpected EOF"), - ACE_TEXT ("Invalid XMLDecl ('<?xml' ?)"), - ACE_TEXT ("Unrecognized XMLDecl ('version'?)"), - ACE_TEXT ("ACEXML only supports XML Version 1.0 documents"), - ACE_TEXT ("Encoding declaration doesn't match auto-detected encoding"), - ACE_TEXT ("ACEXML Parser Internal error"), - ACE_TEXT ("PITarget name cannot start with 'xml'"), - ACE_TEXT ("Expecting keyword 'DOCTYPE'"), - ACE_TEXT ("Expecting a DOCTYPE name"), - ACE_TEXT ("Root element missing"), - ACE_TEXT ("Error reading attribute"), - ACE_TEXT ("Duplicate namespace prefix"), - ACE_TEXT ("Duplicate attribute found"), - ACE_TEXT ("Cannot have both namespaces and namespace_prefixes simultaneously"), - ACE_TEXT ("Unexpected character"), - ACE_TEXT ("Mismatched End-tag encountered"), - ACE_TEXT ("Expecting '[CDATA[' section"), - ACE_TEXT ("Invalid keyword in markupdecl"), - ACE_TEXT ("Invalid character following '<!' in markupdecl"), - ACE_TEXT ("Expecting markupdecl or DeclSep"), - ACE_TEXT ("Expecting keyword `ELEMENT'"), - ACE_TEXT ("Error reading element name"), - ACE_TEXT ("Expecting keyword `EMPTY' in ELEMENT definition."), - ACE_TEXT ("Expecting keyword `ANY' in ELEMENT definition."), - ACE_TEXT ("Error reading ELEMENT definition."), - ACE_TEXT ("Expecting keyword `ENTITY'"), - ACE_TEXT ("Can't use a reference when defining entity name"), - ACE_TEXT ("Error reading ENTITY name."), - ACE_TEXT ("Error reading ENTITY value."), - ACE_TEXT ("Duplicate ENTITY definition"), - ACE_TEXT ("Invalid ExternalID definition (system ID missing)"), - ACE_TEXT ("Unexpected keyword NDATA in PEDecl"), - ACE_TEXT ("Expecting keyword NDATA"), - ACE_TEXT ("Expecting keyword `ATTLIST'"), - ACE_TEXT ("Error reading attribute name"), - ACE_TEXT ("Expecting keyword `CDATA'"), - ACE_TEXT ("Expecting keyword `ID', `IDREF', or `IDREFS'"), - ACE_TEXT ("Expecting keyword `ENTITY', or `ENTITIES'"), - ACE_TEXT ("Expecting keyword `NMTOKEN', `NMTOKENS', or `NOTATION'"), - ACE_TEXT ("Expecting keyword `NMTOKEN' or `NMTOKENS'"), - ACE_TEXT ("Expecting keyword `NOTATION'"), - ACE_TEXT ("Expecting `(' following NOTATION"), - ACE_TEXT ("Error reading NOTATION name"), - ACE_TEXT ("Error reading enumerated NMTOKEN name"), - ACE_TEXT ("Invalid Attribute Type"), - ACE_TEXT ("Expecting keyword `#REQUIRED'"), - ACE_TEXT ("Expecting keyword `#IMPLIED'"), - ACE_TEXT ("Expecting keyword `#FIXED'"), - ACE_TEXT ("Error parsing `#FIXED' attribute value"), - ACE_TEXT ("Invalid notation name."), - ACE_TEXT ("Expecting keyword 'SYSTEM'"), - ACE_TEXT ("Expecting keyword 'PUBLIC'"), - ACE_TEXT ("Error parsing system/public literal"), - ACE_TEXT ("Expecting either keyword `SYSTEM' or `PUBLIC'."), - ACE_TEXT ("Expecting keyword `#PCDATA'"), - ACE_TEXT ("Expecting end of Mixed section"), - ACE_TEXT ("Expecting closing `)*' or ')'"), - ACE_TEXT ("Error reading sub-element name"), - ACE_TEXT ("Expecting `,', `|', or `)' while defining an element."), - ACE_TEXT ("Invalid character reference") -}; - ACEXML_Parser::ACEXML_Parser (void) : dtd_handler_ (0), entity_resolver_ (0), content_handler_ (0), error_handler_ (0), - instream_ (0), doctype_ (0), - dtd_system_ (0), - dtd_public_ (0), - locator_(), + alt_stack_ (MAXPATHLEN), + nested_namespace_ (0), + ref_state_ (ACEXML_ParserInt::INVALID), + external_subset_ (0), + external_entity_ (0), + has_pe_refs_ (0), simple_parsing_ (0), + validate_ (1), namespaces_(1), - namespace_prefixes_ (0) + namespace_prefixes_ (0), + standalone_ (0), + external_dtd_ (0), + internal_dtd_ (0) { } @@ -111,185 +51,125 @@ ACEXML_Parser::~ACEXML_Parser (void) } int -ACEXML_Parser::getFeature (const ACEXML_Char *name ACEXML_ENV_ARG_DECL) - ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException, - ACEXML_SAXNotSupportedException)) +ACEXML_Parser::initialize(ACEXML_InputSource* input) { - if (ACE_OS::strcmp (name, ACEXML_Parser::simple_parsing_feature_) == 0) - { - return this->simple_parsing_; - } - else if (ACE_OS::strcmp (name, ACEXML_Parser::namespaces_feature_) == 0) - { - return this->namespaces_; - } - else if (ACE_OS::strcmp (name, - ACEXML_Parser::namespace_prefixes_feature_) == 0) + for (int i = 0; i < 5; ++i) { - return this->namespace_prefixes_; + if (this->predef_entities_.add_entity (ACEXML_ParserInt::predef_ent_[i], + ACEXML_ParserInt::predef_val_[i]) + != 0) + { + ACE_ERROR ((LM_DEBUG, ACE_TEXT ("Error adding entity %s to Manager"), + ACEXML_ParserInt::predef_ent_[i])); + return -1; + } } - - ACEXML_THROW_RETURN (ACEXML_SAXNotRecognizedException (name), -1); + return this->switch_input (input); } - - void -ACEXML_Parser::setFeature (const ACEXML_Char *name, - int boolean_value ACEXML_ENV_ARG_DECL) - ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException, - ACEXML_SAXNotSupportedException)) +ACEXML_Parser::parse (ACEXML_InputSource *input ACEXML_ENV_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)) { - if (ACE_OS::strcmp (name, ACEXML_Parser::simple_parsing_feature_) == 0) + if (input == 0) { - this->simple_parsing_ = (boolean_value == 0 ? 0 : 1); - return; - } - else if (ACE_OS::strcmp (name, ACEXML_Parser::namespaces_feature_) == 0) - { - this->namespaces_ = (boolean_value == 0 ? 0 : 1); - return; + this->fatal_error(ACE_TEXT ("Invalid input source") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; } - else if (ACE_OS::strcmp (name, - ACEXML_Parser::namespace_prefixes_feature_) == 0) + if (this->initialize(input) == -1) { - this->namespace_prefixes_ = (boolean_value == 0 ? 0 : 1); - return; + this->fatal_error (ACE_TEXT ("Failed to initialize parser state") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; } + // Set up Locator. + if (this->content_handler_) + this->content_handler_->setDocumentLocator (this->current_.getLocator()); - ACEXML_THROW (ACEXML_SAXNotRecognizedException (name)); -} - -void * -ACEXML_Parser::getProperty (const ACEXML_Char *name ACEXML_ENV_ARG_DECL) - ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException, - ACEXML_SAXNotSupportedException)) -{ - ACEXML_THROW_RETURN (ACEXML_SAXNotSupportedException (name), 0); -} - -void -ACEXML_Parser::setProperty (const ACEXML_Char *name, - void *value ACEXML_ENV_ARG_DECL) - ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException, - ACEXML_SAXNotSupportedException)) -{ - ACE_UNUSED_ARG (value); - - ACEXML_THROW (ACEXML_SAXNotSupportedException (name)); -} - -void -ACEXML_Parser::report_error (ACEXML_Error minor_code ACEXML_ENV_ARG_DECL) -{ - ACEXML_SAXParseException* exception = 0; - ACE_NEW_NORETURN (exception, - ACEXML_SAXParseException (ACEXML_Parser_Msg[minor_code])); - if (this->error_handler_) - this->error_handler_->error (*exception ACEXML_ENV_ARG_PARAMETER); - else - ACEXML_ENV_RAISE (exception); - return; -} - -void -ACEXML_Parser::report_warning (ACEXML_Error minor_code ACEXML_ENV_ARG_DECL) -{ - ACEXML_SAXParseException* exception = 0; - ACE_NEW_NORETURN (exception, - ACEXML_SAXParseException (ACEXML_Parser_Msg[minor_code])); - if (this->error_handler_) - this->error_handler_->warning (*exception ACEXML_ENV_ARG_PARAMETER); - return; -} - -void -ACEXML_Parser::report_fatal_error (ACEXML_Error minor_code ACEXML_ENV_ARG_DECL) -{ - ACEXML_SAXParseException* exception = 0; - ACE_NEW_NORETURN (exception, - ACEXML_SAXParseException (ACEXML_Parser_Msg[minor_code])); - if (this->error_handler_) - this->error_handler_->fatalError (*exception ACEXML_ENV_ARG_PARAMETER); - ACEXML_ENV_RAISE (exception); - return; -} - -void -ACEXML_Parser::parse (ACEXML_InputSource *input ACEXML_ENV_ARG_DECL) - ACE_THROW_SPEC ((ACEXML_SAXException)) -{ - if (input == 0 || (this->instream_ = input->getCharStream ()) == 0) + int xmldecl_defined = 0; + ACEXML_Char fwd = this->get(); // Consume '<' + if (fwd == '<' && this->peek() == '?') { - this->report_fatal_error(ACEXML_INVIP ACEXML_ENV_ARG_PARAMETER); - return; + this->get(); // Consume '?' + fwd = this->peek(); + if (fwd == 'x' && !xmldecl_defined) + { + this->parse_xml_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK; + xmldecl_defined = 1; + } } - - // Set up Locator. At this point, the systemId and publicId are null. We - // can't do better, as we don't know anything about the InputSource - // currently, and according to the SAX spec, the parser should set up the - // locator before reporting any document events. - if (this->content_handler_) - this->content_handler_->setDocumentLocator (&this->locator_); - - if (this->simple_parsing_ == 0) + // We need a XMLDecl in a Valid XML document + if (this->validate_ && !xmldecl_defined) { - this->parse_xml_prolog (ACEXML_ENV_SINGLE_ARG_PARAMETER); + this->fatal_error (ACE_TEXT ("Expecting an XMLDecl at the beginning of" + " a valid document") + ACEXML_ENV_ARG_PARAMETER); ACEXML_CHECK; } - this->content_handler_->startDocument (ACEXML_ENV_SINGLE_ARG_PARAMETER); ACEXML_CHECK; int doctype_defined = 0; - for (int prolog_done = 0; prolog_done == 0; ) { - if (this->skip_whitespace (0) != '<') + // Expect a '<' only if we have encountered a XMLDecl, or we are + // looping through Misc blocks. + if (xmldecl_defined) { - this->report_fatal_error (ACEXML_LESS ACEXML_ENV_ARG_PARAMETER); - return; - } - ACEXML_Char fwd = this->peek (); - switch (fwd) - { - case '!': - this->get (); // consume the '!' - fwd = this->peek (); - if (fwd == 'D' && !doctype_defined) // DOCTYPE + if (this->skip_whitespace () != '<') { - // This will also take care of the trailing MISC block if any. - this->parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + this->fatal_error (ACE_TEXT ("Expecting '<' at the beginning of " + "Misc section") + ACEXML_ENV_ARG_PARAMETER); ACEXML_CHECK; - doctype_defined = 1; - break; } - else if (fwd == '-') // COMMENT - { - if (this->grok_comment () < 0) - { - this->report_fatal_error(ACEXML_INVCO - ACEXML_ENV_ARG_PARAMETER); - return; - } - } - else - { - this->report_fatal_error (ACEXML_DUPDOC - ACEXML_ENV_ARG_PARAMETER); - return; - } - break; - case '?': - this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER); - ACEXML_CHECK; - break; - case 0: - this->report_fatal_error (ACEXML_EOF ACEXML_ENV_ARG_PARAMETER); - return; - default: // Root element begins - prolog_done = 1; - break; + fwd = this->get(); + } + switch (fwd) + { + case '?': + this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK; + xmldecl_defined = 1; + break; + case '!': + fwd = this->peek (); + if (fwd == 'D' && !doctype_defined) // DOCTYPE + { + // This will also take care of the trailing MISC block if any. + this->parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK; + doctype_defined = 1; + // Now that we have a DOCTYPE Decl defined, we shouldn't + // accept XML Decl any longer + xmldecl_defined = 1; + } + else if (fwd == 'D') + { + this->fatal_error (ACE_TEXT ("Duplicate DOCTYPE declaration") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + } + else if (fwd == '-') // COMMENT + { + if (this->parse_comment () < 0) + { + this->fatal_error(ACE_TEXT ("Invalid comment in document") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + } + xmldecl_defined = 1; + } + break; + case 0: + this->fatal_error (ACE_TEXT ("Unexpected end-of-file") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + default: // Root element begins + prolog_done = 1; + break; } } @@ -300,8 +180,8 @@ ACEXML_Parser::parse (ACEXML_InputSource *input ACEXML_ENV_ARG_DECL) this->content_handler_->endDocument (ACEXML_ENV_SINGLE_ARG_PARAMETER); ACEXML_CHECK; - // Reset the Locator held within the parser - this->locator_.reset(); + // Reset the parser state + this->reset(); } void @@ -315,640 +195,924 @@ ACEXML_Parser::parse (const ACEXML_Char *systemId ACEXML_ENV_ARG_DECL) } -void -ACEXML_Parser::parse_xml_prolog (ACEXML_ENV_SINGLE_ARG_DECL) - ACE_THROW_SPEC ((ACEXML_SAXException)) + +int +ACEXML_Parser::parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)) { - if (this->parse_token (ACE_TEXT("<?xml")) < 0) + if (this->parse_token (ACE_TEXT ("DOCTYPE")) < 0) { - this->report_fatal_error(ACEXML_INVXMLDECL ACEXML_ENV_ARG_PARAMETER); - return; + this->fatal_error(ACE_TEXT ("Expecting keyword DOCTYPE in a doctypedecl") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } - ACEXML_Char *astring; - if (this->skip_whitespace (0) != 'v' // Discard whitespace - || this->parse_token (ACE_TEXT("ersion")) < 0 - || this->skip_equal () != 0 - || this->get_quoted_string (astring) != 0) + ACEXML_Char nextch = 0; + if (this->skip_whitespace_count (&nextch) == 0) { - this->report_fatal_error (ACEXML_INVVERSION ACEXML_ENV_ARG_PARAMETER); - return; + this->fatal_error(ACE_TEXT ("Expecting a space between DOCTYPE keyword " + "and name") ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } - if (ACE_OS::strcmp (astring, ACE_TEXT ("1.0")) != 0) + + this->doctype_ = this->parse_name (); + if (this->doctype_ == 0) { - this->report_fatal_error (ACEXML_ENOTSUP ACEXML_ENV_ARG_PARAMETER); - return; + this->fatal_error(ACE_TEXT ("Invalid DOCTYPE name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } + int count = this->skip_whitespace_count (&nextch); - ACEXML_Char fwd = this->skip_whitespace (0); - if (fwd != '?') + if (nextch == 'S' || nextch == 'P') // ExternalID defined + { + if (count == 0) { - if (fwd == 'e') - { - if ((this->parse_token (ACE_TEXT("ncoding")) == 0) && - this->skip_equal () == 0 && - this->get_quoted_string (astring) == 0) - { - if (ACE_OS::strcmp (astring, - this->instream_->getEncoding()) != 0) - { - ACE_ERROR ((LM_ERROR, - ACE_TEXT ("Detected Encoding is %s ") - ACE_TEXT (": Declared Encoding is %s\n"), - this->instream_->getEncoding(), astring)); - this->report_warning (ACEXML_ENCMISMATCH - ACEXML_ENV_ARG_PARAMETER); - } - fwd = this->skip_whitespace (0); - } - } - if (fwd == 's') - { - if ((this->parse_token (ACE_TEXT("tandalone")) == 0) && - this->skip_equal () == 0 && - this->get_quoted_string (astring) == 0) - { - if (ACE_OS::strcmp (astring, ACE_TEXT ("yes")) == 0) - this->standalone_ = 1; - else if (ACE_OS::strcmp (astring, ACE_TEXT ("no")) == 0) - this->standalone_ = 0; - fwd = this->skip_whitespace (0); - } - } - } - if (fwd == '?' && this->get() == '>') - return; - // All the rules fail. So return an error. - this->report_fatal_error (ACEXML_INVXMLDECL ACEXML_ENV_ARG_PARAMETER); - return; -} - -int -ACEXML_Parser::grok_comment (void) -{ - /// Simply filter out all the comment - int state = 0; + this->fatal_error(ACE_TEXT ("Expecting a space between DOCTYPE" + "keyword and name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + this->external_dtd_ = 1; + this->parse_external_dtd (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } - if (this->get () != '-' || // Skip the opening "<!--" - this->get () != '-' || // completely. - this->get () == '-') // and at least something not '-'. - return -1; + nextch = this->skip_whitespace (); + switch (nextch) + { + case '[': + this->internal_dtd_ = 1; // Internal DTD definition + this->parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + case '>': // End of DTD definition + // This is an XML document without a doctypedecl. + if (this->validate_ && !this->external_dtd_) + { + this->fatal_error (ACE_TEXT ("No DTD defined") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + return 0; + case '0': + this->fatal_error (ACE_TEXT ("Unexpected end-of-file") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + default: + break; + } - while (state < 3) - // Waiting for the trailing three character '-->'. Notice that - // according to the spec, '--->' is not a valid closing comment - // sequence. But we'll let it pass anyway. + if (this->skip_whitespace() != '>') { - ACEXML_Char fwd = this->get (); - if ((fwd == '-' && state < 2) || - (fwd == '>' && state == 2)) - state += 1; - else - state = 0; // Reset parse state. + this->fatal_error(ACE_TEXT ("Expecting '>' at end of doctypedecl") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } return 0; } int -ACEXML_Parser::parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_DECL) +ACEXML_Parser::parse_external_dtd (ACEXML_ENV_SINGLE_ARG_DECL) { - if (this->get () != '?') - { // How did we get here? - this->report_fatal_error(ACEXML_EINT ACEXML_ENV_ARG_PARAMETER); - return -1; + this->ref_state_ = ACEXML_ParserInt::IN_EXT_DTD; + ACEXML_Char* publicId = 0; + ACEXML_Char* systemId = 0; + if (this->parse_external_id (publicId, systemId + ACEXML_ENV_ARG_PARAMETER) != 0) + { + this->fatal_error (ACE_TEXT ("Error in parsing ExternalID") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } - const ACEXML_Char *pitarget = this->read_name (); - ACEXML_Char *instruction = 0; - - if (ACE_OS::strcasecmp (ACE_TEXT ("xml"), pitarget) != 0) + ACEXML_Char* uri = this->normalize_systemid (systemId); + ACE_Auto_Basic_Array_Ptr<ACEXML_Char> cleanup_uri (uri); + ACEXML_InputSource* ip = 0; + if (this->entity_resolver_) { - // Invalid PITarget name. - this->report_fatal_error(ACEXML_INVPI ACEXML_ENV_ARG_PARAMETER); - return -1; + ip = this->entity_resolver_->resolveEntity (publicId, + (uri ? uri : systemId) + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (ip) + { + if (this->switch_input (ip) != 0) + return -1; + } + else + { + ACEXML_StreamFactory factory; + ACEXML_CharStream* cstream = factory.create_stream (uri ? uri: systemId); + if (!cstream) { + this->fatal_error (ACE_TEXT ("Invalid input source") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (this->switch_input (cstream, systemId, publicId) != 0) + return -1; } + this->parse_external_subset (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + return 0; +} - int state = 0; - ACEXML_Char ch = this->skip_whitespace (0); +int +ACEXML_Parser::parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_DECL) +{ + this->ref_state_ = ACEXML_ParserInt::IN_INT_DTD; + ACEXML_Char nextch = this->skip_whitespace (); + do { + switch (nextch) + { + case '<': + nextch = this->get(); + switch (nextch) + { + case '!': + this->parse_markup_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + case '?': + this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + default: + this->fatal_error (ACE_TEXT ("Invalid internal subset") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + case '%': + this->has_pe_refs_ = 1; + this->parse_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + case ']': // End of internal definitions. + return 0; + case '&': + this->fatal_error (ACE_TEXT ("Invalid Reference in internal DTD") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + case 0: + this->fatal_error (ACE_TEXT ("Unexpected end-of-file") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + default: + this->fatal_error (ACE_TEXT ("Invalid content in internal subset") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + }; + nextch = this->skip_whitespace (); - while (state < 2) - { - switch (ch) - { - case '?': - if (state == 0) - state = 1; + } while (1); + + ACE_NOTREACHED (return -1); +} + +int +ACEXML_Parser::parse_external_subset (ACEXML_ENV_SINGLE_ARG_DECL) +{ + this->ref_state_ = ACEXML_ParserInt::IN_EXT_DTD; + this->external_subset_ = 1; + int nrelems = 0; + ACEXML_Char nextch = this->skip_whitespace(); + do { + switch (nextch) + { + case '<': + nextch = this->get(); + switch (nextch) + { + case '!': + nextch = this->peek(); + if (nextch == '[') + this->parse_conditional_section (ACEXML_ENV_SINGLE_ARG_PARAMETER); + else + this->parse_markup_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + case '?': + nextch = this->peek(); + if (nextch == 'x') + this->parse_text_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + else + this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + default: + this->fatal_error (ACE_TEXT ("Invalid content in external DTD") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } break; - case '>': - if (state == 1) + case '%': + this->parse_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + case 0: + nrelems = this->pop_context(); + if (nrelems > 1) + break; + else if (nrelems == 1) { - instruction = this->obstack_.freeze (); - this->content_handler_->processingInstruction (pitarget, - instruction - ACEXML_ENV_ARG_PARAMETER); - ACEXML_CHECK_RETURN (-1); - this->obstack_.unwind (ACE_const_cast (ACEXML_Char*, pitarget)); + this->external_subset_ = 0; return 0; } - break; - case 0x0D: // End-of-Line handling - ch = (this->peek () == 0x0A ? this->get () : 0x0A); - // Fall thru... - case 0x0A: - // Fall thru... + else + { + this->fatal_error(ACE_TEXT ("Internal Parser Error") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } default: - if (state == 1) - this->obstack_.grow ('?'); - this->obstack_.grow (ch); - state = 0; - } - ch = this->get (); - } - return -1; + this->fatal_error (ACE_TEXT ("Invalid content in external DTD") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + nextch = this->skip_whitespace(); + } while (1); } int -ACEXML_Parser::parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_DECL) - ACE_THROW_SPEC ((ACEXML_SAXException)) +ACEXML_Parser::parse_conditional_section (ACEXML_ENV_SINGLE_ARG_DECL) { - if (this->parse_token (ACE_TEXT ("DOCTYPE")) < 0) + ACEXML_Char ch = this->get (); + if (ch != '[') { - this->report_fatal_error(ACEXML_INVDOCKEYWORD ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error(ACE_TEXT ("Internal Parser Error") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } - - ACEXML_Char nextch = this->skip_whitespace (0); - if (nextch == 0) + if (this->skip_whitespace() != 'I') { - this->report_fatal_error(ACEXML_INVDOCNAME ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error (ACE_TEXT ("Invalid conditionalSect") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } + ch = this->get(); + int include = 0; + switch (ch) + { + case 'N': + if (this->parse_token (ACE_TEXT ("CLUDE")) < 0) + { + this->fatal_error (ACE_TEXT ("Expecting keyword INCLUDE in " + "conditionalSect") ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + include = 1; + break; + case 'G': + if (this->parse_token (ACE_TEXT ("GNORE")) < 0) + { + this->fatal_error (ACE_TEXT ("Expecting keyword IGNORE in " + "conditionalSect") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + include = 0; + break; + default: + this->fatal_error (ACE_TEXT ("Invalid conditionalSect") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (this->skip_whitespace() != '[') + { + this->fatal_error (ACE_TEXT ("Expecting '[' in conditionalSect") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (include) + this->parse_includesect (ACEXML_ENV_SINGLE_ARG_PARAMETER); + else + this->parse_ignoresect (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + return 0; +} - this->doctype_ = this->read_name (nextch); +int +ACEXML_Parser::parse_ignoresect (ACEXML_ENV_SINGLE_ARG_DECL) +{ + ACEXML_Char nextch = this->skip_whitespace(); + int count = 0; + do { + switch (nextch) + { + case '<': + if (this->peek() == '!') + { + this->get(); + if (this->peek() == '[') + { + this->get(); + count++; + } + } + break; + case ']': + if (this->peek() == ']') + { + this->get(); + if (this->peek() == '>') + { + this->get(); + --count; + } + } + break; + case 0: // [VC: Proper Conditional Section/PE Nesting] + if (count != 0) + { + this->fatal_error (ACE_TEXT ("Invalid Conditional Section/PE " + "Nesting ") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + default: + break; + } + nextch = this->get(); + } while (1); +} - this->skip_whitespace_count (&nextch); +int +ACEXML_Parser::parse_includesect (ACEXML_ENV_SINGLE_ARG_DECL) +{ + ACEXML_Char nextch = this->skip_whitespace(); + do { + switch (nextch) + { + case '<': + nextch = this->get(); + switch (nextch) + { + case '!': + nextch = this->peek(); + if (nextch == '[') + this->parse_conditional_section (ACEXML_ENV_SINGLE_ARG_PARAMETER); + else + this->parse_markup_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + case '?': + nextch = this->peek(); + this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + default: + this->fatal_error (ACE_TEXT ("Invalid includeSect") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + case '%': + this->parse_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + case 0: // [VC: Proper Conditional Section/PE Nesting] + this->fatal_error (ACE_TEXT ("Invalid Conditional Section/PE " + "Nesting ") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + case ']': + if (this->peek() == ']') + { + nextch = this->get(); + if (this->peek() == '>') + { + nextch = this->get(); + return 0; + } + } + default: + this->fatal_error (ACE_TEXT ("Invalid includeSect") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + nextch = this->skip_whitespace(); + } while (1); +} - if (nextch == 'S' || nextch == 'P') // ExternalID defined +int +ACEXML_Parser::parse_markup_decl (ACEXML_ENV_SINGLE_ARG_DECL) +{ + ACEXML_Char nextch = this->peek (); + switch (nextch) { - this->parse_external_id_and_ref (this->dtd_public_, - this->dtd_system_ - ACEXML_ENV_ARG_PARAMETER); - ACEXML_CHECK_RETURN (-1); -// if (this->dtd_public_ == 0) -// ACE_DEBUG ((LM_DEBUG, -// ACE_TEXT ("ACEXML Parser got external DTD id: SYSTEM %s\n"), -// this->dtd_system_)); -// else -// ACE_DEBUG ((LM_DEBUG, -// ACE_TEXT ("ACEXML Parser got DTD external id: PUBLIC %s %s\n"), -// this->dtd_public_, this->dtd_system_)); + case 'E': // An ELEMENT or ENTITY decl + this->get (); + nextch = this->peek (); + switch (nextch) + { + case 'L': + this->parse_element_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + case 'N': + this->parse_entity_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + default: + this->fatal_error(ACE_TEXT ("Expecting keyword ELEMENT/ENTITY") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + + case 'A': // An ATTLIST decl + this->parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + + case 'N': // A NOTATION decl + this->parse_notation_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + + case '-': // a comment. + if (this->parse_comment () < 0) + { + this->fatal_error(ACE_TEXT ("Invalid comment") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + case 0: // [VC: Proper Declaration/PE Nesting] + this->fatal_error (ACE_TEXT ("Unexpected end-of-file") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + case '%': + if (this->external_subset_) + { + this->parse_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + } + // Fall through + default: + this->fatal_error (ACE_TEXT ("Invalid markupDecl") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } + return 0; +} - nextch = this->skip_whitespace (0); +int +ACEXML_Parser::parse_external_id (ACEXML_Char *&publicId, + ACEXML_Char *&systemId + ACEXML_ENV_ARG_DECL) +{ + publicId = systemId = 0; + ACEXML_Char nextch = this->get (); + ACEXML_Char fwd = 0; switch (nextch) { - case '[': // Internal DTD definition - if (this->parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_PARAMETER) < 0) - return -1; // Error in markupdecl - break; - case '>': // End of DTD definition - // this is an XML document without a dectypedecl. - return 0; - case '0': - this->report_fatal_error (ACEXML_EOF ACEXML_ENV_ARG_PARAMETER); - return -1; - default: - break; + case 'S': // External SYSTEM id. + if (this->parse_token (ACE_TEXT ("YSTEM")) < 0 || + this->skip_whitespace_count () < 1) + { + this->fatal_error(ACE_TEXT ("Expecting keyword SYSTEM") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (this->parse_system_literal (systemId) != 0) + { + this->fatal_error(ACE_TEXT ("Invalid systemLiteral") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + case 'P': // External PUBLIC id or previously defined PUBLIC id. + if (this->parse_token (ACE_TEXT ("UBLIC")) < 0 || + this->skip_whitespace_count () < 1) + { + this->fatal_error(ACE_TEXT ("Expecing keyword PUBLIC") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (this->parse_pubid_literal (publicId) != 0) + { + this->fatal_error(ACE_TEXT ("Invalid PubidLiteral") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + this->skip_whitespace_count(&fwd); + if (fwd == '\'' || fwd == '"') + { + if (this->parse_system_literal (systemId) != 0) + { + this->fatal_error(ACE_TEXT ("Invalid systemLiteral") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + } + else if (this->ref_state_ != ACEXML_ParserInt::IN_NOTATION) + { + this->fatal_error(ACE_TEXT ("Expecting systemLiteral after a " + "PUBLIC keyword") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + default: + this->fatal_error(ACE_TEXT ("Invalid system/public Literal") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } + return 0; +} - if (this->skip_whitespace (0) != '>') +ACEXML_Char* +ACEXML_Parser::normalize_systemid (const char* systemId) +{ + if (ACE_OS::strstr (systemId, ACE_TEXT("ftp://")) != 0 || + ACE_OS::strstr (systemId, ACE_TEXT ("http://")) != 0 || + ACE_OS::strstr (systemId, ACE_TEXT ("file://")) != 0) + return 0; + else { - this->report_fatal_error(ACEXML_EINT ACEXML_ENV_ARG_PARAMETER); - return -1; + ACEXML_Char* normalized_uri = 0; + const char* baseURI = this->current_.getLocator()->getSystemId(); + if (!baseURI) + return 0; + if (ACE_OS::strstr (baseURI, ACE_TEXT ("http://")) != 0) + { + // baseURI is a HTTP URL and systemId is relative. Note that this + // is not compliant with RFC2396. Caveat Emptor ! + const ACEXML_Char* temp = ACE_OS::strrchr (baseURI, '/'); + size_t pos = temp - baseURI + 1; + size_t len = pos + ACE_OS::strlen (systemId) + 1; + ACE_NEW_RETURN (normalized_uri, ACEXML_Char[len], 0); + ACE_OS::strncpy (normalized_uri, baseURI, pos); + ACE_OS::strcpy (normalized_uri + pos + 1, systemId); + return normalized_uri; + } + else + { + const ACEXML_Char* + temp = ACE_OS::strrchr (baseURI,ACE_DIRECTORY_SEPARATOR_CHAR); + // baseURI is a local file and systemId is relative + // Unlike the HTTP one, this will work always. + if (temp) + { + size_t pos = temp - baseURI + 1; + size_t len = pos + ACE_OS::strlen (systemId) + 1; + ACE_NEW_RETURN (normalized_uri, ACEXML_Char[len], 0); + ACE_OS::strncpy (normalized_uri, baseURI, pos); + ACE_OS::strcpy (normalized_uri + pos + 1, systemId); + return normalized_uri; + } + return 0; + } } - return 0; } void ACEXML_Parser::parse_element (int is_root ACEXML_ENV_ARG_DECL) - ACE_THROW_SPEC ((ACEXML_SAXException)) + ACE_THROW_SPEC ((ACEXML_SAXException)) { // Parse STag. - const ACEXML_Char *startname = this->read_name (); + const ACEXML_Char *startname = this->parse_name (); if (startname == 0) { - this->report_fatal_error (ACEXML_EOF ACEXML_ENV_ARG_PARAMETER); + this->fatal_error (ACE_TEXT ("Unexpected end-of-file") + ACEXML_ENV_ARG_PARAMETER); return; } if (is_root && this->doctype_ != 0 && ACE_OS::strcmp (startname, this->doctype_) != 0) { - this->report_fatal_error (ACEXML_MISSINGROOT ACEXML_ENV_ARG_PARAMETER); - return; + this->fatal_error (ACE_TEXT ("Root element different from DOCTYPE") + ACEXML_ENV_ARG_PARAMETER); + return ; } ACEXML_AttributesImpl attributes; ACEXML_Char ch; - int new_namespace = 0; - const ACEXML_Char *endname = 0; const ACEXML_Char *ns_uri, *ns_lname; // namespace URI and localName - ACEXML_Char* prefix = 0; - ACEXML_Char* name = 0; for (int start_element_done = 0; start_element_done == 0;) { - ch = this->skip_whitespace (0); + ch = this->skip_whitespace (); switch (ch) { - case 0: - this->report_fatal_error(ACEXML_EINT ACEXML_ENV_ARG_PARAMETER); - return; - case '/': - if (this->get () != '>') - { - this->report_fatal_error(ACEXML_GREAT ACEXML_ENV_ARG_PARAMETER); - return; - } - else - { - this->xml_namespace_.processName(startname, ns_uri, ns_lname, 0); - prefix = ACE_const_cast (ACEXML_Char*, - this->xml_namespace_.getPrefix(ns_uri)); - this->report_prefix_mapping (prefix, ns_uri, ns_lname, 1 - ACEXML_ENV_ARG_PARAMETER); - ACEXML_CHECK; - this->content_handler_->startElement (ns_uri, ns_lname, - startname, &attributes + case 0: + this->fatal_error(ACE_TEXT ("Internal Parser error") + ACEXML_ENV_ARG_PARAMETER); + return; + case '/': + if (this->get () != '>') + { + this->fatal_error(ACE_TEXT ("Expecting '>' at end of element " + "definition") + ACEXML_ENV_ARG_PARAMETER); + return; + } + else + { + this->xml_namespace_.processName(startname, ns_uri, + ns_lname, 0); + this->prefix_mapping (this->xml_namespace_.getPrefix(ns_uri), + ns_uri, ns_lname, 1 + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + this->content_handler_->startElement(ns_uri, ns_lname, + startname, &attributes + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + this->content_handler_->endElement (ns_uri, ns_lname, startname ACEXML_ENV_ARG_PARAMETER); - ACEXML_CHECK; - this->content_handler_->endElement (ns_uri, ns_lname, startname - ACEXML_ENV_ARG_PARAMETER); - ACEXML_CHECK; - this->report_prefix_mapping (prefix, ns_uri, ns_lname, 0 - ACEXML_ENV_ARG_PARAMETER); - ACEXML_CHECK; - } - if (new_namespace != 0) - this->xml_namespace_.popContext (); - return; - - case '>': - { - this->xml_namespace_.processName (startname, ns_uri, ns_lname, 0); - prefix = ACE_const_cast (ACEXML_Char*, - this->xml_namespace_.getPrefix (ns_uri)); - this->report_prefix_mapping (prefix, ns_uri, ns_lname, 1 - ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + this->prefix_mapping (this->xml_namespace_.getPrefix(ns_uri), + ns_uri, ns_lname, 0 + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + } + if (this->nested_namespace_ != 0) + this->xml_namespace_.popContext (); + return; + + case '>': + this->xml_namespace_.processName (startname, ns_uri, + ns_lname, 0); + this->prefix_mapping (this->xml_namespace_.getPrefix(ns_uri), + ns_uri, ns_lname, 1 + ACEXML_ENV_ARG_PARAMETER); ACEXML_CHECK; - this->content_handler_->startElement (ns_uri, ns_lname, startname, - &attributes - ACEXML_ENV_ARG_PARAMETER); + this->content_handler_->startElement(ns_uri, ns_lname, startname, + &attributes + ACEXML_ENV_ARG_PARAMETER); ACEXML_CHECK; start_element_done = 1; break; - } - default: - ACEXML_Char *attvalue = 0; - ACEXML_Char *attname = this->read_name (ch); - - if (attname == 0 || - this->skip_equal () != 0 || - this->get_quoted_string (attvalue) != 0) - { - this->report_fatal_error(ACEXML_RDATTR ACEXML_ENV_ARG_PARAMETER); - return; - } - - // Handling new namespace if any. Notice that the order of - // namespace declaration does matter. - if (ACE_OS::strncmp (attname, ACE_TEXT("xmlns"), 5) == 0) - { - if (this->namespaces_) - { - if (new_namespace == 0) - { - this->xml_namespace_.pushContext (); - new_namespace = 1; - } - name = ACE_OS::strchr (attname, ':'); - const ACEXML_Char* ns_name = (name == 0)?empty_string:name+1; - if (this->xml_namespace_.declarePrefix (ns_name, - attvalue) == -1) - { - this->report_fatal_error(ACEXML_DUPPREFIX - ACEXML_ENV_ARG_PARAMETER); - return; - } - } - if (this->namespace_prefixes_) - { - // Namespace_prefixes_feature_ is required. So add the - // xmlns:foo to the list of attributes. - if (attributes.addAttribute (0, 0, attname, - default_attribute_type, - attvalue) == -1) - { - this->report_fatal_error(ACEXML_DUPATTR - ACEXML_ENV_ARG_PARAMETER); - return; - } - } - if (!this->namespaces_ && !this->namespace_prefixes_) - { - this->report_fatal_error(ACEXML_NSERR - ACEXML_ENV_ARG_PARAMETER); - return; - } - } - else - { - const ACEXML_Char *uri, *lName; - this->xml_namespace_.processName (attname, uri, lName, 1); - if (attributes.addAttribute (uri, lName, attname, - default_attribute_type, - attvalue) == -1) - { - this->report_fatal_error(ACEXML_DUPATTR - ACEXML_ENV_ARG_PARAMETER); - return; - } - } - break; - } - } - ACEXML_Char *cdata; - size_t cdata_length = 0; - - // Parse element contents. - while (1) - { - ACEXML_Char ch = this->get (); + default: + ACEXML_Char *attvalue = 0; + ACEXML_Char *attname = this->parse_name (ch); - switch (ch) - { - case 0: - this->report_fatal_error(ACEXML_EINT ACEXML_ENV_ARG_PARAMETER); - return; - case '<': - // Push out old 'characters' event. - if (cdata_length != 0) - { - cdata = this->obstack_.freeze (); - this->content_handler_->characters (cdata, 0, cdata_length - ACEXML_ENV_ARG_PARAMETER); - ACEXML_CHECK; - this->obstack_.unwind (cdata); - cdata_length = 0; - } + if (attname == 0 || + this->skip_equal () != 0 || + this->parse_attvalue (attvalue ACEXML_ENV_ARG_PARAMETER) != 0) + { + this->fatal_error(ACE_TEXT ("Error reading attribute value") + ACEXML_ENV_ARG_PARAMETER); + return; + } - switch (this->peek ()) - { - case '!': // a comment or a CDATA section. - this->get (); // consume '!' - ch = this->peek (); - if (ch == '-') // a comment - { - if (this->grok_comment () < 0) - { - this->report_fatal_error(ACEXML_INVCO - ACEXML_ENV_ARG_PARAMETER); - return; - } - } - else if (ch == '[') // a CDATA section. - { - this->parse_cdata (ACEXML_ENV_SINGLE_ARG_PARAMETER); - ACEXML_CHECK; - } - else - { - this->report_fatal_error(ACEXML_ECHAR - ACEXML_ENV_ARG_PARAMETER); - return; - } - break; - case '?': // a PI. - this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER); - ACEXML_CHECK; - break; - case '/': // an ETag. + // Handling new namespace if any. Notice that the order of + // namespace declaration does matter. + if (ACE_OS::strncmp (attname, ACE_TEXT("xmlns"), 5) == 0) { - this->get (); // consume '/' - endname = this->read_name (); - if (endname == 0 || - ACE_OS::strcmp (startname, endname) != 0) + if (this->namespaces_) { - this->report_fatal_error(ACEXML_ETAG - ACEXML_ENV_ARG_PARAMETER); - return ; + if (this->nested_namespace_ == 0) + { + this->xml_namespace_.pushContext (); + this->nested_namespace_ = 1; + } + ACEXML_Char* name = ACE_OS::strchr (attname, ':'); + const ACEXML_Char* ns_name = (name == 0)? + empty_string:name+1; + if (this->xml_namespace_.declarePrefix (ns_name, + attvalue) == -1) + { + this->fatal_error(ACE_TEXT ("Duplicate definition of " + "prefix") + ACEXML_ENV_ARG_PARAMETER); + return; + } } - if (this->skip_whitespace (0) != '>') + if (this->namespace_prefixes_) { - this->report_fatal_error(ACEXML_GREAT - ACEXML_ENV_ARG_PARAMETER); - return; + // Namespace_prefixes_feature_ is required. So add the + // xmlns:foo to the list of attributes. + if (attributes.addAttribute (0, 0, attname, + default_attribute_type, + attvalue) == -1) + { + this->fatal_error(ACE_TEXT ("Duplicate attribute " + "definition. Hint: Try " + "setting namespace_prefix" + "es feature to 0") + ACEXML_ENV_ARG_PARAMETER); + return; + } } - this->content_handler_->endElement (ns_uri, ns_lname, endname - ACEXML_ENV_ARG_PARAMETER); - ACEXML_CHECK; - prefix = ACE_const_cast (ACEXML_Char*, - this->xml_namespace_.getPrefix(ns_uri)); - this->report_prefix_mapping (prefix, ns_uri, ns_lname, 0 - ACEXML_ENV_ARG_PARAMETER); - ACEXML_CHECK; - if (new_namespace != 0) - this->xml_namespace_.popContext (); - return; - } - default: // a new nested element? - this->parse_element (0 ACEXML_ENV_ARG_PARAMETER); - ACEXML_CHECK; - break; - } - break; - case '&': - { - const ACEXML_String *replace = 0; - ACEXML_String charval; - ACEXML_Char buffer[6]; - - if (this->peek () == '#') - { - if (this->parse_char_reference (buffer, 6) != 0) + if (!this->namespaces_ && !this->namespace_prefixes_) { - this->report_fatal_error (ACEXML_INVCHAR - ACEXML_ENV_ARG_PARAMETER); - + this->fatal_error(ACE_TEXT ("One of namespaces or " + "namespace_prefixes should be" + " declared") + ACEXML_ENV_ARG_PARAMETER); return; } - charval.set (buffer, 0); - replace = &charval; } else - replace = this->parse_reference (); - - if (replace == 0) { - this->report_fatal_error(ACEXML_EINT ACEXML_ENV_ARG_PARAMETER); - return; + const ACEXML_Char *uri, *lName; + this->xml_namespace_.processName (attname, uri, lName, 1); + if (attributes.addAttribute (uri, lName, attname, + default_attribute_type, + attvalue) == -1) + { + this->fatal_error(ACE_TEXT ("Duplicate attribute " + "definition") + ACEXML_ENV_ARG_PARAMETER); + return; + } } - cdata_length = replace->length (); - for (size_t i = 0; i < replace->length (); ++i) - this->obstack_.grow ((*replace)[i]); - } - break; - case 0x0D: // End-of-Line handling - ch = (this->peek () == 0x0A ? this->get () : 0x0A); - // Fall thru... - case 0x0A: - // Fall thru... - default: - ++cdata_length; - cdata = this->obstack_.grow (ch); - if (cdata == 0) - { - cdata = this->obstack_.freeze (); - this->content_handler_->characters (cdata, 0, cdata_length - ACEXML_ENV_ARG_PARAMETER); - ACEXML_CHECK; - this->obstack_.grow (ch); - cdata_length = 1; // the missing char. - } + break; } } - ACE_NOTREACHED (return;) + if (this->parse_content (startname, ns_uri, ns_lname + ACEXML_ENV_ARG_PARAMETER) != 0) + return; } int -ACEXML_Parser::parse_char_reference (ACEXML_Char *buf, size_t len) +ACEXML_Parser::parse_content (const ACEXML_Char* startname, + const ACEXML_Char* ns_uri, + const ACEXML_Char* ns_lname ACEXML_ENV_ARG_DECL) { - if (this->get () != '#') // Internal error. - return -1; - - int hex = 0; - if (this->peek () == 'x') - { - hex = 1; - this->get (); - } - - int more_digit = 0; - ACEXML_UCS4 sum = 0; + ACEXML_Char *cdata; + size_t cdata_length = 0; + // Parse element contents. while (1) { ACEXML_Char ch = this->get (); + int nrelems = 0; switch (ch) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - sum = sum * (hex ? 16 : 10) + (ch - '0'); - break; - case 'a': - case 'A': - if (!hex) - return -1; - sum = sum * 16 + 10; - break; - case 'b': - case 'B': - if (!hex) - return -1; - sum = sum * 16 + 11; - break; - case 'c': - case 'C': - if (!hex) - return -1; - sum = sum * 16 + 12; - break; - case 'd': - case 'D': - if (!hex) - return -1; - sum = sum * 16 + 13; - break; - case 'e': - case 'E': - if (!hex) - return -1; - sum = sum * 16 + 14; - break; - case 'f': - case 'F': - if (!hex) - return -1; - sum = sum * 16 + 15; - break; - case ';': - if (more_digit == 0) // no digit exist??? - return -1; - int clen; - // [WFC: Legal Character] - if (sum == 0x9 || sum == 0xA || sum == 0xD - || sum >= 0x20 && sum <= 0xD7FF - || sum >= 0xE000 && sum <= 0xFFFD - || sum >= 0x10000 && sum <= 0x10FFFF) - { + case 0: + nrelems = this->pop_context(); + if (nrelems >= 1) + break; + else if (nrelems == 0) + return 0; + else + { + this->fatal_error(ACE_TEXT ("Internal Parser Error") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + case '<': + // Push out old 'characters' event. + if (cdata_length != 0) + { + cdata = this->obstack_.freeze (); + this->content_handler_->characters (cdata, 0, cdata_length + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + this->obstack_.unwind (cdata); + cdata_length = 0; + } -#if defined (ACE_USES_WCHAR) // UTF-16 - if ((clen = ACEXML_Transcoder::ucs42utf16 (sum, buf, len)) < 0) - return -1; + switch (this->peek ()) + { + case '!': // a comment or a CDATA section. + this->get (); // consume '!' + ch = this->peek (); + if (ch == '-') // a comment + { + if (this->parse_comment () < 0) + { + this->fatal_error(ACE_TEXT ("Invalid comment in " + "document") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + } + else if (ch == '[') // a CDATA section. + { + this->parse_cdata (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + else + { + this->fatal_error(ACE_TEXT ("Expecting a CDATA section " + "or a comment section") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + case '?': // a PI. + this->get(); // consume the '?' + this->parse_processing_instruction + (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + case '/': // an ETag. + { + this->get (); // consume '/' + ACEXML_Char* endname = this->parse_name (); + if (endname == 0 || + ACE_OS::strcmp (startname, endname) != 0) + { + this->fatal_error(ACE_TEXT ("Name in ETag doesn't " + "match name in STag") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (this->skip_whitespace () != '>') + { + this->fatal_error(ACE_TEXT ("Expecting '>' at end " + "of element") + ACEXML_ENV_ARG_PARAMETER); + return -1; + } + this->content_handler_->endElement (ns_uri, ns_lname, + endname + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + this->prefix_mapping (this->xml_namespace_. getPrefix(ns_uri), + ns_uri, ns_lname, 0 + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + if (this->nested_namespace_ != 0) + this->xml_namespace_.popContext (); + return 0; + } + default: // a new nested element? + this->parse_element (0 ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + } + break; + case '&': -#else // or UTF-8 - if ((clen = ACEXML_Transcoder::ucs42utf8 (sum, buf, len)) < 0) - return -1; -#endif - buf [clen] = 0; - return 0; - } - return -1; - default: - return -1; + if (this->peek () == '#') + { + ACEXML_Char buf[7]; + if (this->parse_char_reference (buf, sizeof (buf)) != 0) + { + // [WFC: Legal Character] + ACE_ERROR ((LM_ERROR, + ACE_TEXT ("Invalid character reference\n"))); + return -1; + } + } + else + { + this->ref_state_ = ACEXML_ParserInt::IN_CONTENT; + this->parse_entity_reference(ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + case '\x20': case '\x0D': case '\x0A': case '\x09': + if (this->validate_) + { + // Flush out any non-whitespace characters + if (cdata_length != 0) + { + cdata = this->obstack_.freeze (); + this->content_handler_->characters(cdata, 0, cdata_length + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + this->obstack_.unwind (cdata); + cdata_length = 0; + } + ++cdata_length; + this->obstack_.grow (ch); + while (1) + { + ch = this->peek(); + if (ch == '\x20' || ch == '\x0D' || ch == '\x0A' || + ch == '\x09') + { + ch = this->get(); + this->obstack_.grow (ch); + continue; + } + break; + } + cdata = this->obstack_.freeze (); + this->content_handler_->ignorableWhitespace (cdata, 0, + cdata_length + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + this->obstack_.unwind (cdata); + cdata_length = 0; + break; + } + // Fall thru... + default: + ++cdata_length; + this->obstack_.grow (ch); } - more_digit = 1; } - ACE_NOTREACHED (return -1); + return 0; } -const ACEXML_String * -ACEXML_Parser::parse_reference (void) -{ - // @@ We'll use a temporary buffer here as the Obstack is most likely in - // use when we are here. This puts a limit on the max length of a - // reference. - ACEXML_Char ref[MAXPATHLEN]; - - size_t loc = 0; - - while (loc < MAXPATHLEN -1) - { - ACEXML_Char ch = this->get (); - if (ch == ';') - { - ref[loc] = 0; - break; - } - else - ref[loc++] = ch; - } - - return this->entities_.resolve_entity (ref); -} int ACEXML_Parser::parse_cdata (ACEXML_ENV_SINGLE_ARG_DECL) { if (this->parse_token (ACE_TEXT ("[CDATA[")) < 0) { - this->report_fatal_error(ACEXML_ECDATASEC ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error(ACE_TEXT ("Expecting '[CDATA[' at beginning of CDATA " + "section") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } ACEXML_Char ch; @@ -975,123 +1139,12 @@ ACEXML_Parser::parse_cdata (ACEXML_ENV_SINGLE_ARG_DECL) this->obstack_.grow (temp); ++datalen; } - else if (ch == 0x0D) - ch = (this->peek () == 0x0A ? this->get () : 0x0A); - this->obstack_.grow (ch); - ++datalen; + this->obstack_.grow (ch); + ++datalen; }; ACE_NOTREACHED (return -1); } -int -ACEXML_Parser::parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_DECL) -{ - ACEXML_Char nextch = this->skip_whitespace (0); - - do { - switch (nextch) - { - case '<': // Start of markup Decl. - nextch = this->peek (); - switch (nextch) - { - case '!': - this->get (); // Discard '!' - nextch = this->peek (); - switch (nextch) - { - case 'E': // An ELEMENT or ENTITY decl - this->get (); - nextch = this->peek (); - switch (nextch) - { - case 'L': - if (this->parse_element_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER) < 0) - return -1; - break; - - case 'N': - if (this->parse_entity_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER) < 0) - return -1; - break; - - default: - this->report_fatal_error(ACEXML_MKDECLKEYWORD - ACEXML_ENV_ARG_PARAMETER); - return -1; - } - break; - - case 'A': // An ATTLIST decl - if (this->parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER) < 0) - return -1; - break; - - case 'N': // A NOTATION decl - if (this->parse_notation_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER) < 0) - return -1; - break; - - case '-': // a comment. - if (this->grok_comment () < 0) - { - this->report_fatal_error(ACEXML_INVCO - ACEXML_ENV_ARG_PARAMETER); - return -1; - } - break; - case 0: - this->report_fatal_error (ACEXML_EOF ACEXML_ENV_ARG_PARAMETER); - return -1; - default: - this->report_fatal_error (ACEXML_MKINVCH - ACEXML_ENV_ARG_PARAMETER); - return -1; - } - break; - - case '?': // PI - this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER); - ACEXML_CHECK_RETURN (-1); - break; - - case 0: - this->report_fatal_error (ACEXML_EOF ACEXML_ENV_ARG_PARAMETER); - return -1; - default: - this->report_fatal_error (ACEXML_MKINVCH ACEXML_ENV_ARG_PARAMETER); - return -1; - } - break; - - case '%': // DeclSep. Define new PEreference... - break; - - case ']': // End of internal definitions. - return 0; // Not applicable when parsing external DTD spec. - - case 0: // This may not be an error if we decide - // to generalize this function to handle both - // internal and external DTD definitions. - this->report_fatal_error (ACEXML_EOF ACEXML_ENV_ARG_PARAMETER); - return -1; - - default: - this->report_fatal_error (ACEXML_MKDECL ACEXML_ENV_ARG_PARAMETER); - return -1; - }; - - // To fully conform with the spec., whitespaces are only allowed - // following a 'DeclSep' section. However, I found it - // hard/impossible to eliminate all the whitespaces between - // markupdecls. - - nextch = this->skip_whitespace (0); - - } while (1); - - ACE_NOTREACHED (return -1); -} int ACEXML_Parser::parse_element_decl (ACEXML_ENV_SINGLE_ARG_DECL) @@ -1099,48 +1152,61 @@ ACEXML_Parser::parse_element_decl (ACEXML_ENV_SINGLE_ARG_DECL) if ((this->parse_token (ACE_TEXT ("LEMENT")) < 0) || this->skip_whitespace_count () == 0) { - this->report_fatal_error (ACEXML_EELEMENT ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error (ACE_TEXT ("Expecting keyword ELEMENT followed by " + "space") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } - ACEXML_Char *element_name = this->read_name (); + ACEXML_Char *element_name = this->parse_name (); if (element_name == 0) { - this->report_fatal_error (ACEXML_ERDELENAME ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error (ACE_TEXT ("Invalid element name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } ACEXML_Char nextch ; - this->skip_whitespace_count (&nextch); + if (this->skip_whitespace_count (&nextch) == 0) + { + this->fatal_error (ACE_TEXT ("Expecting a space between ELEMENT name " + "and definition") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } switch (nextch) { - case 'E': // EMPTY - if (this->parse_token (ACE_TEXT ("EMPTY")) < 0) - { - this->report_fatal_error (ACEXML_EEMPTY ACEXML_ENV_ARG_PARAMETER); - return -1; - } - break; - case 'A': // ANY - if (this->parse_token (ACE_TEXT ("ANY")) < 0) - { - this->report_fatal_error (ACEXML_EANY ACEXML_ENV_ARG_PARAMETER); - return -1; - } - break; - case '(': // children - this->parse_children_definition (ACEXML_ENV_SINGLE_ARG_PARAMETER); - ACEXML_CHECK_RETURN (-1); - break; - default: // error - this->report_fatal_error (ACEXML_ERDELE ACEXML_ENV_ARG_PARAMETER); - return -1; + case 'E': // EMPTY + if (this->parse_token (ACE_TEXT ("EMPTY")) < 0) + { + this->fatal_error (ACE_TEXT ("Expecting keyword EMPTY") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + case 'A': // ANY + if (this->parse_token (ACE_TEXT ("ANY")) < 0) + { + this->fatal_error (ACE_TEXT ("Expecting keyword ANY") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + case '(': // children + this->parse_children_definition (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + default: // error + this->fatal_error (ACE_TEXT ("Invalid element definition") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } - if (this->skip_whitespace (0) != '>') + if (this->skip_whitespace () != '>') { - this->report_fatal_error (ACEXML_GREAT ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error (ACE_TEXT ("Expecing '>' after element defintion") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } return 0; } @@ -1153,8 +1219,9 @@ ACEXML_Parser::parse_entity_decl (ACEXML_ENV_SINGLE_ARG_DECL) if ((this->parse_token (ACE_TEXT ("NTITY")) < 0) || this->skip_whitespace_count (&nextch) == 0) { - this->report_fatal_error (ACEXML_EENTITY ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error (ACE_TEXT ("Expecting keyword ENTITY followed by a " + "space") ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } int is_GEDecl = 1; @@ -1164,100 +1231,146 @@ ACEXML_Parser::parse_entity_decl (ACEXML_ENV_SINGLE_ARG_DECL) this->get (); // consume the '%' if (this->skip_whitespace_count (&nextch) == 0) { - this->report_fatal_error (ACEXML_INVREF ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error (ACE_TEXT ("Expecting space between % and " + "entity name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } } - ACEXML_Char *entity_name = this->read_name (); + ACEXML_Char *entity_name = this->parse_name (); if (entity_name == 0) { - this->report_fatal_error (ACEXML_ENTNAME ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error (ACE_TEXT ("Invalid entity name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } - this->skip_whitespace_count (&nextch); - + if (this->skip_whitespace_count (&nextch) == 0) + { + this->fatal_error (ACE_TEXT ("Expecting space between entity name and " + "entityDef") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + int retval = 0; if (nextch == '\'' || nextch == '"') { ACEXML_Char *entity_value = 0; - - if (this->get_quoted_string (entity_value) != 0) + if (this->parse_entity_value (entity_value + ACEXML_ENV_ARG_PARAMETER) != 0) { - this->report_fatal_error(ACEXML_ENTVALUE ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error(ACE_TEXT ("Invalid EntityValue") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } - if (is_GEDecl) + retval = this->internal_GE_.add_entity (entity_name, + entity_value); + else + retval = this->internal_PE_.add_entity (entity_name, + entity_value); + if (retval < 0) { - if (this->entities_.add_entity (entity_name, entity_value) != 0) - { - this->report_fatal_error(ACEXML_DUPENT ACEXML_ENV_ARG_PARAMETER); - return -1; - } + this->fatal_error (ACE_TEXT ("Internal Parser Error in adding" + "Entity to map") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } - else + else if (retval == 1) { - // @@ need to implement PEdecl lookup mechanism - ACEXML_THROW_RETURN (ACEXML_SAXNotSupportedException (), -1); + this->warning (ACE_TEXT ("Duplicate entity found") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } } else { ACEXML_Char *systemid, *publicid; - this->parse_external_id_and_ref (publicid, systemid ACEXML_ENV_ARG_PARAMETER); + this->parse_external_id (publicid, systemid + ACEXML_ENV_ARG_PARAMETER); ACEXML_CHECK_RETURN (-1); if (systemid == 0) { - this->report_fatal_error(ACEXML_INVEXTID ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error(ACE_TEXT ("Invalid SystemLiteral") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } this->skip_whitespace_count (&nextch); if (nextch == 'N') // NDATA section followed { if (is_GEDecl == 0) { - this->report_fatal_error(ACEXML_UNDATA ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error(ACE_TEXT ("Invalid NDataDecl in PEDef") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } if ((this->parse_token (ACE_TEXT ("NDATA")) < 0) || this->skip_whitespace_count (&nextch) == 0) { - this->report_fatal_error(ACEXML_ENDATA ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error(ACE_TEXT ("Expecting keyword NDATA followed " + "by a space") ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } - ACEXML_Char *ndata = this->read_name (); - this->dtd_handler_->unparsedEntityDecl (entity_name, publicid, - systemid, ndata - ACEXML_ENV_ARG_PARAMETER); - ACEXML_CHECK_RETURN (-1); + ACEXML_Char *ndata = this->parse_name (); + if (this->validate_) // [VC: Notation Declared] + { + if (!this->notations_.resolve_entity (ndata)) + { + this->fatal_error (ACE_TEXT ("Undeclared Notation name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + this->dtd_handler_->unparsedEntityDecl(entity_name, publicid, + systemid, ndata + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } } else { - // @@ Need to support external CharStream sources - ACE_DEBUG ((LM_DEBUG, - ACE_TEXT ("ENTITY: (%s) "), - entity_name)); - - if (publicid == 0) - ACE_DEBUG ((LM_DEBUG, - ACE_TEXT ("SYSTEM %s\n"), - systemid)); + if (is_GEDecl) + retval = this->external_GE_.add_entity (entity_name, + systemid); else - ACE_DEBUG ((LM_DEBUG, - ACE_TEXT ("PUBLIC %s %s\n"), - publicid, systemid)); + retval = this->external_PE_.add_entity (entity_name, + systemid); + if (retval < 0) + { + this->fatal_error(ACE_TEXT ("Internal Parser Error") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + else if (retval == 1) + this->warning(ACE_TEXT ("Duplicate external entity") + ACEXML_ENV_ARG_PARAMETER); + if (is_GEDecl) + retval = this->external_GE_.add_entity (entity_name, + publicid); + else + retval = this->external_PE_.add_entity (entity_name, + publicid); + if (retval < 0) + { + this->fatal_error(ACE_TEXT ("Internal Parser Error") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + else if (retval == 1) + this->warning (ACE_TEXT ("Duplicate entity definition") + ACEXML_ENV_ARG_PARAMETER); } } // End of ENTITY definition - if (this->skip_whitespace (0) != '>') + if (this->skip_whitespace() != '>') { - this->report_fatal_error(ACEXML_GREAT ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error(ACE_TEXT ("Expecting '>' at end of entityDef") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } return 0; } @@ -1268,229 +1381,100 @@ ACEXML_Parser::parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_DECL) if ((this->parse_token (ACE_TEXT ("ATTLIST")) < 0) || this->skip_whitespace_count () == 0) { - this->report_fatal_error(ACEXML_EATTLIST ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error(ACE_TEXT ("Expecting keyword 'ATTLIST' followed by a " + "space ") ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } - ACEXML_Char *element_name = this->read_name (); + ACEXML_Char *element_name = this->parse_name (); if (element_name == 0) { - this->report_fatal_error(ACEXML_ERDELENAME ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error(ACE_TEXT ("Invalid element Name in attlistDecl") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } - - ACEXML_Char nextch = this->skip_whitespace (0); - + ACEXML_Char fwd; + int count = this->skip_whitespace_count(&fwd); + int nrelems = 0; // Parse AttDef* - while (nextch != '>') + while (fwd != '>') { - // Parse attribute name - ACEXML_Char *att_name = this->read_name (nextch); - if (att_name == 0) + if (count == 0) { - this->report_fatal_error(ACEXML_EATTNAME ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error(ACE_TEXT ("Expecting space between element name " + "and AttDef") ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } - - /* - Parse AttType: - Possible keywords: - CDATA // StringType - ID // TokenizedType - IDREF - IDREFS - ENTITY - ENTITIES - NMTOKEN - NMTOKENS - NOTATION // EnumeratedType - NotationTYpe - ( // EnumeratedType - Enumeration - */ - nextch = this->skip_whitespace (0); - switch (nextch) + switch (fwd) { - case 'C': // CDATA - if ((this->parse_token (ACE_TEXT ("DATA")) < 0) || - this->skip_whitespace_count () == 0) - { - this->report_fatal_error(ACEXML_ECDATA ACEXML_ENV_ARG_PARAMETER); - return -1; - } - // Else, we have successfully identified the type of the - // attribute as CDATA - // @@ Set up validator appropriately here. - break; - case 'I': // ID, IDREF, or, IDREFS - if (this->get () == 'D') - { - if (this->skip_whitespace_count (&nextch) > 0) - { - // We have successfully identified the type of the - // attribute as ID @@ Set up validator as such. - break; - } - if (this->parse_token (ACE_TEXT ("REF")) == 0) - { - if (this->skip_whitespace_count (&nextch) > 0) - { - // We have successfully identified the type of - // the attribute as IDREF - // @@ Set up validator as such. - break; - } - else if (nextch == 'S' && - this->get () && // consume the 'S' - this->skip_whitespace_count () != 0) - { - // We have successfully identified the type of - // the attribute as IDREFS - // @@ Set up validator as such. - break; - } - } - } - // Admittedly, this error message is not precise enough - this->report_fatal_error(ACEXML_EID ACEXML_ENV_ARG_PARAMETER); - return -1; - case 'E': // ENTITY or ENTITIES - if (this->parse_token (ACE_TEXT ("NTIT")) == 0) - { - nextch = this->get (); - if (nextch == 'Y') - { - // We have successfully identified the type of - // the attribute as ENTITY - // @@ Set up validator as such. - } - else if (nextch == 'I'&& this->get () == 'E' && - this->get () == 'S') - { - // We have successfully identified the type of - // the attribute as ENTITIES - // @@ Set up validator as such. - } - if (this->skip_whitespace_count () > 0) - { - // success - break; - } - } - // Admittedly, this error message is not precise enough - this->report_fatal_error(ACEXML_EENTITIES ACEXML_ENV_ARG_PARAMETER); - return -1; - case 'N': // NMTOKEN, NMTOKENS, or, NOTATION - nextch = this->get (); - if (nextch != 'M' || nextch != 'O') - { - this->report_fatal_error(ACEXML_ENMTOKEN - ACEXML_ENV_ARG_PARAMETER); - return -1; - } - if (nextch == 'M') - { - if (this->parse_token (ACE_TEXT ("TOKEN")) == 0) - { - if (this->skip_whitespace_count (&nextch) > 0) - { - // We have successfully identified the type of - // the attribute as NMTOKEN - // @@ Set up validator as such. - break; - } - else if (nextch == 'S' && this->skip_whitespace_count () > 0) - { - // We have successfully identified the type of - // the attribute as NMTOKENS - // @@ Set up validator as such. - break; - } - } - this->report_fatal_error(ACEXML_ENMTOKENS - ACEXML_ENV_ARG_PARAMETER); - return -1; - } - else // NOTATION - { - if ((this->parse_token (ACE_TEXT ("TATION")) < 0) || - this->skip_whitespace_count () == 0) - { - this->report_fatal_error(ACEXML_ENOTATION - ACEXML_ENV_ARG_PARAMETER); - return -1; - } - - if (this->get () != '(') - { - this->report_fatal_error(ACEXML_LPAREN - ACEXML_ENV_ARG_PARAMETER); - return -1; - } - - this->skip_whitespace_count (); - - do { - ACEXML_Char *notation_name = this->read_name (); - if (notation_name == 0) - { - this->report_fatal_error(ACEXML_ENOTNAME - ACEXML_ENV_ARG_PARAMETER); - return -1; - } - // @@ get another notation name, set up validator as such - this->skip_whitespace_count (&nextch); - } while (nextch != ')'); - - this->get (); // consume the closing paren. - this->skip_whitespace_count (); - } - break; - case '(': // EnumeratedType - Enumeration - this->skip_whitespace_count (); - - do { - ACEXML_Char *token_name = this->read_name (); // @@ need a special read_nmtoken? - if (token_name == 0) + case '%': + if (this->external_subset_) { - this->report_fatal_error(ACEXML_ENMNAME - ACEXML_ENV_ARG_PARAMETER); - return -1; + this->parse_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } - // @@ get another nmtoken, set up validator as such - this->skip_whitespace_count (&nextch); - } while (nextch != ')'); - - this->get (); // consume the closing paren. - this->skip_whitespace_count (); - break; - default: - { - this->report_fatal_error(ACEXML_INVATTRTYPE - ACEXML_ENV_ARG_PARAMETER); - return -1; - } - ACE_NOTREACHED (break); + break; + case 0: + nrelems = this->pop_context(); + if (nrelems >= 1) + break; + else + { + this->fatal_error(ACE_TEXT ("Internal Parser Error") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + default: + break; } - - /* - Parse DefaultDecl: - #REQUIRED - #IMPLIED - #FIXED - quoted string // #FIXED - */ - nextch = this->peek (); - switch (nextch) + // Parse attribute name + ACEXML_Char *att_name = this->parse_name (); + if (att_name == 0) { - case '#': - this->get (); // consume the '#' - switch (this->get ()) - { + this->fatal_error(ACE_TEXT ("Invalid AttName") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + count = this->skip_whitespace_count (&fwd); + if (count == 0) + { + this->fatal_error(ACE_TEXT ("Expecting space between AttName and " + "AttType") ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + this->parse_atttype (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + this->parse_defaultdecl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + count = this->skip_whitespace_count(&fwd); + } + this->get (); // consume closing '>' + return 0; +} + +int +ACEXML_Parser::parse_defaultdecl (ACEXML_ENV_SINGLE_ARG_DECL) +{ + /* + Parse DefaultDecl: + #REQUIRED + #IMPLIED + #FIXED + quoted string // #FIXED + */ + ACEXML_Char nextch = this->peek (); + switch (nextch) + { + case '#': + this->get (); // consume the '#' + switch (this->get ()) + { case 'R': if (this->parse_token (ACE_TEXT ("EQUIRED")) < 0) { - this->report_fatal_error(ACEXML_EREQUIRED - ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error(ACE_TEXT ("Expecting keyword REQUIRED") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } // We now know this attribute is required // @@ Set up the validator as such. @@ -1498,9 +1482,9 @@ ACEXML_Parser::parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_DECL) case 'I': if (this->parse_token (ACE_TEXT ("MPLIED")) < 0) { - this->report_fatal_error(ACEXML_EIMPLIED - ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error(ACE_TEXT ("Expecting keyword IMPLIED") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } // We now know this attribute is impleid. // @@ Set up the validator as such. @@ -1509,44 +1493,242 @@ ACEXML_Parser::parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_DECL) if (this->parse_token (ACE_TEXT ("IXED")) < 0 || this->skip_whitespace_count () == 0) { - this->report_fatal_error(ACEXML_EFIXED - ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error(ACE_TEXT ("Expecting keyword FIXED") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } // We now know this attribute is fixed. ACEXML_Char *fixed_attr; - if (this->get_quoted_string (fixed_attr) != 0) + if (this->parse_attvalue (fixed_attr + ACEXML_ENV_ARG_PARAMETER) != 0) { - this->report_fatal_error(ACEXML_EINVFIXED - ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error(ACE_TEXT ("Invalid AttValue") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } // @@ set up validator break; default: - break; - } - break; - case '\'': - case '"': - ACEXML_Char *fixed_attr; - if (this->get_quoted_string (fixed_attr) != 0) + this->fatal_error (ACE_TEXT ("Invalid DefaultDecl") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + case '\'': + case '"': + ACEXML_Char *fixed_attr; + if (this->parse_attvalue (fixed_attr ACEXML_ENV_ARG_PARAMETER) != 0) + { + this->fatal_error(ACE_TEXT ("Invalid AttValue") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + // @@ set up validator + break; + default: + this->fatal_error (ACE_TEXT ("Invalid DefaultDecl") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + } + return 0; +} + + +int +ACEXML_Parser::parse_atttype (ACEXML_ENV_SINGLE_ARG_DECL) +{ + /* + Parse AttType: + Possible keywords: + CDATA // StringType + ID // TokenizedType + IDREF + IDREFS + ENTITY + ENTITIES + NMTOKEN + NMTOKENS + NOTATION // EnumeratedType - NotationTYpe + ( // EnumeratedType - Enumeration + */ + ACEXML_Char nextch = this->get(); + switch (nextch) + { + case 'C': // CDATA + if ((this->parse_token (ACE_TEXT ("DATA")) < 0) || + this->skip_whitespace_count () == 0) + { + this->fatal_error(ACE_TEXT ("Expecting keyword 'CDATA'") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + // Else, we have successfully identified the type of the + // attribute as CDATA + // @@ Set up validator appropriately here. + break; + case 'I': // ID, IDREF, or, IDREFS + if (this->get () == 'D') + { + if (this->skip_whitespace_count (&nextch) > 0) + { + // We have successfully identified the type of the + // attribute as ID @@ Set up validator as such. + break; + } + if (this->parse_token (ACE_TEXT ("REF")) == 0) + { + if (this->skip_whitespace_count (&nextch) > 0) + { + // We have successfully identified the type of + // the attribute as IDREF + // @@ Set up validator as such. + break; + } + else if (nextch == 'S' && + this->get () && // consume the 'S' + this->skip_whitespace_count () != 0) + { + // We have successfully identified the type of + // the attribute as IDREFS + // @@ Set up validator as such. + break; + } + } + } + // Admittedly, this error message is not precise enough + this->fatal_error(ACE_TEXT ("Expecting keyword `ID', `IDREF', or" + "`IDREFS'") ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + case 'E': // ENTITY or ENTITIES + if (this->parse_token (ACE_TEXT ("NTIT")) == 0) + { + nextch = this->get (); + if (nextch == 'Y') + { + // We have successfully identified the type of + // the attribute as ENTITY + // @@ Set up validator as such. + } + else if (nextch == 'I'&& this->get () == 'E' && + this->get () == 'S') + { + // We have successfully identified the type of + // the attribute as ENTITIES + // @@ Set up validator as such. + } + if (this->skip_whitespace_count () > 0) + { + // success + break; + } + } + // Admittedly, this error message is not precise enough + this->fatal_error(ACE_TEXT ("Expecting keyword `ENTITY', or" + "`ENTITIES'") ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + case 'N': // NMTOKEN, NMTOKENS, or, NOTATION + nextch = this->get (); + if (nextch != 'M' || nextch != 'O') + { + this->fatal_error(ACE_TEXT ("Expecting keyword `NMTOKEN'," + "`NMTOKENS', or `NOTATION'") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (nextch == 'M') + { + if (this->parse_token (ACE_TEXT ("TOKEN")) == 0) + { + if (this->skip_whitespace_count (&nextch) > 0) + { + // We have successfully identified the type of + // the attribute as NMTOKEN + // @@ Set up validator as such. + break; + } + else if (nextch == 'S' && this->skip_whitespace_count () > 0) + { + // We have successfully identified the type of + // the attribute as NMTOKENS + // @@ Set up validator as such. + break; + } + } + this->fatal_error(ACE_TEXT ("Expecting keyword `NMTOKEN' or `NMTO" + "KENS'") ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + else // NOTATION + { + if ((this->parse_token (ACE_TEXT ("TATION")) < 0) || + this->skip_whitespace_count () == 0) + { + this->fatal_error(ACE_TEXT ("Expecting keyword `NOTATION'") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + if (this->get () != '(') + { + this->fatal_error(ACE_TEXT ("Expecting '(' in elementDef") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + this->skip_whitespace_count (); + + do { + ACEXML_Char *notation_name = this->parse_name (); + if (notation_name == 0) + { + this->fatal_error(ACE_TEXT ("Invalid notation name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + // @@ get another notation name, set up validator as such + this->skip_whitespace_count (&nextch); + if (nextch != '|') + break; + nextch = this->get(); + this->skip_whitespace_count (&nextch); + } while (nextch != ')'); + + this->get (); // consume the closing paren. + this->skip_whitespace_count (); + } + break; + case '(': // EnumeratedType - Enumeration + this->skip_whitespace_count (); + + do { + ACEXML_Char *token_name = this->parse_nmtoken(); + if (token_name == 0) { - this->report_fatal_error(ACEXML_EINVFIXED - ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error(ACE_TEXT ("Invalid NMTOKEN") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } - // @@ set up validator - break; - default: - break; + // @@ get another nmtoken, set up validator as such + this->skip_whitespace_count (&nextch); + if (nextch != '|') + break; + nextch = this->get(); + this->skip_whitespace_count (&nextch); + } while (nextch != ')'); + + this->get (); // consume the closing paren. + this->skip_whitespace_count (); + break; + default: + { + this->fatal_error(ACE_TEXT ("Invalid AttType") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } - this->skip_whitespace_count (&nextch); + ACE_NOTREACHED (break); } - - this->get (); // consume closing '>' - return 0; } @@ -1556,93 +1738,72 @@ ACEXML_Parser::parse_notation_decl (ACEXML_ENV_SINGLE_ARG_DECL) if (this->parse_token (ACE_TEXT ("NOTATION")) < 0 || this->skip_whitespace_count () == 0) { - this->report_fatal_error(ACEXML_ENOTATION ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error(ACE_TEXT ("Expecting Keyword 'NOTATION'") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } - ACEXML_Char *notation = this->read_name (); + ACEXML_Char *notation = this->parse_name (); if (notation == 0) { - this->report_fatal_error(ACEXML_INVNOTNAME ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error(ACE_TEXT ("Invalid Notation name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } this->skip_whitespace_count (); ACEXML_Char *systemid, *publicid; - this->parse_external_id_and_ref (publicid, systemid ACEXML_ENV_ARG_PARAMETER); + // Gross hack but otherwise we need to go around a lot of loops to parse, + // When the ExternalID starts with 'PUBLIC' we cannot distinguish a + // PublicId from a ExternalID by looking using a one character read-ahead + ACEXML_ParserInt::ReferenceState temp = this->ref_state_; + this->ref_state_ = ACEXML_ParserInt::IN_NOTATION; + + this->parse_external_id (publicid, systemid + ACEXML_ENV_ARG_PARAMETER); ACEXML_CHECK_RETURN (-1); + // Restore the original value. + this->ref_state_ = temp; - if (this->get () != '>') + // [VC: Unique Notation Name] + if (systemid && this->notations_.add_entity (notation, systemid) != 0 + && this->validate_) { - this->report_fatal_error(ACEXML_GREAT ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error(ACE_TEXT ("Internal Parser Error") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } - - this->dtd_handler_->notationDecl (notation, - publicid, - systemid ACEXML_ENV_ARG_PARAMETER); - ACEXML_CHECK_RETURN (-1); - - return 0; -} - -int -ACEXML_Parser::parse_external_id_and_ref (ACEXML_Char *&publicId, - ACEXML_Char *&systemId ACEXML_ENV_ARG_DECL) -{ - publicId = systemId = 0; - ACEXML_Char nextch = this->get (); - - switch (nextch) + if (publicid) { - case 'S': // External SYSTEM id. - if (this->parse_token (ACE_TEXT ("YSTEM")) < 0 || - this->skip_whitespace_count () == 0) - { - this->report_fatal_error(ACEXML_ESYSTEM ACEXML_ENV_ARG_PARAMETER); - return -1; - } - if (this->get_quoted_string (systemId) != 0) - { - this->report_fatal_error(ACEXML_ELITERAL ACEXML_ENV_ARG_PARAMETER); - return -1; - } - this->locator_.setSystemId (systemId); - break; - case 'P': // External PUBLIC id or previously defined PUBLIC id. - if (this->parse_token (ACE_TEXT ("UBLIC")) < 0 || - this->skip_whitespace_count () == 0) - { - this->report_fatal_error(ACEXML_EPUBLIC ACEXML_ENV_ARG_PARAMETER); - return -1; - } - if (this->get_quoted_string (publicId) != 0) + int retval = this->notations_.add_entity (notation, publicid); + if (retval != 0 && !systemid && this->validate_) { - this->report_fatal_error(ACEXML_ELITERAL ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error(ACE_TEXT ("Internal Parser Error") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } - this->locator_.setPublicId (publicId); + } - this->skip_whitespace_count (&nextch); - if (nextch == '\'' || nextch == '"') // not end of NOTATION yet. - { - if (this->get_quoted_string (systemId) != 0) - { - this->report_fatal_error(ACEXML_ELITERAL - ACEXML_ENV_ARG_PARAMETER); - return -1; - } - this->locator_.setSystemId (systemId); - } - break; - default: - this->report_fatal_error(ACEXML_ESYSPUB ACEXML_ENV_ARG_PARAMETER); - return -1; + if (this->skip_whitespace() != '>') + { + this->fatal_error(ACE_TEXT ("Expecting '>' at end of NotationDecl") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + if (this->validate_ && this->dtd_handler_) + { + this->dtd_handler_->notationDecl (notation, + publicid, + systemid ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } return 0; } + + int ACEXML_Parser::parse_children_definition (ACEXML_ENV_SINGLE_ARG_DECL) { @@ -1654,45 +1815,49 @@ ACEXML_Parser::parse_children_definition (ACEXML_ENV_SINGLE_ARG_DECL) switch (nextch) { - case '#': // Mixed element, - if (this->parse_token (ACE_TEXT ("#PCDATA")) < 0) - { - this->report_fatal_error(ACEXML_EPCDATA ACEXML_ENV_ARG_PARAMETER); - return -1; - } + case '#': // Mixed element, + if (this->parse_token (ACE_TEXT ("#PCDATA")) < 0) + { + this->fatal_error(ACE_TEXT ("Expecting keyword '#PCDATA'") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } - this->skip_whitespace_count (&nextch); + this->skip_whitespace_count (&nextch); - while (nextch != ')') - { - if (this->get () != '|') - { - this->report_fatal_error(ACEXML_EMIXED ACEXML_ENV_ARG_PARAMETER); - return -1; - } - this->skip_whitespace_count (); + while (nextch != ')') + { + if (this->get () != '|') + { + this->fatal_error(ACE_TEXT ("Illegal character in Mixed " + "element") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + this->skip_whitespace_count (); - ACEXML_Char *name = this->read_name (); - // @@ name will be used in the Validator later. - ACE_UNUSED_ARG (name); - ++subelement_number; - // @@ Install Mixed element name into the validator. - this->skip_whitespace_count (&nextch); - } + ACEXML_Char *name = this->parse_name (); + // @@ name will be used in the Validator later. + ACE_UNUSED_ARG (name); + ++subelement_number; + // @@ Install Mixed element name into the validator. + this->skip_whitespace_count (&nextch); + } - if (this->get () != ')' || - (subelement_number && this->get () != '*')) - { - this->report_fatal_error(ACEXML_ERPAREN ACEXML_ENV_ARG_PARAMETER); + if (this->get () != ')' || + (subelement_number && this->get () != '*')) + { + this->fatal_error(ACE_TEXT ("Expecing ')' or ')*' at end of Mixed" + " element") ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + // @@ close the element definition in the validator. + break; + default: + int status = this->parse_child (1 ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + if (status != 0) return -1; - } - // @@ close the element definition in the validator. - break; - default: - int status = this->parse_child (1 ACEXML_ENV_ARG_PARAMETER); - ACEXML_CHECK_RETURN (-1); - if (status != 0) - return -1; } return 0; @@ -1705,8 +1870,9 @@ ACEXML_Parser::parse_child (int skip_open_paren ACEXML_ENV_ARG_DECL) if (skip_open_paren == 0 && this->get () != '(') { - this->report_fatal_error(ACEXML_LPAREN ACEXML_ENV_ARG_PARAMETER); - return -1; + this->fatal_error(ACE_TEXT ("Expecting '(' at beginning of children") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } ACEXML_Char node_type = 0; @@ -1716,57 +1882,64 @@ ACEXML_Parser::parse_child (int skip_open_paren ACEXML_ENV_ARG_DECL) this->skip_whitespace_count (&nextch); switch (nextch) { - case '(': - this->parse_child (0 ACEXML_ENV_ARG_PARAMETER); - ACEXML_CHECK_RETURN (-1); - break; - default: - // must be an element name here. - ACEXML_Char *subelement = this->read_name (); - if (subelement == 0) - { - this->report_fatal_error(ACEXML_ESUBELE ACEXML_ENV_ARG_PARAMETER); - return -1; - } - // @@ Inform validator of the new element here. - break; + case '(': + this->parse_child (0 ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + default: + // must be an element name here. + ACEXML_Char *subelement = this->parse_name (); + if (subelement == 0) + { + this->fatal_error(ACE_TEXT ("Invalid subelement name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + // @@ Inform validator of the new element here. + break; } this->skip_whitespace_count (&nextch); switch (nextch) { - case '|': - switch (node_type) - { - case 0: - node_type = '|'; - // @@ inform validator of this new type?? - break; - case '|': - break; - default: - this->report_fatal_error(ACEXML_ECHOICE ACEXML_ENV_ARG_PARAMETER); - return -1; - } - break; - case ',': - switch (node_type) - { - case 0: - node_type = ','; - // @@ inform validator of this new type?? - break; - case ',': - break; - default: - this->report_fatal_error(ACEXML_ECHOICE ACEXML_ENV_ARG_PARAMETER); - return -1; - } - case ')': - break; - default: - this->report_fatal_error(ACEXML_ECHOICE ACEXML_ENV_ARG_PARAMETER); - return -1; + case '|': + switch (node_type) + { + case 0: + node_type = '|'; + // @@ inform validator of this new type?? + break; + case '|': + break; + default: + this->fatal_error (ACE_TEXT ("Expecting `,', `|', or `)' " + "while defining an element") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + case ',': + switch (node_type) + { + case 0: + node_type = ','; + // @@ inform validator of this new type?? + break; + case ',': + break; + default: + this->fatal_error (ACE_TEXT ("Expecting `,', `|', or `)' " + "while defining an element") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + case ')': + break; + default: + this->fatal_error (ACE_TEXT ("Expecting `,', `|', or `)' " + "while defining an element") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } this->get (); // consume , | or ) } while (nextch != ')'); @@ -1775,194 +1948,1139 @@ ACEXML_Parser::parse_child (int skip_open_paren ACEXML_ENV_ARG_DECL) nextch = this->peek (); switch (nextch) { - case '?': - // @@ Consume the character and inform validator as such, - this->get (); - break; - case '*': - // @@ Consume the character and inform validator as such, - this->get (); - break; - case '+': - // @@ Consume the character and inform validator as such, + case '?': + // @@ Consume the character and inform validator as such, + this->get (); + break; + case '*': + // @@ Consume the character and inform validator as such, + this->get (); + break; + case '+': + // @@ Consume the character and inform validator as such, + this->get (); + break; + default: + break; // not much to do. + } + + return 0; +} + +int +ACEXML_Parser::parse_char_reference (ACEXML_Char *buf, size_t len) +{ + if (len < 7) // Max size of a CharRef plus terminating '\0' + return -1; + ACEXML_Char ch = this->get(); + if (ch != '#') // Internal error. + return -1; + int hex = 0; + ch = this->peek(); + if (ch == 'x') + { + hex = 1; this->get (); - break; - default: - break; // not much to do. } + size_t i = 0; + int more_digit = 0; + ch = this->get (); + for ( ; i < len && + (this->isNormalDigit (ch) && (hex ? this->isCharRef(ch): 1)); ++i) + { + buf[i] = ch; + ch = this->get(); + ++more_digit; + } + if (ch != ';' || !more_digit) + return -1; + buf[i] = 0; + ACEXML_UCS4 sum = (ACEXML_UCS4) ACE_OS::strtol (buf, 0, (hex ? 16 : 10)); + // [WFC: Legal Character] + if (!this->isChar (sum)) + return -1; + int clen; +#if defined (ACE_USES_WCHAR) // UTF-16 + if ((clen = ACEXML_Transcoder::ucs42utf16 (sum, buf, len)) < 0) + return -1; +#else // or UTF-8 + if ((clen = ACEXML_Transcoder::ucs42utf8 (sum, buf, len)) < 0) + return -1; +#endif + buf [clen] = 0; + for (int j = 0; j < clen; ++j) + this->obstack_.grow (buf[j]); return 0; } -ACEXML_Char -ACEXML_Parser::skip_whitespace (ACEXML_Char **whitespace) +ACEXML_Char* +ACEXML_Parser::parse_reference_name (void) { ACEXML_Char ch = this->get (); + if (!this->isLetter (ch) && ch != '_' && ch != ':') + return 0; + int numchars = 0; + while (ch) { + this->alt_stack_.grow (ch); + numchars++; + ch = this->peek (); + if (!this->isNameChar (ch)) + break; + ch = this->get (); + }; + if (ch != ';') + return 0; + ch = this->get(); + return this->alt_stack_.freeze (); +} + +int +ACEXML_Parser::parse_attvalue (ACEXML_Char *&str ACEXML_ENV_ARG_DECL) +{ + ACEXML_Char quote = this->get (); + if (quote != '\'' && quote != '"') // Not a quoted string. + return -1; + ACEXML_Char ch = this->get (); + int nrelems = 0; + while (1) + { + if (ch == quote) + { + ACEXML_Char* temp = this->obstack_.freeze (); + // If the attribute type is not CDATA, then the XML processor + // must further process the normalized attribute value by + // discarding any leading and trailing space (#x20) characters, + // and by replacing sequences of space (#x20) characters by a + // single space (#x20) character. + + // if (atttype != CDATA) { + // ACEXML_Char* start = temp; + // ACEXML_Char* end = temp + ACE_OS::strlen (temp); + // while (*start == '\x20') + // start++; + // if (start == end) // String which is all spaces + // str = start; + // while (*start != 0) + // { + // this->obstack_.grow (*start); + // start++; + // while (*start == '\x20') + // start++; + // } + // str = this->obstack_.freeze(); + // } + str = temp; + return 0; + } + switch (ch) + { + case '&': + if (this->peek () == '#') + { + ACEXML_Char buf[7]; + if (this->parse_char_reference (buf, sizeof (buf)) != 0) + { + // [WFC: Legal Character] + ACE_ERROR ((LM_ERROR, + ACE_TEXT ("Invalid character reference\n"))); + return -1; + } + } + else + { + this->ref_state_ = ACEXML_ParserInt::IN_ATT_VALUE; + this->parse_entity_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + case '\x20': case '\x0D': case '\x0A': case '\x09': + this->obstack_.grow ('\x20'); + break; + case '<': // [WFC: No < in Attribute Values] + ACE_ERROR ((LM_ERROR, ACE_TEXT ("Illegal '<' in AttValue\n"))); + return -1; + case 0: + nrelems = this->pop_context(); + if (nrelems >= 1) + break; + else + { + this->fatal_error(ACE_TEXT ("Internal Parser Error") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + default: + this->obstack_.grow (ch); + break; + } + ch = this->get(); + } +} + + - if (this->is_whitespace (ch) == 0) +int +ACEXML_Parser::parse_entity_reference (ACEXML_ENV_SINGLE_ARG_DECL) +{ + ACEXML_Char* replace = this->parse_reference_name (); + if (replace == 0) { - if (whitespace != 0) - *whitespace = 0; - return ch; + this->fatal_error (ACE_TEXT ("Invalid Reference name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); } - do + // [WFC: Parsed Entity] + if (this->unparsed_entities_.resolve_entity (replace)) { + this->fatal_error (ACE_TEXT ("EntityRef refers to unparsed entity") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + // Look in the internal general entities set first. + const ACEXML_Char* entity = this->internal_GE_.resolve_entity(replace); + + // Look in the predefined entities. + if (!entity && !this->validate_) { - if (whitespace != 0) - this->obstack_.grow (ch); - ch = this->get (); + entity = this->predef_entities_.resolve_entity (replace); + if (!entity) + { + this->fatal_error (ACE_TEXT ("Undefined Entity reference") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } } - while (this->is_whitespace (ch)); - if (whitespace != 0) - *whitespace = this->obstack_.freeze (); + if (!entity && // No match in internal + (!(this->internal_dtd_ || this->external_dtd_) || // or No DTDs + // or Only Internal DTD and no parameter entity references + (this->internal_dtd_ && !this->external_dtd_ && !this->has_pe_refs_) || + this->standalone_)) // or Standalone = 'yes' + { + // [WFC: Entity Declared] + this->fatal_error (ACE_TEXT ("Undeclared EntityRef") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + ACEXML_Char* systemId = 0; + ACEXML_Char* publicId = 0; + if (!entity && this->validate_) + { + if (this->external_GE_.resolve_entity (replace, systemId, publicId) < 0) + { + this->fatal_error (ACE_TEXT ("Undefined Entity reference") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (this->ref_state_ == ACEXML_ParserInt::IN_ATT_VALUE) + { + this->fatal_error (ACE_TEXT ("External EntityRef in Attribute Value") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + this->external_entity_ = 1; + } - return ch; + // [WFC: No Recursion] + int present = this->GE_reference_.insert (entity); + if (present == 1 || present == -1) + { + ACEXML_String ref_name; + while (this->GE_reference_.pop(ref_name) != -1) + ; + this->fatal_error (ACE_TEXT ("Recursion in resolving entity") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (!this->external_entity_) + { + ACEXML_StrCharStream* str = 0; + ACE_NEW_RETURN (str, ACEXML_StrCharStream (entity), 0); + if (str) + { + if (this->switch_input (str) != 0) + { + this->fatal_error (ACE_TEXT ("Unable to create internal input " + "stream") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + return 0; + } + } + else if (this->validate_) + { + ACEXML_Char* uri = this->normalize_systemid (systemId); + ACE_Auto_Basic_Array_Ptr<ACEXML_Char> cleanup_uri (uri); + ACEXML_InputSource* ip = 0; + if (this->entity_resolver_) + { + ip = this->entity_resolver_->resolveEntity (publicId, + (uri ? uri : systemId) + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (ip) + { + if (this->switch_input (ip) != 0) + { + this->fatal_error (ACE_TEXT ("Internal Parser Error") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + return 0; + } + else + { + ACEXML_StreamFactory factory; + ACEXML_CharStream* cstream = factory.create_stream (uri ? uri: systemId); + if (!cstream) { + this->fatal_error (ACE_TEXT ("Invalid input source") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (this->switch_input (cstream, systemId, publicId) != 0) + { + this->fatal_error (ACE_TEXT ("Internal Parser Error") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + return 0; + } + } + ACE_ERROR ((LM_ERROR, ACE_TEXT ("Undefined Entity reference\n"))); + return -1; } int -ACEXML_Parser::skip_whitespace_count (ACEXML_Char *peeky) +ACEXML_Parser::parse_PE_reference (ACEXML_ENV_SINGLE_ARG_DECL) { - int wscount = 0; - ACEXML_Char dummy; - ACEXML_Char &forward = (peeky == 0 ? dummy : *peeky); + ACEXML_Char* replace = this->parse_reference_name (); + if (replace == 0) + { + this->fatal_error (ACE_TEXT ("Invalid PEReference name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + // Look in the internal general entities set first. + const ACEXML_Char* entity = this->internal_PE_.resolve_entity(replace); + + if (!entity && // No match in internal + (!this->external_dtd_ || // or No External DTDs + this->standalone_)) // or Standalone + { + // [VC: Entity Declared] + this->fatal_error (ACE_TEXT ("Undefined Internal PEReference") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + ACEXML_Char* systemId = 0; + ACEXML_Char* publicId = 0; + if (!entity && this->validate_) + { + if (this->external_PE_.resolve_entity (replace, systemId, publicId) < 0) + { + this->fatal_error (ACE_TEXT ("Undefined PEReference") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + this->external_entity_ = 1; + } - for (;this->is_whitespace ((forward = this->peek ())); ++wscount) - this->get (); + // [WFC: No Recursion] + int present = this->PE_reference_.insert (replace); + if (present == 1 || present == -1) + { + ACEXML_String ref_name; + while (this->PE_reference_.pop(ref_name) != -1) + ; + this->fatal_error (ACE_TEXT ("Recursion in resolving entity") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } - return wscount; + if (entity && !this->external_entity_) + { + ACEXML_StrCharStream* sstream = 0; + ACEXML_String str (entity); + if (this->ref_state_ != ACEXML_ParserInt::IN_ENTITY_VALUE) + { + const ACEXML_Char ch = '\x20'; + str = ch + str + ch; + } + ACE_NEW_RETURN (sstream, ACEXML_StrCharStream (str.c_str()), 0); + if (sstream) + { + if (this->switch_input (sstream) != 0) + { + this->fatal_error (ACE_TEXT ("Error in switching InputSource") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + return 0; + } + } + else if (this->external_entity_ && this->validate_) + { + ACEXML_Char* uri = this->normalize_systemid (systemId); + ACE_Auto_Basic_Array_Ptr<ACEXML_Char> cleanup_uri (uri); + ACEXML_InputSource* ip = 0; + if (this->entity_resolver_) + { + ip = this->entity_resolver_->resolveEntity (publicId, + (uri ? uri : systemId) + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (ip) + { + if (this->switch_input (ip) != 0) + { + this->fatal_error (ACE_TEXT ("Error in switching InputSource") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + return 0; + } + else + { + ACEXML_StreamFactory factory; + ACEXML_CharStream* cstream = factory.create_stream (uri ? uri: systemId); + if (!cstream) { + this->fatal_error (ACE_TEXT ("Invalid input source") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (this->switch_input (cstream, systemId, publicId) != 0) + { + this->fatal_error (ACE_TEXT ("Error in switching InputSource") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (this->ref_state_ == ACEXML_ParserInt::IN_ENTITY_VALUE) + { + ACEXML_Char less, mark; + if (this->peek() == '<') + { + less = this->get(); + if (this->peek() == '?') + { + mark = this->get(); + if (this->peek() == 'x') + { + this->parse_text_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + else + { + this->obstack_.grow (less); + this->obstack_.grow (mark); + } + } + this->obstack_.grow (less); + } + } + return 0; + } + } + this->fatal_error (ACE_TEXT ("Undefined PEReference") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + return -1; } int -ACEXML_Parser::parse_token (const ACEXML_Char* keyword) +ACEXML_Parser::parse_entity_value (ACEXML_Char *&str + ACEXML_ENV_ARG_DECL) { - if (keyword == 0) - return -1; - const ACEXML_Char* ptr = keyword; - ACEXML_Char ch; - for (; *ptr != 0 && ((ch = this->get()) == *ptr); ++ptr) - ; - if (*ptr == 0) - return 0; - else + ACEXML_ParserInt::ReferenceState temp = this->ref_state_; + ACEXML_Char quote = this->get (); + if (quote != '\'' && quote != '"') // Not a quoted string. return -1; + ACEXML_Char ch = this->get (); + int nrelems = 0; + while (1) + { + if (ch == quote) + { + str = this->obstack_.freeze (); + this->ref_state_ = temp; + return 0; + } + switch (ch) + { + case '&': + if (this->peek () == '#') + { + if (!this->external_entity_) + { + ACEXML_Char buf[7]; + if (this->parse_char_reference (buf, sizeof (buf)) != 0) + { + // [WFC: Legal Character] + this->fatal_error (ACE_TEXT ("Invalid character " + "reference") + ACEXML_ENV_ARG_PARAMETER); + return -1; + } + break; + } + } + this->obstack_.grow (ch); + break; + case '%': + if (!this->external_entity_) + { + this->ref_state_ = ACEXML_ParserInt::IN_ENTITY_VALUE; + this->parse_PE_reference(ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + } + this->obstack_.grow (ch); + break; + case 0: + nrelems = this->pop_context(); + if (nrelems >= 1) + { + if (this->external_entity_) + this->external_entity_ = 0; + break; + } + else + { + this->fatal_error(ACE_TEXT ("Internal Parser Error") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + default: + this->obstack_.grow (ch); + break; + } + ch = this->get(); + } } -int -ACEXML_Parser::skip_equal (void) +ACEXML_Char * +ACEXML_Parser::parse_name (ACEXML_Char ch) { - if (this->skip_whitespace (0) != '=') - return -1; + if (ch == 0) + ch = this->get (); + if (!this->isLetter (ch) && ch != '_' && ch != ':') + return 0; + while (ch) { + this->obstack_.grow (ch); + ch = this->peek (); + if (!this->isNameChar (ch)) + break; + ch = this->get (); + }; + return this->obstack_.freeze (); +} - while (this->is_whitespace (this->peek ())) - this->get (); - return 0; +ACEXML_Char* +ACEXML_Parser::parse_nmtoken (ACEXML_Char ch) +{ + if (ch == 0) + ch = this->get (); + if (!this->isNameChar (ch)) + return 0; + while (ch) { + this->obstack_.grow (ch); + ch = this->peek (); + if (!this->isNameChar (ch)) + break; + ch = this->get (); + }; + return this->obstack_.freeze (); } int -ACEXML_Parser::get_quoted_string (ACEXML_Char *&str) +ACEXML_Parser::parse_version_num (ACEXML_Char*& str) { ACEXML_Char quote = this->get (); if (quote != '\'' && quote != '"') // Not a quoted string. return -1; - + int numchars = 0; while (1) { ACEXML_Char ch = this->get (); + if (ch == quote && !numchars) + return -1; + else if (ch == quote) + { + str = this->obstack_.freeze (); + return 0; + } + // [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ + if (ch == '-' || ((ch >= 'a' && ch <= 'z') || + (ch >= 'A' && ch <= 'Z') || + (ch >= '0' && ch <= '9') || + (ch == '_' || ch == '.' || ch == ':'))) + { + this->obstack_.grow (ch); + numchars++; + } + else + return -1; + } +} - // @@ Deoes not handle buffer overflow yet. +int +ACEXML_Parser::parse_system_literal (ACEXML_Char*& str) +{ + const ACEXML_Char quote = this->get(); + if (quote != '\'' && quote != '"') // Not a quoted string. + return -1; + while (1) + { + ACEXML_Char ch = this->get (); if (ch == quote) { str = this->obstack_.freeze (); return 0; } - - const ACEXML_String *replace = 0; - ACEXML_String charval; - ACEXML_Char buffer[6]; - size_t i = 0; switch (ch) { - case '&': - if (this->peek () == '#') - { - if (this->parse_char_reference (buffer, 6) != 0) - { - // [WFC: Legal Character] - ACE_ERROR ((LM_ERROR, - ACE_TEXT ("Invalid character reference\n"))); - return -1; - } - charval.set (buffer, 0); - replace = &charval; - } - else - replace = this->parse_reference (); - - if (replace == 0) - { - ACE_ERROR ((LM_ERROR, ACE_TEXT ("Undefined reference\n"))); - return -1; - } - for (i = 0; i < replace->length (); ++i) - this->obstack_.grow ((*replace)[i]); - // handle reference here. - break; - case 0x0D: // End-of-Line handling - ch = (this->peek () == 0x0A ? this->get () : 0x0A); - // Fall thru... - case 0x0A: - // Fall thru... - default: - this->obstack_.grow (ch); - break; + case '\x00': case '\x01': case '\x02': case '\x03': case '\x04': + case '\x05': case '\x06': case '\x07': case '\x08': case '\x09': + case '\x0A': case '\x0B': case '\x0C': case '\x0D': case '\x0E': + case '\x0F': case '\x10': case '\x11': case '\x12': case '\x13': + case '\x14': case '\x15': case '\x16': case '\x17': case '\x18': + case '\x19': case '\x1A': case '\x1B': case '\x1C': case '\x1D': + case '\x1E': case '\x1F': case '\x7F': case '\x20': case '<': + case '>': case '#': case '%': + ACE_ERROR ((LM_ERROR, + ACE_TEXT ("Invalid character in SystemLiteral\n"))); + return -1; + default: + this->obstack_.grow (ch); } } } -ACEXML_Char * -ACEXML_Parser::read_name (ACEXML_Char ch) +int +ACEXML_Parser::parse_pubid_literal (ACEXML_Char*& str) { - if (ch == 0) + const ACEXML_Char quote = this->get(); + if (quote != '\'' && quote != '"') // Not a quoted string. + return -1; + while (1) { - ch = this->get (); - - if (this->is_whitespace (ch)) - // No white space is allowed here. - return 0; + ACEXML_Char ch = this->get (); + if (ch == quote) + { + str = this->obstack_.freeze (); + return 0; + } + else if (this->isPubidChar (ch)) + this->obstack_.grow (ch); + else + return -1; } - else if (this->is_nonname (ch)) - return 0; +} +int +ACEXML_Parser::parse_encname (ACEXML_Char*& str) +{ + const ACEXML_Char quote = this->get (); + if (quote != '\'' && quote != '"') // Not a quoted string. + return -1; + int numchars = 0; while (1) { - this->obstack_.grow (ch); - ch = this->peek (); - if (this->is_nonname (ch)) - break; - ch = this->get (); - }; + ACEXML_Char ch = this->get (); + if (ch == quote && !numchars) + return -1; + else if (ch == quote) + { + str = this->obstack_.freeze (); + return 0; + } + // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* + if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) + && !numchars) + return -1; + if (ch == '-' || ((ch >= 'a' && ch <= 'z') || + (ch >= 'A' && ch <= 'Z') || + (ch >= '0' && ch <= '9') || + (ch == '_' || ch == '.'))) + { + this->obstack_.grow (ch); + numchars++; + } + else + return -1; + } +} - return this->obstack_.freeze (); +int +ACEXML_Parser::parse_sddecl (ACEXML_Char*& str) +{ + ACEXML_Char quote = this->get (); + if (quote != '\'' && quote != '"') // Not a quoted string. + return -1; + int numchars = 0; + while (1) + { + ACEXML_Char ch = this->get (); + if (ch == quote && numchars < 2) + return -1; + else if (ch == quote) + { + str = this->obstack_.freeze (); + return 0; + } + // [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") + // | ('"' ('yes' | 'no') '"')) + switch (ch) + { + case 'y': case 'e': case 's': case 'n': case 'o': + this->obstack_.grow (ch); + numchars++; + break; + default: + return -1; + } + } } void -ACEXML_Parser::report_prefix_mapping (const ACEXML_Char* prefix, - const ACEXML_Char* uri, - const ACEXML_Char* name, - int start ACEXML_ENV_ARG_DECL) +ACEXML_Parser::prefix_mapping (const ACEXML_Char* prefix, + const ACEXML_Char* uri, + const ACEXML_Char* name, + int start ACEXML_ENV_ARG_DECL) { if (this->namespaces_) { const ACEXML_Char* temp = (name == 0) ? empty_string : prefix; if (start) { - this->content_handler_->startPrefixMapping (temp, uri ACEXML_ENV_ARG_PARAMETER); + this->content_handler_->startPrefixMapping (temp, uri + ACEXML_ENV_ARG_PARAMETER); ACEXML_CHECK; } else { - this->content_handler_->endPrefixMapping(temp ACEXML_ENV_ARG_PARAMETER); + this->content_handler_->endPrefixMapping(temp + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + } + } +} + +int +ACEXML_Parser::switch_input (ACEXML_CharStream* cstream, + const ACEXML_Char* systemId, + const ACEXML_Char* publicId) +{ + ACEXML_InputSource* input = 0; + ACE_NEW_RETURN (input, ACEXML_InputSource (cstream), -1); + return this->switch_input (input, systemId, publicId); +} + +int +ACEXML_Parser::switch_input (ACEXML_InputSource* input, + const ACEXML_Char* systemId, + const ACEXML_Char* publicId) +{ + ACEXML_LocatorImpl* locator = 0; + if (!systemId && this->current_.getLocator()) + locator = ACE_const_cast (ACEXML_LocatorImpl*, + this->current_.getLocator()); + if (!locator) + ACE_NEW_RETURN (locator, ACEXML_LocatorImpl (systemId, publicId), -1); + ACEXML_Parser_Context* new_context = 0; + ACE_NEW_RETURN (new_context, ACEXML_Parser_Context(input, locator), -1); + if (this->push_context (*new_context) != 0) + { + ACE_ERROR ((LM_ERROR, "Unable to switch input streams")); + return -1; + } + this->current_.reset(); + this->current_ = *new_context; + // Set up Locator. + if (this->content_handler_) + this->content_handler_->setDocumentLocator (this->current_.getLocator()); + return 0; +} + +int +ACEXML_Parser::push_context (const ACEXML_Parser_Context& context) +{ + if (this->ctx_stack_.push (context) < 0) + { + ACE_ERROR ((LM_ERROR, "Unable to push input source onto the stack")); + return -1; + } + return 0; +} + +int +ACEXML_Parser::pop_context (void) +{ + this->current_.reset(); + int retval = this->ctx_stack_.pop (this->current_); + if (retval != 0) + return -1; + this->current_.reset(); + if (this->ctx_stack_.top (this->current_) != 0) + return -1; + // Set up Locator. + if (this->content_handler_) + this->content_handler_->setDocumentLocator (this->current_.getLocator()); + return this->ctx_stack_.size(); +} + +int +ACEXML_Parser::getFeature (const ACEXML_Char *name ACEXML_ENV_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException, + ACEXML_SAXNotSupportedException)) +{ + if (ACE_OS::strcmp (name, ACEXML_Parser::simple_parsing_feature_) == 0) + { + return this->simple_parsing_; + } + else if (ACE_OS::strcmp (name, ACEXML_Parser::namespaces_feature_) == 0) + { + return this->namespaces_; + } + else if (ACE_OS::strcmp (name, + ACEXML_Parser::namespace_prefixes_feature_) == 0) + { + return this->namespace_prefixes_; + } + + ACEXML_THROW_RETURN (ACEXML_SAXNotRecognizedException (name), -1); +} + + + +void +ACEXML_Parser::setFeature (const ACEXML_Char *name, + int boolean_value ACEXML_ENV_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException, + ACEXML_SAXNotSupportedException)) +{ + if (ACE_OS::strcmp (name, ACEXML_Parser::simple_parsing_feature_) == 0) + { + this->simple_parsing_ = (boolean_value == 0 ? 0 : 1); + return; + } + else if (ACE_OS::strcmp (name, ACEXML_Parser::namespaces_feature_) == 0) + { + this->namespaces_ = (boolean_value == 0 ? 0 : 1); + return; + } + else if (ACE_OS::strcmp (name, + ACEXML_Parser::namespace_prefixes_feature_) == 0) + { + this->namespace_prefixes_ = (boolean_value == 0 ? 0 : 1); + return; + } + + ACEXML_THROW (ACEXML_SAXNotRecognizedException (name)); +} + +void * +ACEXML_Parser::getProperty (const ACEXML_Char *name ACEXML_ENV_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException, + ACEXML_SAXNotSupportedException)) +{ + ACEXML_THROW_RETURN (ACEXML_SAXNotSupportedException (name), 0); +} + +void +ACEXML_Parser::setProperty (const ACEXML_Char *name, + void *value ACEXML_ENV_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException, + ACEXML_SAXNotSupportedException)) +{ + ACE_UNUSED_ARG (value); + + ACEXML_THROW (ACEXML_SAXNotSupportedException (name)); +} + +void +ACEXML_Parser::error (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL) +{ + ACEXML_SAXParseException* exception = 0; + ACE_NEW_NORETURN (exception, ACEXML_SAXParseException (msg)); + if (this->error_handler_) + this->error_handler_->error (*exception ACEXML_ENV_ARG_PARAMETER); + else + ACEXML_ENV_RAISE (exception); + return; +} + +void +ACEXML_Parser::warning (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL) +{ + ACEXML_SAXParseException* exception = 0; + ACE_NEW_NORETURN (exception, ACEXML_SAXParseException (msg)); + if (this->error_handler_) + this->error_handler_->warning (*exception ACEXML_ENV_ARG_PARAMETER); + return; +} + +void +ACEXML_Parser::fatal_error (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL) +{ + ACEXML_SAXParseException* exception = 0; + ACE_NEW_NORETURN (exception, ACEXML_SAXParseException (msg)); + if (this->error_handler_) + this->error_handler_->fatalError (*exception ACEXML_ENV_ARG_PARAMETER); + ACEXML_ENV_RAISE (exception); + return; +} + +void +ACEXML_Parser::parse_version_info (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)) +{ + ACEXML_Char* astring; + if (this->parse_token (ACE_TEXT("ersion")) < 0 + || this->skip_equal () != 0 + || this->parse_version_num (astring) != 0) + { + this->fatal_error (ACE_TEXT ("Invalid VersionInfo specification") + ACEXML_ENV_ARG_PARAMETER); + return; + } + if (ACE_OS::strcmp (astring, ACE_TEXT ("1.0")) != 0) + { + this->fatal_error (ACE_TEXT ("ACEXML Parser supports XML version 1.0 " + "documents only") ACEXML_ENV_ARG_PARAMETER); + return; + } +} + +void +ACEXML_Parser::parse_encoding_decl (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)) +{ + ACEXML_Char* astring; + if ((this->parse_token (ACE_TEXT("ncoding")) < 0) + || this->skip_equal () != 0 + || this->parse_encname (astring) != 0) + { + this->fatal_error (ACE_TEXT ("Invalid EncodingDecl specification") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + } + const ACEXML_Char* encoding = this->current_.getInputSource()->getEncoding(); + if (ACE_OS::strcmp (astring, encoding) != 0) + { + ACE_ERROR ((LM_ERROR, ACE_TEXT ("Detected Encoding is %s " + ": Declared Encoding is %s\n"), + encoding, astring)); + this->warning (ACE_TEXT ("Declared encoding differs from detected " + "encoding") ACEXML_ENV_ARG_PARAMETER); + } +} + +int +ACEXML_Parser::parse_text_decl (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)) +{ + // Read xml + if (this->parse_token (ACE_TEXT("xml")) < 0) + { + this->fatal_error(ACE_TEXT ("Expecting keyword 'xml' in TextDecl") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + ACEXML_Char fwd = this->skip_whitespace(); + // Read version + if (fwd == 'v') + { + this->parse_version_info (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + fwd = this->skip_whitespace(); + } + + if (fwd == 'e') + { + this->parse_encoding_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + fwd = this->skip_whitespace(); + } + else + { + this->fatal_error (ACE_TEXT ("Missing encodingDecl in TextDecl") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + if (fwd == '?' && this->get() == '>') + return 0; + // All the rules fail. So return an error. + this->fatal_error (ACE_TEXT ("Invalid TextDecl") ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + return -1; +} + +void +ACEXML_Parser::parse_xml_decl (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)) +{ + // Read <?xml + if (this->parse_token (ACE_TEXT("xml")) < 0) + { + this->fatal_error(ACE_TEXT ("Expecting keyword xml in XMLDecl") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + } + + ACEXML_Char fwd = this->skip_whitespace(); + + // Read version + if (fwd != 'v') + { + this->fatal_error (ACE_TEXT ("Expecting VersionInfo declaration") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + } + + this->parse_version_info (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK; + + fwd = this->skip_whitespace(); + if (fwd != '?') + { + if (fwd == 'e') + { + this->parse_encoding_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); ACEXML_CHECK; + fwd = this->skip_whitespace(); + } + if (fwd == 's') + { + ACEXML_Char* astring; + if ((this->parse_token (ACE_TEXT("tandalone")) == 0) && + this->skip_equal () == 0 && + this->parse_sddecl (astring) == 0) + { + if (ACE_OS::strcmp (astring, ACE_TEXT ("yes")) == 0) + this->standalone_ = 1; + fwd = this->skip_whitespace(); + } } } + if (fwd == '?' && this->get() == '>') + return; + // All the rules fail. So return an error. + this->fatal_error (ACE_TEXT ("Invalid XMLDecl declaration") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; +} + +int +ACEXML_Parser::parse_comment (void) +{ + int state = 0; + + if (this->get () != '-' || // Skip the opening "<!--" + this->get () != '-' || // completely. + this->get () == '-') // and at least something not '-'. + return -1; + + while (state < 3) + // Waiting for the trailing three character '-->'. Notice that + // according to the spec, '--->' is not a valid closing comment + // sequence. But we'll let it pass anyway. + { + ACEXML_Char fwd = this->get (); + if ((fwd == '-' && state < 2) || + (fwd == '>' && state == 2)) + state += 1; + else + state = 0; // Reset parse state. + } + return 0; +} + +int +ACEXML_Parser::parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)) +{ + const ACEXML_Char *pitarget = this->parse_name (); + ACEXML_Char *instruction = 0; + + if (!ACE_OS::strcasecmp (ACE_TEXT ("xml"), pitarget)) + { + // Invalid PITarget name. + this->fatal_error(ACE_TEXT ("PI can't have 'xml' in PITarget") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + int state = 0; + + ACEXML_Char ch = this->skip_whitespace(); + while (state < 2) + { + switch (ch) + { + case '?': + if (state == 0) + state = 1; + break; + case '>': + if (state == 1) + { + instruction = this->obstack_.freeze (); + this->content_handler_->processingInstruction (pitarget, + instruction + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + this->obstack_.unwind (ACE_const_cast (ACEXML_Char*, pitarget)); + return 0; + } + break; + case 0x0A: + // Fall thru... + default: + if (state == 1) + this->obstack_.grow ('?'); + this->obstack_.grow (ch); + state = 0; + } + ch = this->get (); + } + return -1; +} + +void +ACEXML_Parser::reset (void) +{ + this->current_.reset(); + if (this->ctx_stack_.pop (this->current_) != -1) + ACE_ERROR ((LM_ERROR, ACE_TEXT ("Mismatched push/pop of Context stack"))); + this->current_.reset(); + ACEXML_String temp; + while (this->GE_reference_.pop (temp) != -1) + ; + while (this->PE_reference_.pop (temp) != -1) + ; + this->obstack_.release(); + this->xml_namespace_.reset(); + this->nested_namespace_ = 0; + this->internal_GE_.reset(); + this->external_GE_.reset(); + this->unparsed_entities_.reset(); + this->predef_entities_.reset(); + this->internal_PE_.reset(); + this->external_PE_.reset(); + this->notations_.reset(); + this->ref_state_ = ACEXML_ParserInt::INVALID; + this->external_subset_ = 0; + this->external_entity_ = 0; + this->has_pe_refs_ = 0; + this->standalone_ = 0; + this->external_dtd_ = 0; + this->internal_dtd_ = 0; } diff --git a/ACEXML/parser/parser/Parser.dsp b/ACEXML/parser/parser/Parser.dsp index 76a8f3f5ad8..f70361682b0 100644 --- a/ACEXML/parser/parser/Parser.dsp +++ b/ACEXML/parser/parser/Parser.dsp @@ -100,6 +100,14 @@ SOURCE=.\Entity_Manager.cpp SOURCE=.\Parser.cpp
# End Source File
+# Begin Source File
+
+SOURCE=.\ParserContext.cpp
+# End Source File
+# Begin Source File
+
+SOURCE=.\ParserInternals.cpp
+# End Source File
# End Group
# Begin Group "Header Files"
@@ -118,7 +126,15 @@ SOURCE=.\Parser_export.h # End Source File
# Begin Source File
-SOURCE=.\ParserErrors.h
+SOURCE=.\ParserContext.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\ParserContext.inl
+# End Source File
+# Begin Source File
+
+SOURCE=.\ParserInternals.h
# End Source File
# End Group
# Begin Group "Resource Files"
diff --git a/ACEXML/parser/parser/Parser.h b/ACEXML/parser/parser/Parser.h index f84ac2986d8..28eb2359ea3 100644 --- a/ACEXML/parser/parser/Parser.h +++ b/ACEXML/parser/parser/Parser.h @@ -7,6 +7,7 @@ * $Id$ * * @author Nanbor Wang <nanbor@cs.wustl.edu> + * @author Krishnakumar B <kitty@cs.wustl.edu> */ //============================================================================= @@ -28,10 +29,12 @@ #include "ace/Functor.h" #include "ace/SString.h" #include "ace/Hash_Map_Manager.h" +#include "ace/Unbounded_Set.h" #include "ace/Containers_T.h" #include "ace/Auto_Ptr.h" #include "ACEXML/parser/parser/Entity_Manager.h" -#include "ACEXML/parser/parser/ParserErrors.h" +#include "ACEXML/parser/parser/ParserInternals.h" +#include "ACEXML/parser/parser/ParserContext.h" /** * @class ACEXML_Parser Parser.h "ACEXML/parser/parser/Parser.h" @@ -48,7 +51,14 @@ public: /// Destructor. virtual ~ACEXML_Parser (void); - /* + /** + * Initialize the parser state. + * + * @retval 0 if parser was initialized correctly else -1. + */ + int initialize (ACEXML_InputSource* input); + + /** * Return the current content handler. */ virtual ACEXML_ContentHandler *getContentHandler (void) const; @@ -133,80 +143,41 @@ public: */ virtual void setErrorHandler (ACEXML_ErrorHandler *handler); - // *** Helper functions for parsing XML - /** - * Skip any whitespaces encountered until the first non-whitespace - * character is encountered and consumed from the current input - * CharStream. - * - * @param whitespace Return a pointer to the string of skipped - * whitespace after proper conversion. Null if there's no - * whitespace found. - * - * @return The first none-white space characters (which will be - * consumed from the CharStream.) If no whitespace is found, it - * returns 0. - * - * @sa skip_whitespace_count - */ - ACEXML_Char skip_whitespace (ACEXML_Char **whitespace); +protected: /** - * Skip any whitespaces encountered until the first non-whitespace - * character. The first non-whitespace character is not consumed. - * This method does peek into the input CharStream and therefore - * is more expensive than @ref skip_whitespace. - * - * @param peek If non-null, @a peek points to a ACEXML_Char where - * skip_whitespace_count stores the first non-whitespace - * character it sees (character is not removed from the stream.) - * - * @return The number of whitespace characters consumed. - * - * @sa skip_whitespace + * Parse XML Prolog. */ - int skip_whitespace_count (ACEXML_Char *peek = 0); + void parse_xml_prolog (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); /** - * Check if a character @a c is a whitespace. + * Parse VersionInfo declaration. * - * @retval 1 if @a c is a valid white space character. 0 otherwise. */ - int is_whitespace (ACEXML_Char c); + void parse_version_info (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); /** - * Check if a character @a c is a whitespace or '='. + * Parse a EncodingDecl declaration. * - * @retval 1 if true, 0 otherwise. */ - int is_whitespace_or_equal (ACEXML_Char c); + void parse_encoding_decl (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); /** - * Check if a character @a c is a valid character for nonterminal NAME. + * Parse a XMLDecl declaration. * - * @retval 1 if true, 0 otherwise. */ - int is_nonname (ACEXML_Char c); + void parse_xml_decl (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); /** - * Skip an equal sign. - * - * @retval 0 when succeeds, -1 if no equal sign is found. + * Parse a TextDecl declaration. */ - int skip_equal (void); - - /** - * Get a quoted string. Quoted strings are used to specify - * attribute values and this routine will replace character and - * entity references on-the-fly. Parameter entities are not allowed - * (or replaced) in this function. (But regular entities are.) - * - * @param str returns the un-quoted string. - * - * @retval 0 on success, -1 otherwise. - */ - int get_quoted_string (ACEXML_Char *&str); + int parse_text_decl (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); /** * Parse a PI statement. The first character encountered @@ -214,33 +185,15 @@ public: * * @retval 0 on success, -1 otherwise. */ - int parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_DECL); - - /** - * Skip over a comment. The first character encountered - * should always be the first '-' in the comment prefix - * "@<@!--". - */ - int grok_comment (); - - /** - * Read a name from the input CharStream (until white space). - * If @a ch @!= 0, then we have already consumed the first name - * character from the input CharStream, otherwise, read_name - * will use this->get() to acquire the initial character. - * - * @return A pointer to the string in the obstack, 0 if it's not - * a valid name. - */ - ACEXML_Char *read_name (ACEXML_Char ch = 0); + int parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); /** * Parse the DOCTYPE declaration. The first character encountered * should always be 'D' in doctype prefix: "@<@!DOCTYPE". */ int parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_DECL) - ACE_THROW_SPEC ((ACEXML_SAXException)) - ; + ACE_THROW_SPEC ((ACEXML_SAXException)); /** * Parse an XML element. The first character encountered should @@ -256,35 +209,39 @@ public: * can be used in a validator. */ void parse_element (int is_root ACEXML_ENV_ARG_DECL) - ACE_THROW_SPEC ((ACEXML_SAXException)) - ; + ACE_THROW_SPEC ((ACEXML_SAXException)); /** - * Parse XML Prolog. + * Parse a content declaration. + * */ - void parse_xml_prolog (ACEXML_ENV_SINGLE_ARG_DECL) + int parse_content (const ACEXML_Char* startname, const ACEXML_Char* ns_uri, + const ACEXML_Char* ns_lname + ACEXML_ENV_ARG_DECL) ACE_THROW_SPEC ((ACEXML_SAXException)); - /** * Parse a character reference, i.e., " " or "". The first * character encountered should be the '#' char. * * @param buf points to a character buffer for the result. - * @param len specifies the capacities of the buffer. + * + * @param len In/out argument which initially specifies the size of the + * buffer and is later set to the no. of characters in the reference. * * @retval 0 on success and -1 otherwise. */ int parse_char_reference (ACEXML_Char *buf, size_t len); /** - * Parse an entity reference, i.e., "&". The first character - * encountered should be the character following '&'. + * Parse a reference name, i.e., foo in "&foo;" or "%foo;". The first + * character encountered should be the character following '&' or '%'. + * Effectively the same as @sa parse_name but we don't use the parser's + * obstack. Caller is responsible for deleting the memory. * - * @return A pointer to the resolved const ACEXML_String if success - * (previously defined), 0 otherwise. + * @retval A pointer to name of reference, 0 otherwise. */ - const ACEXML_String *parse_reference (void); + ACEXML_Char* parse_reference_name (void); /** * Parse a CDATA section. The first character should always be the first @@ -293,13 +250,21 @@ public: * @retval 0 on success. * @retval -1 if fail. */ - int parse_cdata (ACEXML_ENV_SINGLE_ARG_DECL); + int parse_cdata (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); /** * Parse a "markupdecl" section, this includes both "markupdecl" and * "DeclSep" sections in XML specification */ - int parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_DECL); + int parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); + + /** + * Skip over a comment. The first character encountered should always be + * the first '-' in the comment prefix "@<@!--". + */ + int parse_comment (void); /** * Parse an "ELEMENT" decl. The first character this method @@ -308,7 +273,8 @@ public: * * @retval 0 on success, -1 otherwise. */ - int parse_element_decl (ACEXML_ENV_SINGLE_ARG_DECL); + int parse_element_decl (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); /** * Parse an "ENTITY" decl. The first character this method expects @@ -316,7 +282,8 @@ public: * * @retval 0 on success, -1 otherwise. */ - int parse_entity_decl (ACEXML_ENV_SINGLE_ARG_DECL); + int parse_entity_decl (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); /** * Parse an "ATTLIST" decl. Thse first character this method @@ -325,7 +292,15 @@ public: * * @retval 0 on success, -1 otherwise. */ - int parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_DECL); + int parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); + + /** + * Parse a AttType declaration. + * + */ + int parse_atttype (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); /** *Parse a "NOTATION" decl. The first character this method @@ -334,7 +309,8 @@ public: * * @retval 0 on success, -1 otherwise. */ - int parse_notation_decl (ACEXML_ENV_SINGLE_ARG_DECL); + int parse_notation_decl (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); /** * Parse an ExternalID or a reference to PUBLIC ExternalID. @@ -356,8 +332,81 @@ public: * * @retval 0 on success, -1 otherwise. */ - int parse_external_id_and_ref (ACEXML_Char *&publicId, - ACEXML_Char *&systemId ACEXML_ENV_ARG_DECL); + int parse_external_id (ACEXML_Char *&publicId, ACEXML_Char *&systemId + ACEXML_ENV_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); + + /** + * Parse an external DTD. + * + */ + int parse_external_dtd (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); + + /** + * Parse an external subset. This does the actual parsing of an external + * subset and is called by @sa parse_external_dtd. + * + */ + int parse_external_subset (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); + + /** + * Parse a markupDecl section. + * + */ + int parse_markup_decl (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); + + /** + * Parse a conditionalSect declaration. + * + */ + int parse_conditional_section (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); + + /** + * Parse a includeSect declaration. + * + */ + int parse_includesect (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); + + /** + * + * Parse a ignoreSect declaration. + */ + int parse_ignoresect (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); + + /** + * Parse a PEReference. + * + */ + int parse_PE_reference (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); + + /** + * Parse a Reference. + * + */ + int parse_entity_reference (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); + + /** + * Parse an entityValue. + * + */ + int parse_entity_value (ACEXML_Char *&str ACEXML_ENV_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); + + /** + * Parse a DefaultDecl specification. + * + */ + int parse_defaultdecl (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); + /** * Parse the "children" and "Mixed" non-terminals in contentspec. @@ -367,7 +416,8 @@ public: * * @retval 0 on success, -1 otherwise. */ - int parse_children_definition (ACEXML_ENV_SINGLE_ARG_DECL); + int parse_children_definition (ACEXML_ENV_SINGLE_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); /** * Parse a @c cp non-terminal. @c cp can either be a @c seq or a @c choice. @@ -379,72 +429,241 @@ public: * * @retval 0 on success, -1 otherwise. */ - int parse_child (int skip_open_paren ACEXML_ENV_ARG_DECL); + int parse_child (int skip_open_paren ACEXML_ENV_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); + + /** + * Parse a name from the input CharStream. If @a ch @!= 0, then we have + * already consumed the first name character from the input CharStream, + * otherwise, parse_name will use this->get() to acquire the initial + * character. + * + * @return A pointer to the string in the obstack, 0 if it's not a + * valid name. + */ + ACEXML_Char *parse_name (ACEXML_Char ch = 0); + + /** + * Parse a NMTOKEN from the input stream. + * + * @return A pointer to the string in the obstack, 0 if it's not a valid + * NMTOKEN. + */ + ACEXML_Char* parse_nmtoken (ACEXML_Char ch = 0); + + /** + * Parse the version string in an XML Prolog section. + * + * @param str String containing the version number if successful. + * @return 0 if the string was read successfully, 0 otherwise. + */ + int parse_version (ACEXML_Char*& str); + + /** + * Parse the version number in a VersionInfo declaration. + */ + int parse_version_num (ACEXML_Char*& str); + + /** + * Parse the encoding name in an XML Prolog section. + * + * @param str String containing the encoding name if successful. + * @return 0 if the string was read successfully, 0 otherwise. + */ + int parse_encname (ACEXML_Char*& str); + + /** + * Parse a SDDecl string. + * + * @param str String containing the encoding name if successful. + * @return 0 if the string was read successfully, 0 otherwise. + */ + int parse_sddecl (ACEXML_Char*& str); + + /** + * Parse an attribute value. + * + * @param str String containing the value of the attribute if successful. + * @return 0 if attribute value was read successfully, 0 otherwise. + */ + int parse_attvalue (ACEXML_Char*& str ACEXML_ENV_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); + + /** + * Parse a SystemLiteral. + * + * @param str String containing the SystemLiteral if successful. + * @return 0 if the string was read successfully, 0 otherwise. + */ + int parse_system_literal (ACEXML_Char*& str); + + /** + * Parse a PubidLiteral. + * + * @param str String containing the PubidLiteral if successful. + * @return 0 if the string was read successfully, 0 otherwise. + */ + int parse_pubid_literal (ACEXML_Char*& str); + + /** + * Check if a character @a c is a whitespace. + * + * @retval 1 if @a c is a valid white space character. 0 otherwise. + */ + int is_whitespace (const ACEXML_Char c) const; + + /** + * Check if a character @a c is a valid Char. + * + * @retval 1 if @a c is a valid character. 0 otherwise. + */ + int isChar (ACEXML_UCS4 c) const; + + /** + * Check if a character @a c is a valid CharRef character. + * + * @retval 1 if @a c is a valid character reference character, 0 otherwise. + */ + int isCharRef (const ACEXML_Char c) const; + + /** + * Check if a character @a c is a BaseChar. + * + * @retval 1 if @a c is a valid BaseChar character, 0 otherwise. + */ + int isBasechar (const ACEXML_Char c) const; + + /** + * Check if a character @a c is a Ideographic. + * + * @retval 1 if @a c is a valid Ideographic character, 0 otherwise. + */ + int isIdeographic (const ACEXML_Char c) const; + + /** + * Check if a character @a c is a CombiningChar. + * + * @retval 1 if @a c is a valid CombiningChar character, 0 otherwise. + */ + int isCombiningchar (const ACEXML_Char c) const; + + /** + * Check if a character @a c is a Digit. + * + * @retval 1 if @a c is a valid Digit character, 0 otherwise. + */ + int isDigit (const ACEXML_Char c) const; + + /** + * Check if a character @a c is an Extender. + * + * @retval 1 if @a c is a valid Extender character, 0 otherwise. + */ + int isExtender (const ACEXML_Char c) const; + + /** + * Check if a character @a c is a Letter. + * + * @retval 1 if @a c is a valid Letter character, 0 otherwise. + */ + int isLetter (const ACEXML_Char c) const; + + /** + * Check if a character is an acceptable NameChar. + * + * @retval 1 if @a c is a valid NameChar character, 0 otherwise. + */ + int isNameChar (const ACEXML_Char c) const; + + /** + * Check if a character is a PubidChar. + * + * @retval 1 if @a c is a valid PubidChar character, 0 otherwise. + */ + int isPubidChar (const ACEXML_Char c) const; -protected: /// Get a character. - ACEXML_Char get (void); + virtual ACEXML_Char get (void); /// Peek a character. - ACEXML_Char peek (void); + virtual ACEXML_Char peek (void); - // Feature names: +private: + + // *** Helper functions for parsing XML /** - * \addtogroup acexml_parser_features - * @{ + * Skip any whitespaces encountered until the first non-whitespace + * character is encountered. + * + * @return The next non-whitespace character from the CharStream. + * + * @sa skip_whitespace_count */ + ACEXML_Char skip_whitespace (void); /** - * @var simple_parsing_feature_ + * Skip any whitespaces encountered until the first non-whitespace + * character. The first non-whitespace character is not consumed. + * This method does peek into the input CharStream and therefore + * is more expensive than @ref skip_whitespace. * - * This constant string defines the name of "simple XML parsing" - * feature. When this feature is enabled, ACEXML parser is allowed - * to parse a simple XML stream without mandated XML prolog - * and no DTD defintion. + * @param peek If non-null, @a peek points to a ACEXML_Char where + * skip_whitespace_count stores the first non-whitespace + * character it sees (character is not removed from the stream.) + * + * @return The number of whitespace characters consumed. + * + * @sa skip_whitespace */ - static const ACEXML_Char simple_parsing_feature_[]; + int skip_whitespace_count (ACEXML_Char *peek = 0); /** - * @var namespaces_feature_ + * Skip an equal sign. * - * This constant string defines the SAX XML Namespace feature. When this - * feature is enabled, ACEXML parser allows access by namespace qualified - * names. + * @retval 0 when succeeds, -1 if no equal sign is found. */ - static const ACEXML_Char namespaces_feature_[]; + int skip_equal (void); /** - * @var namespace_prefixes_feature_ + * Get a quoted string. Quoted strings are used to specify + * attribute values and this routine will replace character and + * entity references on-the-fly. Parameter entities are not allowed + * (or replaced) in this function. (But regular entities are.) * - * This constant string defines the SAX XML Namespace prefixes feature. - * Normally the list of attributes returned by the parser will not - * contain attributes used as namespace declarations (xmlns*). When this - * feature is enabled, the list of attributes contains the namespace - * declarations also. + * @param str returns the un-quoted string. + * + * @retval 0 on success, -1 otherwise. */ - static const ACEXML_Char namespace_prefixes_feature_[]; + int get_quoted_string (ACEXML_Char *&str); - /* @} */ + /** + * Check if a character @a c is a Digit. + * + * @retval 1 if @a c is a valid Digit character, 0 otherwise. + */ + int isNormalDigit (const ACEXML_Char c) const; -private: /** * Dispatch errors to ErrorHandler. * */ - void report_error (ACEXML_Error minor_code ACEXML_ENV_ARG_DECL); + void error (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); /** * Dispatch warnings to ErrorHandler. * */ - void report_warning (ACEXML_Error minor_code ACEXML_ENV_ARG_DECL); + void warning (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); /** * Dispatch fatal errors to ErrorHandler. * */ - void report_fatal_error (ACEXML_Error minor_code ACEXML_ENV_ARG_DECL); + void fatal_error (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); /** * Dispatch prefix mapping calls to the ContentHandler. @@ -454,15 +673,93 @@ private: * @param name Local name * @param start 1 => startPrefixMapping 0 => endPrefixMapping */ - void report_prefix_mapping (const ACEXML_Char* prefix, + void prefix_mapping (const ACEXML_Char* prefix, const ACEXML_Char* uri, const ACEXML_Char* name, - int start ACEXML_ENV_ARG_DECL); + int start ACEXML_ENV_ARG_DECL) + ACE_THROW_SPEC ((ACEXML_SAXException)); /** * Parse a keyword. */ int parse_token (const ACEXML_Char* keyword); + /** + * Push the current context on to the stack. + * + */ + int push_context (const ACEXML_Parser_Context& context); + + /** + * Pop the top element in the stack and replace current context with that. + */ + int pop_context (void); + + /** + * Create a new ACEXML_CharStream from @a systemId and @a publicId and + * replace the current input stream with the newly created stream. + */ + virtual int switch_input (ACEXML_CharStream* cstream, + const ACEXML_Char* systemId = 0, + const ACEXML_Char* publicId = 0); + /** + * Create a new ACEXML_InputSource from @a systemId and @a publicId and + * replace the current input source with the newly created InputSource. + */ + virtual int switch_input (ACEXML_InputSource* input, + const ACEXML_Char* systemId = 0, + const ACEXML_Char* publicId = 0); + + /** + * Reset the parser state. + * + */ + void reset (void); + + /** + * Very trivial, non-conformant normalization of a systemid. + * + */ + ACEXML_Char* normalize_systemid (const char* systemId); + + // Feature names: + + /** + * \addtogroup acexml_parser_features + * @{ + */ + + /** + * @var simple_parsing_feature_ + * + * This constant string defines the name of "simple XML parsing" + * feature. When this feature is enabled, ACEXML parser is allowed + * to parse a simple XML stream without mandated XML prolog + * and no DTD defintion. + */ + static const ACEXML_Char simple_parsing_feature_[]; + + /** + * @var namespaces_feature_ + * + * This constant string defines the SAX XML Namespace feature. When this + * feature is enabled, ACEXML parser allows access by namespace qualified + * names. + */ + static const ACEXML_Char namespaces_feature_[]; + + /** + * @var namespace_prefixes_feature_ + * + * This constant string defines the SAX XML Namespace prefixes feature. + * Normally the list of attributes returned by the parser will not + * contain attributes used as namespace declarations (xmlns*). When this + * feature is enabled, the list of attributes contains the namespace + * declarations also. + */ + static const ACEXML_Char namespace_prefixes_feature_[]; + + /* @} */ + /// Keeping track of the handlers. We do not manage the memory for /// handlers. ACEXML_DTDHandler *dtd_handler_; @@ -470,36 +767,96 @@ private: ACEXML_ContentHandler *content_handler_; ACEXML_ErrorHandler *error_handler_; - /// @@ Feature and properties management structure here. - /// Current input char stream. - ACEXML_CharStream *instream_; - - /// My doctype, if any. + /// Document Type ACEXML_Char *doctype_; - /// External DTD System Literal, if any. - ACEXML_Char *dtd_system_; + /// Current parser context + ACEXML_Parser_Context current_; - /// External DTD Public Literal, if any. - ACEXML_Char *dtd_public_; + /// Stack used to hold the Parser_Context + ACE_Unbounded_Stack<ACEXML_Parser_Context> ctx_stack_; + /* + * The following two are essentially chains of references and is used by + * the parser to determine if there is any recursion. We keep two of + * these one for general entities and one for parameter entities, as they + * both fall under different namespaces. + * + */ + /// Set used to hold the general entity references that are active. + ACE_Unbounded_Stack<ACEXML_String> GE_reference_; + + /// Set used to hold the parameter entity references that are active. + ACE_Unbounded_Stack<ACEXML_String> PE_reference_; + + /// Obstack used by the parser to hold all the strings parsed ACE_Obstack_T<ACEXML_Char> obstack_; + /// Alternative obstack used to hold any strings when the original is in use + ACE_Obstack_T<ACEXML_Char> alt_stack_; + + /// Namespace stack used by the parser to implement support for Namespaces ACEXML_NamespaceSupport xml_namespace_; - ACEXML_Entity_Manager entities_; + /// T => We are processing a nested namespace + int nested_namespace_; - // Locator - ACEXML_LocatorImpl locator_; + /// Set of internal parsed general entities in the document + ACEXML_Entity_Manager internal_GE_; - // Flag set if the document is a standalone XML document - int standalone_; + /// Set of external parsed general entities in the document + ACEXML_Entity_Manager external_GE_; + + /// Set of unparsed entities in the document + ACEXML_Entity_Manager unparsed_entities_; + + /// Set of predefined entities used by the parser + ACEXML_Entity_Manager predef_entities_; + + /// Set of internal parsed parameter entities in the document + ACEXML_Entity_Manager internal_PE_; + + /// Set of external parsed parameter entities in the document + ACEXML_Entity_Manager external_PE_; + + /// Set of notations declared in the document + ACEXML_Entity_Manager notations_; - // Feature flags & + /// State of the parser when it encounters a reference. + ACEXML_ParserInt::ReferenceState ref_state_; + + /// T => We are parsing an external subset + int external_subset_; + + /// T => We are parsing an external entity value + int external_entity_; + + /// T => Internal DTD has parameter entity references + int has_pe_refs_; + + /// Feature flags + /// If set, the parser should parse a document without a prolog int simple_parsing_; + + /// If set, the parser should also validate + int validate_; + + /// If set, the parser should allow access by namespace qualified names. int namespaces_; + + /// If set, the parser should include namespace declarations in the list + /// of attributes of an element. int namespace_prefixes_; + /// If set, the document is a standalone XML document + int standalone_; + + /// If set, the document has an external DTD subset + int external_dtd_; + + /// If set, the document has an internal DTD + int internal_dtd_; + }; #if defined (__ACEXML_INLINE__) diff --git a/ACEXML/parser/parser/Parser.i b/ACEXML/parser/parser/Parser.i index 5b6f072fba0..e03f09c2116 100644 --- a/ACEXML/parser/parser/Parser.i +++ b/ACEXML/parser/parser/Parser.i @@ -57,49 +57,113 @@ ACEXML_Parser::setErrorHandler (ACEXML_ErrorHandler *handler) } ACEXML_INLINE int -ACEXML_Parser::is_whitespace (ACEXML_Char c) +ACEXML_Parser::isChar (const ACEXML_UCS4 c) const { - switch (c) + return (c == 0x9 || c == 0xA || c == 0xD || + c >= 0x20 && c <= 0xD7FF || + c >= 0xE000 && c <= 0xFFFD || + c >= 0x10000 && c <= 0x10FFFF); +} + +ACEXML_INLINE int +ACEXML_Parser::isCharRef (const ACEXML_Char c) const { - case 0xa: - case 0x20: - case 0x9: - case 0xd: - return 1; - default: + return ((c >= 'a' && c <= 'f') || + (c >= 'A' && c <= 'F')); +} + +ACEXML_INLINE int +ACEXML_Parser::isNormalDigit (const ACEXML_Char c) const +{ + return (c >= '\x30' && c <= '\x39'); +} + +ACEXML_INLINE int +ACEXML_Parser::isBasechar (const ACEXML_Char c) const +{ +#if defined (ACE_USES_WCHAR) + return ACEXML_ParserInt::isBasechar_i (c); +#else + return ACEXML_ParserInt::base_char_table_[c]; +#endif /* ACE_USES_WCHAR */ +} + +ACEXML_INLINE int +ACEXML_Parser::isIdeographic (const ACEXML_Char c) const +{ +#if defined (ACE_USES_WCHAR) + return ACEXML_ParserInt::isIdeographic_i (c); +#else + ACE_UNUSED_ARG (c); + return 0; +#endif /* ACE_USES_WCHAR */ +} + +ACEXML_INLINE int +ACEXML_Parser::isCombiningchar (const ACEXML_Char c) const +{ +#if defined (ACE_USES_WCHAR) + return ACEXML_ParserInt::isCombiningchar_i (c); +#else + ACE_UNUSED_ARG (c); return 0; +#endif /* ACE_USES_WCHAR */ } + +ACEXML_INLINE int +ACEXML_Parser::isDigit (const ACEXML_Char c) const +{ +#if defined (ACE_USES_WCHAR) + return ACEXML_ParserInt::isDigit_i (c); +#else + return (this->isNormalDigit (c)); +#endif /* ACE_USES_WCHAR */ +} + +ACEXML_INLINE int +ACEXML_Parser::isExtender (const ACEXML_Char c) const +{ +#if defined (ACE_USES_WCHAR) + return ACEXML_ParserInt::isExtender_i (c); +#else + return (c == '\xB7'); +#endif /* ACE_USES_WCHAR */ } +ACEXML_INLINE int +ACEXML_Parser::isLetter (const ACEXML_Char c) const +{ + return (this->isBasechar (c) || this->isIdeographic (c)); +} ACEXML_INLINE int -ACEXML_Parser::is_whitespace_or_equal (ACEXML_Char c) +ACEXML_Parser::isNameChar (const ACEXML_Char c) const { - return (is_whitespace (c) || c == '=') ? 1 : 0; + return (this->isLetter (c) || this->isDigit (c) || c == '.' || c == '-' || + c == '_' || c == ':' || this->isCombiningchar (c) || + this->isExtender (c)); } ACEXML_INLINE int -ACEXML_Parser::is_nonname (ACEXML_Char c) +ACEXML_Parser::isPubidChar (const ACEXML_Char c) const { - // Handle this separately as doing so avoids code duplication and enables - // setting of line and column numbers in one place. - if (is_whitespace_or_equal (c)) - return 1; + return (c == '\x20' || c == '\x0D' || c == '\x0A' || + (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || c == '-' || c == '\'' || c == '(' || + c == ')' || c == '+' || c == ',' || c == '.' || c == '/' || + c == ':' || c == '=' || c == '?' || c == ';' || c == '!' || + c == '*' || c == '#' || c == '@' || c == '$' || c == '_' || + c == '%'); +} + +ACEXML_INLINE int +ACEXML_Parser::is_whitespace (const ACEXML_Char c) const +{ switch (c) { - case '/': - case '?': - case '>': - case '<': - case ')': - case '(': - case '+': - case '*': - case '\'': - case '"': - case ',': - case '|': + case '\x0A': case '\x20': + case '\x09': case '\x0D': return 1; default: return 0; @@ -107,19 +171,59 @@ ACEXML_Parser::is_nonname (ACEXML_Char c) } ACEXML_INLINE ACEXML_Char +ACEXML_Parser::skip_whitespace (void) +{ + ACEXML_Char ch = this->get(); + while (this->is_whitespace (ch)) + ch = this->get (); + return ch; +} + + +ACEXML_INLINE int +ACEXML_Parser::skip_whitespace_count (ACEXML_Char *peeky) +{ + int wscount = 0; + ACEXML_Char dummy; + ACEXML_Char &forward = (peeky == 0 ? dummy : *peeky); + + for (;this->is_whitespace ((forward = this->peek ())); ++wscount) + this->get (); + return wscount; +} + +ACEXML_INLINE int +ACEXML_Parser::skip_equal (void) +{ + if (this->skip_whitespace() != '=') + return -1; + while (this->is_whitespace (this->peek())) + this->get(); + return 0; +} + +ACEXML_INLINE ACEXML_Char ACEXML_Parser::get (void) { - // Using an extra level of indirection so we can - // manage document location in the future. + ACEXML_Char ch = 0; + const ACEXML_InputSource* ip = this->current_.getInputSource(); + ACEXML_CharStream* instream = ip->getCharStream(); - if (this->instream_ != 0) + if (instream->get (ch) != -1) { - ACEXML_Char ch; - this->instream_->get (ch); - this->locator_.incrColumnNumber(); - if (ch == 0x0A) { - this->locator_.incrLineNumber(); - this->locator_.setColumnNumber (0); + this->current_.getLocator()->incrColumnNumber(); + // Normalize white-space + if (ch == '\x0D') + { + if (instream->peek() == 0x0A) + instream->get (ch); + ch = '\x0A'; + } + if (ch == '\x0A') + { + // Reset column number and increment Line Number. + this->current_.getLocator()->incrLineNumber(); + this->current_.getLocator()->setColumnNumber (0); } return ch; } @@ -131,9 +235,24 @@ ACEXML_Parser::peek (void) { // Using an extra level of indirection so we can // manage document location in the future. + ACEXML_Char ch = 0; + const ACEXML_InputSource* ip = this->current_.getInputSource(); + ACEXML_CharStream* instream = ip->getCharStream(); + ch = instream->peek (); + return (ch == -1 ? 0 : ch); +} - if (this->instream_ != 0) - return this->instream_->peek (); +ACEXML_INLINE int +ACEXML_Parser::parse_token (const ACEXML_Char* keyword) +{ + if (keyword == 0) + return -1; + const ACEXML_Char* ptr = keyword; + ACEXML_Char ch; + for (; *ptr != 0 && ((ch = this->get()) == *ptr); ++ptr) + ; + if (*ptr == 0) return 0; - + else + return -1; } diff --git a/ACEXML/parser/parser/ParserContext.cpp b/ACEXML/parser/parser/ParserContext.cpp new file mode 100644 index 00000000000..fd0792677fb --- /dev/null +++ b/ACEXML/parser/parser/ParserContext.cpp @@ -0,0 +1,15 @@ +// $Id$ + +#include "ACEXML/parser/parser/ParserContext.h" + +#if !defined (__ACEXML_INLINE__) +# include "ACEXML/parser/parser/ParserContext.inl" +#endif /* __ACEXML_INLINE__ */ + +ACEXML_Parser_Context::~ACEXML_Parser_Context() +{ + delete this->instream_; + this->instream_ = 0; + delete this->locator_; + this->locator_ = 0; +} diff --git a/ACEXML/parser/parser/ParserContext.h b/ACEXML/parser/parser/ParserContext.h new file mode 100644 index 00000000000..3e62e8f532b --- /dev/null +++ b/ACEXML/parser/parser/ParserContext.h @@ -0,0 +1,78 @@ +// -*- C++ -*- + +//============================================================================= +/** + * @file ParserContext.h + * + * $Id$ + * + * @author Krishnakumar B <kitty@cs.wustl.edu> + */ +//============================================================================= + +#ifndef ACEXML_PARSER_CONTEXT_H +#define ACEXML_PARSER_CONTEXT_H + +#include "ace/pre.h" +#include "ACEXML/parser/parser/Parser_export.h" + +#if !defined (ACE_LACKS_PRAGMA_ONCE) +#pragma once +#endif /* ACE_LACKS_PRAGMA_ONCE */ + +#include "ACEXML/common/XML_Types.h" +#include "ACEXML/common/InputSource.h" +#include "ACEXML/common/Locator.h" +#include "ACEXML/common/LocatorImpl.h" +#include "ace/Functor.h" +#include "ace/Containers_T.h" + +class ACEXML_PARSER_Export ACEXML_Parser_Context +{ +public: + /// Default constructor + ACEXML_Parser_Context(); + + /// Constructor which initializes the context + ACEXML_Parser_Context (ACEXML_InputSource* instream, + ACEXML_LocatorImpl* locator); + + /// Copy constructor + ACEXML_Parser_Context (const ACEXML_Parser_Context& src); + + /// Assignment operator + ACEXML_Parser_Context& operator= (const ACEXML_Parser_Context& src); + + /// Comparison operator + int operator!= (const ACEXML_Parser_Context& src); + + /// Destructor + virtual ~ACEXML_Parser_Context(); + + /// Reset the parser context. This does not free up the memory. Only sets + /// it to zero. Meant to be called after a context is pushed on to a + /// stack. + void reset (void); + + /// Get the underlying input source. + virtual ACEXML_InputSource* getInputSource(void); + + /// Get the underlying locator. + virtual ACEXML_LocatorImpl* getLocator(void); + +private: + + /// Current input char stream. + ACEXML_InputSource *instream_; + + /// Current Locator which provides line no., column no. systemId and publicId + ACEXML_LocatorImpl* locator_; +}; + +#if defined (__ACEXML_INLINE__) +# include "ACEXML/parser/parser/ParserContext.inl" +#endif /* __ACEXML_INLINE__ */ + +#include "ace/post.h" + +#endif /* ACEXML_PARSER_CONTEXT_H */ diff --git a/ACEXML/parser/parser/ParserContext.inl b/ACEXML/parser/parser/ParserContext.inl new file mode 100644 index 00000000000..adbfe099bc2 --- /dev/null +++ b/ACEXML/parser/parser/ParserContext.inl @@ -0,0 +1,67 @@ +// $Id$ + + +ACEXML_INLINE +ACEXML_Parser_Context::ACEXML_Parser_Context() + : instream_ (0), + locator_ (0) +{ + +} + +ACEXML_INLINE +ACEXML_Parser_Context::ACEXML_Parser_Context (ACEXML_InputSource* instream, + ACEXML_LocatorImpl* locator) + : instream_ (instream), + locator_ (locator) +{ + +} + +ACEXML_INLINE +ACEXML_Parser_Context::ACEXML_Parser_Context (const ACEXML_Parser_Context& src) + : instream_ (src.instream_), + locator_ (src.locator_) +{ + +} + +ACEXML_INLINE int +ACEXML_Parser_Context::operator!= (const ACEXML_Parser_Context& src) +{ + return (this->instream_ != src.instream_ && this->locator_ != src.locator_); +} + +ACEXML_INLINE ACEXML_Parser_Context& +ACEXML_Parser_Context::operator= (const ACEXML_Parser_Context& src) +{ + if (*this != src) + { + delete this->instream_; + delete this->locator_; + this->instream_ = src.instream_; + this->locator_ = src.locator_; + } + return *this; +} + + + +ACEXML_INLINE ACEXML_InputSource* +ACEXML_Parser_Context::getInputSource (void) +{ + return this->instream_; +}; + +ACEXML_INLINE ACEXML_LocatorImpl* +ACEXML_Parser_Context::getLocator (void) +{ + return this->locator_; +} + +ACEXML_INLINE void +ACEXML_Parser_Context::reset (void) +{ + this->instream_ = 0; + this->locator_ = 0; +} diff --git a/ChangeLog b/ChangeLog index 8e257b62e0e..285f051fb34 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,118 @@ +Tue Nov 12 19:48:34 2002 Krishnakumar B <kitty@cs.wustl.edu> + + * ACEXML/parser/parser/ParserContext.cpp: + * ACEXML/parser/parser/ParserContext.h: + * ACEXML/parser/parser/ParserContext.inl: + + New files which hold the ParserContext needed to handle the + switching of input streams on the fly. + + * ACEXML/parser/parser/ParserInternals.cpp: + * ACEXML/parser/parser/ParserInternals.h: + + Moved some generic code from Parser.cpp to here. + + * ACEXML/apps/svcconf/Makefile: + * ACEXML/common/Makefile: + * ACEXML/parser/parser/Makefile: + + Updated dependencies. + + * ACEXML/common/Attributes_Def_Builder.h: + + No need to typedef in C++. + + * ACEXML/common/DefaultHandler.cpp: + + Minor typos. + + * ACEXML/common/Encoding.cpp: + + If auto-detection of encoding fails, assume that it is UTF-8. + + * ACEXML/common/Exception.cpp: + + Change the error message from ACE_DEBUG to ACE_ERROR. + + * ACEXML/common/FileCharStream.cpp: Handle BOM of UTF-8 in + addition to UTF-16. Cleanup unnecessary parens. + + * ACEXML/common/HttpCharStream.cpp: + * ACEXML/common/HttpCharStream.h: + + Add support for auto-detection of encoding. + + * ACEXML/common/InputSource.cpp: + * ACEXML/common/InputSource.h: + + Fixes for use with ACEXML_Parser_Context. + + * ACEXML/common/LocatorImpl.cpp: + * ACEXML/common/LocatorImpl.h: + + Fixed bug in copy constructor which resulted in locator + information not getting set properly. + + * ACEXML/common/NamespaceSupport.cpp: + * ACEXML/common/NamespaceSupport.h: + + Implement reset() method. + + * ACEXML/common/SAXExceptions.cpp: + + Change the error message from ACE_DEBUG to ACE_ERROR. + + * ACEXML/common/StrCharStream.cpp: + + Handle copying of bytes according to sizeof (ACE_WCHAR). + + * ACEXML/common/StreamFactory.cpp: Create the appropriate stream + given an URI. We don't try to normalize the URI here. It is done + in the Parser. + + * ACEXML/common/Transcode.cpp: + * ACEXML/common/Transcode.i: + + Moved some very big functions from .i to .cpp. + + * ACEXML/examples/SAXPrint/SAXPrint_Handler.cpp: + * ACEXML/examples/SAXPrint/main.cpp: + + Updates to reflect the new calling convention in the Parser. + + * ACEXML/parser/parser/Entity_Manager.cpp: + * ACEXML/parser/parser/Entity_Manager.h: + * ACEXML/parser/parser/Entity_Manager.i: + + Implemented support for resolving SYSTEM and PUBLIC ids from + the Entity_Manager. + + * ACEXML/parser/parser/Parser.cpp: + * ACEXML/parser/parser/Parser.h: + * ACEXML/parser/parser/Parser.i: + + Implemented support for external parameter and entity + references. Rewrote a lot of the basic parsing functionality to + adhere to the standard. Implment partial support for validation + of XML files. + + +Fri Oct 25 15:44:04 2002 Krishnakumar B <kitty@cs.wustl.edu> + + * ACEXML/parser/parser/Parser.i: Handle end-of-line as required by + the spec. Specifically any sequence of 0x0D or 0x0D 0x0A should + be normalized to a 0x0A before passing to the XML processor. + + * ACEXML/parser/parser/Parser.cpp: Remove checks for 0x0D as it is + handled tranparently now. + +Thu Oct 24 21:06:44 2002 Krishnakumar B <kitty@cs.wustl.edu> + + * ACEXML/common/NamespaceSupport.cpp: Define strings normally and + not as an array. + + * ACEXML/common/Attributes_Def_Builder.h: No need to typedef in C++. + Thu Oct 24 01:52:46 2002 Krishnakumar B <kitty@cs.wustl.edu> * ACEXML\parser\parser\Parser.cpp: Moved out the declaration of diff --git a/ChangeLogs/ChangeLog-03a b/ChangeLogs/ChangeLog-03a index 8e257b62e0e..285f051fb34 100644 --- a/ChangeLogs/ChangeLog-03a +++ b/ChangeLogs/ChangeLog-03a @@ -1,3 +1,118 @@ +Tue Nov 12 19:48:34 2002 Krishnakumar B <kitty@cs.wustl.edu> + + * ACEXML/parser/parser/ParserContext.cpp: + * ACEXML/parser/parser/ParserContext.h: + * ACEXML/parser/parser/ParserContext.inl: + + New files which hold the ParserContext needed to handle the + switching of input streams on the fly. + + * ACEXML/parser/parser/ParserInternals.cpp: + * ACEXML/parser/parser/ParserInternals.h: + + Moved some generic code from Parser.cpp to here. + + * ACEXML/apps/svcconf/Makefile: + * ACEXML/common/Makefile: + * ACEXML/parser/parser/Makefile: + + Updated dependencies. + + * ACEXML/common/Attributes_Def_Builder.h: + + No need to typedef in C++. + + * ACEXML/common/DefaultHandler.cpp: + + Minor typos. + + * ACEXML/common/Encoding.cpp: + + If auto-detection of encoding fails, assume that it is UTF-8. + + * ACEXML/common/Exception.cpp: + + Change the error message from ACE_DEBUG to ACE_ERROR. + + * ACEXML/common/FileCharStream.cpp: Handle BOM of UTF-8 in + addition to UTF-16. Cleanup unnecessary parens. + + * ACEXML/common/HttpCharStream.cpp: + * ACEXML/common/HttpCharStream.h: + + Add support for auto-detection of encoding. + + * ACEXML/common/InputSource.cpp: + * ACEXML/common/InputSource.h: + + Fixes for use with ACEXML_Parser_Context. + + * ACEXML/common/LocatorImpl.cpp: + * ACEXML/common/LocatorImpl.h: + + Fixed bug in copy constructor which resulted in locator + information not getting set properly. + + * ACEXML/common/NamespaceSupport.cpp: + * ACEXML/common/NamespaceSupport.h: + + Implement reset() method. + + * ACEXML/common/SAXExceptions.cpp: + + Change the error message from ACE_DEBUG to ACE_ERROR. + + * ACEXML/common/StrCharStream.cpp: + + Handle copying of bytes according to sizeof (ACE_WCHAR). + + * ACEXML/common/StreamFactory.cpp: Create the appropriate stream + given an URI. We don't try to normalize the URI here. It is done + in the Parser. + + * ACEXML/common/Transcode.cpp: + * ACEXML/common/Transcode.i: + + Moved some very big functions from .i to .cpp. + + * ACEXML/examples/SAXPrint/SAXPrint_Handler.cpp: + * ACEXML/examples/SAXPrint/main.cpp: + + Updates to reflect the new calling convention in the Parser. + + * ACEXML/parser/parser/Entity_Manager.cpp: + * ACEXML/parser/parser/Entity_Manager.h: + * ACEXML/parser/parser/Entity_Manager.i: + + Implemented support for resolving SYSTEM and PUBLIC ids from + the Entity_Manager. + + * ACEXML/parser/parser/Parser.cpp: + * ACEXML/parser/parser/Parser.h: + * ACEXML/parser/parser/Parser.i: + + Implemented support for external parameter and entity + references. Rewrote a lot of the basic parsing functionality to + adhere to the standard. Implment partial support for validation + of XML files. + + +Fri Oct 25 15:44:04 2002 Krishnakumar B <kitty@cs.wustl.edu> + + * ACEXML/parser/parser/Parser.i: Handle end-of-line as required by + the spec. Specifically any sequence of 0x0D or 0x0D 0x0A should + be normalized to a 0x0A before passing to the XML processor. + + * ACEXML/parser/parser/Parser.cpp: Remove checks for 0x0D as it is + handled tranparently now. + +Thu Oct 24 21:06:44 2002 Krishnakumar B <kitty@cs.wustl.edu> + + * ACEXML/common/NamespaceSupport.cpp: Define strings normally and + not as an array. + + * ACEXML/common/Attributes_Def_Builder.h: No need to typedef in C++. + Thu Oct 24 01:52:46 2002 Krishnakumar B <kitty@cs.wustl.edu> * ACEXML\parser\parser\Parser.cpp: Moved out the declaration of diff --git a/tests/Obstack_Test.cpp b/tests/Obstack_Test.cpp index 110fea4eba9..ad424254b98 100644 --- a/tests/Obstack_Test.cpp +++ b/tests/Obstack_Test.cpp @@ -29,8 +29,6 @@ int ACE_TMAIN (int, ACE_TCHAR *[]) ACE_START_TEST (ACE_TEXT ("Obstack_Test")); int errors = 0; - // For this test, the length of the ACE_Obstack must be larger than - // both of these strings, but less than their sum. const ACE_TCHAR str1[] = ACE_TEXT ("Mary had a little lamb."); const ACE_TCHAR str2[] = ACE_TEXT ("It's fleece was white as snow; but...."); ACE_Obstack_T<ACE_TCHAR> stack (sizeof (str1) + 1); diff --git a/tests/Service_Config_Test.conf.xml b/tests/Service_Config_Test.conf.xml index f3273f0cb93..767e885c467 100644 --- a/tests/Service_Config_Test.conf.xml +++ b/tests/Service_Config_Test.conf.xml @@ -1,4 +1,5 @@ <?xml version='1.0'?> +<!DOCTYPE ACE_Svc_Conf "http://www.cs.wustl.edu/~kitty/svcconf.dtd"> <!-- Converted from Service_Config_Test.conf by svcconf-convert.pl --> <ACE_Svc_Conf> <!-- Dynamically loading each of the Service Objects below causes a --> |