summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkitty <kitty@ae88bc3d-4319-0410-8dbf-d08b4c9d3795>2002-11-15 00:50:17 +0000
committerkitty <kitty@ae88bc3d-4319-0410-8dbf-d08b4c9d3795>2002-11-15 00:50:17 +0000
commitc1ae0478065bfb4136ce1c4ea861428a6b0d1138 (patch)
tree02286fba3cffd453d2eb953746d58364595350b6
parent9341853540c4dfda33b28b1eb1af19b4ada5e97f (diff)
downloadATCD-c1ae0478065bfb4136ce1c4ea861428a6b0d1138.tar.gz
ChangeLogTag: Tue Nov 12 19:48:34 2002 Krishnakumar B <kitty@cs.wustl.edu>
-rw-r--r--ACEXML/apps/svcconf/Makefile4
-rw-r--r--ACEXML/common/Attributes_Def_Builder.h8
-rw-r--r--ACEXML/common/DefaultHandler.cpp6
-rw-r--r--ACEXML/common/Encoding.cpp17
-rw-r--r--ACEXML/common/Exception.cpp2
-rw-r--r--ACEXML/common/FileCharStream.cpp20
-rw-r--r--ACEXML/common/HttpCharStream.cpp119
-rw-r--r--ACEXML/common/HttpCharStream.h5
-rw-r--r--ACEXML/common/InputSource.cpp22
-rw-r--r--ACEXML/common/InputSource.h10
-rw-r--r--ACEXML/common/LocatorImpl.cpp10
-rw-r--r--ACEXML/common/LocatorImpl.h8
-rw-r--r--ACEXML/common/Makefile3
-rw-r--r--ACEXML/common/NamespaceSupport.cpp14
-rw-r--r--ACEXML/common/NamespaceSupport.h2
-rw-r--r--ACEXML/common/SAXExceptions.cpp4
-rw-r--r--ACEXML/common/StrCharStream.cpp5
-rw-r--r--ACEXML/common/StreamFactory.cpp7
-rw-r--r--ACEXML/common/Transcode.cpp233
-rw-r--r--ACEXML/common/Transcode.i233
-rw-r--r--ACEXML/examples/SAXPrint/SAXPrint_Handler.cpp9
-rw-r--r--ACEXML/examples/SAXPrint/main.cpp5
-rw-r--r--ACEXML/parser/parser/Entity_Manager.cpp32
-rw-r--r--ACEXML/parser/parser/Entity_Manager.h22
-rw-r--r--ACEXML/parser/parser/Entity_Manager.i37
-rw-r--r--ACEXML/parser/parser/Makefile291
-rw-r--r--ACEXML/parser/parser/Parser.cpp3996
-rw-r--r--ACEXML/parser/parser/Parser.dsp18
-rw-r--r--ACEXML/parser/parser/Parser.h647
-rw-r--r--ACEXML/parser/parser/Parser.i197
-rw-r--r--ACEXML/parser/parser/ParserContext.cpp15
-rw-r--r--ACEXML/parser/parser/ParserContext.h78
-rw-r--r--ACEXML/parser/parser/ParserContext.inl67
-rw-r--r--ChangeLog115
-rw-r--r--ChangeLogs/ChangeLog-03a115
-rw-r--r--tests/Obstack_Test.cpp2
-rw-r--r--tests/Service_Config_Test.conf.xml1
37 files changed, 4365 insertions, 2014 deletions
diff --git a/ACEXML/apps/svcconf/Makefile b/ACEXML/apps/svcconf/Makefile
index e908e8ca2f7..8652af06061 100644
--- a/ACEXML/apps/svcconf/Makefile
+++ b/ACEXML/apps/svcconf/Makefile
@@ -395,7 +395,9 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ace/Obstack_T.cpp \
$(ACE_ROOT)/ACEXML/parser/parser/Entity_Manager.h \
$(ACE_ROOT)/ACEXML/parser/parser/Entity_Manager.i \
- $(ACE_ROOT)/ACEXML/parser/parser/ParserErrors.h \
+ $(ACE_ROOT)/ACEXML/parser/parser/ParserInternals.h \
+ $(ACE_ROOT)/ACEXML/parser/parser/ParserContext.h \
+ $(ACE_ROOT)/ACEXML/parser/parser/ParserContext.inl \
$(ACE_ROOT)/ACEXML/parser/parser/Parser.i \
Svcconf_Handler.h \
$(ACE_ROOT)/ACEXML/common/DefaultHandler.h \
diff --git a/ACEXML/common/Attributes_Def_Builder.h b/ACEXML/common/Attributes_Def_Builder.h
index 26404fe6d86..ed7139bd2a3 100644
--- a/ACEXML/common/Attributes_Def_Builder.h
+++ b/ACEXML/common/Attributes_Def_Builder.h
@@ -39,7 +39,7 @@ public:
typedef auto_ptr<ACEXML_Attribute_Def_Builder> VAR;
- typedef enum {
+ enum ATT_TYPE {
CDATA,
ID,
IDREF,
@@ -51,14 +51,14 @@ public:
NOTATION,
ENUMERATION,
ERROR_TYPE
- } ATT_TYPE;
+ };
- typedef enum {
+ enum DEFAULT_DECL {
REQUIRED,
IMPLIED,
FIXED,
INVALID
- } DEFAULT_DECL;
+ };
virtual ~ACEXML_Attribute_Def_Builder () = 0;
diff --git a/ACEXML/common/DefaultHandler.cpp b/ACEXML/common/DefaultHandler.cpp
index f96e29d3cf3..60d2d783366 100644
--- a/ACEXML/common/DefaultHandler.cpp
+++ b/ACEXML/common/DefaultHandler.cpp
@@ -103,7 +103,7 @@ ACEXML_DefaultHandler::startPrefixMapping (const ACEXML_Char *,
// No-op.
}
- // *** Methods inherit from ACEXML_DTDHandler.
+ // *** Methods inherited from ACEXML_DTDHandler.
void
ACEXML_DefaultHandler::notationDecl (const ACEXML_Char *,
@@ -124,7 +124,7 @@ ACEXML_DefaultHandler::unparsedEntityDecl (const ACEXML_Char *,
// No-op.
}
- // Methods inherit from ACEXML_EnitityResolver.
+ // Methods inherited from ACEXML_EntityResolver.
ACEXML_InputSource *
ACEXML_DefaultHandler::resolveEntity (const ACEXML_Char *,
@@ -135,7 +135,7 @@ ACEXML_DefaultHandler::resolveEntity (const ACEXML_Char *,
return 0;
}
- // Methods inherit from ACEXML_ErrorHandler.
+ // Methods inherited from ACEXML_ErrorHandler.
/*
* Receive notification of a recoverable error.
diff --git a/ACEXML/common/Encoding.cpp b/ACEXML/common/Encoding.cpp
index c6e53ed8c47..9ca972816b8 100644
--- a/ACEXML/common/Encoding.cpp
+++ b/ACEXML/common/Encoding.cpp
@@ -18,8 +18,8 @@ const ACEXML_UTF8 ACEXML_Encoding::byte_order_mark_[][4] = {
{ '\xFF', '\xFE', '\x00', '\x00' }, // UCS-4, little-endian (4321 order)
{ '\x00', '\x00', '\xFF', '\xFE' }, // UCS-4, unusual octet order (2143)
{ '\xFE', '\xFF', '\x00', '\x00' }, // UCS-4, unusual octet order (3412)
- { '\xFE', '\xFF', '\xFF', '\xFF' }, // UTF-16, big-endian (3 & 4 ignored)
- { '\xFF', '\xFE', '\xFF', '\xFF' }, // UTF-16, little-endian ( 3 & 4 ignored)
+ { '\xFE', '\xFF', '\xFF', '\xFF' }, // UTF-16, big-endian (3 & 4 != 0)
+ { '\xFF', '\xFE', '\xFF', '\xFF' }, // UTF-16, little-endian ( 3 & 4 != 0)
{ '\xEF', '\xBB', '\xBF', '\xFF' } // UTF-8
};
@@ -36,11 +36,13 @@ const ACEXML_UTF8 ACEXML_Encoding::magic_values_[][4] = {
const ACEXML_Char*
ACEXML_Encoding::get_encoding (const char* input)
{
- if (ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF16BE][0], input, 2) == 0)
+ if ((ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF16BE][0], input, 2) == 0)
+ && (input[2] != 0 || input[3] != 0)) // 3 & 4 should not be both zero
return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16BE];
- else if (ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF16LE][0], input, 2) == 0)
+ else if ((ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF16LE][0], input, 2) == 0)
+ && (input[2] != 0 && input[3] != 0)) // 3 & 4 should not be both zero
return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16LE];
- else if (ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF8][0], input, 4) == 0)
+ else if (ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF8][0], input, 3) == 0)
return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF8];
else if (ACE_OS::memcmp (&ACEXML_Encoding::magic_values_[ACEXML_Encoding::UTF16BE][0], input, 4) == 0)
return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16BE];
@@ -49,5 +51,8 @@ ACEXML_Encoding::get_encoding (const char* input)
else if (ACE_OS::memcmp (&ACEXML_Encoding::magic_values_[ACEXML_Encoding::UTF8][0], input, 4) == 0)
return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF8];
else
- return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::OTHER];
+ {
+ ACE_ERROR ((LM_ERROR, "Unknown encoding. Assuming UTF-8\n"));
+ return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF8];
+ }
}
diff --git a/ACEXML/common/Exception.cpp b/ACEXML/common/Exception.cpp
index 3086a8a7bfb..88b2a5709a7 100644
--- a/ACEXML/common/Exception.cpp
+++ b/ACEXML/common/Exception.cpp
@@ -45,7 +45,7 @@ ACEXML_Exception::is_a (const ACEXML_Char *name)
void
ACEXML_Exception::print (void)
{
- ACE_DEBUG ((LM_ERROR,
+ ACE_ERROR ((LM_ERROR,
ACE_TEXT ("ACEXML: (%P|%t) EXCEPTION : %s\n"),
this->exception_name_));
}
diff --git a/ACEXML/common/FileCharStream.cpp b/ACEXML/common/FileCharStream.cpp
index 07aaa2ee97c..1d7811494ef 100644
--- a/ACEXML/common/FileCharStream.cpp
+++ b/ACEXML/common/FileCharStream.cpp
@@ -50,22 +50,22 @@ ACEXML_FileCharStream::determine_encoding (void)
const ACEXML_Char* temp = ACEXML_Encoding::get_encoding (input);
if (!temp)
return -1;
- if (ACE_OS::strcmp (temp,
- ACEXML_Encoding::encoding_names_[ACEXML_Encoding::OTHER]) == 0)
- return -1;
else
{
this->encoding_ = ACE::strnew (temp);
- ACE_DEBUG ((LM_DEBUG, "File's encoding is %s\n", this->encoding_));
+ ACE_DEBUG ((LM_DEBUG, ACE_TEXT ("File's encoding is %s\n"),
+ this->encoding_));
}
// Rewind the stream
this->rewind();
// Move over the byte-order-mark if present.
char ch;
- for (int j = 0; j < 2; ++j)
+ for (int j = 0; j < 3; ++j)
{
- this->getchar_i (ch);
- if (ch == '\xFF' || ch == '\xFE' || ch == '\xEF')
+ if (this->getchar_i (ch) < 0)
+ return -1;
+ if (ch == '\xFF' || ch == '\xFE' || ch == '\xEF' || ch == '\xBB' ||
+ ch == '\xBF')
continue;
else
{
@@ -115,7 +115,7 @@ int
ACEXML_FileCharStream::read (ACEXML_Char *str,
size_t len)
{
- return ACE_OS::fread (str, len, 1, this->infile_);
+ return ACE_OS::fread (str, len, sizeof (ACEXML_Char), this->infile_);
}
int
@@ -159,7 +159,7 @@ ACEXML_FileCharStream::get_i (ACEXML_Char& ch)
ch = 0;
return -1;
}
- ch = (BE) ? (input[0] << 8) | input[1] : (input[1] << 8) | input[0];
+ ch = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0];
return 0;
}
#endif /* ACE_USES_WCHAR */
@@ -208,7 +208,7 @@ ACEXML_FileCharStream::peek_i (void)
this->peek_ = 0;
return -1;
}
- this->peek_ = (BE) ? (input[0] << 8) | input[1] : (input[1] << 8) | input[0];
+ this->peek_ = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0];
return this->peek_;
}
#endif /* ACE_USES_WCHAR */
diff --git a/ACEXML/common/HttpCharStream.cpp b/ACEXML/common/HttpCharStream.cpp
index 83426595168..2f1a9d4d754 100644
--- a/ACEXML/common/HttpCharStream.cpp
+++ b/ACEXML/common/HttpCharStream.cpp
@@ -4,6 +4,7 @@
#include "ace/ace_wchar.h"
#include "ace/Auto_Ptr.h"
#include "ACEXML/common/HttpCharStream.h"
+#include "ACEXML/common/Encoding.h"
ACE_RCSID (common, HttpCharStream, "$Id$")
@@ -72,7 +73,7 @@ ACEXML_HttpCharStream::open (const ACEXML_Char *url)
this->close();
ACE_ERROR_RETURN ((LM_ERROR, "Server returned status %d : %s\n",
result,
- "Refer HTTP/1.1 for details"), -1);
+ "Refer HTTP/1.0 for details"), -1);
}
this->size_ = len;
@@ -236,6 +237,9 @@ ACEXML_HttpCharStream::get_url (size_t& len)
if (this->stream_->seek (data_offset, SEEK_SET) == -1)
ACE_ERROR_RETURN ((LM_ERROR, "%s: %m",
"Error in seeking to beginning of data"), -1);
+
+ if (this->determine_encoding() == -1)
+ return -1;
return status;
}
@@ -257,7 +261,7 @@ ACEXML_HttpCharStream::send_request (void)
// Ensure that the <command> memory is deallocated.
ACE_Auto_Basic_Array_Ptr<char> cmd_ptr (command);
- int bytes = ACE_OS::sprintf (command, "GET %s HTTP/1.1\r\n", path);
+ int bytes = ACE_OS::sprintf (command, "GET %s HTTP/1.0\r\n", path);
bytes += ACE_OS::sprintf (&command[bytes], "Host: %s\r\n",
this->url_addr_->get_host_name ());
bytes += ACE_OS::sprintf (&command[bytes], "\r\n");
@@ -302,16 +306,53 @@ ACEXML_HttpCharStream::close (void)
}
int
-ACEXML_HttpCharStream::get (ACEXML_Char& ch)
+ACEXML_HttpCharStream::determine_encoding (void)
{
- ch = (ACEXML_Char) this->stream_->get_char();
- return (ch == (ACEXML_Char)EOF ? -1 :0);
+ char input[4] = {0, 0, 0, 0};
+ int i = 0;
+ for (; i < 4 && input[i] != -1; ++i)
+ input[i] = this->stream_->peek_char(i);
+ if (i < 4)
+ return -1;
+ const ACEXML_Char* temp = ACEXML_Encoding::get_encoding (input);
+ if (!temp)
+ return -1;
+ else
+ {
+ this->encoding_ = ACE::strnew (temp);
+ ACE_DEBUG ((LM_DEBUG, "URI's encoding is %s\n", this->encoding_));
+ }
+ // Move over the byte-order-mark if present.
+ for (int j = 0; j < 3; ++j)
+ {
+ if (input[i] == '\xFF' || input[i] == '\xFE' || input[i] == '\xEF' ||
+ input[i] == '\xBB' || input[i] == '\xBF')
+ {
+ this->stream_->get_char();
+ continue;
+ }
+ break;
+ }
+ return 0;
+}
+
+void
+ACEXML_HttpCharStream::rewind (void)
+{
+ this->stream_->rewind();
+}
+
+const ACEXML_Char*
+ACEXML_HttpCharStream::getEncoding (void)
+{
+ return this->encoding_;
}
int
ACEXML_HttpCharStream::read (ACEXML_Char *str,
size_t len)
{
+ len = len * sizeof (ACEXML_Char);
char* temp = ACE_const_cast (char*, this->stream_->recv (len));
str = ACE_TEXT_CHAR_TO_TCHAR (temp);
if (str == 0)
@@ -319,20 +360,76 @@ ACEXML_HttpCharStream::read (ACEXML_Char *str,
return len;
}
+
+int
+ACEXML_HttpCharStream::get (ACEXML_Char& ch)
+{
+#if defined (ACE_USES_WCHAR)
+ return this->get_i (ch);
+#else
+ ch = (ACEXML_Char) this->stream_->get_char();
+ return (ch == (ACEXML_Char)EOF ? -1 :0);
+#endif /* ACE_USES_WCHAR */
+}
+
int
ACEXML_HttpCharStream::peek (void)
{
+#if defined (ACE_USES_WCHAR)
+ return this->peek_i();
+#else
return this->stream_->peek_char (0);
+#endif /* ACE_USES_WCHAR */
}
-void
-ACEXML_HttpCharStream::rewind (void)
+
+#if defined (ACE_USES_WCHAR)
+int
+ACEXML_HttpCharStream::get_i (ACEXML_Char& ch)
{
- this->stream_->rewind();
+ if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
+ {
+ ch = (ACEXML_Char) this->stream_->getchar();
+ return (ch == (ACEXML_Char)EOF ? -1 : 0);
+ }
+ int BE = (ACE_OS::strcmp (this->encoding_,
+ ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
+ ACEXML_Char input[2] = {0};
+ int i = 0;
+ for (; i < 2 && input[i] != EOF; ++i)
+ {
+ input[i] = this->stream_->get_char();
+ }
+ if (i < 2)
+ {
+ ch = 0;
+ return input[i];
+ }
+ ch = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0];
+ return 0;
}
-const ACEXML_Char*
-ACEXML_HttpCharStream::getEncoding (void)
+int
+ACEXML_HttpCharStream::peek_i (void)
{
- return this->encoding_;
+ // If we are reading a UTF-8 encoded file, just use the plain unget.
+ if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
+ {
+ ACEXML_Char ch = (ACEXML_Char) this->stream_->peek_char (0);
+ return ch;
+ }
+
+ int BE = (ACE_OS::strcmp (this->encoding_,
+ ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
+ // Peek into the stream.
+ ACEXML_Char input[2];
+ int i = 0;
+ for (; i < 2 && input[i] != EOF; ++i)
+ {
+ input[i] = this->peek_char (i);
+ }
+ if (i < 2)
+ return -1;
+ return (BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0]);
}
+#endif /* ACE_USES_WCHAR */
diff --git a/ACEXML/common/HttpCharStream.h b/ACEXML/common/HttpCharStream.h
index 7bce23a224b..59813c51eb7 100644
--- a/ACEXML/common/HttpCharStream.h
+++ b/ACEXML/common/HttpCharStream.h
@@ -77,6 +77,11 @@ public:
virtual void rewind (void);
/**
+ * Determine the encoding of the file.
+ */
+ virtual int determine_encoding (void);
+
+ /**
* Get the encoding of the file
*/
virtual const ACEXML_Char* getEncoding (void);
diff --git a/ACEXML/common/InputSource.cpp b/ACEXML/common/InputSource.cpp
index 0dcdeb0f2ba..1292f3caa31 100644
--- a/ACEXML/common/InputSource.cpp
+++ b/ACEXML/common/InputSource.cpp
@@ -22,8 +22,7 @@ ACEXML_InputSource::ACEXML_InputSource (ACEXML_CharStream *stm)
/*
* Create a new input source with a character stream.
- * /
- InputSource (Reader);
+ *
*/
ACEXML_InputSource::ACEXML_InputSource (const ACEXML_Char *systemId)
@@ -40,31 +39,25 @@ ACEXML_InputSource::~ACEXML_InputSource (void)
}
ACEXML_CharStream *
-ACEXML_InputSource::getCharStream (void)
+ACEXML_InputSource::getCharStream (void) const
{
return this->charStream_;
}
- /*
- * Get the character stream for this input source.
- * /
- virtual Reader *getCharacterStream (void);
- */
-
const ACEXML_Char *
-ACEXML_InputSource::getEncoding (void)
+ACEXML_InputSource::getEncoding (void) const
{
return this->encoding_;
}
const ACEXML_Char *
-ACEXML_InputSource::getPublicId (void)
+ACEXML_InputSource::getPublicId (void) const
{
return this->publicId_;
}
const ACEXML_Char *
-ACEXML_InputSource::getSystemId (void)
+ACEXML_InputSource::getSystemId (void) const
{
return this->systemId_;
}
@@ -76,11 +69,6 @@ ACEXML_InputSource::setCharStream (ACEXML_CharStream *stm)
this->charStream_ = stm;
}
- /*
- * Set the character stream for this input source.
- *
- */
-
void
ACEXML_InputSource::setEncoding (const ACEXML_Char *encoding)
{
diff --git a/ACEXML/common/InputSource.h b/ACEXML/common/InputSource.h
index 2d8c5a7beb5..aca68f1adeb 100644
--- a/ACEXML/common/InputSource.h
+++ b/ACEXML/common/InputSource.h
@@ -63,7 +63,7 @@ public:
* Notice that ACEXML_InputSource assumes the ownership
* of <stream>
*/
- ACEXML_InputSource (ACEXML_CharStream *stream);
+ ACE_EXPLICIT ACEXML_InputSource (ACEXML_CharStream *stream);
/*
* Create a new input source with a system identifier.
@@ -78,22 +78,22 @@ public:
/*
* Get the ACEXML_Char stream for this input source.
*/
- virtual ACEXML_CharStream *getCharStream (void);
+ virtual ACEXML_CharStream *getCharStream (void) const;
/*
* Get the character encoding for a byte stream or URI.
*/
- virtual const ACEXML_Char *getEncoding (void);
+ virtual const ACEXML_Char *getEncoding (void) const;
/*
* Get the public identifier for this input source.
*/
- virtual const ACEXML_Char *getPublicId (void);
+ virtual const ACEXML_Char *getPublicId (void) const;
/*
* Get the system identifier for this input source.
*/
- virtual const ACEXML_Char *getSystemId (void);
+ virtual const ACEXML_Char *getSystemId (void) const;
/*
* Set the ACEXML_Char stream for this input source.
diff --git a/ACEXML/common/LocatorImpl.cpp b/ACEXML/common/LocatorImpl.cpp
index d304092e9fc..d7f0d1028bc 100644
--- a/ACEXML/common/LocatorImpl.cpp
+++ b/ACEXML/common/LocatorImpl.cpp
@@ -14,6 +14,16 @@ ACEXML_LocatorImpl::ACEXML_LocatorImpl (void)
{
}
+ACEXML_LocatorImpl::ACEXML_LocatorImpl (const ACEXML_Char* systemId,
+ const ACEXML_Char* publicId)
+ : publicId_ (publicId ? ACE::strnew (publicId) : 0),
+ systemId_ (systemId ? ACE::strnew (systemId) : 0),
+ lineNumber_ (1),
+ columnNumber_ (0)
+{
+}
+
+
ACEXML_LocatorImpl::ACEXML_LocatorImpl (const ACEXML_Locator& locator)
: publicId_ (ACE::strnew (locator.getPublicId ())),
systemId_ (ACE::strnew (locator.getSystemId ())),
diff --git a/ACEXML/common/LocatorImpl.h b/ACEXML/common/LocatorImpl.h
index 5e0bbdace23..2bea4e80c22 100644
--- a/ACEXML/common/LocatorImpl.h
+++ b/ACEXML/common/LocatorImpl.h
@@ -66,7 +66,13 @@ public:
*/
ACEXML_LocatorImpl (void);
- /*
+ /**
+ * Construct a locator with systemId and publicId
+ *
+ */
+ ACEXML_LocatorImpl (const ACEXML_Char* systemId,
+ const ACEXML_Char* publicId);
+ /**
* Copy constructor. Create a persistent copy of the current state
* of a locator. When the original locator changes, this copy will
* still keep the original values (and it can be used outside the
diff --git a/ACEXML/common/Makefile b/ACEXML/common/Makefile
index ee20d8016f5..64962069496 100644
--- a/ACEXML/common/Makefile
+++ b/ACEXML/common/Makefile
@@ -2470,7 +2470,8 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ace/Service_Repository.h \
$(ACE_ROOT)/ace/Service_Repository.i \
$(ACE_ROOT)/ace/WFMO_Reactor.h \
- $(ACE_ROOT)/ace/Connector.cpp
+ $(ACE_ROOT)/ace/Connector.cpp \
+ Encoding.h
.obj/StreamFactory.o .obj/StreamFactory.so .shobj/StreamFactory.o .shobj/StreamFactory.so: StreamFactory.cpp \
StreamFactory.h \
diff --git a/ACEXML/common/NamespaceSupport.cpp b/ACEXML/common/NamespaceSupport.cpp
index b28b9d07248..5f385d6a706 100644
--- a/ACEXML/common/NamespaceSupport.cpp
+++ b/ACEXML/common/NamespaceSupport.cpp
@@ -2,18 +2,15 @@
#include "ACEXML/common/NamespaceSupport.h"
-static const ACEXML_Char ACEXML_XMLNS_PREFIX_name[] = {'x', 'm', 'l', 'n', 's', 0};
+static const ACEXML_Char ACEXML_XMLNS_PREFIX_name[] = ACE_TEXT ("xmlns");
+
const ACEXML_Char *ACEXML_NamespaceSupport::XMLNS_PREFIX = ACEXML_XMLNS_PREFIX_name;
static const ACEXML_Char ACEXML_DEFAULT_NS_PREFIX[] = {0};
-static const ACEXML_Char ACEXML_TABOO_NS_PREFIX[] = {'x', 'm', 'l', 0};
+static const ACEXML_Char ACEXML_TABOO_NS_PREFIX[] = ACE_TEXT ("xml");
-static const ACEXML_Char ACEXML_XMLNS_URI_name[] = {
- 'h', 't', 't', 'p', ':', '/', '/',
- 'w', 'w', 'w', '.', 'w', '3', '.', 'o', 'r', 'g', '/',
- 'X', 'M', 'L', '/', '1', '9', '9', '8', '/',
- 'n', 'a', 'm', 'e', 's', 'p', 'a', 'c', 'e', 0};
+static const ACEXML_Char ACEXML_XMLNS_URI_name[] = ACE_TEXT ("http://www.w3.org/XML/1998/namespace");
const ACEXML_Char *ACEXML_NamespaceSupport::XMLNS = ACEXML_XMLNS_URI_name;
#if !defined (__ACEXML_INLINE__)
@@ -250,7 +247,8 @@ ACEXML_NamespaceSupport::processName (const ACEXML_Char *qName,
int
ACEXML_NamespaceSupport::reset (void)
{
- // Not implemented.
+ while (this->popContext() != -1)
+ ;
return 0;
}
diff --git a/ACEXML/common/NamespaceSupport.h b/ACEXML/common/NamespaceSupport.h
index 83d78a3c58d..c39bd6fc731 100644
--- a/ACEXML/common/NamespaceSupport.h
+++ b/ACEXML/common/NamespaceSupport.h
@@ -210,7 +210,7 @@ public:
/**
* Reset this Namespace support object for reuse.
- * @todo Not implemented.
+ *
*/
int reset (void);
diff --git a/ACEXML/common/SAXExceptions.cpp b/ACEXML/common/SAXExceptions.cpp
index fe0963cd169..4f56ed31d99 100644
--- a/ACEXML/common/SAXExceptions.cpp
+++ b/ACEXML/common/SAXExceptions.cpp
@@ -105,7 +105,7 @@ ACEXML_SAXException::is_a (const ACEXML_Char *name)
void
ACEXML_SAXException::print (void)
{
- ACE_DEBUG ((LM_ERROR,
+ ACE_ERROR ((LM_ERROR,
ACE_TEXT ("ACEXML: (%P|%t) %s: %s\n"),
this->exception_name_, this->message()));
}
@@ -249,7 +249,7 @@ ACEXML_SAXParseException::is_a (const ACEXML_Char *name)
void
ACEXML_SAXParseException::print (void)
{
- ACE_DEBUG ((LM_ERROR,
+ ACE_ERROR ((LM_ERROR,
ACE_TEXT ("ACEXML: (%P|%t) %s: %s\n"),
this->exception_name_, this->message()));
}
diff --git a/ACEXML/common/StrCharStream.cpp b/ACEXML/common/StrCharStream.cpp
index 046511407c2..4ea75a3743c 100644
--- a/ACEXML/common/StrCharStream.cpp
+++ b/ACEXML/common/StrCharStream.cpp
@@ -73,9 +73,6 @@ ACEXML_StrCharStream::determine_encoding (void)
const ACEXML_Char* temp = ACEXML_Encoding::get_encoding (input);
if (!temp)
return -1;
- if (ACE_OS::strcmp (temp,
- ACEXML_Encoding::encoding_names_[ACEXML_Encoding::OTHER]) == 0)
- return -1;
else
{
this->encoding_ = ACE::strnew (temp);
@@ -110,7 +107,7 @@ ACEXML_StrCharStream::read (ACEXML_Char *str,
if (this->start_ != 0 &&
this->ptr_ != this->end_)
{
- if ((int) len > this->end_ - this->ptr_)
+ if (len * sizeof (ACEXML_Char) > (size_t) (this->end_ - this->ptr_))
len = this->end_ - this->ptr_;
ACE_OS_String::strncpy (str, this->ptr_, len);
diff --git a/ACEXML/common/StreamFactory.cpp b/ACEXML/common/StreamFactory.cpp
index 80edc8a265f..df458e61697 100644
--- a/ACEXML/common/StreamFactory.cpp
+++ b/ACEXML/common/StreamFactory.cpp
@@ -24,17 +24,16 @@ ACEXML_StreamFactory::create_stream (const ACEXML_Char* uri)
ACE_NEW_RETURN (hstream, ACEXML_HttpCharStream, 0);
if (hstream->open (uri) != -1)
return hstream;
- else
- return 0;
}
else
{
+ if (ACE_OS::strstr (uri, ACE_TEXT ("file://")) != 0)
+ uri += 7; // Skip over file://
ACE_NEW_RETURN (fstream, ACEXML_FileCharStream, 0);
if (fstream->open (uri) != -1)
return fstream;
- else
- return 0;
}
+ return 0;
}
ACEXML_StreamFactory::~ACEXML_StreamFactory ()
diff --git a/ACEXML/common/Transcode.cpp b/ACEXML/common/Transcode.cpp
index 4007d346494..d857756d7dd 100644
--- a/ACEXML/common/Transcode.cpp
+++ b/ACEXML/common/Transcode.cpp
@@ -7,6 +7,239 @@
#endif /* __ACEXML_INLINE__ */
int
+ACEXML_Transcoder::utf162utf8 (ACEXML_UTF16 src,
+ ACEXML_UTF8 *dst,
+ size_t len)
+{
+ // Check for valid argument first...
+
+ if (dst == 0)
+ return INVALID_ARGS;
+
+ if (src < 0x80)
+ {
+ if (len < 1)
+ return DESTINATION_TOO_SHORT;
+
+ *dst = ACE_static_cast (ACEXML_UTF8, src);
+ return 1;
+ }
+ else if (src < 0x800)
+ {
+ if (len < 2)
+ return DESTINATION_TOO_SHORT;
+
+ *dst = 0xc0 | (src / 0x40);
+ *(dst+1) = 0x80 | (src % 0x40);
+ return 2;
+ }
+ else
+ {
+ if (len < 3)
+ return DESTINATION_TOO_SHORT;
+
+ // Surrogates (0xD800 - 0xDFFF) are not valid unicode values
+ if (src >= 0xD800 && src < 0xE000)
+ return IS_SURROGATE;
+
+ *dst = 0xe0 | (src / 0x1000);
+ *(dst+1) = 0x80 | ((src % 0x1000) / 0x40);
+ *(dst+2) = 0x80 | (src % 0x40);
+ return 3;
+ }
+ ACE_NOTREACHED (return NON_UNICODE;)
+ }
+
+int
+ACEXML_Transcoder::ucs42utf8 (ACEXML_UCS4 src,
+ ACEXML_UTF8 *dst,
+ size_t len)
+{
+ if (src < 0x10000)
+ {
+ int retv = ACEXML_Transcoder::utf162utf8
+ (ACE_static_cast (ACEXML_UTF16, src),
+ dst, len);
+ return (retv == IS_SURROGATE ? NON_UNICODE : retv);
+ }
+ else if (src >= 0x100000 && src < 0x110000)
+ {
+ if (len < 4)
+ return DESTINATION_TOO_SHORT;
+
+ if (dst == 0)
+ return INVALID_ARGS;
+
+ *dst = 0xf0 | (src / 0x40000);
+ *(dst+1) = 0x80 | ((src % 0x40000) / 0x1000);
+ *(dst+2) = 0x80 | ((src % 0x1000) / 0x40);
+ *(dst+3) = 0x80 | (src % 0x40);
+ return 4;
+ }
+ return NON_UNICODE;
+}
+
+
+int
+ACEXML_Transcoder::ucs42utf16 (ACEXML_UCS4 src,
+ ACEXML_UTF16 *dst,
+ size_t len)
+{
+ if (dst == 0)
+ return INVALID_ARGS;
+
+ if (src < 0x10000)
+ {
+ if (len < 1)
+ return DESTINATION_TOO_SHORT;
+
+ if (src >= 0xD800 && src < 0xE000)
+ return NON_UNICODE; // Surrogates are not valid unicode value
+
+ *dst = ACE_static_cast (ACEXML_UTF16, src);
+ return 1;
+ }
+ else if (src >= 0x100000 && src < 0x110000)
+ // Scalar values are encoded into surrogates
+ {
+ if (len < 2)
+ return DESTINATION_TOO_SHORT;
+
+ *dst = 0xD800 | (src / 0x400);
+ *(dst+1) = 0xDC00 | (src % 0x400);
+ return 2;
+ }
+
+ return NON_UNICODE;
+}
+
+int
+ACEXML_Transcoder::surrogate2utf8 (ACEXML_UTF16 high,
+ ACEXML_UTF16 low,
+ ACEXML_UTF8 *dst,
+ size_t len)
+{
+ if (len < 3)
+ return DESTINATION_TOO_SHORT;
+
+ if (dst == 0 ||
+ (high >= 0xD800 && high < 0xDC00) ||
+ (low >= 0xDC00 && low < 0xE000))
+ return INVALID_ARGS;
+
+ ACEXML_UCS4 src = (high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000;
+ *dst = 0xD800 | (src / 0x400);
+ *(dst+1) = 0xDC00 | (src % 0x400);
+ return 2;
+}
+
+int
+ACEXML_Transcoder::surrogate2ucs4 (ACEXML_UTF16 high,
+ ACEXML_UTF16 low,
+ ACEXML_UCS4 &dst)
+{
+ if ((high >= 0xD800 && high < 0xDC00) ||
+ (low >= 0xDC00 && low < 0xE000))
+ return INVALID_ARGS;
+
+ dst = (high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000;
+ return SUCCESS;
+}
+
+int
+ACEXML_Transcoder::utf82ucs4 (const ACEXML_UTF8 *the_src,
+ size_t len,
+ ACEXML_UCS4 &dst)
+{
+ if (the_src == 0)
+ return INVALID_ARGS;
+
+ const unsigned char *src = ACE_reinterpret_cast (const unsigned char *,
+ the_src);
+
+ size_t forward = 1;
+
+ if (forward > len)
+ return END_OF_SOURCE;
+
+ if (ACE_static_cast (unsigned char, *src) < 0x80)
+ dst = *src;
+ else if ((*src & 0xE0) == 0xC0)
+ {
+ dst = (*(src++) & 0x1f) * 0x40;
+ if (++forward > len)
+ return END_OF_SOURCE;
+ if ((*src & 0xC0) != 0x80)
+ return NON_UNICODE; // Error transcoding unicode scalar
+ dst += *src & 0x3f;
+ }
+ else if ((*src & 0xF0) == 0xE0)
+ {
+ dst = (*src++ & 0x0f) * 0x40;
+ if (++forward > len)
+ return END_OF_SOURCE;
+ if ((*src & 0xC0) != 0x80)
+ return NON_UNICODE;
+ dst = (dst + (*src++ & 0x3f)) * 0x40;
+ if (++forward > len)
+ return END_OF_SOURCE;
+ if ((*src & 0xC0) != 0x80)
+ return NON_UNICODE;
+ dst += *src & 0x3f;
+ }
+ else if ((*src & 0xF8) == 0xF0)
+ {
+ dst = (*src++ & 0x0f) * 0x40;
+ if (++forward > len)
+ return END_OF_SOURCE;
+ if ((*src & 0xC0) != 0x80)
+ return NON_UNICODE;
+ dst = (dst + (*src++ & 0x3f)) * 0x40;
+ if (++forward > len)
+ return END_OF_SOURCE;
+ if ((*src & 0xC0) != 0x80)
+ return NON_UNICODE;
+ dst = (dst + (*src++ & 0x3f)) * 0x40;
+ if (++forward > len)
+ return END_OF_SOURCE;
+ if ((*src & 0xC0) != 0x80)
+ return NON_UNICODE;
+ dst += *src & 0x3f;
+ }
+ else
+ return NON_UNICODE;
+
+ return forward;
+}
+
+int
+ACEXML_Transcoder::utf162ucs4 (const ACEXML_UTF16 *src,
+ size_t len,
+ ACEXML_UCS4 &dst)
+{
+ if (src == 0)
+ return INVALID_ARGS;
+
+ size_t forward = 1;
+ if (*src >= 0xDC00 && *src < 0xE000)
+ {
+ if (len < 2)
+ return END_OF_SOURCE;
+ return ACEXML_Transcoder::surrogate2ucs4 (*src,
+ *(src+1),
+ dst);
+ }
+ else
+ {
+ if (len < 1)
+ return END_OF_SOURCE;
+ dst = *src;
+ }
+
+ return forward;
+}
+
+int
ACEXML_Transcoder::utf8s2utf16s (const ACEXML_UTF8 *src,
ACEXML_UTF16 *dst,
size_t len)
diff --git a/ACEXML/common/Transcode.i b/ACEXML/common/Transcode.i
index e9ec3936e80..77b4466a3cb 100644
--- a/ACEXML/common/Transcode.i
+++ b/ACEXML/common/Transcode.i
@@ -1,234 +1 @@
// -*- C++ -*- $Id$
-
-ACEXML_INLINE int
-ACEXML_Transcoder::utf162utf8 (ACEXML_UTF16 src,
- ACEXML_UTF8 *dst,
- size_t len)
-{
- // Check for valid argument first...
-
- if (dst == 0)
- return INVALID_ARGS;
-
- if (src < 0x80)
- {
- if (len < 1)
- return DESTINATION_TOO_SHORT;
-
- *dst = ACE_static_cast (ACEXML_UTF8, src);
- return 1;
- }
- else if (src < 0x800)
- {
- if (len < 2)
- return DESTINATION_TOO_SHORT;
-
- *dst = 0xc0 | (src / 0x40);
- *(dst+1) = 0x80 | (src % 0x40);
- return 2;
- }
- else
- {
- if (len < 3)
- return DESTINATION_TOO_SHORT;
-
- // Surrogates (0xD800 - 0xDFFF) are not valid unicode values
- if (src >= 0xD800 && src < 0xE000)
- return IS_SURROGATE;
-
- *dst = 0xe0 | (src / 0x1000);
- *(dst+1) = 0x80 | ((src % 0x1000) / 0x40);
- *(dst+2) = 0x80 | (src % 0x40);
- return 3;
- }
- ACE_NOTREACHED (return NON_UNICODE;)
-}
-
-ACEXML_INLINE int
-ACEXML_Transcoder::ucs42utf8 (ACEXML_UCS4 src,
- ACEXML_UTF8 *dst,
- size_t len)
-{
- if (src < 0x10000)
- {
- int retv = ACEXML_Transcoder::utf162utf8
- (ACE_static_cast (ACEXML_UTF16, src),
- dst, len);
- return (retv == IS_SURROGATE ? NON_UNICODE : retv);
- }
- else if (src >= 0x100000 && src < 0x110000)
- {
- if (len < 4)
- return DESTINATION_TOO_SHORT;
-
- if (dst == 0)
- return INVALID_ARGS;
-
- *dst = 0xf0 | (src / 0x40000);
- *(dst+1) = 0x80 | ((src % 0x40000) / 0x1000);
- *(dst+2) = 0x80 | ((src % 0x1000) / 0x40);
- *(dst+3) = 0x80 | (src % 0x40);
- return 4;
- }
- return NON_UNICODE;
-}
-
-
-ACEXML_INLINE int
-ACEXML_Transcoder::ucs42utf16 (ACEXML_UCS4 src,
- ACEXML_UTF16 *dst,
- size_t len)
-{
- if (dst == 0)
- return INVALID_ARGS;
-
- if (src < 0x10000)
- {
- if (len < 1)
- return DESTINATION_TOO_SHORT;
-
- if (src >= 0xD800 && src < 0xE000)
- return NON_UNICODE; // Surrogates are not valid unicode value
-
- *dst = ACE_static_cast (ACEXML_UTF16, src);
- return 1;
- }
- else if (src >= 0x100000 && src < 0x110000)
- // Scalar values are encoded into surrogates
- {
- if (len < 2)
- return DESTINATION_TOO_SHORT;
-
- *dst = 0xD800 | (src / 0x400);
- *(dst+1) = 0xDC00 | (src % 0x400);
- return 2;
- }
-
- return NON_UNICODE;
-}
-
-ACEXML_INLINE int
-ACEXML_Transcoder::surrogate2utf8 (ACEXML_UTF16 high,
- ACEXML_UTF16 low,
- ACEXML_UTF8 *dst,
- size_t len)
-{
- if (len < 3)
- return DESTINATION_TOO_SHORT;
-
- if (dst == 0 ||
- (high >= 0xD800 && high < 0xDC00) ||
- (low >= 0xDC00 && low < 0xE000))
- return INVALID_ARGS;
-
- ACEXML_UCS4 src = (high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000;
- *dst = 0xD800 | (src / 0x400);
- *(dst+1) = 0xDC00 | (src % 0x400);
- return 2;
-}
-
-ACEXML_INLINE int
-ACEXML_Transcoder::surrogate2ucs4 (ACEXML_UTF16 high,
- ACEXML_UTF16 low,
- ACEXML_UCS4 &dst)
-{
- if ((high >= 0xD800 && high < 0xDC00) ||
- (low >= 0xDC00 && low < 0xE000))
- return INVALID_ARGS;
-
- dst = (high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000;
- return SUCCESS;
-}
-
-ACEXML_INLINE int
-ACEXML_Transcoder::utf82ucs4 (const ACEXML_UTF8 *the_src,
- size_t len,
- ACEXML_UCS4 &dst)
-{
- if (the_src == 0)
- return INVALID_ARGS;
-
- const unsigned char *src = ACE_reinterpret_cast (const unsigned char *,
- the_src);
-
- size_t forward = 1;
-
- if (forward > len)
- return END_OF_SOURCE;
-
- if (ACE_static_cast (unsigned char, *src) < 0x80)
- dst = *src;
- else if ((*src & 0xE0) == 0xC0)
- {
- dst = (*(src++) & 0x1f) * 0x40;
- if (++forward > len)
- return END_OF_SOURCE;
- if ((*src & 0xC0) != 0x80)
- return NON_UNICODE; // Error transcoding unicode scalar
- dst += *src & 0x3f;
- }
- else if ((*src & 0xF0) == 0xE0)
- {
- dst = (*src++ & 0x0f) * 0x40;
- if (++forward > len)
- return END_OF_SOURCE;
- if ((*src & 0xC0) != 0x80)
- return NON_UNICODE;
- dst = (dst + (*src++ & 0x3f)) * 0x40;
- if (++forward > len)
- return END_OF_SOURCE;
- if ((*src & 0xC0) != 0x80)
- return NON_UNICODE;
- dst += *src & 0x3f;
- }
- else if ((*src & 0xF8) == 0xF0)
- {
- dst = (*src++ & 0x0f) * 0x40;
- if (++forward > len)
- return END_OF_SOURCE;
- if ((*src & 0xC0) != 0x80)
- return NON_UNICODE;
- dst = (dst + (*src++ & 0x3f)) * 0x40;
- if (++forward > len)
- return END_OF_SOURCE;
- if ((*src & 0xC0) != 0x80)
- return NON_UNICODE;
- dst = (dst + (*src++ & 0x3f)) * 0x40;
- if (++forward > len)
- return END_OF_SOURCE;
- if ((*src & 0xC0) != 0x80)
- return NON_UNICODE;
- dst += *src & 0x3f;
- }
- else
- return NON_UNICODE;
-
- return forward;
-}
-
-ACEXML_INLINE int
-ACEXML_Transcoder::utf162ucs4 (const ACEXML_UTF16 *src,
- size_t len,
- ACEXML_UCS4 &dst)
-{
- if (src == 0)
- return INVALID_ARGS;
-
- size_t forward = 1;
- if (*src >= 0xDC00 && *src < 0xE000)
- {
- if (len < 2)
- return END_OF_SOURCE;
- return ACEXML_Transcoder::surrogate2ucs4 (*src,
- *(src+1),
- dst);
- }
- else
- {
- if (len < 1)
- return END_OF_SOURCE;
- dst = *src;
- }
-
- return forward;
-}
diff --git a/ACEXML/examples/SAXPrint/SAXPrint_Handler.cpp b/ACEXML/examples/SAXPrint/SAXPrint_Handler.cpp
index 462c14ba7a7..921f04d881c 100644
--- a/ACEXML/examples/SAXPrint/SAXPrint_Handler.cpp
+++ b/ACEXML/examples/SAXPrint/SAXPrint_Handler.cpp
@@ -206,7 +206,8 @@ ACEXML_SAXPrint_Handler::error (ACEXML_SAXParseException & ex
ACEXML_ENV_ARG_DECL_NOT_USED)
ACE_THROW_SPEC ((ACEXML_SAXException))
{
- ACE_DEBUG ((LM_DEBUG, "%s: line :%d col: %d ", this->fileName_,
+ ACE_DEBUG ((LM_DEBUG, "%s: line :%d col: %d ",
+ (this->locator_->getSystemId() == 0 ? this->fileName_ : this->locator_->getSystemId()),
this->locator_->getLineNumber(),
this->locator_->getColumnNumber()));
ex.print();
@@ -217,7 +218,8 @@ ACEXML_SAXPrint_Handler::fatalError (ACEXML_SAXParseException & ex
ACEXML_ENV_ARG_DECL_NOT_USED)
ACE_THROW_SPEC ((ACEXML_SAXException))
{
- ACE_DEBUG ((LM_DEBUG, "%s: line :%d col: %d ", this->fileName_,
+ ACE_DEBUG ((LM_DEBUG, "%s: line :%d col: %d ",
+ (this->locator_->getSystemId() == 0 ? this->fileName_ : this->locator_->getSystemId()),
this->locator_->getLineNumber(),
this->locator_->getColumnNumber()));
ex.print();
@@ -228,7 +230,8 @@ ACEXML_SAXPrint_Handler::warning (ACEXML_SAXParseException & ex
ACEXML_ENV_ARG_DECL_NOT_USED)
ACE_THROW_SPEC ((ACEXML_SAXException))
{
- ACE_DEBUG ((LM_DEBUG, "%s: line :%d col: %d ", this->fileName_,
+ ACE_DEBUG ((LM_DEBUG, "%s: line :%d col: %d ",
+ (this->locator_->getSystemId() == 0 ? this->fileName_ : this->locator_->getSystemId()),
this->locator_->getLineNumber(),
this->locator_->getColumnNumber()));
ex.print();
diff --git a/ACEXML/examples/SAXPrint/main.cpp b/ACEXML/examples/SAXPrint/main.cpp
index 771411867b1..6986744dd18 100644
--- a/ACEXML/examples/SAXPrint/main.cpp
+++ b/ACEXML/examples/SAXPrint/main.cpp
@@ -110,7 +110,8 @@ ACE_TMAIN (int argc, ACE_TCHAR *argv[])
-1);
ACEXML_Parser parser;
- ACEXML_InputSource input(stm);
+ ACEXML_InputSource* input = 0;
+ ACE_NEW_RETURN (input, ACEXML_InputSource (stm), -1);
parser.setContentHandler (handler);
parser.setDTDHandler (handler);
@@ -119,7 +120,7 @@ ACE_TMAIN (int argc, ACE_TCHAR *argv[])
ACEXML_TRY_NEW_ENV
{
- parser.parse (&input ACEXML_ENV_ARG_PARAMETER);
+ parser.parse (input ACEXML_ENV_ARG_PARAMETER);
ACEXML_TRY_CHECK;
}
ACEXML_CATCH (ACEXML_SAXException, ex)
diff --git a/ACEXML/parser/parser/Entity_Manager.cpp b/ACEXML/parser/parser/Entity_Manager.cpp
index d0c86862805..0b781b6f0e4 100644
--- a/ACEXML/parser/parser/Entity_Manager.cpp
+++ b/ACEXML/parser/parser/Entity_Manager.cpp
@@ -2,45 +2,17 @@
#include "ACEXML/parser/parser/Entity_Manager.h"
+static const ACEXML_Char empty_string[] = { 0 };
+
#if !defined (__ACEXML_INLINE__)
# include "ACEXML/parser/parser/Entity_Manager.i"
#endif /* __ACEXML_INLINE__ */
-static const ACEXML_Char amp_name[] = {'a', 'm', 'p', 0 };
-static const ACEXML_Char amp_value[] = {'&', 0};
-static const ACEXML_Char lt_name[] = {'l', 't', 0};
-static const ACEXML_Char lt_value[] = {'<', 0};
-static const ACEXML_Char gt_name[] = {'g', 't', 0};
-static const ACEXML_Char gt_value[] = {'>', 0};
-static const ACEXML_Char apos_name[] = {'a', 'p', 'o', 's', 0};
-static const ACEXML_Char apos_value[] = {'\'', 0};
-static const ACEXML_Char quot_name[] = {'q', 'u', 'o', 't', 0};
-static const ACEXML_Char quot_value[] = {'"', 0};
ACEXML_Entity_Manager::ACEXML_Entity_Manager (void)
: entities_ ()
{
- // @@ No way to know if these bindings succeed or not.
-
- ACEXML_String ampname (amp_name, 0, 0);
- ACEXML_String ampvalue (amp_value, 0, 0);
- this->entities_.bind (ampname, ampvalue);
-
- ACEXML_String ltname (lt_name, 0, 0);
- ACEXML_String ltvalue (lt_value, 0, 0);
- this->entities_.bind (ltname, ltvalue);
-
- ACEXML_String gtname (gt_name, 0, 0);
- ACEXML_String gtvalue (gt_value, 0, 0);
- this->entities_.bind (gtname, gtvalue);
-
- ACEXML_String aposname (apos_name, 0, 0);
- ACEXML_String aposvalue (apos_value, 0, 0);
- this->entities_.bind (aposname, aposvalue);
- ACEXML_String quotname (quot_name, 0, 0);
- ACEXML_String quotvalue (quot_value, 0, 0);
- this->entities_.bind (quotname, quotvalue);
}
ACEXML_Entity_Manager::~ACEXML_Entity_Manager (void)
diff --git a/ACEXML/parser/parser/Entity_Manager.h b/ACEXML/parser/parser/Entity_Manager.h
index 7d2f4ef9b6e..ba16279eb05 100644
--- a/ACEXML/parser/parser/Entity_Manager.h
+++ b/ACEXML/parser/parser/Entity_Manager.h
@@ -44,6 +44,12 @@ typedef ACE_Hash_Map_Reverse_Iterator_Ex<ACEXML_String,
ACE_Equal_To<ACEXML_String>,
ACE_Null_Mutex> ACEXML_ENTITIES_MANAGER_REVERSE_ITER;
+typedef ACE_Hash_Map_Bucket_Iterator<ACEXML_String,
+ ACEXML_String,
+ ACE_Hash<ACEXML_String>,
+ ACE_Equal_To<ACEXML_String>,
+ ACE_Null_Mutex> ACEXML_ENTITY_ENTRY_ITERATOR;
+
/**
* @class ACEXML_Entity_Manager Entity_Manager.h "ACEXML/parser/parser/Entity_Manager.h"
*
@@ -61,11 +67,21 @@ public:
~ACEXML_Entity_Manager (void);
/// Add a new entity declaration.
- int add_entity (const ACEXML_Char *ref,
- const ACEXML_Char *value);
+ int add_entity (const ACEXML_Char *ref, const ACEXML_Char *value);
/// Resolve an entity reference.
- const ACEXML_String *resolve_entity (const ACEXML_Char *ref);
+ const ACEXML_Char* resolve_entity (const ACEXML_Char *ref);
+
+ /// Resolve an entity reference and return the tuple of @c systemId and
+ /// @c publicId
+ int resolve_entity (const ACEXML_Char* ref, ACEXML_Char*& systemId,
+ ACEXML_Char*& publicId);
+
+ /// Number of items in the Entity Manager
+ const size_t size(void) const;
+
+ /// Reset the state
+ int reset (void);
private:
ACEXML_ENTITIES_MANAGER entities_;
diff --git a/ACEXML/parser/parser/Entity_Manager.i b/ACEXML/parser/parser/Entity_Manager.i
index 696b82b64e4..26da9ad15d9 100644
--- a/ACEXML/parser/parser/Entity_Manager.i
+++ b/ACEXML/parser/parser/Entity_Manager.i
@@ -9,13 +9,46 @@ ACEXML_Entity_Manager::add_entity (const ACEXML_Char *ref,
return this->entities_.bind (name, value);
}
-ACEXML_INLINE const ACEXML_String *
+ACEXML_INLINE const ACEXML_Char*
ACEXML_Entity_Manager::resolve_entity (const ACEXML_Char *ref)
{
ACEXML_ENTITY_ENTRY *entry;
if (this->entities_.find (ACEXML_String (ref, 0, 0),
entry) == 0)
- return &entry->int_id_;
+ return entry->int_id_.c_str();
return 0;
}
+
+ACEXML_INLINE int
+ACEXML_Entity_Manager::resolve_entity (const ACEXML_Char* ref,
+ ACEXML_Char*& systemId,
+ ACEXML_Char*& publicId)
+{
+ publicId = systemId = 0;
+ ACEXML_ENTITY_ENTRY_ITERATOR iter (this->entities_, ref);
+ ACEXML_ENTITY_ENTRY_ITERATOR end (this->entities_, ref, 1);
+
+ if (iter != end)
+ {
+ systemId = ACE_const_cast (ACEXML_Char*, (*iter).int_id_.c_str());
+ ++iter;
+ if (iter != end)
+ publicId = ACE_const_cast (ACEXML_Char*, (*iter).int_id_.c_str());
+ return 0;
+ }
+ return -1;
+}
+
+ACEXML_INLINE int
+ACEXML_Entity_Manager::reset (void)
+{
+ return this->entities_.close();
+}
+
+
+ACEXML_INLINE const size_t
+ACEXML_Entity_Manager::size (void) const
+{
+ return this->entities_.current_size();
+}
diff --git a/ACEXML/parser/parser/Makefile b/ACEXML/parser/parser/Makefile
index b16dcfec6ba..434a79dfacb 100644
--- a/ACEXML/parser/parser/Makefile
+++ b/ACEXML/parser/parser/Makefile
@@ -8,7 +8,9 @@ LIB = libACEXML_Parser.a
SHLIB = libACEXML_Parser.$(SOEXT)
FILES = Entity_Manager \
- Parser
+ Parser \
+ ParserInternals \
+ ParserContext
DEFS = $(addsuffix .h,$(FILES))
LSRC = $(addsuffix .cpp,$(FILES))
@@ -171,22 +173,11 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ace/Reactor_Impl.h \
Entity_Manager.i
-.obj/Parser.o .obj/Parser.so .shobj/Parser.o .shobj/Parser.so: Parser.cpp \
- Parser.h \
- $(ACE_ROOT)/ace/pre.h \
- Parser_export.h \
+.obj/Parser.o .obj/Parser.so .shobj/Parser.o .shobj/Parser.so: Parser.cpp $(ACE_ROOT)/ace/ACE.h \
+ $(ACE_ROOT)/ace/pre.h $(ACE_ROOT)/ace/OS.h \
$(ACE_ROOT)/ace/post.h \
$(ACE_ROOT)/ace/ace_wchar.h \
$(ACE_ROOT)/ace/ace_wchar.inl \
- $(ACE_ROOT)/ACEXML/common/XMLReader.h \
- $(ACE_ROOT)/ACEXML/common/ACEXML_Export.h \
- $(ACE_ROOT)/ACEXML/common/ContentHandler.h \
- $(ACE_ROOT)/ACEXML/common/Env.h \
- $(ACE_ROOT)/ACEXML/common/XML_Macros.h \
- $(ACE_ROOT)/ace/Exception_Macros.h \
- $(ACE_ROOT)/ACEXML/common/Exception.h \
- $(ACE_ROOT)/ACEXML/common/XML_Types.h \
- $(ACE_ROOT)/ace/OS.h \
$(ACE_ROOT)/ace/OS_Dirent.h \
$(ACE_ROOT)/ace/OS_Export.h \
$(ACE_ROOT)/ace/OS_Errno.h \
@@ -209,9 +200,6 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ace/streams.h \
$(ACE_ROOT)/ace/Trace.h \
$(ACE_ROOT)/ace/OS.i \
- $(ACE_ROOT)/ace/SString.h \
- $(ACE_ROOT)/ace/String_Base.h \
- $(ACE_ROOT)/ace/ACE.h \
$(ACE_ROOT)/ace/Flag_Manip.h \
$(ACE_ROOT)/ace/Flag_Manip.i \
$(ACE_ROOT)/ace/Handle_Ops.h \
@@ -223,6 +211,11 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ace/Sock_Connect.h \
$(ACE_ROOT)/ace/Sock_Connect.i \
$(ACE_ROOT)/ace/ACE.i \
+ $(ACE_ROOT)/ACEXML/common/Transcode.h \
+ $(ACE_ROOT)/ACEXML/common/ACEXML_Export.h \
+ $(ACE_ROOT)/ACEXML/common/XML_Types.h \
+ $(ACE_ROOT)/ace/SString.h \
+ $(ACE_ROOT)/ace/String_Base.h \
$(ACE_ROOT)/ace/String_Base_Const.h \
$(ACE_ROOT)/ace/String_Base.i \
$(ACE_ROOT)/ace/Malloc_Base.h \
@@ -273,16 +266,40 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ace/Auto_Ptr.i \
$(ACE_ROOT)/ace/Auto_Ptr.cpp \
$(ACE_ROOT)/ace/SString.i \
+ $(ACE_ROOT)/ACEXML/common/Transcode.i \
+ $(ACE_ROOT)/ACEXML/common/AttributesImpl.h \
+ $(ACE_ROOT)/ACEXML/common/Attributes.h \
+ $(ACE_ROOT)/ace/Containers_T.h \
+ $(ACE_ROOT)/ace/Containers.h \
+ $(ACE_ROOT)/ace/Containers.i \
+ $(ACE_ROOT)/ace/Array_Base.h \
+ $(ACE_ROOT)/ace/Array_Base.inl \
+ $(ACE_ROOT)/ace/Array_Base.cpp \
+ $(ACE_ROOT)/ace/Unbounded_Queue.h \
+ $(ACE_ROOT)/ace/Unbounded_Queue.inl \
+ $(ACE_ROOT)/ace/Unbounded_Queue.cpp \
+ $(ACE_ROOT)/ace/Containers_T.i \
+ $(ACE_ROOT)/ace/Containers_T.cpp \
+ $(ACE_ROOT)/ACEXML/common/AttributesImpl.i \
+ $(ACE_ROOT)/ACEXML/common/StrCharStream.h \
+ $(ACE_ROOT)/ACEXML/common/CharStream.h \
+ $(ACE_ROOT)/ACEXML/common/StreamFactory.h \
+ Parser.h \
+ Parser_export.h \
+ $(ACE_ROOT)/ACEXML/common/XMLReader.h \
+ $(ACE_ROOT)/ACEXML/common/ContentHandler.h \
+ $(ACE_ROOT)/ACEXML/common/Env.h \
+ $(ACE_ROOT)/ACEXML/common/XML_Macros.h \
+ $(ACE_ROOT)/ace/Exception_Macros.h \
+ $(ACE_ROOT)/ACEXML/common/Exception.h \
$(ACE_ROOT)/ACEXML/common/Exception.i \
$(ACE_ROOT)/ACEXML/common/Env.i \
$(ACE_ROOT)/ACEXML/common/SAXExceptions.h \
$(ACE_ROOT)/ACEXML/common/SAXExceptions.i \
$(ACE_ROOT)/ACEXML/common/Locator.h \
- $(ACE_ROOT)/ACEXML/common/Attributes.h \
$(ACE_ROOT)/ACEXML/common/DTDHandler.h \
$(ACE_ROOT)/ACEXML/common/EntityResolver.h \
$(ACE_ROOT)/ACEXML/common/InputSource.h \
- $(ACE_ROOT)/ACEXML/common/CharStream.h \
$(ACE_ROOT)/ACEXML/common/ErrorHandler.h \
$(ACE_ROOT)/ACEXML/common/LocatorImpl.h \
$(ACE_ROOT)/ACEXML/common/LocatorImpl.i \
@@ -305,9 +322,6 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ace/DLL.h \
$(ACE_ROOT)/ace/Service_Object.i \
$(ACE_ROOT)/ace/Service_Types.i \
- $(ACE_ROOT)/ace/Unbounded_Queue.h \
- $(ACE_ROOT)/ace/Unbounded_Queue.inl \
- $(ACE_ROOT)/ace/Unbounded_Queue.cpp \
$(ACE_ROOT)/ace/XML_Svc_Conf.h \
$(ACE_ROOT)/ace/Service_Config.i \
$(ACE_ROOT)/ace/Reactor.h \
@@ -322,14 +336,6 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ace/Timer_Queue_T.cpp \
$(ACE_ROOT)/ace/Reactor.i \
$(ACE_ROOT)/ace/Reactor_Impl.h \
- $(ACE_ROOT)/ace/Containers_T.h \
- $(ACE_ROOT)/ace/Containers.h \
- $(ACE_ROOT)/ace/Containers.i \
- $(ACE_ROOT)/ace/Array_Base.h \
- $(ACE_ROOT)/ace/Array_Base.inl \
- $(ACE_ROOT)/ace/Array_Base.cpp \
- $(ACE_ROOT)/ace/Containers_T.i \
- $(ACE_ROOT)/ace/Containers_T.cpp \
$(ACE_ROOT)/ACEXML/common/NamespaceSupport.i \
$(ACE_ROOT)/ace/Obstack.h \
$(ACE_ROOT)/ace/Obstack_T.h \
@@ -339,11 +345,224 @@ include $(ACE_ROOT)/include/makeinclude/rules.local.GNU
$(ACE_ROOT)/ace/Obstack_T.cpp \
Entity_Manager.h \
Entity_Manager.i \
- ParserErrors.h \
- Parser.i \
- $(ACE_ROOT)/ACEXML/common/Transcode.h \
- $(ACE_ROOT)/ACEXML/common/Transcode.i \
- $(ACE_ROOT)/ACEXML/common/AttributesImpl.h \
- $(ACE_ROOT)/ACEXML/common/AttributesImpl.i
+ ParserInternals.h \
+ ParserContext.h \
+ ParserContext.inl \
+ Parser.i
+
+.obj/ParserInternals.o .obj/ParserInternals.so .shobj/ParserInternals.o .shobj/ParserInternals.so: ParserInternals.cpp \
+ ParserInternals.h \
+ $(ACE_ROOT)/ace/pre.h \
+ Parser_export.h \
+ $(ACE_ROOT)/ace/post.h \
+ $(ACE_ROOT)/ace/ace_wchar.h \
+ $(ACE_ROOT)/ace/ace_wchar.inl \
+ $(ACE_ROOT)/ACEXML/common/XML_Types.h \
+ $(ACE_ROOT)/ace/OS.h \
+ $(ACE_ROOT)/ace/OS_Dirent.h \
+ $(ACE_ROOT)/ace/OS_Export.h \
+ $(ACE_ROOT)/ace/OS_Errno.h \
+ $(ACE_ROOT)/ace/OS_Errno.inl \
+ $(ACE_ROOT)/ace/OS_Dirent.inl \
+ $(ACE_ROOT)/ace/OS_String.h \
+ $(ACE_ROOT)/ace/Basic_Types.h \
+ $(ACE_ROOT)/ace/ACE_export.h \
+ $(ACE_ROOT)/ace/Basic_Types.i \
+ $(ACE_ROOT)/ace/OS_String.inl \
+ $(ACE_ROOT)/ace/OS_Memory.h \
+ $(ACE_ROOT)/ace/OS_Memory.inl \
+ $(ACE_ROOT)/ace/OS_TLI.h \
+ $(ACE_ROOT)/ace/OS_TLI.inl \
+ $(ACE_ROOT)/ace/Time_Value.h \
+ $(ACE_ROOT)/ace/Time_Value.inl \
+ $(ACE_ROOT)/ace/Default_Constants.h \
+ $(ACE_ROOT)/ace/Global_Macros.h \
+ $(ACE_ROOT)/ace/Min_Max.h \
+ $(ACE_ROOT)/ace/streams.h \
+ $(ACE_ROOT)/ace/Trace.h \
+ $(ACE_ROOT)/ace/OS.i \
+ $(ACE_ROOT)/ace/SString.h \
+ $(ACE_ROOT)/ace/String_Base.h \
+ $(ACE_ROOT)/ace/ACE.h \
+ $(ACE_ROOT)/ace/Flag_Manip.h \
+ $(ACE_ROOT)/ace/Flag_Manip.i \
+ $(ACE_ROOT)/ace/Handle_Ops.h \
+ $(ACE_ROOT)/ace/Handle_Ops.i \
+ $(ACE_ROOT)/ace/Lib_Find.h \
+ $(ACE_ROOT)/ace/Lib_Find.i \
+ $(ACE_ROOT)/ace/Init_ACE.h \
+ $(ACE_ROOT)/ace/Init_ACE.i \
+ $(ACE_ROOT)/ace/Sock_Connect.h \
+ $(ACE_ROOT)/ace/Sock_Connect.i \
+ $(ACE_ROOT)/ace/ACE.i \
+ $(ACE_ROOT)/ace/String_Base_Const.h \
+ $(ACE_ROOT)/ace/String_Base.i \
+ $(ACE_ROOT)/ace/Malloc_Base.h \
+ $(ACE_ROOT)/ace/String_Base.cpp \
+ $(ACE_ROOT)/ace/Malloc.h \
+ $(ACE_ROOT)/ace/Log_Msg.h \
+ $(ACE_ROOT)/ace/Log_Priority.h \
+ $(ACE_ROOT)/ace/OS_Log_Msg_Attributes.h \
+ $(ACE_ROOT)/ace/OS_Log_Msg_Attributes.inl \
+ $(ACE_ROOT)/ace/Malloc.i \
+ $(ACE_ROOT)/ace/Malloc_T.h \
+ $(ACE_ROOT)/ace/Synch.h \
+ $(ACE_ROOT)/ace/Synch.i \
+ $(ACE_ROOT)/ace/Synch_T.h \
+ $(ACE_ROOT)/ace/Synch_T.i \
+ $(ACE_ROOT)/ace/Thread.h \
+ $(ACE_ROOT)/ace/Thread_Adapter.h \
+ $(ACE_ROOT)/ace/Base_Thread_Adapter.h \
+ $(ACE_ROOT)/ace/Base_Thread_Adapter.inl \
+ $(ACE_ROOT)/ace/Thread_Adapter.inl \
+ $(ACE_ROOT)/ace/Thread.i \
+ $(ACE_ROOT)/ace/Synch_T.cpp \
+ $(ACE_ROOT)/ace/Malloc_Allocator.h \
+ $(ACE_ROOT)/ace/Malloc_Allocator.i \
+ $(ACE_ROOT)/ace/Free_List.h \
+ $(ACE_ROOT)/ace/Free_List.i \
+ $(ACE_ROOT)/ace/Free_List.cpp \
+ $(ACE_ROOT)/ace/Malloc_T.i \
+ $(ACE_ROOT)/ace/Malloc_T.cpp \
+ $(ACE_ROOT)/ace/Memory_Pool.h \
+ $(ACE_ROOT)/ace/Event_Handler.h \
+ $(ACE_ROOT)/ace/Event_Handler.i \
+ $(ACE_ROOT)/ace/Signal.h \
+ $(ACE_ROOT)/ace/Signal.i \
+ $(ACE_ROOT)/ace/Mem_Map.h \
+ $(ACE_ROOT)/ace/Mem_Map.i \
+ $(ACE_ROOT)/ace/SV_Semaphore_Complex.h \
+ $(ACE_ROOT)/ace/SV_Semaphore_Simple.h \
+ $(ACE_ROOT)/ace/SV_Semaphore_Simple.i \
+ $(ACE_ROOT)/ace/SV_Semaphore_Complex.i \
+ $(ACE_ROOT)/ace/Unbounded_Set.h \
+ $(ACE_ROOT)/ace/Node.h \
+ $(ACE_ROOT)/ace/Node.cpp \
+ $(ACE_ROOT)/ace/Unbounded_Set.inl \
+ $(ACE_ROOT)/ace/Unbounded_Set.cpp \
+ $(ACE_ROOT)/ace/Memory_Pool.i \
+ $(ACE_ROOT)/ace/Auto_Ptr.h \
+ $(ACE_ROOT)/ace/Auto_Ptr.i \
+ $(ACE_ROOT)/ace/Auto_Ptr.cpp \
+ $(ACE_ROOT)/ace/SString.i
+
+.obj/ParserContext.o .obj/ParserContext.so .shobj/ParserContext.o .shobj/ParserContext.so: ParserContext.cpp \
+ ParserContext.h \
+ $(ACE_ROOT)/ace/pre.h \
+ Parser_export.h \
+ $(ACE_ROOT)/ace/post.h \
+ $(ACE_ROOT)/ace/ace_wchar.h \
+ $(ACE_ROOT)/ace/ace_wchar.inl \
+ $(ACE_ROOT)/ACEXML/common/XML_Types.h \
+ $(ACE_ROOT)/ace/OS.h \
+ $(ACE_ROOT)/ace/OS_Dirent.h \
+ $(ACE_ROOT)/ace/OS_Export.h \
+ $(ACE_ROOT)/ace/OS_Errno.h \
+ $(ACE_ROOT)/ace/OS_Errno.inl \
+ $(ACE_ROOT)/ace/OS_Dirent.inl \
+ $(ACE_ROOT)/ace/OS_String.h \
+ $(ACE_ROOT)/ace/Basic_Types.h \
+ $(ACE_ROOT)/ace/ACE_export.h \
+ $(ACE_ROOT)/ace/Basic_Types.i \
+ $(ACE_ROOT)/ace/OS_String.inl \
+ $(ACE_ROOT)/ace/OS_Memory.h \
+ $(ACE_ROOT)/ace/OS_Memory.inl \
+ $(ACE_ROOT)/ace/OS_TLI.h \
+ $(ACE_ROOT)/ace/OS_TLI.inl \
+ $(ACE_ROOT)/ace/Time_Value.h \
+ $(ACE_ROOT)/ace/Time_Value.inl \
+ $(ACE_ROOT)/ace/Default_Constants.h \
+ $(ACE_ROOT)/ace/Global_Macros.h \
+ $(ACE_ROOT)/ace/Min_Max.h \
+ $(ACE_ROOT)/ace/streams.h \
+ $(ACE_ROOT)/ace/Trace.h \
+ $(ACE_ROOT)/ace/OS.i \
+ $(ACE_ROOT)/ace/SString.h \
+ $(ACE_ROOT)/ace/String_Base.h \
+ $(ACE_ROOT)/ace/ACE.h \
+ $(ACE_ROOT)/ace/Flag_Manip.h \
+ $(ACE_ROOT)/ace/Flag_Manip.i \
+ $(ACE_ROOT)/ace/Handle_Ops.h \
+ $(ACE_ROOT)/ace/Handle_Ops.i \
+ $(ACE_ROOT)/ace/Lib_Find.h \
+ $(ACE_ROOT)/ace/Lib_Find.i \
+ $(ACE_ROOT)/ace/Init_ACE.h \
+ $(ACE_ROOT)/ace/Init_ACE.i \
+ $(ACE_ROOT)/ace/Sock_Connect.h \
+ $(ACE_ROOT)/ace/Sock_Connect.i \
+ $(ACE_ROOT)/ace/ACE.i \
+ $(ACE_ROOT)/ace/String_Base_Const.h \
+ $(ACE_ROOT)/ace/String_Base.i \
+ $(ACE_ROOT)/ace/Malloc_Base.h \
+ $(ACE_ROOT)/ace/String_Base.cpp \
+ $(ACE_ROOT)/ace/Malloc.h \
+ $(ACE_ROOT)/ace/Log_Msg.h \
+ $(ACE_ROOT)/ace/Log_Priority.h \
+ $(ACE_ROOT)/ace/OS_Log_Msg_Attributes.h \
+ $(ACE_ROOT)/ace/OS_Log_Msg_Attributes.inl \
+ $(ACE_ROOT)/ace/Malloc.i \
+ $(ACE_ROOT)/ace/Malloc_T.h \
+ $(ACE_ROOT)/ace/Synch.h \
+ $(ACE_ROOT)/ace/Synch.i \
+ $(ACE_ROOT)/ace/Synch_T.h \
+ $(ACE_ROOT)/ace/Synch_T.i \
+ $(ACE_ROOT)/ace/Thread.h \
+ $(ACE_ROOT)/ace/Thread_Adapter.h \
+ $(ACE_ROOT)/ace/Base_Thread_Adapter.h \
+ $(ACE_ROOT)/ace/Base_Thread_Adapter.inl \
+ $(ACE_ROOT)/ace/Thread_Adapter.inl \
+ $(ACE_ROOT)/ace/Thread.i \
+ $(ACE_ROOT)/ace/Synch_T.cpp \
+ $(ACE_ROOT)/ace/Malloc_Allocator.h \
+ $(ACE_ROOT)/ace/Malloc_Allocator.i \
+ $(ACE_ROOT)/ace/Free_List.h \
+ $(ACE_ROOT)/ace/Free_List.i \
+ $(ACE_ROOT)/ace/Free_List.cpp \
+ $(ACE_ROOT)/ace/Malloc_T.i \
+ $(ACE_ROOT)/ace/Malloc_T.cpp \
+ $(ACE_ROOT)/ace/Memory_Pool.h \
+ $(ACE_ROOT)/ace/Event_Handler.h \
+ $(ACE_ROOT)/ace/Event_Handler.i \
+ $(ACE_ROOT)/ace/Signal.h \
+ $(ACE_ROOT)/ace/Signal.i \
+ $(ACE_ROOT)/ace/Mem_Map.h \
+ $(ACE_ROOT)/ace/Mem_Map.i \
+ $(ACE_ROOT)/ace/SV_Semaphore_Complex.h \
+ $(ACE_ROOT)/ace/SV_Semaphore_Simple.h \
+ $(ACE_ROOT)/ace/SV_Semaphore_Simple.i \
+ $(ACE_ROOT)/ace/SV_Semaphore_Complex.i \
+ $(ACE_ROOT)/ace/Unbounded_Set.h \
+ $(ACE_ROOT)/ace/Node.h \
+ $(ACE_ROOT)/ace/Node.cpp \
+ $(ACE_ROOT)/ace/Unbounded_Set.inl \
+ $(ACE_ROOT)/ace/Unbounded_Set.cpp \
+ $(ACE_ROOT)/ace/Memory_Pool.i \
+ $(ACE_ROOT)/ace/Auto_Ptr.h \
+ $(ACE_ROOT)/ace/Auto_Ptr.i \
+ $(ACE_ROOT)/ace/Auto_Ptr.cpp \
+ $(ACE_ROOT)/ace/SString.i \
+ $(ACE_ROOT)/ACEXML/common/InputSource.h \
+ $(ACE_ROOT)/ACEXML/common/ACEXML_Export.h \
+ $(ACE_ROOT)/ACEXML/common/CharStream.h \
+ $(ACE_ROOT)/ACEXML/common/Locator.h \
+ $(ACE_ROOT)/ACEXML/common/LocatorImpl.h \
+ $(ACE_ROOT)/ACEXML/common/LocatorImpl.i \
+ $(ACE_ROOT)/ace/Functor.h \
+ $(ACE_ROOT)/ace/Functor.i \
+ $(ACE_ROOT)/ace/Functor_T.h \
+ $(ACE_ROOT)/ace/Functor_T.i \
+ $(ACE_ROOT)/ace/Functor_T.cpp \
+ $(ACE_ROOT)/ace/Containers_T.h \
+ $(ACE_ROOT)/ace/Containers.h \
+ $(ACE_ROOT)/ace/Containers.i \
+ $(ACE_ROOT)/ace/Array_Base.h \
+ $(ACE_ROOT)/ace/Array_Base.inl \
+ $(ACE_ROOT)/ace/Array_Base.cpp \
+ $(ACE_ROOT)/ace/Unbounded_Queue.h \
+ $(ACE_ROOT)/ace/Unbounded_Queue.inl \
+ $(ACE_ROOT)/ace/Unbounded_Queue.cpp \
+ $(ACE_ROOT)/ace/Containers_T.i \
+ $(ACE_ROOT)/ace/Containers_T.cpp \
+ ParserContext.inl
# IF YOU PUT ANYTHING HERE IT WILL GO AWAY
diff --git a/ACEXML/parser/parser/Parser.cpp b/ACEXML/parser/parser/Parser.cpp
index 3dd7cba6216..c4da35f3260 100644
--- a/ACEXML/parser/parser/Parser.cpp
+++ b/ACEXML/parser/parser/Parser.cpp
@@ -1,9 +1,12 @@
// $Id$
-#include "ACEXML/parser/parser/Parser.h"
+#include "ace/ACE.h"
#include "ACEXML/common/Transcode.h"
#include "ACEXML/common/AttributesImpl.h"
-#include "ace/ACE.h"
+#include "ACEXML/common/StrCharStream.h"
+#include "ACEXML/common/StreamFactory.h"
+#include "ACEXML/parser/parser/Parser.h"
+#include "ACEXML/parser/parser/ParserInternals.h"
#if !defined (__ACEXML_INLINE__)
# include "ACEXML/parser/parser/Parser.i"
@@ -21,88 +24,25 @@ ACEXML_Parser::namespaces_feature_[] = ACE_TEXT ("http://xml.org/sax/features/na
const ACEXML_Char
ACEXML_Parser::namespace_prefixes_feature_[] = ACE_TEXT ("http://xml.org/sax/features/namespace-prefixes");
-
-static const ACEXML_Char* ACEXML_Parser_Msg[] = {
- ACE_TEXT ("Invalid input source"),
- ACE_TEXT ("Expecting '<'"),
- ACE_TEXT ("Expecting '>'"),
- ACE_TEXT ("Invalid comment"),
- ACE_TEXT ("Duplicate DOCTYPE definition"),
- ACE_TEXT ("Unexpected EOF"),
- ACE_TEXT ("Invalid XMLDecl ('<?xml' ?)"),
- ACE_TEXT ("Unrecognized XMLDecl ('version'?)"),
- ACE_TEXT ("ACEXML only supports XML Version 1.0 documents"),
- ACE_TEXT ("Encoding declaration doesn't match auto-detected encoding"),
- ACE_TEXT ("ACEXML Parser Internal error"),
- ACE_TEXT ("PITarget name cannot start with 'xml'"),
- ACE_TEXT ("Expecting keyword 'DOCTYPE'"),
- ACE_TEXT ("Expecting a DOCTYPE name"),
- ACE_TEXT ("Root element missing"),
- ACE_TEXT ("Error reading attribute"),
- ACE_TEXT ("Duplicate namespace prefix"),
- ACE_TEXT ("Duplicate attribute found"),
- ACE_TEXT ("Cannot have both namespaces and namespace_prefixes simultaneously"),
- ACE_TEXT ("Unexpected character"),
- ACE_TEXT ("Mismatched End-tag encountered"),
- ACE_TEXT ("Expecting '[CDATA[' section"),
- ACE_TEXT ("Invalid keyword in markupdecl"),
- ACE_TEXT ("Invalid character following '<!' in markupdecl"),
- ACE_TEXT ("Expecting markupdecl or DeclSep"),
- ACE_TEXT ("Expecting keyword `ELEMENT'"),
- ACE_TEXT ("Error reading element name"),
- ACE_TEXT ("Expecting keyword `EMPTY' in ELEMENT definition."),
- ACE_TEXT ("Expecting keyword `ANY' in ELEMENT definition."),
- ACE_TEXT ("Error reading ELEMENT definition."),
- ACE_TEXT ("Expecting keyword `ENTITY'"),
- ACE_TEXT ("Can't use a reference when defining entity name"),
- ACE_TEXT ("Error reading ENTITY name."),
- ACE_TEXT ("Error reading ENTITY value."),
- ACE_TEXT ("Duplicate ENTITY definition"),
- ACE_TEXT ("Invalid ExternalID definition (system ID missing)"),
- ACE_TEXT ("Unexpected keyword NDATA in PEDecl"),
- ACE_TEXT ("Expecting keyword NDATA"),
- ACE_TEXT ("Expecting keyword `ATTLIST'"),
- ACE_TEXT ("Error reading attribute name"),
- ACE_TEXT ("Expecting keyword `CDATA'"),
- ACE_TEXT ("Expecting keyword `ID', `IDREF', or `IDREFS'"),
- ACE_TEXT ("Expecting keyword `ENTITY', or `ENTITIES'"),
- ACE_TEXT ("Expecting keyword `NMTOKEN', `NMTOKENS', or `NOTATION'"),
- ACE_TEXT ("Expecting keyword `NMTOKEN' or `NMTOKENS'"),
- ACE_TEXT ("Expecting keyword `NOTATION'"),
- ACE_TEXT ("Expecting `(' following NOTATION"),
- ACE_TEXT ("Error reading NOTATION name"),
- ACE_TEXT ("Error reading enumerated NMTOKEN name"),
- ACE_TEXT ("Invalid Attribute Type"),
- ACE_TEXT ("Expecting keyword `#REQUIRED'"),
- ACE_TEXT ("Expecting keyword `#IMPLIED'"),
- ACE_TEXT ("Expecting keyword `#FIXED'"),
- ACE_TEXT ("Error parsing `#FIXED' attribute value"),
- ACE_TEXT ("Invalid notation name."),
- ACE_TEXT ("Expecting keyword 'SYSTEM'"),
- ACE_TEXT ("Expecting keyword 'PUBLIC'"),
- ACE_TEXT ("Error parsing system/public literal"),
- ACE_TEXT ("Expecting either keyword `SYSTEM' or `PUBLIC'."),
- ACE_TEXT ("Expecting keyword `#PCDATA'"),
- ACE_TEXT ("Expecting end of Mixed section"),
- ACE_TEXT ("Expecting closing `)*' or ')'"),
- ACE_TEXT ("Error reading sub-element name"),
- ACE_TEXT ("Expecting `,', `|', or `)' while defining an element."),
- ACE_TEXT ("Invalid character reference")
-};
-
ACEXML_Parser::ACEXML_Parser (void)
: dtd_handler_ (0),
entity_resolver_ (0),
content_handler_ (0),
error_handler_ (0),
- instream_ (0),
doctype_ (0),
- dtd_system_ (0),
- dtd_public_ (0),
- locator_(),
+ alt_stack_ (MAXPATHLEN),
+ nested_namespace_ (0),
+ ref_state_ (ACEXML_ParserInt::INVALID),
+ external_subset_ (0),
+ external_entity_ (0),
+ has_pe_refs_ (0),
simple_parsing_ (0),
+ validate_ (1),
namespaces_(1),
- namespace_prefixes_ (0)
+ namespace_prefixes_ (0),
+ standalone_ (0),
+ external_dtd_ (0),
+ internal_dtd_ (0)
{
}
@@ -111,185 +51,125 @@ ACEXML_Parser::~ACEXML_Parser (void)
}
int
-ACEXML_Parser::getFeature (const ACEXML_Char *name ACEXML_ENV_ARG_DECL)
- ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
- ACEXML_SAXNotSupportedException))
+ACEXML_Parser::initialize(ACEXML_InputSource* input)
{
- if (ACE_OS::strcmp (name, ACEXML_Parser::simple_parsing_feature_) == 0)
- {
- return this->simple_parsing_;
- }
- else if (ACE_OS::strcmp (name, ACEXML_Parser::namespaces_feature_) == 0)
- {
- return this->namespaces_;
- }
- else if (ACE_OS::strcmp (name,
- ACEXML_Parser::namespace_prefixes_feature_) == 0)
+ for (int i = 0; i < 5; ++i)
{
- return this->namespace_prefixes_;
+ if (this->predef_entities_.add_entity (ACEXML_ParserInt::predef_ent_[i],
+ ACEXML_ParserInt::predef_val_[i])
+ != 0)
+ {
+ ACE_ERROR ((LM_DEBUG, ACE_TEXT ("Error adding entity %s to Manager"),
+ ACEXML_ParserInt::predef_ent_[i]));
+ return -1;
+ }
}
-
- ACEXML_THROW_RETURN (ACEXML_SAXNotRecognizedException (name), -1);
+ return this->switch_input (input);
}
-
-
void
-ACEXML_Parser::setFeature (const ACEXML_Char *name,
- int boolean_value ACEXML_ENV_ARG_DECL)
- ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
- ACEXML_SAXNotSupportedException))
+ACEXML_Parser::parse (ACEXML_InputSource *input ACEXML_ENV_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException))
{
- if (ACE_OS::strcmp (name, ACEXML_Parser::simple_parsing_feature_) == 0)
+ if (input == 0)
{
- this->simple_parsing_ = (boolean_value == 0 ? 0 : 1);
- return;
- }
- else if (ACE_OS::strcmp (name, ACEXML_Parser::namespaces_feature_) == 0)
- {
- this->namespaces_ = (boolean_value == 0 ? 0 : 1);
- return;
+ this->fatal_error(ACE_TEXT ("Invalid input source")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK;
}
- else if (ACE_OS::strcmp (name,
- ACEXML_Parser::namespace_prefixes_feature_) == 0)
+ if (this->initialize(input) == -1)
{
- this->namespace_prefixes_ = (boolean_value == 0 ? 0 : 1);
- return;
+ this->fatal_error (ACE_TEXT ("Failed to initialize parser state")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK;
}
+ // Set up Locator.
+ if (this->content_handler_)
+ this->content_handler_->setDocumentLocator (this->current_.getLocator());
- ACEXML_THROW (ACEXML_SAXNotRecognizedException (name));
-}
-
-void *
-ACEXML_Parser::getProperty (const ACEXML_Char *name ACEXML_ENV_ARG_DECL)
- ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
- ACEXML_SAXNotSupportedException))
-{
- ACEXML_THROW_RETURN (ACEXML_SAXNotSupportedException (name), 0);
-}
-
-void
-ACEXML_Parser::setProperty (const ACEXML_Char *name,
- void *value ACEXML_ENV_ARG_DECL)
- ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
- ACEXML_SAXNotSupportedException))
-{
- ACE_UNUSED_ARG (value);
-
- ACEXML_THROW (ACEXML_SAXNotSupportedException (name));
-}
-
-void
-ACEXML_Parser::report_error (ACEXML_Error minor_code ACEXML_ENV_ARG_DECL)
-{
- ACEXML_SAXParseException* exception = 0;
- ACE_NEW_NORETURN (exception,
- ACEXML_SAXParseException (ACEXML_Parser_Msg[minor_code]));
- if (this->error_handler_)
- this->error_handler_->error (*exception ACEXML_ENV_ARG_PARAMETER);
- else
- ACEXML_ENV_RAISE (exception);
- return;
-}
-
-void
-ACEXML_Parser::report_warning (ACEXML_Error minor_code ACEXML_ENV_ARG_DECL)
-{
- ACEXML_SAXParseException* exception = 0;
- ACE_NEW_NORETURN (exception,
- ACEXML_SAXParseException (ACEXML_Parser_Msg[minor_code]));
- if (this->error_handler_)
- this->error_handler_->warning (*exception ACEXML_ENV_ARG_PARAMETER);
- return;
-}
-
-void
-ACEXML_Parser::report_fatal_error (ACEXML_Error minor_code ACEXML_ENV_ARG_DECL)
-{
- ACEXML_SAXParseException* exception = 0;
- ACE_NEW_NORETURN (exception,
- ACEXML_SAXParseException (ACEXML_Parser_Msg[minor_code]));
- if (this->error_handler_)
- this->error_handler_->fatalError (*exception ACEXML_ENV_ARG_PARAMETER);
- ACEXML_ENV_RAISE (exception);
- return;
-}
-
-void
-ACEXML_Parser::parse (ACEXML_InputSource *input ACEXML_ENV_ARG_DECL)
- ACE_THROW_SPEC ((ACEXML_SAXException))
-{
- if (input == 0 || (this->instream_ = input->getCharStream ()) == 0)
+ int xmldecl_defined = 0;
+ ACEXML_Char fwd = this->get(); // Consume '<'
+ if (fwd == '<' && this->peek() == '?')
{
- this->report_fatal_error(ACEXML_INVIP ACEXML_ENV_ARG_PARAMETER);
- return;
+ this->get(); // Consume '?'
+ fwd = this->peek();
+ if (fwd == 'x' && !xmldecl_defined)
+ {
+ this->parse_xml_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK;
+ xmldecl_defined = 1;
+ }
}
-
- // Set up Locator. At this point, the systemId and publicId are null. We
- // can't do better, as we don't know anything about the InputSource
- // currently, and according to the SAX spec, the parser should set up the
- // locator before reporting any document events.
- if (this->content_handler_)
- this->content_handler_->setDocumentLocator (&this->locator_);
-
- if (this->simple_parsing_ == 0)
+ // We need a XMLDecl in a Valid XML document
+ if (this->validate_ && !xmldecl_defined)
{
- this->parse_xml_prolog (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ this->fatal_error (ACE_TEXT ("Expecting an XMLDecl at the beginning of"
+ " a valid document")
+ ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK;
}
-
this->content_handler_->startDocument (ACEXML_ENV_SINGLE_ARG_PARAMETER);
ACEXML_CHECK;
int doctype_defined = 0;
-
for (int prolog_done = 0; prolog_done == 0; )
{
- if (this->skip_whitespace (0) != '<')
+ // Expect a '<' only if we have encountered a XMLDecl, or we are
+ // looping through Misc blocks.
+ if (xmldecl_defined)
{
- this->report_fatal_error (ACEXML_LESS ACEXML_ENV_ARG_PARAMETER);
- return;
- }
- ACEXML_Char fwd = this->peek ();
- switch (fwd)
- {
- case '!':
- this->get (); // consume the '!'
- fwd = this->peek ();
- if (fwd == 'D' && !doctype_defined) // DOCTYPE
+ if (this->skip_whitespace () != '<')
{
- // This will also take care of the trailing MISC block if any.
- this->parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ this->fatal_error (ACE_TEXT ("Expecting '<' at the beginning of "
+ "Misc section")
+ ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK;
- doctype_defined = 1;
- break;
}
- else if (fwd == '-') // COMMENT
- {
- if (this->grok_comment () < 0)
- {
- this->report_fatal_error(ACEXML_INVCO
- ACEXML_ENV_ARG_PARAMETER);
- return;
- }
- }
- else
- {
- this->report_fatal_error (ACEXML_DUPDOC
- ACEXML_ENV_ARG_PARAMETER);
- return;
- }
- break;
- case '?':
- this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER);
- ACEXML_CHECK;
- break;
- case 0:
- this->report_fatal_error (ACEXML_EOF ACEXML_ENV_ARG_PARAMETER);
- return;
- default: // Root element begins
- prolog_done = 1;
- break;
+ fwd = this->get();
+ }
+ switch (fwd)
+ {
+ case '?':
+ this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK;
+ xmldecl_defined = 1;
+ break;
+ case '!':
+ fwd = this->peek ();
+ if (fwd == 'D' && !doctype_defined) // DOCTYPE
+ {
+ // This will also take care of the trailing MISC block if any.
+ this->parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK;
+ doctype_defined = 1;
+ // Now that we have a DOCTYPE Decl defined, we shouldn't
+ // accept XML Decl any longer
+ xmldecl_defined = 1;
+ }
+ else if (fwd == 'D')
+ {
+ this->fatal_error (ACE_TEXT ("Duplicate DOCTYPE declaration")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK;
+ }
+ else if (fwd == '-') // COMMENT
+ {
+ if (this->parse_comment () < 0)
+ {
+ this->fatal_error(ACE_TEXT ("Invalid comment in document")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK;
+ }
+ xmldecl_defined = 1;
+ }
+ break;
+ case 0:
+ this->fatal_error (ACE_TEXT ("Unexpected end-of-file")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK;
+ default: // Root element begins
+ prolog_done = 1;
+ break;
}
}
@@ -300,8 +180,8 @@ ACEXML_Parser::parse (ACEXML_InputSource *input ACEXML_ENV_ARG_DECL)
this->content_handler_->endDocument (ACEXML_ENV_SINGLE_ARG_PARAMETER);
ACEXML_CHECK;
- // Reset the Locator held within the parser
- this->locator_.reset();
+ // Reset the parser state
+ this->reset();
}
void
@@ -315,640 +195,924 @@ ACEXML_Parser::parse (const ACEXML_Char *systemId ACEXML_ENV_ARG_DECL)
}
-void
-ACEXML_Parser::parse_xml_prolog (ACEXML_ENV_SINGLE_ARG_DECL)
- ACE_THROW_SPEC ((ACEXML_SAXException))
+
+int
+ACEXML_Parser::parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException))
{
- if (this->parse_token (ACE_TEXT("<?xml")) < 0)
+ if (this->parse_token (ACE_TEXT ("DOCTYPE")) < 0)
{
- this->report_fatal_error(ACEXML_INVXMLDECL ACEXML_ENV_ARG_PARAMETER);
- return;
+ this->fatal_error(ACE_TEXT ("Expecting keyword DOCTYPE in a doctypedecl")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
- ACEXML_Char *astring;
- if (this->skip_whitespace (0) != 'v' // Discard whitespace
- || this->parse_token (ACE_TEXT("ersion")) < 0
- || this->skip_equal () != 0
- || this->get_quoted_string (astring) != 0)
+ ACEXML_Char nextch = 0;
+ if (this->skip_whitespace_count (&nextch) == 0)
{
- this->report_fatal_error (ACEXML_INVVERSION ACEXML_ENV_ARG_PARAMETER);
- return;
+ this->fatal_error(ACE_TEXT ("Expecting a space between DOCTYPE keyword "
+ "and name") ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
- if (ACE_OS::strcmp (astring, ACE_TEXT ("1.0")) != 0)
+
+ this->doctype_ = this->parse_name ();
+ if (this->doctype_ == 0)
{
- this->report_fatal_error (ACEXML_ENOTSUP ACEXML_ENV_ARG_PARAMETER);
- return;
+ this->fatal_error(ACE_TEXT ("Invalid DOCTYPE name")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
+ int count = this->skip_whitespace_count (&nextch);
- ACEXML_Char fwd = this->skip_whitespace (0);
- if (fwd != '?')
+ if (nextch == 'S' || nextch == 'P') // ExternalID defined
+ {
+ if (count == 0)
{
- if (fwd == 'e')
- {
- if ((this->parse_token (ACE_TEXT("ncoding")) == 0) &&
- this->skip_equal () == 0 &&
- this->get_quoted_string (astring) == 0)
- {
- if (ACE_OS::strcmp (astring,
- this->instream_->getEncoding()) != 0)
- {
- ACE_ERROR ((LM_ERROR,
- ACE_TEXT ("Detected Encoding is %s ")
- ACE_TEXT (": Declared Encoding is %s\n"),
- this->instream_->getEncoding(), astring));
- this->report_warning (ACEXML_ENCMISMATCH
- ACEXML_ENV_ARG_PARAMETER);
- }
- fwd = this->skip_whitespace (0);
- }
- }
- if (fwd == 's')
- {
- if ((this->parse_token (ACE_TEXT("tandalone")) == 0) &&
- this->skip_equal () == 0 &&
- this->get_quoted_string (astring) == 0)
- {
- if (ACE_OS::strcmp (astring, ACE_TEXT ("yes")) == 0)
- this->standalone_ = 1;
- else if (ACE_OS::strcmp (astring, ACE_TEXT ("no")) == 0)
- this->standalone_ = 0;
- fwd = this->skip_whitespace (0);
- }
- }
- }
- if (fwd == '?' && this->get() == '>')
- return;
- // All the rules fail. So return an error.
- this->report_fatal_error (ACEXML_INVXMLDECL ACEXML_ENV_ARG_PARAMETER);
- return;
-}
-
-int
-ACEXML_Parser::grok_comment (void)
-{
- /// Simply filter out all the comment
- int state = 0;
+ this->fatal_error(ACE_TEXT ("Expecting a space between DOCTYPE"
+ "keyword and name")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ this->external_dtd_ = 1;
+ this->parse_external_dtd (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
- if (this->get () != '-' || // Skip the opening "<!--"
- this->get () != '-' || // completely.
- this->get () == '-') // and at least something not '-'.
- return -1;
+ nextch = this->skip_whitespace ();
+ switch (nextch)
+ {
+ case '[':
+ this->internal_dtd_ = 1; // Internal DTD definition
+ this->parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ break;
+ case '>': // End of DTD definition
+ // This is an XML document without a doctypedecl.
+ if (this->validate_ && !this->external_dtd_)
+ {
+ this->fatal_error (ACE_TEXT ("No DTD defined")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ return 0;
+ case '0':
+ this->fatal_error (ACE_TEXT ("Unexpected end-of-file")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ default:
+ break;
+ }
- while (state < 3)
- // Waiting for the trailing three character '-->'. Notice that
- // according to the spec, '--->' is not a valid closing comment
- // sequence. But we'll let it pass anyway.
+ if (this->skip_whitespace() != '>')
{
- ACEXML_Char fwd = this->get ();
- if ((fwd == '-' && state < 2) ||
- (fwd == '>' && state == 2))
- state += 1;
- else
- state = 0; // Reset parse state.
+ this->fatal_error(ACE_TEXT ("Expecting '>' at end of doctypedecl")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
return 0;
}
int
-ACEXML_Parser::parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_DECL)
+ACEXML_Parser::parse_external_dtd (ACEXML_ENV_SINGLE_ARG_DECL)
{
- if (this->get () != '?')
- { // How did we get here?
- this->report_fatal_error(ACEXML_EINT ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->ref_state_ = ACEXML_ParserInt::IN_EXT_DTD;
+ ACEXML_Char* publicId = 0;
+ ACEXML_Char* systemId = 0;
+ if (this->parse_external_id (publicId, systemId
+ ACEXML_ENV_ARG_PARAMETER) != 0)
+ {
+ this->fatal_error (ACE_TEXT ("Error in parsing ExternalID")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
- const ACEXML_Char *pitarget = this->read_name ();
- ACEXML_Char *instruction = 0;
-
- if (ACE_OS::strcasecmp (ACE_TEXT ("xml"), pitarget) != 0)
+ ACEXML_Char* uri = this->normalize_systemid (systemId);
+ ACE_Auto_Basic_Array_Ptr<ACEXML_Char> cleanup_uri (uri);
+ ACEXML_InputSource* ip = 0;
+ if (this->entity_resolver_)
{
- // Invalid PITarget name.
- this->report_fatal_error(ACEXML_INVPI ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ ip = this->entity_resolver_->resolveEntity (publicId,
+ (uri ? uri : systemId)
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ if (ip)
+ {
+ if (this->switch_input (ip) != 0)
+ return -1;
+ }
+ else
+ {
+ ACEXML_StreamFactory factory;
+ ACEXML_CharStream* cstream = factory.create_stream (uri ? uri: systemId);
+ if (!cstream) {
+ this->fatal_error (ACE_TEXT ("Invalid input source")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ if (this->switch_input (cstream, systemId, publicId) != 0)
+ return -1;
}
+ this->parse_external_subset (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ return 0;
+}
- int state = 0;
- ACEXML_Char ch = this->skip_whitespace (0);
+int
+ACEXML_Parser::parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_DECL)
+{
+ this->ref_state_ = ACEXML_ParserInt::IN_INT_DTD;
+ ACEXML_Char nextch = this->skip_whitespace ();
+ do {
+ switch (nextch)
+ {
+ case '<':
+ nextch = this->get();
+ switch (nextch)
+ {
+ case '!':
+ this->parse_markup_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ break;
+ case '?':
+ this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ break;
+ default:
+ this->fatal_error (ACE_TEXT ("Invalid internal subset")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ break;
+ case '%':
+ this->has_pe_refs_ = 1;
+ this->parse_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ case ']': // End of internal definitions.
+ return 0;
+ case '&':
+ this->fatal_error (ACE_TEXT ("Invalid Reference in internal DTD")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ case 0:
+ this->fatal_error (ACE_TEXT ("Unexpected end-of-file")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ default:
+ this->fatal_error (ACE_TEXT ("Invalid content in internal subset")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ };
+ nextch = this->skip_whitespace ();
- while (state < 2)
- {
- switch (ch)
- {
- case '?':
- if (state == 0)
- state = 1;
+ } while (1);
+
+ ACE_NOTREACHED (return -1);
+}
+
+int
+ACEXML_Parser::parse_external_subset (ACEXML_ENV_SINGLE_ARG_DECL)
+{
+ this->ref_state_ = ACEXML_ParserInt::IN_EXT_DTD;
+ this->external_subset_ = 1;
+ int nrelems = 0;
+ ACEXML_Char nextch = this->skip_whitespace();
+ do {
+ switch (nextch)
+ {
+ case '<':
+ nextch = this->get();
+ switch (nextch)
+ {
+ case '!':
+ nextch = this->peek();
+ if (nextch == '[')
+ this->parse_conditional_section (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ else
+ this->parse_markup_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ break;
+ case '?':
+ nextch = this->peek();
+ if (nextch == 'x')
+ this->parse_text_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ else
+ this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ break;
+ default:
+ this->fatal_error (ACE_TEXT ("Invalid content in external DTD")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
break;
- case '>':
- if (state == 1)
+ case '%':
+ this->parse_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ break;
+ case 0:
+ nrelems = this->pop_context();
+ if (nrelems > 1)
+ break;
+ else if (nrelems == 1)
{
- instruction = this->obstack_.freeze ();
- this->content_handler_->processingInstruction (pitarget,
- instruction
- ACEXML_ENV_ARG_PARAMETER);
- ACEXML_CHECK_RETURN (-1);
- this->obstack_.unwind (ACE_const_cast (ACEXML_Char*, pitarget));
+ this->external_subset_ = 0;
return 0;
}
- break;
- case 0x0D: // End-of-Line handling
- ch = (this->peek () == 0x0A ? this->get () : 0x0A);
- // Fall thru...
- case 0x0A:
- // Fall thru...
+ else
+ {
+ this->fatal_error(ACE_TEXT ("Internal Parser Error")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
default:
- if (state == 1)
- this->obstack_.grow ('?');
- this->obstack_.grow (ch);
- state = 0;
- }
- ch = this->get ();
- }
- return -1;
+ this->fatal_error (ACE_TEXT ("Invalid content in external DTD")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ nextch = this->skip_whitespace();
+ } while (1);
}
int
-ACEXML_Parser::parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_DECL)
- ACE_THROW_SPEC ((ACEXML_SAXException))
+ACEXML_Parser::parse_conditional_section (ACEXML_ENV_SINGLE_ARG_DECL)
{
- if (this->parse_token (ACE_TEXT ("DOCTYPE")) < 0)
+ ACEXML_Char ch = this->get ();
+ if (ch != '[')
{
- this->report_fatal_error(ACEXML_INVDOCKEYWORD ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error(ACE_TEXT ("Internal Parser Error")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
-
- ACEXML_Char nextch = this->skip_whitespace (0);
- if (nextch == 0)
+ if (this->skip_whitespace() != 'I')
{
- this->report_fatal_error(ACEXML_INVDOCNAME ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error (ACE_TEXT ("Invalid conditionalSect")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
+ ch = this->get();
+ int include = 0;
+ switch (ch)
+ {
+ case 'N':
+ if (this->parse_token (ACE_TEXT ("CLUDE")) < 0)
+ {
+ this->fatal_error (ACE_TEXT ("Expecting keyword INCLUDE in "
+ "conditionalSect") ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ include = 1;
+ break;
+ case 'G':
+ if (this->parse_token (ACE_TEXT ("GNORE")) < 0)
+ {
+ this->fatal_error (ACE_TEXT ("Expecting keyword IGNORE in "
+ "conditionalSect")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ include = 0;
+ break;
+ default:
+ this->fatal_error (ACE_TEXT ("Invalid conditionalSect")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ if (this->skip_whitespace() != '[')
+ {
+ this->fatal_error (ACE_TEXT ("Expecting '[' in conditionalSect")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ if (include)
+ this->parse_includesect (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ else
+ this->parse_ignoresect (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ return 0;
+}
- this->doctype_ = this->read_name (nextch);
+int
+ACEXML_Parser::parse_ignoresect (ACEXML_ENV_SINGLE_ARG_DECL)
+{
+ ACEXML_Char nextch = this->skip_whitespace();
+ int count = 0;
+ do {
+ switch (nextch)
+ {
+ case '<':
+ if (this->peek() == '!')
+ {
+ this->get();
+ if (this->peek() == '[')
+ {
+ this->get();
+ count++;
+ }
+ }
+ break;
+ case ']':
+ if (this->peek() == ']')
+ {
+ this->get();
+ if (this->peek() == '>')
+ {
+ this->get();
+ --count;
+ }
+ }
+ break;
+ case 0: // [VC: Proper Conditional Section/PE Nesting]
+ if (count != 0)
+ {
+ this->fatal_error (ACE_TEXT ("Invalid Conditional Section/PE "
+ "Nesting ")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ default:
+ break;
+ }
+ nextch = this->get();
+ } while (1);
+}
- this->skip_whitespace_count (&nextch);
+int
+ACEXML_Parser::parse_includesect (ACEXML_ENV_SINGLE_ARG_DECL)
+{
+ ACEXML_Char nextch = this->skip_whitespace();
+ do {
+ switch (nextch)
+ {
+ case '<':
+ nextch = this->get();
+ switch (nextch)
+ {
+ case '!':
+ nextch = this->peek();
+ if (nextch == '[')
+ this->parse_conditional_section (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ else
+ this->parse_markup_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ break;
+ case '?':
+ nextch = this->peek();
+ this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ break;
+ default:
+ this->fatal_error (ACE_TEXT ("Invalid includeSect")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ break;
+ case '%':
+ this->parse_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ break;
+ case 0: // [VC: Proper Conditional Section/PE Nesting]
+ this->fatal_error (ACE_TEXT ("Invalid Conditional Section/PE "
+ "Nesting ")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ case ']':
+ if (this->peek() == ']')
+ {
+ nextch = this->get();
+ if (this->peek() == '>')
+ {
+ nextch = this->get();
+ return 0;
+ }
+ }
+ default:
+ this->fatal_error (ACE_TEXT ("Invalid includeSect")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ nextch = this->skip_whitespace();
+ } while (1);
+}
- if (nextch == 'S' || nextch == 'P') // ExternalID defined
+int
+ACEXML_Parser::parse_markup_decl (ACEXML_ENV_SINGLE_ARG_DECL)
+{
+ ACEXML_Char nextch = this->peek ();
+ switch (nextch)
{
- this->parse_external_id_and_ref (this->dtd_public_,
- this->dtd_system_
- ACEXML_ENV_ARG_PARAMETER);
- ACEXML_CHECK_RETURN (-1);
-// if (this->dtd_public_ == 0)
-// ACE_DEBUG ((LM_DEBUG,
-// ACE_TEXT ("ACEXML Parser got external DTD id: SYSTEM %s\n"),
-// this->dtd_system_));
-// else
-// ACE_DEBUG ((LM_DEBUG,
-// ACE_TEXT ("ACEXML Parser got DTD external id: PUBLIC %s %s\n"),
-// this->dtd_public_, this->dtd_system_));
+ case 'E': // An ELEMENT or ENTITY decl
+ this->get ();
+ nextch = this->peek ();
+ switch (nextch)
+ {
+ case 'L':
+ this->parse_element_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ break;
+ case 'N':
+ this->parse_entity_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ break;
+ default:
+ this->fatal_error(ACE_TEXT ("Expecting keyword ELEMENT/ENTITY")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ break;
+
+ case 'A': // An ATTLIST decl
+ this->parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ break;
+
+ case 'N': // A NOTATION decl
+ this->parse_notation_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ break;
+
+ case '-': // a comment.
+ if (this->parse_comment () < 0)
+ {
+ this->fatal_error(ACE_TEXT ("Invalid comment")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ break;
+ case 0: // [VC: Proper Declaration/PE Nesting]
+ this->fatal_error (ACE_TEXT ("Unexpected end-of-file")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ case '%':
+ if (this->external_subset_)
+ {
+ this->parse_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ break;
+ }
+ // Fall through
+ default:
+ this->fatal_error (ACE_TEXT ("Invalid markupDecl")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
+ return 0;
+}
- nextch = this->skip_whitespace (0);
+int
+ACEXML_Parser::parse_external_id (ACEXML_Char *&publicId,
+ ACEXML_Char *&systemId
+ ACEXML_ENV_ARG_DECL)
+{
+ publicId = systemId = 0;
+ ACEXML_Char nextch = this->get ();
+ ACEXML_Char fwd = 0;
switch (nextch)
{
- case '[': // Internal DTD definition
- if (this->parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_PARAMETER) < 0)
- return -1; // Error in markupdecl
- break;
- case '>': // End of DTD definition
- // this is an XML document without a dectypedecl.
- return 0;
- case '0':
- this->report_fatal_error (ACEXML_EOF ACEXML_ENV_ARG_PARAMETER);
- return -1;
- default:
- break;
+ case 'S': // External SYSTEM id.
+ if (this->parse_token (ACE_TEXT ("YSTEM")) < 0 ||
+ this->skip_whitespace_count () < 1)
+ {
+ this->fatal_error(ACE_TEXT ("Expecting keyword SYSTEM")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ if (this->parse_system_literal (systemId) != 0)
+ {
+ this->fatal_error(ACE_TEXT ("Invalid systemLiteral")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ break;
+ case 'P': // External PUBLIC id or previously defined PUBLIC id.
+ if (this->parse_token (ACE_TEXT ("UBLIC")) < 0 ||
+ this->skip_whitespace_count () < 1)
+ {
+ this->fatal_error(ACE_TEXT ("Expecing keyword PUBLIC")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ if (this->parse_pubid_literal (publicId) != 0)
+ {
+ this->fatal_error(ACE_TEXT ("Invalid PubidLiteral")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ this->skip_whitespace_count(&fwd);
+ if (fwd == '\'' || fwd == '"')
+ {
+ if (this->parse_system_literal (systemId) != 0)
+ {
+ this->fatal_error(ACE_TEXT ("Invalid systemLiteral")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ }
+ else if (this->ref_state_ != ACEXML_ParserInt::IN_NOTATION)
+ {
+ this->fatal_error(ACE_TEXT ("Expecting systemLiteral after a "
+ "PUBLIC keyword")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ break;
+ default:
+ this->fatal_error(ACE_TEXT ("Invalid system/public Literal")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
+ return 0;
+}
- if (this->skip_whitespace (0) != '>')
+ACEXML_Char*
+ACEXML_Parser::normalize_systemid (const char* systemId)
+{
+ if (ACE_OS::strstr (systemId, ACE_TEXT("ftp://")) != 0 ||
+ ACE_OS::strstr (systemId, ACE_TEXT ("http://")) != 0 ||
+ ACE_OS::strstr (systemId, ACE_TEXT ("file://")) != 0)
+ return 0;
+ else
{
- this->report_fatal_error(ACEXML_EINT ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ ACEXML_Char* normalized_uri = 0;
+ const char* baseURI = this->current_.getLocator()->getSystemId();
+ if (!baseURI)
+ return 0;
+ if (ACE_OS::strstr (baseURI, ACE_TEXT ("http://")) != 0)
+ {
+ // baseURI is a HTTP URL and systemId is relative. Note that this
+ // is not compliant with RFC2396. Caveat Emptor !
+ const ACEXML_Char* temp = ACE_OS::strrchr (baseURI, '/');
+ size_t pos = temp - baseURI + 1;
+ size_t len = pos + ACE_OS::strlen (systemId) + 1;
+ ACE_NEW_RETURN (normalized_uri, ACEXML_Char[len], 0);
+ ACE_OS::strncpy (normalized_uri, baseURI, pos);
+ ACE_OS::strcpy (normalized_uri + pos + 1, systemId);
+ return normalized_uri;
+ }
+ else
+ {
+ const ACEXML_Char*
+ temp = ACE_OS::strrchr (baseURI,ACE_DIRECTORY_SEPARATOR_CHAR);
+ // baseURI is a local file and systemId is relative
+ // Unlike the HTTP one, this will work always.
+ if (temp)
+ {
+ size_t pos = temp - baseURI + 1;
+ size_t len = pos + ACE_OS::strlen (systemId) + 1;
+ ACE_NEW_RETURN (normalized_uri, ACEXML_Char[len], 0);
+ ACE_OS::strncpy (normalized_uri, baseURI, pos);
+ ACE_OS::strcpy (normalized_uri + pos + 1, systemId);
+ return normalized_uri;
+ }
+ return 0;
+ }
}
- return 0;
}
void
ACEXML_Parser::parse_element (int is_root ACEXML_ENV_ARG_DECL)
- ACE_THROW_SPEC ((ACEXML_SAXException))
+ ACE_THROW_SPEC ((ACEXML_SAXException))
{
// Parse STag.
- const ACEXML_Char *startname = this->read_name ();
+ const ACEXML_Char *startname = this->parse_name ();
if (startname == 0)
{
- this->report_fatal_error (ACEXML_EOF ACEXML_ENV_ARG_PARAMETER);
+ this->fatal_error (ACE_TEXT ("Unexpected end-of-file")
+ ACEXML_ENV_ARG_PARAMETER);
return;
}
if (is_root && this->doctype_ != 0
&& ACE_OS::strcmp (startname, this->doctype_) != 0)
{
- this->report_fatal_error (ACEXML_MISSINGROOT ACEXML_ENV_ARG_PARAMETER);
- return;
+ this->fatal_error (ACE_TEXT ("Root element different from DOCTYPE")
+ ACEXML_ENV_ARG_PARAMETER);
+ return ;
}
ACEXML_AttributesImpl attributes;
ACEXML_Char ch;
- int new_namespace = 0;
- const ACEXML_Char *endname = 0;
const ACEXML_Char *ns_uri, *ns_lname; // namespace URI and localName
- ACEXML_Char* prefix = 0;
- ACEXML_Char* name = 0;
for (int start_element_done = 0; start_element_done == 0;)
{
- ch = this->skip_whitespace (0);
+ ch = this->skip_whitespace ();
switch (ch)
{
- case 0:
- this->report_fatal_error(ACEXML_EINT ACEXML_ENV_ARG_PARAMETER);
- return;
- case '/':
- if (this->get () != '>')
- {
- this->report_fatal_error(ACEXML_GREAT ACEXML_ENV_ARG_PARAMETER);
- return;
- }
- else
- {
- this->xml_namespace_.processName(startname, ns_uri, ns_lname, 0);
- prefix = ACE_const_cast (ACEXML_Char*,
- this->xml_namespace_.getPrefix(ns_uri));
- this->report_prefix_mapping (prefix, ns_uri, ns_lname, 1
- ACEXML_ENV_ARG_PARAMETER);
- ACEXML_CHECK;
- this->content_handler_->startElement (ns_uri, ns_lname,
- startname, &attributes
+ case 0:
+ this->fatal_error(ACE_TEXT ("Internal Parser error")
+ ACEXML_ENV_ARG_PARAMETER);
+ return;
+ case '/':
+ if (this->get () != '>')
+ {
+ this->fatal_error(ACE_TEXT ("Expecting '>' at end of element "
+ "definition")
+ ACEXML_ENV_ARG_PARAMETER);
+ return;
+ }
+ else
+ {
+ this->xml_namespace_.processName(startname, ns_uri,
+ ns_lname, 0);
+ this->prefix_mapping (this->xml_namespace_.getPrefix(ns_uri),
+ ns_uri, ns_lname, 1
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK;
+ this->content_handler_->startElement(ns_uri, ns_lname,
+ startname, &attributes
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK;
+ this->content_handler_->endElement (ns_uri, ns_lname, startname
ACEXML_ENV_ARG_PARAMETER);
- ACEXML_CHECK;
- this->content_handler_->endElement (ns_uri, ns_lname, startname
- ACEXML_ENV_ARG_PARAMETER);
- ACEXML_CHECK;
- this->report_prefix_mapping (prefix, ns_uri, ns_lname, 0
- ACEXML_ENV_ARG_PARAMETER);
- ACEXML_CHECK;
- }
- if (new_namespace != 0)
- this->xml_namespace_.popContext ();
- return;
-
- case '>':
- {
- this->xml_namespace_.processName (startname, ns_uri, ns_lname, 0);
- prefix = ACE_const_cast (ACEXML_Char*,
- this->xml_namespace_.getPrefix (ns_uri));
- this->report_prefix_mapping (prefix, ns_uri, ns_lname, 1
- ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK;
+ this->prefix_mapping (this->xml_namespace_.getPrefix(ns_uri),
+ ns_uri, ns_lname, 0
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK;
+ }
+ if (this->nested_namespace_ != 0)
+ this->xml_namespace_.popContext ();
+ return;
+
+ case '>':
+ this->xml_namespace_.processName (startname, ns_uri,
+ ns_lname, 0);
+ this->prefix_mapping (this->xml_namespace_.getPrefix(ns_uri),
+ ns_uri, ns_lname, 1
+ ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK;
- this->content_handler_->startElement (ns_uri, ns_lname, startname,
- &attributes
- ACEXML_ENV_ARG_PARAMETER);
+ this->content_handler_->startElement(ns_uri, ns_lname, startname,
+ &attributes
+ ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK;
start_element_done = 1;
break;
- }
- default:
- ACEXML_Char *attvalue = 0;
- ACEXML_Char *attname = this->read_name (ch);
-
- if (attname == 0 ||
- this->skip_equal () != 0 ||
- this->get_quoted_string (attvalue) != 0)
- {
- this->report_fatal_error(ACEXML_RDATTR ACEXML_ENV_ARG_PARAMETER);
- return;
- }
-
- // Handling new namespace if any. Notice that the order of
- // namespace declaration does matter.
- if (ACE_OS::strncmp (attname, ACE_TEXT("xmlns"), 5) == 0)
- {
- if (this->namespaces_)
- {
- if (new_namespace == 0)
- {
- this->xml_namespace_.pushContext ();
- new_namespace = 1;
- }
- name = ACE_OS::strchr (attname, ':');
- const ACEXML_Char* ns_name = (name == 0)?empty_string:name+1;
- if (this->xml_namespace_.declarePrefix (ns_name,
- attvalue) == -1)
- {
- this->report_fatal_error(ACEXML_DUPPREFIX
- ACEXML_ENV_ARG_PARAMETER);
- return;
- }
- }
- if (this->namespace_prefixes_)
- {
- // Namespace_prefixes_feature_ is required. So add the
- // xmlns:foo to the list of attributes.
- if (attributes.addAttribute (0, 0, attname,
- default_attribute_type,
- attvalue) == -1)
- {
- this->report_fatal_error(ACEXML_DUPATTR
- ACEXML_ENV_ARG_PARAMETER);
- return;
- }
- }
- if (!this->namespaces_ && !this->namespace_prefixes_)
- {
- this->report_fatal_error(ACEXML_NSERR
- ACEXML_ENV_ARG_PARAMETER);
- return;
- }
- }
- else
- {
- const ACEXML_Char *uri, *lName;
- this->xml_namespace_.processName (attname, uri, lName, 1);
- if (attributes.addAttribute (uri, lName, attname,
- default_attribute_type,
- attvalue) == -1)
- {
- this->report_fatal_error(ACEXML_DUPATTR
- ACEXML_ENV_ARG_PARAMETER);
- return;
- }
- }
- break;
- }
- }
- ACEXML_Char *cdata;
- size_t cdata_length = 0;
-
- // Parse element contents.
- while (1)
- {
- ACEXML_Char ch = this->get ();
+ default:
+ ACEXML_Char *attvalue = 0;
+ ACEXML_Char *attname = this->parse_name (ch);
- switch (ch)
- {
- case 0:
- this->report_fatal_error(ACEXML_EINT ACEXML_ENV_ARG_PARAMETER);
- return;
- case '<':
- // Push out old 'characters' event.
- if (cdata_length != 0)
- {
- cdata = this->obstack_.freeze ();
- this->content_handler_->characters (cdata, 0, cdata_length
- ACEXML_ENV_ARG_PARAMETER);
- ACEXML_CHECK;
- this->obstack_.unwind (cdata);
- cdata_length = 0;
- }
+ if (attname == 0 ||
+ this->skip_equal () != 0 ||
+ this->parse_attvalue (attvalue ACEXML_ENV_ARG_PARAMETER) != 0)
+ {
+ this->fatal_error(ACE_TEXT ("Error reading attribute value")
+ ACEXML_ENV_ARG_PARAMETER);
+ return;
+ }
- switch (this->peek ())
- {
- case '!': // a comment or a CDATA section.
- this->get (); // consume '!'
- ch = this->peek ();
- if (ch == '-') // a comment
- {
- if (this->grok_comment () < 0)
- {
- this->report_fatal_error(ACEXML_INVCO
- ACEXML_ENV_ARG_PARAMETER);
- return;
- }
- }
- else if (ch == '[') // a CDATA section.
- {
- this->parse_cdata (ACEXML_ENV_SINGLE_ARG_PARAMETER);
- ACEXML_CHECK;
- }
- else
- {
- this->report_fatal_error(ACEXML_ECHAR
- ACEXML_ENV_ARG_PARAMETER);
- return;
- }
- break;
- case '?': // a PI.
- this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER);
- ACEXML_CHECK;
- break;
- case '/': // an ETag.
+ // Handling new namespace if any. Notice that the order of
+ // namespace declaration does matter.
+ if (ACE_OS::strncmp (attname, ACE_TEXT("xmlns"), 5) == 0)
{
- this->get (); // consume '/'
- endname = this->read_name ();
- if (endname == 0 ||
- ACE_OS::strcmp (startname, endname) != 0)
+ if (this->namespaces_)
{
- this->report_fatal_error(ACEXML_ETAG
- ACEXML_ENV_ARG_PARAMETER);
- return ;
+ if (this->nested_namespace_ == 0)
+ {
+ this->xml_namespace_.pushContext ();
+ this->nested_namespace_ = 1;
+ }
+ ACEXML_Char* name = ACE_OS::strchr (attname, ':');
+ const ACEXML_Char* ns_name = (name == 0)?
+ empty_string:name+1;
+ if (this->xml_namespace_.declarePrefix (ns_name,
+ attvalue) == -1)
+ {
+ this->fatal_error(ACE_TEXT ("Duplicate definition of "
+ "prefix")
+ ACEXML_ENV_ARG_PARAMETER);
+ return;
+ }
}
- if (this->skip_whitespace (0) != '>')
+ if (this->namespace_prefixes_)
{
- this->report_fatal_error(ACEXML_GREAT
- ACEXML_ENV_ARG_PARAMETER);
- return;
+ // Namespace_prefixes_feature_ is required. So add the
+ // xmlns:foo to the list of attributes.
+ if (attributes.addAttribute (0, 0, attname,
+ default_attribute_type,
+ attvalue) == -1)
+ {
+ this->fatal_error(ACE_TEXT ("Duplicate attribute "
+ "definition. Hint: Try "
+ "setting namespace_prefix"
+ "es feature to 0")
+ ACEXML_ENV_ARG_PARAMETER);
+ return;
+ }
}
- this->content_handler_->endElement (ns_uri, ns_lname, endname
- ACEXML_ENV_ARG_PARAMETER);
- ACEXML_CHECK;
- prefix = ACE_const_cast (ACEXML_Char*,
- this->xml_namespace_.getPrefix(ns_uri));
- this->report_prefix_mapping (prefix, ns_uri, ns_lname, 0
- ACEXML_ENV_ARG_PARAMETER);
- ACEXML_CHECK;
- if (new_namespace != 0)
- this->xml_namespace_.popContext ();
- return;
- }
- default: // a new nested element?
- this->parse_element (0 ACEXML_ENV_ARG_PARAMETER);
- ACEXML_CHECK;
- break;
- }
- break;
- case '&':
- {
- const ACEXML_String *replace = 0;
- ACEXML_String charval;
- ACEXML_Char buffer[6];
-
- if (this->peek () == '#')
- {
- if (this->parse_char_reference (buffer, 6) != 0)
+ if (!this->namespaces_ && !this->namespace_prefixes_)
{
- this->report_fatal_error (ACEXML_INVCHAR
- ACEXML_ENV_ARG_PARAMETER);
-
+ this->fatal_error(ACE_TEXT ("One of namespaces or "
+ "namespace_prefixes should be"
+ " declared")
+ ACEXML_ENV_ARG_PARAMETER);
return;
}
- charval.set (buffer, 0);
- replace = &charval;
}
else
- replace = this->parse_reference ();
-
- if (replace == 0)
{
- this->report_fatal_error(ACEXML_EINT ACEXML_ENV_ARG_PARAMETER);
- return;
+ const ACEXML_Char *uri, *lName;
+ this->xml_namespace_.processName (attname, uri, lName, 1);
+ if (attributes.addAttribute (uri, lName, attname,
+ default_attribute_type,
+ attvalue) == -1)
+ {
+ this->fatal_error(ACE_TEXT ("Duplicate attribute "
+ "definition")
+ ACEXML_ENV_ARG_PARAMETER);
+ return;
+ }
}
- cdata_length = replace->length ();
- for (size_t i = 0; i < replace->length (); ++i)
- this->obstack_.grow ((*replace)[i]);
- }
- break;
- case 0x0D: // End-of-Line handling
- ch = (this->peek () == 0x0A ? this->get () : 0x0A);
- // Fall thru...
- case 0x0A:
- // Fall thru...
- default:
- ++cdata_length;
- cdata = this->obstack_.grow (ch);
- if (cdata == 0)
- {
- cdata = this->obstack_.freeze ();
- this->content_handler_->characters (cdata, 0, cdata_length
- ACEXML_ENV_ARG_PARAMETER);
- ACEXML_CHECK;
- this->obstack_.grow (ch);
- cdata_length = 1; // the missing char.
- }
+ break;
}
}
- ACE_NOTREACHED (return;)
+ if (this->parse_content (startname, ns_uri, ns_lname
+ ACEXML_ENV_ARG_PARAMETER) != 0)
+ return;
}
int
-ACEXML_Parser::parse_char_reference (ACEXML_Char *buf, size_t len)
+ACEXML_Parser::parse_content (const ACEXML_Char* startname,
+ const ACEXML_Char* ns_uri,
+ const ACEXML_Char* ns_lname ACEXML_ENV_ARG_DECL)
{
- if (this->get () != '#') // Internal error.
- return -1;
-
- int hex = 0;
- if (this->peek () == 'x')
- {
- hex = 1;
- this->get ();
- }
-
- int more_digit = 0;
- ACEXML_UCS4 sum = 0;
+ ACEXML_Char *cdata;
+ size_t cdata_length = 0;
+ // Parse element contents.
while (1)
{
ACEXML_Char ch = this->get ();
+ int nrelems = 0;
switch (ch)
{
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- sum = sum * (hex ? 16 : 10) + (ch - '0');
- break;
- case 'a':
- case 'A':
- if (!hex)
- return -1;
- sum = sum * 16 + 10;
- break;
- case 'b':
- case 'B':
- if (!hex)
- return -1;
- sum = sum * 16 + 11;
- break;
- case 'c':
- case 'C':
- if (!hex)
- return -1;
- sum = sum * 16 + 12;
- break;
- case 'd':
- case 'D':
- if (!hex)
- return -1;
- sum = sum * 16 + 13;
- break;
- case 'e':
- case 'E':
- if (!hex)
- return -1;
- sum = sum * 16 + 14;
- break;
- case 'f':
- case 'F':
- if (!hex)
- return -1;
- sum = sum * 16 + 15;
- break;
- case ';':
- if (more_digit == 0) // no digit exist???
- return -1;
- int clen;
- // [WFC: Legal Character]
- if (sum == 0x9 || sum == 0xA || sum == 0xD
- || sum >= 0x20 && sum <= 0xD7FF
- || sum >= 0xE000 && sum <= 0xFFFD
- || sum >= 0x10000 && sum <= 0x10FFFF)
- {
+ case 0:
+ nrelems = this->pop_context();
+ if (nrelems >= 1)
+ break;
+ else if (nrelems == 0)
+ return 0;
+ else
+ {
+ this->fatal_error(ACE_TEXT ("Internal Parser Error")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ case '<':
+ // Push out old 'characters' event.
+ if (cdata_length != 0)
+ {
+ cdata = this->obstack_.freeze ();
+ this->content_handler_->characters (cdata, 0, cdata_length
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ this->obstack_.unwind (cdata);
+ cdata_length = 0;
+ }
-#if defined (ACE_USES_WCHAR) // UTF-16
- if ((clen = ACEXML_Transcoder::ucs42utf16 (sum, buf, len)) < 0)
- return -1;
+ switch (this->peek ())
+ {
+ case '!': // a comment or a CDATA section.
+ this->get (); // consume '!'
+ ch = this->peek ();
+ if (ch == '-') // a comment
+ {
+ if (this->parse_comment () < 0)
+ {
+ this->fatal_error(ACE_TEXT ("Invalid comment in "
+ "document")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ }
+ else if (ch == '[') // a CDATA section.
+ {
+ this->parse_cdata (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ else
+ {
+ this->fatal_error(ACE_TEXT ("Expecting a CDATA section "
+ "or a comment section")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ break;
+ case '?': // a PI.
+ this->get(); // consume the '?'
+ this->parse_processing_instruction
+ (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ break;
+ case '/': // an ETag.
+ {
+ this->get (); // consume '/'
+ ACEXML_Char* endname = this->parse_name ();
+ if (endname == 0 ||
+ ACE_OS::strcmp (startname, endname) != 0)
+ {
+ this->fatal_error(ACE_TEXT ("Name in ETag doesn't "
+ "match name in STag")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ if (this->skip_whitespace () != '>')
+ {
+ this->fatal_error(ACE_TEXT ("Expecting '>' at end "
+ "of element")
+ ACEXML_ENV_ARG_PARAMETER);
+ return -1;
+ }
+ this->content_handler_->endElement (ns_uri, ns_lname,
+ endname
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ this->prefix_mapping (this->xml_namespace_. getPrefix(ns_uri),
+ ns_uri, ns_lname, 0
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ if (this->nested_namespace_ != 0)
+ this->xml_namespace_.popContext ();
+ return 0;
+ }
+ default: // a new nested element?
+ this->parse_element (0 ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ break;
+ }
+ break;
+ case '&':
-#else // or UTF-8
- if ((clen = ACEXML_Transcoder::ucs42utf8 (sum, buf, len)) < 0)
- return -1;
-#endif
- buf [clen] = 0;
- return 0;
- }
- return -1;
- default:
- return -1;
+ if (this->peek () == '#')
+ {
+ ACEXML_Char buf[7];
+ if (this->parse_char_reference (buf, sizeof (buf)) != 0)
+ {
+ // [WFC: Legal Character]
+ ACE_ERROR ((LM_ERROR,
+ ACE_TEXT ("Invalid character reference\n")));
+ return -1;
+ }
+ }
+ else
+ {
+ this->ref_state_ = ACEXML_ParserInt::IN_CONTENT;
+ this->parse_entity_reference(ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ break;
+ case '\x20': case '\x0D': case '\x0A': case '\x09':
+ if (this->validate_)
+ {
+ // Flush out any non-whitespace characters
+ if (cdata_length != 0)
+ {
+ cdata = this->obstack_.freeze ();
+ this->content_handler_->characters(cdata, 0, cdata_length
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ this->obstack_.unwind (cdata);
+ cdata_length = 0;
+ }
+ ++cdata_length;
+ this->obstack_.grow (ch);
+ while (1)
+ {
+ ch = this->peek();
+ if (ch == '\x20' || ch == '\x0D' || ch == '\x0A' ||
+ ch == '\x09')
+ {
+ ch = this->get();
+ this->obstack_.grow (ch);
+ continue;
+ }
+ break;
+ }
+ cdata = this->obstack_.freeze ();
+ this->content_handler_->ignorableWhitespace (cdata, 0,
+ cdata_length
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ this->obstack_.unwind (cdata);
+ cdata_length = 0;
+ break;
+ }
+ // Fall thru...
+ default:
+ ++cdata_length;
+ this->obstack_.grow (ch);
}
- more_digit = 1;
}
- ACE_NOTREACHED (return -1);
+ return 0;
}
-const ACEXML_String *
-ACEXML_Parser::parse_reference (void)
-{
- // @@ We'll use a temporary buffer here as the Obstack is most likely in
- // use when we are here. This puts a limit on the max length of a
- // reference.
- ACEXML_Char ref[MAXPATHLEN];
-
- size_t loc = 0;
-
- while (loc < MAXPATHLEN -1)
- {
- ACEXML_Char ch = this->get ();
- if (ch == ';')
- {
- ref[loc] = 0;
- break;
- }
- else
- ref[loc++] = ch;
- }
-
- return this->entities_.resolve_entity (ref);
-}
int
ACEXML_Parser::parse_cdata (ACEXML_ENV_SINGLE_ARG_DECL)
{
if (this->parse_token (ACE_TEXT ("[CDATA[")) < 0)
{
- this->report_fatal_error(ACEXML_ECDATASEC ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error(ACE_TEXT ("Expecting '[CDATA[' at beginning of CDATA "
+ "section")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
ACEXML_Char ch;
@@ -975,123 +1139,12 @@ ACEXML_Parser::parse_cdata (ACEXML_ENV_SINGLE_ARG_DECL)
this->obstack_.grow (temp);
++datalen;
}
- else if (ch == 0x0D)
- ch = (this->peek () == 0x0A ? this->get () : 0x0A);
- this->obstack_.grow (ch);
- ++datalen;
+ this->obstack_.grow (ch);
+ ++datalen;
};
ACE_NOTREACHED (return -1);
}
-int
-ACEXML_Parser::parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_DECL)
-{
- ACEXML_Char nextch = this->skip_whitespace (0);
-
- do {
- switch (nextch)
- {
- case '<': // Start of markup Decl.
- nextch = this->peek ();
- switch (nextch)
- {
- case '!':
- this->get (); // Discard '!'
- nextch = this->peek ();
- switch (nextch)
- {
- case 'E': // An ELEMENT or ENTITY decl
- this->get ();
- nextch = this->peek ();
- switch (nextch)
- {
- case 'L':
- if (this->parse_element_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER) < 0)
- return -1;
- break;
-
- case 'N':
- if (this->parse_entity_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER) < 0)
- return -1;
- break;
-
- default:
- this->report_fatal_error(ACEXML_MKDECLKEYWORD
- ACEXML_ENV_ARG_PARAMETER);
- return -1;
- }
- break;
-
- case 'A': // An ATTLIST decl
- if (this->parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER) < 0)
- return -1;
- break;
-
- case 'N': // A NOTATION decl
- if (this->parse_notation_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER) < 0)
- return -1;
- break;
-
- case '-': // a comment.
- if (this->grok_comment () < 0)
- {
- this->report_fatal_error(ACEXML_INVCO
- ACEXML_ENV_ARG_PARAMETER);
- return -1;
- }
- break;
- case 0:
- this->report_fatal_error (ACEXML_EOF ACEXML_ENV_ARG_PARAMETER);
- return -1;
- default:
- this->report_fatal_error (ACEXML_MKINVCH
- ACEXML_ENV_ARG_PARAMETER);
- return -1;
- }
- break;
-
- case '?': // PI
- this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER);
- ACEXML_CHECK_RETURN (-1);
- break;
-
- case 0:
- this->report_fatal_error (ACEXML_EOF ACEXML_ENV_ARG_PARAMETER);
- return -1;
- default:
- this->report_fatal_error (ACEXML_MKINVCH ACEXML_ENV_ARG_PARAMETER);
- return -1;
- }
- break;
-
- case '%': // DeclSep. Define new PEreference...
- break;
-
- case ']': // End of internal definitions.
- return 0; // Not applicable when parsing external DTD spec.
-
- case 0: // This may not be an error if we decide
- // to generalize this function to handle both
- // internal and external DTD definitions.
- this->report_fatal_error (ACEXML_EOF ACEXML_ENV_ARG_PARAMETER);
- return -1;
-
- default:
- this->report_fatal_error (ACEXML_MKDECL ACEXML_ENV_ARG_PARAMETER);
- return -1;
- };
-
- // To fully conform with the spec., whitespaces are only allowed
- // following a 'DeclSep' section. However, I found it
- // hard/impossible to eliminate all the whitespaces between
- // markupdecls.
-
- nextch = this->skip_whitespace (0);
-
- } while (1);
-
- ACE_NOTREACHED (return -1);
-}
int
ACEXML_Parser::parse_element_decl (ACEXML_ENV_SINGLE_ARG_DECL)
@@ -1099,48 +1152,61 @@ ACEXML_Parser::parse_element_decl (ACEXML_ENV_SINGLE_ARG_DECL)
if ((this->parse_token (ACE_TEXT ("LEMENT")) < 0) ||
this->skip_whitespace_count () == 0)
{
- this->report_fatal_error (ACEXML_EELEMENT ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error (ACE_TEXT ("Expecting keyword ELEMENT followed by "
+ "space")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
- ACEXML_Char *element_name = this->read_name ();
+ ACEXML_Char *element_name = this->parse_name ();
if (element_name == 0)
{
- this->report_fatal_error (ACEXML_ERDELENAME ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error (ACE_TEXT ("Invalid element name")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
ACEXML_Char nextch ;
- this->skip_whitespace_count (&nextch);
+ if (this->skip_whitespace_count (&nextch) == 0)
+ {
+ this->fatal_error (ACE_TEXT ("Expecting a space between ELEMENT name "
+ "and definition")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
switch (nextch)
{
- case 'E': // EMPTY
- if (this->parse_token (ACE_TEXT ("EMPTY")) < 0)
- {
- this->report_fatal_error (ACEXML_EEMPTY ACEXML_ENV_ARG_PARAMETER);
- return -1;
- }
- break;
- case 'A': // ANY
- if (this->parse_token (ACE_TEXT ("ANY")) < 0)
- {
- this->report_fatal_error (ACEXML_EANY ACEXML_ENV_ARG_PARAMETER);
- return -1;
- }
- break;
- case '(': // children
- this->parse_children_definition (ACEXML_ENV_SINGLE_ARG_PARAMETER);
- ACEXML_CHECK_RETURN (-1);
- break;
- default: // error
- this->report_fatal_error (ACEXML_ERDELE ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ case 'E': // EMPTY
+ if (this->parse_token (ACE_TEXT ("EMPTY")) < 0)
+ {
+ this->fatal_error (ACE_TEXT ("Expecting keyword EMPTY")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ break;
+ case 'A': // ANY
+ if (this->parse_token (ACE_TEXT ("ANY")) < 0)
+ {
+ this->fatal_error (ACE_TEXT ("Expecting keyword ANY")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ break;
+ case '(': // children
+ this->parse_children_definition (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ break;
+ default: // error
+ this->fatal_error (ACE_TEXT ("Invalid element definition")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
- if (this->skip_whitespace (0) != '>')
+ if (this->skip_whitespace () != '>')
{
- this->report_fatal_error (ACEXML_GREAT ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error (ACE_TEXT ("Expecing '>' after element defintion")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
return 0;
}
@@ -1153,8 +1219,9 @@ ACEXML_Parser::parse_entity_decl (ACEXML_ENV_SINGLE_ARG_DECL)
if ((this->parse_token (ACE_TEXT ("NTITY")) < 0) ||
this->skip_whitespace_count (&nextch) == 0)
{
- this->report_fatal_error (ACEXML_EENTITY ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error (ACE_TEXT ("Expecting keyword ENTITY followed by a "
+ "space") ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
int is_GEDecl = 1;
@@ -1164,100 +1231,146 @@ ACEXML_Parser::parse_entity_decl (ACEXML_ENV_SINGLE_ARG_DECL)
this->get (); // consume the '%'
if (this->skip_whitespace_count (&nextch) == 0)
{
- this->report_fatal_error (ACEXML_INVREF ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error (ACE_TEXT ("Expecting space between % and "
+ "entity name")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
}
- ACEXML_Char *entity_name = this->read_name ();
+ ACEXML_Char *entity_name = this->parse_name ();
if (entity_name == 0)
{
- this->report_fatal_error (ACEXML_ENTNAME ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error (ACE_TEXT ("Invalid entity name")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
- this->skip_whitespace_count (&nextch);
-
+ if (this->skip_whitespace_count (&nextch) == 0)
+ {
+ this->fatal_error (ACE_TEXT ("Expecting space between entity name and "
+ "entityDef")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ int retval = 0;
if (nextch == '\'' || nextch == '"')
{
ACEXML_Char *entity_value = 0;
-
- if (this->get_quoted_string (entity_value) != 0)
+ if (this->parse_entity_value (entity_value
+ ACEXML_ENV_ARG_PARAMETER) != 0)
{
- this->report_fatal_error(ACEXML_ENTVALUE ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error(ACE_TEXT ("Invalid EntityValue")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
-
if (is_GEDecl)
+ retval = this->internal_GE_.add_entity (entity_name,
+ entity_value);
+ else
+ retval = this->internal_PE_.add_entity (entity_name,
+ entity_value);
+ if (retval < 0)
{
- if (this->entities_.add_entity (entity_name, entity_value) != 0)
- {
- this->report_fatal_error(ACEXML_DUPENT ACEXML_ENV_ARG_PARAMETER);
- return -1;
- }
+ this->fatal_error (ACE_TEXT ("Internal Parser Error in adding"
+ "Entity to map")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
- else
+ else if (retval == 1)
{
- // @@ need to implement PEdecl lookup mechanism
- ACEXML_THROW_RETURN (ACEXML_SAXNotSupportedException (), -1);
+ this->warning (ACE_TEXT ("Duplicate entity found")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
}
else
{
ACEXML_Char *systemid, *publicid;
- this->parse_external_id_and_ref (publicid, systemid ACEXML_ENV_ARG_PARAMETER);
+ this->parse_external_id (publicid, systemid
+ ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK_RETURN (-1);
if (systemid == 0)
{
- this->report_fatal_error(ACEXML_INVEXTID ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error(ACE_TEXT ("Invalid SystemLiteral")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
this->skip_whitespace_count (&nextch);
if (nextch == 'N') // NDATA section followed
{
if (is_GEDecl == 0)
{
- this->report_fatal_error(ACEXML_UNDATA ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error(ACE_TEXT ("Invalid NDataDecl in PEDef")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
if ((this->parse_token (ACE_TEXT ("NDATA")) < 0) ||
this->skip_whitespace_count (&nextch) == 0)
{
- this->report_fatal_error(ACEXML_ENDATA ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error(ACE_TEXT ("Expecting keyword NDATA followed "
+ "by a space") ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
- ACEXML_Char *ndata = this->read_name ();
- this->dtd_handler_->unparsedEntityDecl (entity_name, publicid,
- systemid, ndata
- ACEXML_ENV_ARG_PARAMETER);
- ACEXML_CHECK_RETURN (-1);
+ ACEXML_Char *ndata = this->parse_name ();
+ if (this->validate_) // [VC: Notation Declared]
+ {
+ if (!this->notations_.resolve_entity (ndata))
+ {
+ this->fatal_error (ACE_TEXT ("Undeclared Notation name")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ this->dtd_handler_->unparsedEntityDecl(entity_name, publicid,
+ systemid, ndata
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
}
else
{
- // @@ Need to support external CharStream sources
- ACE_DEBUG ((LM_DEBUG,
- ACE_TEXT ("ENTITY: (%s) "),
- entity_name));
-
- if (publicid == 0)
- ACE_DEBUG ((LM_DEBUG,
- ACE_TEXT ("SYSTEM %s\n"),
- systemid));
+ if (is_GEDecl)
+ retval = this->external_GE_.add_entity (entity_name,
+ systemid);
else
- ACE_DEBUG ((LM_DEBUG,
- ACE_TEXT ("PUBLIC %s %s\n"),
- publicid, systemid));
+ retval = this->external_PE_.add_entity (entity_name,
+ systemid);
+ if (retval < 0)
+ {
+ this->fatal_error(ACE_TEXT ("Internal Parser Error")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ else if (retval == 1)
+ this->warning(ACE_TEXT ("Duplicate external entity")
+ ACEXML_ENV_ARG_PARAMETER);
+ if (is_GEDecl)
+ retval = this->external_GE_.add_entity (entity_name,
+ publicid);
+ else
+ retval = this->external_PE_.add_entity (entity_name,
+ publicid);
+ if (retval < 0)
+ {
+ this->fatal_error(ACE_TEXT ("Internal Parser Error")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ else if (retval == 1)
+ this->warning (ACE_TEXT ("Duplicate entity definition")
+ ACEXML_ENV_ARG_PARAMETER);
}
}
// End of ENTITY definition
- if (this->skip_whitespace (0) != '>')
+ if (this->skip_whitespace() != '>')
{
- this->report_fatal_error(ACEXML_GREAT ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error(ACE_TEXT ("Expecting '>' at end of entityDef")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
return 0;
}
@@ -1268,229 +1381,100 @@ ACEXML_Parser::parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_DECL)
if ((this->parse_token (ACE_TEXT ("ATTLIST")) < 0) ||
this->skip_whitespace_count () == 0)
{
- this->report_fatal_error(ACEXML_EATTLIST ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error(ACE_TEXT ("Expecting keyword 'ATTLIST' followed by a "
+ "space ") ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
- ACEXML_Char *element_name = this->read_name ();
+ ACEXML_Char *element_name = this->parse_name ();
if (element_name == 0)
{
- this->report_fatal_error(ACEXML_ERDELENAME ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error(ACE_TEXT ("Invalid element Name in attlistDecl")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
-
- ACEXML_Char nextch = this->skip_whitespace (0);
-
+ ACEXML_Char fwd;
+ int count = this->skip_whitespace_count(&fwd);
+ int nrelems = 0;
// Parse AttDef*
- while (nextch != '>')
+ while (fwd != '>')
{
- // Parse attribute name
- ACEXML_Char *att_name = this->read_name (nextch);
- if (att_name == 0)
+ if (count == 0)
{
- this->report_fatal_error(ACEXML_EATTNAME ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error(ACE_TEXT ("Expecting space between element name "
+ "and AttDef") ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
-
- /*
- Parse AttType:
- Possible keywords:
- CDATA // StringType
- ID // TokenizedType
- IDREF
- IDREFS
- ENTITY
- ENTITIES
- NMTOKEN
- NMTOKENS
- NOTATION // EnumeratedType - NotationTYpe
- ( // EnumeratedType - Enumeration
- */
- nextch = this->skip_whitespace (0);
- switch (nextch)
+ switch (fwd)
{
- case 'C': // CDATA
- if ((this->parse_token (ACE_TEXT ("DATA")) < 0) ||
- this->skip_whitespace_count () == 0)
- {
- this->report_fatal_error(ACEXML_ECDATA ACEXML_ENV_ARG_PARAMETER);
- return -1;
- }
- // Else, we have successfully identified the type of the
- // attribute as CDATA
- // @@ Set up validator appropriately here.
- break;
- case 'I': // ID, IDREF, or, IDREFS
- if (this->get () == 'D')
- {
- if (this->skip_whitespace_count (&nextch) > 0)
- {
- // We have successfully identified the type of the
- // attribute as ID @@ Set up validator as such.
- break;
- }
- if (this->parse_token (ACE_TEXT ("REF")) == 0)
- {
- if (this->skip_whitespace_count (&nextch) > 0)
- {
- // We have successfully identified the type of
- // the attribute as IDREF
- // @@ Set up validator as such.
- break;
- }
- else if (nextch == 'S' &&
- this->get () && // consume the 'S'
- this->skip_whitespace_count () != 0)
- {
- // We have successfully identified the type of
- // the attribute as IDREFS
- // @@ Set up validator as such.
- break;
- }
- }
- }
- // Admittedly, this error message is not precise enough
- this->report_fatal_error(ACEXML_EID ACEXML_ENV_ARG_PARAMETER);
- return -1;
- case 'E': // ENTITY or ENTITIES
- if (this->parse_token (ACE_TEXT ("NTIT")) == 0)
- {
- nextch = this->get ();
- if (nextch == 'Y')
- {
- // We have successfully identified the type of
- // the attribute as ENTITY
- // @@ Set up validator as such.
- }
- else if (nextch == 'I'&& this->get () == 'E' &&
- this->get () == 'S')
- {
- // We have successfully identified the type of
- // the attribute as ENTITIES
- // @@ Set up validator as such.
- }
- if (this->skip_whitespace_count () > 0)
- {
- // success
- break;
- }
- }
- // Admittedly, this error message is not precise enough
- this->report_fatal_error(ACEXML_EENTITIES ACEXML_ENV_ARG_PARAMETER);
- return -1;
- case 'N': // NMTOKEN, NMTOKENS, or, NOTATION
- nextch = this->get ();
- if (nextch != 'M' || nextch != 'O')
- {
- this->report_fatal_error(ACEXML_ENMTOKEN
- ACEXML_ENV_ARG_PARAMETER);
- return -1;
- }
- if (nextch == 'M')
- {
- if (this->parse_token (ACE_TEXT ("TOKEN")) == 0)
- {
- if (this->skip_whitespace_count (&nextch) > 0)
- {
- // We have successfully identified the type of
- // the attribute as NMTOKEN
- // @@ Set up validator as such.
- break;
- }
- else if (nextch == 'S' && this->skip_whitespace_count () > 0)
- {
- // We have successfully identified the type of
- // the attribute as NMTOKENS
- // @@ Set up validator as such.
- break;
- }
- }
- this->report_fatal_error(ACEXML_ENMTOKENS
- ACEXML_ENV_ARG_PARAMETER);
- return -1;
- }
- else // NOTATION
- {
- if ((this->parse_token (ACE_TEXT ("TATION")) < 0) ||
- this->skip_whitespace_count () == 0)
- {
- this->report_fatal_error(ACEXML_ENOTATION
- ACEXML_ENV_ARG_PARAMETER);
- return -1;
- }
-
- if (this->get () != '(')
- {
- this->report_fatal_error(ACEXML_LPAREN
- ACEXML_ENV_ARG_PARAMETER);
- return -1;
- }
-
- this->skip_whitespace_count ();
-
- do {
- ACEXML_Char *notation_name = this->read_name ();
- if (notation_name == 0)
- {
- this->report_fatal_error(ACEXML_ENOTNAME
- ACEXML_ENV_ARG_PARAMETER);
- return -1;
- }
- // @@ get another notation name, set up validator as such
- this->skip_whitespace_count (&nextch);
- } while (nextch != ')');
-
- this->get (); // consume the closing paren.
- this->skip_whitespace_count ();
- }
- break;
- case '(': // EnumeratedType - Enumeration
- this->skip_whitespace_count ();
-
- do {
- ACEXML_Char *token_name = this->read_name (); // @@ need a special read_nmtoken?
- if (token_name == 0)
+ case '%':
+ if (this->external_subset_)
{
- this->report_fatal_error(ACEXML_ENMNAME
- ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->parse_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
- // @@ get another nmtoken, set up validator as such
- this->skip_whitespace_count (&nextch);
- } while (nextch != ')');
-
- this->get (); // consume the closing paren.
- this->skip_whitespace_count ();
- break;
- default:
- {
- this->report_fatal_error(ACEXML_INVATTRTYPE
- ACEXML_ENV_ARG_PARAMETER);
- return -1;
- }
- ACE_NOTREACHED (break);
+ break;
+ case 0:
+ nrelems = this->pop_context();
+ if (nrelems >= 1)
+ break;
+ else
+ {
+ this->fatal_error(ACE_TEXT ("Internal Parser Error")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ default:
+ break;
}
-
- /*
- Parse DefaultDecl:
- #REQUIRED
- #IMPLIED
- #FIXED
- quoted string // #FIXED
- */
- nextch = this->peek ();
- switch (nextch)
+ // Parse attribute name
+ ACEXML_Char *att_name = this->parse_name ();
+ if (att_name == 0)
{
- case '#':
- this->get (); // consume the '#'
- switch (this->get ())
- {
+ this->fatal_error(ACE_TEXT ("Invalid AttName")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ count = this->skip_whitespace_count (&fwd);
+ if (count == 0)
+ {
+ this->fatal_error(ACE_TEXT ("Expecting space between AttName and "
+ "AttType") ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ this->parse_atttype (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ this->parse_defaultdecl (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ count = this->skip_whitespace_count(&fwd);
+ }
+ this->get (); // consume closing '>'
+ return 0;
+}
+
+int
+ACEXML_Parser::parse_defaultdecl (ACEXML_ENV_SINGLE_ARG_DECL)
+{
+ /*
+ Parse DefaultDecl:
+ #REQUIRED
+ #IMPLIED
+ #FIXED
+ quoted string // #FIXED
+ */
+ ACEXML_Char nextch = this->peek ();
+ switch (nextch)
+ {
+ case '#':
+ this->get (); // consume the '#'
+ switch (this->get ())
+ {
case 'R':
if (this->parse_token (ACE_TEXT ("EQUIRED")) < 0)
{
- this->report_fatal_error(ACEXML_EREQUIRED
- ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error(ACE_TEXT ("Expecting keyword REQUIRED")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
// We now know this attribute is required
// @@ Set up the validator as such.
@@ -1498,9 +1482,9 @@ ACEXML_Parser::parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_DECL)
case 'I':
if (this->parse_token (ACE_TEXT ("MPLIED")) < 0)
{
- this->report_fatal_error(ACEXML_EIMPLIED
- ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error(ACE_TEXT ("Expecting keyword IMPLIED")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
// We now know this attribute is impleid.
// @@ Set up the validator as such.
@@ -1509,44 +1493,242 @@ ACEXML_Parser::parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_DECL)
if (this->parse_token (ACE_TEXT ("IXED")) < 0 ||
this->skip_whitespace_count () == 0)
{
- this->report_fatal_error(ACEXML_EFIXED
- ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error(ACE_TEXT ("Expecting keyword FIXED")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
// We now know this attribute is fixed.
ACEXML_Char *fixed_attr;
- if (this->get_quoted_string (fixed_attr) != 0)
+ if (this->parse_attvalue (fixed_attr
+ ACEXML_ENV_ARG_PARAMETER) != 0)
{
- this->report_fatal_error(ACEXML_EINVFIXED
- ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error(ACE_TEXT ("Invalid AttValue")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
// @@ set up validator
break;
default:
- break;
- }
- break;
- case '\'':
- case '"':
- ACEXML_Char *fixed_attr;
- if (this->get_quoted_string (fixed_attr) != 0)
+ this->fatal_error (ACE_TEXT ("Invalid DefaultDecl")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ break;
+ case '\'':
+ case '"':
+ ACEXML_Char *fixed_attr;
+ if (this->parse_attvalue (fixed_attr ACEXML_ENV_ARG_PARAMETER) != 0)
+ {
+ this->fatal_error(ACE_TEXT ("Invalid AttValue")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ // @@ set up validator
+ break;
+ default:
+ this->fatal_error (ACE_TEXT ("Invalid DefaultDecl")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ break;
+ }
+ return 0;
+}
+
+
+int
+ACEXML_Parser::parse_atttype (ACEXML_ENV_SINGLE_ARG_DECL)
+{
+ /*
+ Parse AttType:
+ Possible keywords:
+ CDATA // StringType
+ ID // TokenizedType
+ IDREF
+ IDREFS
+ ENTITY
+ ENTITIES
+ NMTOKEN
+ NMTOKENS
+ NOTATION // EnumeratedType - NotationTYpe
+ ( // EnumeratedType - Enumeration
+ */
+ ACEXML_Char nextch = this->get();
+ switch (nextch)
+ {
+ case 'C': // CDATA
+ if ((this->parse_token (ACE_TEXT ("DATA")) < 0) ||
+ this->skip_whitespace_count () == 0)
+ {
+ this->fatal_error(ACE_TEXT ("Expecting keyword 'CDATA'")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ // Else, we have successfully identified the type of the
+ // attribute as CDATA
+ // @@ Set up validator appropriately here.
+ break;
+ case 'I': // ID, IDREF, or, IDREFS
+ if (this->get () == 'D')
+ {
+ if (this->skip_whitespace_count (&nextch) > 0)
+ {
+ // We have successfully identified the type of the
+ // attribute as ID @@ Set up validator as such.
+ break;
+ }
+ if (this->parse_token (ACE_TEXT ("REF")) == 0)
+ {
+ if (this->skip_whitespace_count (&nextch) > 0)
+ {
+ // We have successfully identified the type of
+ // the attribute as IDREF
+ // @@ Set up validator as such.
+ break;
+ }
+ else if (nextch == 'S' &&
+ this->get () && // consume the 'S'
+ this->skip_whitespace_count () != 0)
+ {
+ // We have successfully identified the type of
+ // the attribute as IDREFS
+ // @@ Set up validator as such.
+ break;
+ }
+ }
+ }
+ // Admittedly, this error message is not precise enough
+ this->fatal_error(ACE_TEXT ("Expecting keyword `ID', `IDREF', or"
+ "`IDREFS'") ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ case 'E': // ENTITY or ENTITIES
+ if (this->parse_token (ACE_TEXT ("NTIT")) == 0)
+ {
+ nextch = this->get ();
+ if (nextch == 'Y')
+ {
+ // We have successfully identified the type of
+ // the attribute as ENTITY
+ // @@ Set up validator as such.
+ }
+ else if (nextch == 'I'&& this->get () == 'E' &&
+ this->get () == 'S')
+ {
+ // We have successfully identified the type of
+ // the attribute as ENTITIES
+ // @@ Set up validator as such.
+ }
+ if (this->skip_whitespace_count () > 0)
+ {
+ // success
+ break;
+ }
+ }
+ // Admittedly, this error message is not precise enough
+ this->fatal_error(ACE_TEXT ("Expecting keyword `ENTITY', or"
+ "`ENTITIES'") ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ case 'N': // NMTOKEN, NMTOKENS, or, NOTATION
+ nextch = this->get ();
+ if (nextch != 'M' || nextch != 'O')
+ {
+ this->fatal_error(ACE_TEXT ("Expecting keyword `NMTOKEN',"
+ "`NMTOKENS', or `NOTATION'")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ if (nextch == 'M')
+ {
+ if (this->parse_token (ACE_TEXT ("TOKEN")) == 0)
+ {
+ if (this->skip_whitespace_count (&nextch) > 0)
+ {
+ // We have successfully identified the type of
+ // the attribute as NMTOKEN
+ // @@ Set up validator as such.
+ break;
+ }
+ else if (nextch == 'S' && this->skip_whitespace_count () > 0)
+ {
+ // We have successfully identified the type of
+ // the attribute as NMTOKENS
+ // @@ Set up validator as such.
+ break;
+ }
+ }
+ this->fatal_error(ACE_TEXT ("Expecting keyword `NMTOKEN' or `NMTO"
+ "KENS'") ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ else // NOTATION
+ {
+ if ((this->parse_token (ACE_TEXT ("TATION")) < 0) ||
+ this->skip_whitespace_count () == 0)
+ {
+ this->fatal_error(ACE_TEXT ("Expecting keyword `NOTATION'")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+
+ if (this->get () != '(')
+ {
+ this->fatal_error(ACE_TEXT ("Expecting '(' in elementDef")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+
+ this->skip_whitespace_count ();
+
+ do {
+ ACEXML_Char *notation_name = this->parse_name ();
+ if (notation_name == 0)
+ {
+ this->fatal_error(ACE_TEXT ("Invalid notation name")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ // @@ get another notation name, set up validator as such
+ this->skip_whitespace_count (&nextch);
+ if (nextch != '|')
+ break;
+ nextch = this->get();
+ this->skip_whitespace_count (&nextch);
+ } while (nextch != ')');
+
+ this->get (); // consume the closing paren.
+ this->skip_whitespace_count ();
+ }
+ break;
+ case '(': // EnumeratedType - Enumeration
+ this->skip_whitespace_count ();
+
+ do {
+ ACEXML_Char *token_name = this->parse_nmtoken();
+ if (token_name == 0)
{
- this->report_fatal_error(ACEXML_EINVFIXED
- ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error(ACE_TEXT ("Invalid NMTOKEN")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
- // @@ set up validator
- break;
- default:
- break;
+ // @@ get another nmtoken, set up validator as such
+ this->skip_whitespace_count (&nextch);
+ if (nextch != '|')
+ break;
+ nextch = this->get();
+ this->skip_whitespace_count (&nextch);
+ } while (nextch != ')');
+
+ this->get (); // consume the closing paren.
+ this->skip_whitespace_count ();
+ break;
+ default:
+ {
+ this->fatal_error(ACE_TEXT ("Invalid AttType")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
- this->skip_whitespace_count (&nextch);
+ ACE_NOTREACHED (break);
}
-
- this->get (); // consume closing '>'
-
return 0;
}
@@ -1556,93 +1738,72 @@ ACEXML_Parser::parse_notation_decl (ACEXML_ENV_SINGLE_ARG_DECL)
if (this->parse_token (ACE_TEXT ("NOTATION")) < 0 ||
this->skip_whitespace_count () == 0)
{
- this->report_fatal_error(ACEXML_ENOTATION ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error(ACE_TEXT ("Expecting Keyword 'NOTATION'")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
- ACEXML_Char *notation = this->read_name ();
+ ACEXML_Char *notation = this->parse_name ();
if (notation == 0)
{
- this->report_fatal_error(ACEXML_INVNOTNAME ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error(ACE_TEXT ("Invalid Notation name")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
this->skip_whitespace_count ();
ACEXML_Char *systemid, *publicid;
- this->parse_external_id_and_ref (publicid, systemid ACEXML_ENV_ARG_PARAMETER);
+ // Gross hack but otherwise we need to go around a lot of loops to parse,
+ // When the ExternalID starts with 'PUBLIC' we cannot distinguish a
+ // PublicId from a ExternalID by looking using a one character read-ahead
+ ACEXML_ParserInt::ReferenceState temp = this->ref_state_;
+ this->ref_state_ = ACEXML_ParserInt::IN_NOTATION;
+
+ this->parse_external_id (publicid, systemid
+ ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK_RETURN (-1);
+ // Restore the original value.
+ this->ref_state_ = temp;
- if (this->get () != '>')
+ // [VC: Unique Notation Name]
+ if (systemid && this->notations_.add_entity (notation, systemid) != 0
+ && this->validate_)
{
- this->report_fatal_error(ACEXML_GREAT ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error(ACE_TEXT ("Internal Parser Error")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
-
- this->dtd_handler_->notationDecl (notation,
- publicid,
- systemid ACEXML_ENV_ARG_PARAMETER);
- ACEXML_CHECK_RETURN (-1);
-
- return 0;
-}
-
-int
-ACEXML_Parser::parse_external_id_and_ref (ACEXML_Char *&publicId,
- ACEXML_Char *&systemId ACEXML_ENV_ARG_DECL)
-{
- publicId = systemId = 0;
- ACEXML_Char nextch = this->get ();
-
- switch (nextch)
+ if (publicid)
{
- case 'S': // External SYSTEM id.
- if (this->parse_token (ACE_TEXT ("YSTEM")) < 0 ||
- this->skip_whitespace_count () == 0)
- {
- this->report_fatal_error(ACEXML_ESYSTEM ACEXML_ENV_ARG_PARAMETER);
- return -1;
- }
- if (this->get_quoted_string (systemId) != 0)
- {
- this->report_fatal_error(ACEXML_ELITERAL ACEXML_ENV_ARG_PARAMETER);
- return -1;
- }
- this->locator_.setSystemId (systemId);
- break;
- case 'P': // External PUBLIC id or previously defined PUBLIC id.
- if (this->parse_token (ACE_TEXT ("UBLIC")) < 0 ||
- this->skip_whitespace_count () == 0)
- {
- this->report_fatal_error(ACEXML_EPUBLIC ACEXML_ENV_ARG_PARAMETER);
- return -1;
- }
- if (this->get_quoted_string (publicId) != 0)
+ int retval = this->notations_.add_entity (notation, publicid);
+ if (retval != 0 && !systemid && this->validate_)
{
- this->report_fatal_error(ACEXML_ELITERAL ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error(ACE_TEXT ("Internal Parser Error")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
- this->locator_.setPublicId (publicId);
+ }
- this->skip_whitespace_count (&nextch);
- if (nextch == '\'' || nextch == '"') // not end of NOTATION yet.
- {
- if (this->get_quoted_string (systemId) != 0)
- {
- this->report_fatal_error(ACEXML_ELITERAL
- ACEXML_ENV_ARG_PARAMETER);
- return -1;
- }
- this->locator_.setSystemId (systemId);
- }
- break;
- default:
- this->report_fatal_error(ACEXML_ESYSPUB ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ if (this->skip_whitespace() != '>')
+ {
+ this->fatal_error(ACE_TEXT ("Expecting '>' at end of NotationDecl")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+
+ if (this->validate_ && this->dtd_handler_)
+ {
+ this->dtd_handler_->notationDecl (notation,
+ publicid,
+ systemid ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
return 0;
}
+
+
int
ACEXML_Parser::parse_children_definition (ACEXML_ENV_SINGLE_ARG_DECL)
{
@@ -1654,45 +1815,49 @@ ACEXML_Parser::parse_children_definition (ACEXML_ENV_SINGLE_ARG_DECL)
switch (nextch)
{
- case '#': // Mixed element,
- if (this->parse_token (ACE_TEXT ("#PCDATA")) < 0)
- {
- this->report_fatal_error(ACEXML_EPCDATA ACEXML_ENV_ARG_PARAMETER);
- return -1;
- }
+ case '#': // Mixed element,
+ if (this->parse_token (ACE_TEXT ("#PCDATA")) < 0)
+ {
+ this->fatal_error(ACE_TEXT ("Expecting keyword '#PCDATA'")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
- this->skip_whitespace_count (&nextch);
+ this->skip_whitespace_count (&nextch);
- while (nextch != ')')
- {
- if (this->get () != '|')
- {
- this->report_fatal_error(ACEXML_EMIXED ACEXML_ENV_ARG_PARAMETER);
- return -1;
- }
- this->skip_whitespace_count ();
+ while (nextch != ')')
+ {
+ if (this->get () != '|')
+ {
+ this->fatal_error(ACE_TEXT ("Illegal character in Mixed "
+ "element")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ this->skip_whitespace_count ();
- ACEXML_Char *name = this->read_name ();
- // @@ name will be used in the Validator later.
- ACE_UNUSED_ARG (name);
- ++subelement_number;
- // @@ Install Mixed element name into the validator.
- this->skip_whitespace_count (&nextch);
- }
+ ACEXML_Char *name = this->parse_name ();
+ // @@ name will be used in the Validator later.
+ ACE_UNUSED_ARG (name);
+ ++subelement_number;
+ // @@ Install Mixed element name into the validator.
+ this->skip_whitespace_count (&nextch);
+ }
- if (this->get () != ')' ||
- (subelement_number && this->get () != '*'))
- {
- this->report_fatal_error(ACEXML_ERPAREN ACEXML_ENV_ARG_PARAMETER);
+ if (this->get () != ')' ||
+ (subelement_number && this->get () != '*'))
+ {
+ this->fatal_error(ACE_TEXT ("Expecing ')' or ')*' at end of Mixed"
+ " element") ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ // @@ close the element definition in the validator.
+ break;
+ default:
+ int status = this->parse_child (1 ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ if (status != 0)
return -1;
- }
- // @@ close the element definition in the validator.
- break;
- default:
- int status = this->parse_child (1 ACEXML_ENV_ARG_PARAMETER);
- ACEXML_CHECK_RETURN (-1);
- if (status != 0)
- return -1;
}
return 0;
@@ -1705,8 +1870,9 @@ ACEXML_Parser::parse_child (int skip_open_paren ACEXML_ENV_ARG_DECL)
if (skip_open_paren == 0 &&
this->get () != '(')
{
- this->report_fatal_error(ACEXML_LPAREN ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ this->fatal_error(ACE_TEXT ("Expecting '(' at beginning of children")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
ACEXML_Char node_type = 0;
@@ -1716,57 +1882,64 @@ ACEXML_Parser::parse_child (int skip_open_paren ACEXML_ENV_ARG_DECL)
this->skip_whitespace_count (&nextch);
switch (nextch)
{
- case '(':
- this->parse_child (0 ACEXML_ENV_ARG_PARAMETER);
- ACEXML_CHECK_RETURN (-1);
- break;
- default:
- // must be an element name here.
- ACEXML_Char *subelement = this->read_name ();
- if (subelement == 0)
- {
- this->report_fatal_error(ACEXML_ESUBELE ACEXML_ENV_ARG_PARAMETER);
- return -1;
- }
- // @@ Inform validator of the new element here.
- break;
+ case '(':
+ this->parse_child (0 ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ break;
+ default:
+ // must be an element name here.
+ ACEXML_Char *subelement = this->parse_name ();
+ if (subelement == 0)
+ {
+ this->fatal_error(ACE_TEXT ("Invalid subelement name")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ // @@ Inform validator of the new element here.
+ break;
}
this->skip_whitespace_count (&nextch);
switch (nextch)
{
- case '|':
- switch (node_type)
- {
- case 0:
- node_type = '|';
- // @@ inform validator of this new type??
- break;
- case '|':
- break;
- default:
- this->report_fatal_error(ACEXML_ECHOICE ACEXML_ENV_ARG_PARAMETER);
- return -1;
- }
- break;
- case ',':
- switch (node_type)
- {
- case 0:
- node_type = ',';
- // @@ inform validator of this new type??
- break;
- case ',':
- break;
- default:
- this->report_fatal_error(ACEXML_ECHOICE ACEXML_ENV_ARG_PARAMETER);
- return -1;
- }
- case ')':
- break;
- default:
- this->report_fatal_error(ACEXML_ECHOICE ACEXML_ENV_ARG_PARAMETER);
- return -1;
+ case '|':
+ switch (node_type)
+ {
+ case 0:
+ node_type = '|';
+ // @@ inform validator of this new type??
+ break;
+ case '|':
+ break;
+ default:
+ this->fatal_error (ACE_TEXT ("Expecting `,', `|', or `)' "
+ "while defining an element")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ break;
+ case ',':
+ switch (node_type)
+ {
+ case 0:
+ node_type = ',';
+ // @@ inform validator of this new type??
+ break;
+ case ',':
+ break;
+ default:
+ this->fatal_error (ACE_TEXT ("Expecting `,', `|', or `)' "
+ "while defining an element")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ case ')':
+ break;
+ default:
+ this->fatal_error (ACE_TEXT ("Expecting `,', `|', or `)' "
+ "while defining an element")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
this->get (); // consume , | or )
} while (nextch != ')');
@@ -1775,194 +1948,1139 @@ ACEXML_Parser::parse_child (int skip_open_paren ACEXML_ENV_ARG_DECL)
nextch = this->peek ();
switch (nextch)
{
- case '?':
- // @@ Consume the character and inform validator as such,
- this->get ();
- break;
- case '*':
- // @@ Consume the character and inform validator as such,
- this->get ();
- break;
- case '+':
- // @@ Consume the character and inform validator as such,
+ case '?':
+ // @@ Consume the character and inform validator as such,
+ this->get ();
+ break;
+ case '*':
+ // @@ Consume the character and inform validator as such,
+ this->get ();
+ break;
+ case '+':
+ // @@ Consume the character and inform validator as such,
+ this->get ();
+ break;
+ default:
+ break; // not much to do.
+ }
+
+ return 0;
+}
+
+int
+ACEXML_Parser::parse_char_reference (ACEXML_Char *buf, size_t len)
+{
+ if (len < 7) // Max size of a CharRef plus terminating '\0'
+ return -1;
+ ACEXML_Char ch = this->get();
+ if (ch != '#') // Internal error.
+ return -1;
+ int hex = 0;
+ ch = this->peek();
+ if (ch == 'x')
+ {
+ hex = 1;
this->get ();
- break;
- default:
- break; // not much to do.
}
+ size_t i = 0;
+ int more_digit = 0;
+ ch = this->get ();
+ for ( ; i < len &&
+ (this->isNormalDigit (ch) && (hex ? this->isCharRef(ch): 1)); ++i)
+ {
+ buf[i] = ch;
+ ch = this->get();
+ ++more_digit;
+ }
+ if (ch != ';' || !more_digit)
+ return -1;
+ buf[i] = 0;
+ ACEXML_UCS4 sum = (ACEXML_UCS4) ACE_OS::strtol (buf, 0, (hex ? 16 : 10));
+ // [WFC: Legal Character]
+ if (!this->isChar (sum))
+ return -1;
+ int clen;
+#if defined (ACE_USES_WCHAR) // UTF-16
+ if ((clen = ACEXML_Transcoder::ucs42utf16 (sum, buf, len)) < 0)
+ return -1;
+#else // or UTF-8
+ if ((clen = ACEXML_Transcoder::ucs42utf8 (sum, buf, len)) < 0)
+ return -1;
+#endif
+ buf [clen] = 0;
+ for (int j = 0; j < clen; ++j)
+ this->obstack_.grow (buf[j]);
return 0;
}
-ACEXML_Char
-ACEXML_Parser::skip_whitespace (ACEXML_Char **whitespace)
+ACEXML_Char*
+ACEXML_Parser::parse_reference_name (void)
{
ACEXML_Char ch = this->get ();
+ if (!this->isLetter (ch) && ch != '_' && ch != ':')
+ return 0;
+ int numchars = 0;
+ while (ch) {
+ this->alt_stack_.grow (ch);
+ numchars++;
+ ch = this->peek ();
+ if (!this->isNameChar (ch))
+ break;
+ ch = this->get ();
+ };
+ if (ch != ';')
+ return 0;
+ ch = this->get();
+ return this->alt_stack_.freeze ();
+}
+
+int
+ACEXML_Parser::parse_attvalue (ACEXML_Char *&str ACEXML_ENV_ARG_DECL)
+{
+ ACEXML_Char quote = this->get ();
+ if (quote != '\'' && quote != '"') // Not a quoted string.
+ return -1;
+ ACEXML_Char ch = this->get ();
+ int nrelems = 0;
+ while (1)
+ {
+ if (ch == quote)
+ {
+ ACEXML_Char* temp = this->obstack_.freeze ();
+ // If the attribute type is not CDATA, then the XML processor
+ // must further process the normalized attribute value by
+ // discarding any leading and trailing space (#x20) characters,
+ // and by replacing sequences of space (#x20) characters by a
+ // single space (#x20) character.
+
+ // if (atttype != CDATA) {
+ // ACEXML_Char* start = temp;
+ // ACEXML_Char* end = temp + ACE_OS::strlen (temp);
+ // while (*start == '\x20')
+ // start++;
+ // if (start == end) // String which is all spaces
+ // str = start;
+ // while (*start != 0)
+ // {
+ // this->obstack_.grow (*start);
+ // start++;
+ // while (*start == '\x20')
+ // start++;
+ // }
+ // str = this->obstack_.freeze();
+ // }
+ str = temp;
+ return 0;
+ }
+ switch (ch)
+ {
+ case '&':
+ if (this->peek () == '#')
+ {
+ ACEXML_Char buf[7];
+ if (this->parse_char_reference (buf, sizeof (buf)) != 0)
+ {
+ // [WFC: Legal Character]
+ ACE_ERROR ((LM_ERROR,
+ ACE_TEXT ("Invalid character reference\n")));
+ return -1;
+ }
+ }
+ else
+ {
+ this->ref_state_ = ACEXML_ParserInt::IN_ATT_VALUE;
+ this->parse_entity_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ break;
+ case '\x20': case '\x0D': case '\x0A': case '\x09':
+ this->obstack_.grow ('\x20');
+ break;
+ case '<': // [WFC: No < in Attribute Values]
+ ACE_ERROR ((LM_ERROR, ACE_TEXT ("Illegal '<' in AttValue\n")));
+ return -1;
+ case 0:
+ nrelems = this->pop_context();
+ if (nrelems >= 1)
+ break;
+ else
+ {
+ this->fatal_error(ACE_TEXT ("Internal Parser Error")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ default:
+ this->obstack_.grow (ch);
+ break;
+ }
+ ch = this->get();
+ }
+}
+
+
- if (this->is_whitespace (ch) == 0)
+int
+ACEXML_Parser::parse_entity_reference (ACEXML_ENV_SINGLE_ARG_DECL)
+{
+ ACEXML_Char* replace = this->parse_reference_name ();
+ if (replace == 0)
{
- if (whitespace != 0)
- *whitespace = 0;
- return ch;
+ this->fatal_error (ACE_TEXT ("Invalid Reference name")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
}
- do
+ // [WFC: Parsed Entity]
+ if (this->unparsed_entities_.resolve_entity (replace)) {
+ this->fatal_error (ACE_TEXT ("EntityRef refers to unparsed entity")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ // Look in the internal general entities set first.
+ const ACEXML_Char* entity = this->internal_GE_.resolve_entity(replace);
+
+ // Look in the predefined entities.
+ if (!entity && !this->validate_)
{
- if (whitespace != 0)
- this->obstack_.grow (ch);
- ch = this->get ();
+ entity = this->predef_entities_.resolve_entity (replace);
+ if (!entity)
+ {
+ this->fatal_error (ACE_TEXT ("Undefined Entity reference")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
}
- while (this->is_whitespace (ch));
- if (whitespace != 0)
- *whitespace = this->obstack_.freeze ();
+ if (!entity && // No match in internal
+ (!(this->internal_dtd_ || this->external_dtd_) || // or No DTDs
+ // or Only Internal DTD and no parameter entity references
+ (this->internal_dtd_ && !this->external_dtd_ && !this->has_pe_refs_) ||
+ this->standalone_)) // or Standalone = 'yes'
+ {
+ // [WFC: Entity Declared]
+ this->fatal_error (ACE_TEXT ("Undeclared EntityRef")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+
+ ACEXML_Char* systemId = 0;
+ ACEXML_Char* publicId = 0;
+ if (!entity && this->validate_)
+ {
+ if (this->external_GE_.resolve_entity (replace, systemId, publicId) < 0)
+ {
+ this->fatal_error (ACE_TEXT ("Undefined Entity reference")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ if (this->ref_state_ == ACEXML_ParserInt::IN_ATT_VALUE)
+ {
+ this->fatal_error (ACE_TEXT ("External EntityRef in Attribute Value")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ this->external_entity_ = 1;
+ }
- return ch;
+ // [WFC: No Recursion]
+ int present = this->GE_reference_.insert (entity);
+ if (present == 1 || present == -1)
+ {
+ ACEXML_String ref_name;
+ while (this->GE_reference_.pop(ref_name) != -1)
+ ;
+ this->fatal_error (ACE_TEXT ("Recursion in resolving entity")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ if (!this->external_entity_)
+ {
+ ACEXML_StrCharStream* str = 0;
+ ACE_NEW_RETURN (str, ACEXML_StrCharStream (entity), 0);
+ if (str)
+ {
+ if (this->switch_input (str) != 0)
+ {
+ this->fatal_error (ACE_TEXT ("Unable to create internal input "
+ "stream")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ return 0;
+ }
+ }
+ else if (this->validate_)
+ {
+ ACEXML_Char* uri = this->normalize_systemid (systemId);
+ ACE_Auto_Basic_Array_Ptr<ACEXML_Char> cleanup_uri (uri);
+ ACEXML_InputSource* ip = 0;
+ if (this->entity_resolver_)
+ {
+ ip = this->entity_resolver_->resolveEntity (publicId,
+ (uri ? uri : systemId)
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ if (ip)
+ {
+ if (this->switch_input (ip) != 0)
+ {
+ this->fatal_error (ACE_TEXT ("Internal Parser Error")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ return 0;
+ }
+ else
+ {
+ ACEXML_StreamFactory factory;
+ ACEXML_CharStream* cstream = factory.create_stream (uri ? uri: systemId);
+ if (!cstream) {
+ this->fatal_error (ACE_TEXT ("Invalid input source")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ if (this->switch_input (cstream, systemId, publicId) != 0)
+ {
+ this->fatal_error (ACE_TEXT ("Internal Parser Error")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ return 0;
+ }
+ }
+ ACE_ERROR ((LM_ERROR, ACE_TEXT ("Undefined Entity reference\n")));
+ return -1;
}
int
-ACEXML_Parser::skip_whitespace_count (ACEXML_Char *peeky)
+ACEXML_Parser::parse_PE_reference (ACEXML_ENV_SINGLE_ARG_DECL)
{
- int wscount = 0;
- ACEXML_Char dummy;
- ACEXML_Char &forward = (peeky == 0 ? dummy : *peeky);
+ ACEXML_Char* replace = this->parse_reference_name ();
+ if (replace == 0)
+ {
+ this->fatal_error (ACE_TEXT ("Invalid PEReference name")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+
+ // Look in the internal general entities set first.
+ const ACEXML_Char* entity = this->internal_PE_.resolve_entity(replace);
+
+ if (!entity && // No match in internal
+ (!this->external_dtd_ || // or No External DTDs
+ this->standalone_)) // or Standalone
+ {
+ // [VC: Entity Declared]
+ this->fatal_error (ACE_TEXT ("Undefined Internal PEReference")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+
+ ACEXML_Char* systemId = 0;
+ ACEXML_Char* publicId = 0;
+ if (!entity && this->validate_)
+ {
+ if (this->external_PE_.resolve_entity (replace, systemId, publicId) < 0)
+ {
+ this->fatal_error (ACE_TEXT ("Undefined PEReference")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ this->external_entity_ = 1;
+ }
- for (;this->is_whitespace ((forward = this->peek ())); ++wscount)
- this->get ();
+ // [WFC: No Recursion]
+ int present = this->PE_reference_.insert (replace);
+ if (present == 1 || present == -1)
+ {
+ ACEXML_String ref_name;
+ while (this->PE_reference_.pop(ref_name) != -1)
+ ;
+ this->fatal_error (ACE_TEXT ("Recursion in resolving entity")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
- return wscount;
+ if (entity && !this->external_entity_)
+ {
+ ACEXML_StrCharStream* sstream = 0;
+ ACEXML_String str (entity);
+ if (this->ref_state_ != ACEXML_ParserInt::IN_ENTITY_VALUE)
+ {
+ const ACEXML_Char ch = '\x20';
+ str = ch + str + ch;
+ }
+ ACE_NEW_RETURN (sstream, ACEXML_StrCharStream (str.c_str()), 0);
+ if (sstream)
+ {
+ if (this->switch_input (sstream) != 0)
+ {
+ this->fatal_error (ACE_TEXT ("Error in switching InputSource")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ return 0;
+ }
+ }
+ else if (this->external_entity_ && this->validate_)
+ {
+ ACEXML_Char* uri = this->normalize_systemid (systemId);
+ ACE_Auto_Basic_Array_Ptr<ACEXML_Char> cleanup_uri (uri);
+ ACEXML_InputSource* ip = 0;
+ if (this->entity_resolver_)
+ {
+ ip = this->entity_resolver_->resolveEntity (publicId,
+ (uri ? uri : systemId)
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ if (ip)
+ {
+ if (this->switch_input (ip) != 0)
+ {
+ this->fatal_error (ACE_TEXT ("Error in switching InputSource")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ return 0;
+ }
+ else
+ {
+ ACEXML_StreamFactory factory;
+ ACEXML_CharStream* cstream = factory.create_stream (uri ? uri: systemId);
+ if (!cstream) {
+ this->fatal_error (ACE_TEXT ("Invalid input source")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ if (this->switch_input (cstream, systemId, publicId) != 0)
+ {
+ this->fatal_error (ACE_TEXT ("Error in switching InputSource")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ if (this->ref_state_ == ACEXML_ParserInt::IN_ENTITY_VALUE)
+ {
+ ACEXML_Char less, mark;
+ if (this->peek() == '<')
+ {
+ less = this->get();
+ if (this->peek() == '?')
+ {
+ mark = this->get();
+ if (this->peek() == 'x')
+ {
+ this->parse_text_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ else
+ {
+ this->obstack_.grow (less);
+ this->obstack_.grow (mark);
+ }
+ }
+ this->obstack_.grow (less);
+ }
+ }
+ return 0;
+ }
+ }
+ this->fatal_error (ACE_TEXT ("Undefined PEReference")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ return -1;
}
int
-ACEXML_Parser::parse_token (const ACEXML_Char* keyword)
+ACEXML_Parser::parse_entity_value (ACEXML_Char *&str
+ ACEXML_ENV_ARG_DECL)
{
- if (keyword == 0)
- return -1;
- const ACEXML_Char* ptr = keyword;
- ACEXML_Char ch;
- for (; *ptr != 0 && ((ch = this->get()) == *ptr); ++ptr)
- ;
- if (*ptr == 0)
- return 0;
- else
+ ACEXML_ParserInt::ReferenceState temp = this->ref_state_;
+ ACEXML_Char quote = this->get ();
+ if (quote != '\'' && quote != '"') // Not a quoted string.
return -1;
+ ACEXML_Char ch = this->get ();
+ int nrelems = 0;
+ while (1)
+ {
+ if (ch == quote)
+ {
+ str = this->obstack_.freeze ();
+ this->ref_state_ = temp;
+ return 0;
+ }
+ switch (ch)
+ {
+ case '&':
+ if (this->peek () == '#')
+ {
+ if (!this->external_entity_)
+ {
+ ACEXML_Char buf[7];
+ if (this->parse_char_reference (buf, sizeof (buf)) != 0)
+ {
+ // [WFC: Legal Character]
+ this->fatal_error (ACE_TEXT ("Invalid character "
+ "reference")
+ ACEXML_ENV_ARG_PARAMETER);
+ return -1;
+ }
+ break;
+ }
+ }
+ this->obstack_.grow (ch);
+ break;
+ case '%':
+ if (!this->external_entity_)
+ {
+ this->ref_state_ = ACEXML_ParserInt::IN_ENTITY_VALUE;
+ this->parse_PE_reference(ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ break;
+ }
+ this->obstack_.grow (ch);
+ break;
+ case 0:
+ nrelems = this->pop_context();
+ if (nrelems >= 1)
+ {
+ if (this->external_entity_)
+ this->external_entity_ = 0;
+ break;
+ }
+ else
+ {
+ this->fatal_error(ACE_TEXT ("Internal Parser Error")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+ default:
+ this->obstack_.grow (ch);
+ break;
+ }
+ ch = this->get();
+ }
}
-int
-ACEXML_Parser::skip_equal (void)
+ACEXML_Char *
+ACEXML_Parser::parse_name (ACEXML_Char ch)
{
- if (this->skip_whitespace (0) != '=')
- return -1;
+ if (ch == 0)
+ ch = this->get ();
+ if (!this->isLetter (ch) && ch != '_' && ch != ':')
+ return 0;
+ while (ch) {
+ this->obstack_.grow (ch);
+ ch = this->peek ();
+ if (!this->isNameChar (ch))
+ break;
+ ch = this->get ();
+ };
+ return this->obstack_.freeze ();
+}
- while (this->is_whitespace (this->peek ()))
- this->get ();
- return 0;
+ACEXML_Char*
+ACEXML_Parser::parse_nmtoken (ACEXML_Char ch)
+{
+ if (ch == 0)
+ ch = this->get ();
+ if (!this->isNameChar (ch))
+ return 0;
+ while (ch) {
+ this->obstack_.grow (ch);
+ ch = this->peek ();
+ if (!this->isNameChar (ch))
+ break;
+ ch = this->get ();
+ };
+ return this->obstack_.freeze ();
}
int
-ACEXML_Parser::get_quoted_string (ACEXML_Char *&str)
+ACEXML_Parser::parse_version_num (ACEXML_Char*& str)
{
ACEXML_Char quote = this->get ();
if (quote != '\'' && quote != '"') // Not a quoted string.
return -1;
-
+ int numchars = 0;
while (1)
{
ACEXML_Char ch = this->get ();
+ if (ch == quote && !numchars)
+ return -1;
+ else if (ch == quote)
+ {
+ str = this->obstack_.freeze ();
+ return 0;
+ }
+ // [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
+ if (ch == '-' || ((ch >= 'a' && ch <= 'z') ||
+ (ch >= 'A' && ch <= 'Z') ||
+ (ch >= '0' && ch <= '9') ||
+ (ch == '_' || ch == '.' || ch == ':')))
+ {
+ this->obstack_.grow (ch);
+ numchars++;
+ }
+ else
+ return -1;
+ }
+}
- // @@ Deoes not handle buffer overflow yet.
+int
+ACEXML_Parser::parse_system_literal (ACEXML_Char*& str)
+{
+ const ACEXML_Char quote = this->get();
+ if (quote != '\'' && quote != '"') // Not a quoted string.
+ return -1;
+ while (1)
+ {
+ ACEXML_Char ch = this->get ();
if (ch == quote)
{
str = this->obstack_.freeze ();
return 0;
}
-
- const ACEXML_String *replace = 0;
- ACEXML_String charval;
- ACEXML_Char buffer[6];
- size_t i = 0;
switch (ch)
{
- case '&':
- if (this->peek () == '#')
- {
- if (this->parse_char_reference (buffer, 6) != 0)
- {
- // [WFC: Legal Character]
- ACE_ERROR ((LM_ERROR,
- ACE_TEXT ("Invalid character reference\n")));
- return -1;
- }
- charval.set (buffer, 0);
- replace = &charval;
- }
- else
- replace = this->parse_reference ();
-
- if (replace == 0)
- {
- ACE_ERROR ((LM_ERROR, ACE_TEXT ("Undefined reference\n")));
- return -1;
- }
- for (i = 0; i < replace->length (); ++i)
- this->obstack_.grow ((*replace)[i]);
- // handle reference here.
- break;
- case 0x0D: // End-of-Line handling
- ch = (this->peek () == 0x0A ? this->get () : 0x0A);
- // Fall thru...
- case 0x0A:
- // Fall thru...
- default:
- this->obstack_.grow (ch);
- break;
+ case '\x00': case '\x01': case '\x02': case '\x03': case '\x04':
+ case '\x05': case '\x06': case '\x07': case '\x08': case '\x09':
+ case '\x0A': case '\x0B': case '\x0C': case '\x0D': case '\x0E':
+ case '\x0F': case '\x10': case '\x11': case '\x12': case '\x13':
+ case '\x14': case '\x15': case '\x16': case '\x17': case '\x18':
+ case '\x19': case '\x1A': case '\x1B': case '\x1C': case '\x1D':
+ case '\x1E': case '\x1F': case '\x7F': case '\x20': case '<':
+ case '>': case '#': case '%':
+ ACE_ERROR ((LM_ERROR,
+ ACE_TEXT ("Invalid character in SystemLiteral\n")));
+ return -1;
+ default:
+ this->obstack_.grow (ch);
}
}
}
-ACEXML_Char *
-ACEXML_Parser::read_name (ACEXML_Char ch)
+int
+ACEXML_Parser::parse_pubid_literal (ACEXML_Char*& str)
{
- if (ch == 0)
+ const ACEXML_Char quote = this->get();
+ if (quote != '\'' && quote != '"') // Not a quoted string.
+ return -1;
+ while (1)
{
- ch = this->get ();
-
- if (this->is_whitespace (ch))
- // No white space is allowed here.
- return 0;
+ ACEXML_Char ch = this->get ();
+ if (ch == quote)
+ {
+ str = this->obstack_.freeze ();
+ return 0;
+ }
+ else if (this->isPubidChar (ch))
+ this->obstack_.grow (ch);
+ else
+ return -1;
}
- else if (this->is_nonname (ch))
- return 0;
+}
+int
+ACEXML_Parser::parse_encname (ACEXML_Char*& str)
+{
+ const ACEXML_Char quote = this->get ();
+ if (quote != '\'' && quote != '"') // Not a quoted string.
+ return -1;
+ int numchars = 0;
while (1)
{
- this->obstack_.grow (ch);
- ch = this->peek ();
- if (this->is_nonname (ch))
- break;
- ch = this->get ();
- };
+ ACEXML_Char ch = this->get ();
+ if (ch == quote && !numchars)
+ return -1;
+ else if (ch == quote)
+ {
+ str = this->obstack_.freeze ();
+ return 0;
+ }
+ // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
+ if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
+ && !numchars)
+ return -1;
+ if (ch == '-' || ((ch >= 'a' && ch <= 'z') ||
+ (ch >= 'A' && ch <= 'Z') ||
+ (ch >= '0' && ch <= '9') ||
+ (ch == '_' || ch == '.')))
+ {
+ this->obstack_.grow (ch);
+ numchars++;
+ }
+ else
+ return -1;
+ }
+}
- return this->obstack_.freeze ();
+int
+ACEXML_Parser::parse_sddecl (ACEXML_Char*& str)
+{
+ ACEXML_Char quote = this->get ();
+ if (quote != '\'' && quote != '"') // Not a quoted string.
+ return -1;
+ int numchars = 0;
+ while (1)
+ {
+ ACEXML_Char ch = this->get ();
+ if (ch == quote && numchars < 2)
+ return -1;
+ else if (ch == quote)
+ {
+ str = this->obstack_.freeze ();
+ return 0;
+ }
+ // [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
+ // | ('"' ('yes' | 'no') '"'))
+ switch (ch)
+ {
+ case 'y': case 'e': case 's': case 'n': case 'o':
+ this->obstack_.grow (ch);
+ numchars++;
+ break;
+ default:
+ return -1;
+ }
+ }
}
void
-ACEXML_Parser::report_prefix_mapping (const ACEXML_Char* prefix,
- const ACEXML_Char* uri,
- const ACEXML_Char* name,
- int start ACEXML_ENV_ARG_DECL)
+ACEXML_Parser::prefix_mapping (const ACEXML_Char* prefix,
+ const ACEXML_Char* uri,
+ const ACEXML_Char* name,
+ int start ACEXML_ENV_ARG_DECL)
{
if (this->namespaces_)
{
const ACEXML_Char* temp = (name == 0) ? empty_string : prefix;
if (start) {
- this->content_handler_->startPrefixMapping (temp, uri ACEXML_ENV_ARG_PARAMETER);
+ this->content_handler_->startPrefixMapping (temp, uri
+ ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK;
}
else
{
- this->content_handler_->endPrefixMapping(temp ACEXML_ENV_ARG_PARAMETER);
+ this->content_handler_->endPrefixMapping(temp
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK;
+ }
+ }
+}
+
+int
+ACEXML_Parser::switch_input (ACEXML_CharStream* cstream,
+ const ACEXML_Char* systemId,
+ const ACEXML_Char* publicId)
+{
+ ACEXML_InputSource* input = 0;
+ ACE_NEW_RETURN (input, ACEXML_InputSource (cstream), -1);
+ return this->switch_input (input, systemId, publicId);
+}
+
+int
+ACEXML_Parser::switch_input (ACEXML_InputSource* input,
+ const ACEXML_Char* systemId,
+ const ACEXML_Char* publicId)
+{
+ ACEXML_LocatorImpl* locator = 0;
+ if (!systemId && this->current_.getLocator())
+ locator = ACE_const_cast (ACEXML_LocatorImpl*,
+ this->current_.getLocator());
+ if (!locator)
+ ACE_NEW_RETURN (locator, ACEXML_LocatorImpl (systemId, publicId), -1);
+ ACEXML_Parser_Context* new_context = 0;
+ ACE_NEW_RETURN (new_context, ACEXML_Parser_Context(input, locator), -1);
+ if (this->push_context (*new_context) != 0)
+ {
+ ACE_ERROR ((LM_ERROR, "Unable to switch input streams"));
+ return -1;
+ }
+ this->current_.reset();
+ this->current_ = *new_context;
+ // Set up Locator.
+ if (this->content_handler_)
+ this->content_handler_->setDocumentLocator (this->current_.getLocator());
+ return 0;
+}
+
+int
+ACEXML_Parser::push_context (const ACEXML_Parser_Context& context)
+{
+ if (this->ctx_stack_.push (context) < 0)
+ {
+ ACE_ERROR ((LM_ERROR, "Unable to push input source onto the stack"));
+ return -1;
+ }
+ return 0;
+}
+
+int
+ACEXML_Parser::pop_context (void)
+{
+ this->current_.reset();
+ int retval = this->ctx_stack_.pop (this->current_);
+ if (retval != 0)
+ return -1;
+ this->current_.reset();
+ if (this->ctx_stack_.top (this->current_) != 0)
+ return -1;
+ // Set up Locator.
+ if (this->content_handler_)
+ this->content_handler_->setDocumentLocator (this->current_.getLocator());
+ return this->ctx_stack_.size();
+}
+
+int
+ACEXML_Parser::getFeature (const ACEXML_Char *name ACEXML_ENV_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
+ ACEXML_SAXNotSupportedException))
+{
+ if (ACE_OS::strcmp (name, ACEXML_Parser::simple_parsing_feature_) == 0)
+ {
+ return this->simple_parsing_;
+ }
+ else if (ACE_OS::strcmp (name, ACEXML_Parser::namespaces_feature_) == 0)
+ {
+ return this->namespaces_;
+ }
+ else if (ACE_OS::strcmp (name,
+ ACEXML_Parser::namespace_prefixes_feature_) == 0)
+ {
+ return this->namespace_prefixes_;
+ }
+
+ ACEXML_THROW_RETURN (ACEXML_SAXNotRecognizedException (name), -1);
+}
+
+
+
+void
+ACEXML_Parser::setFeature (const ACEXML_Char *name,
+ int boolean_value ACEXML_ENV_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
+ ACEXML_SAXNotSupportedException))
+{
+ if (ACE_OS::strcmp (name, ACEXML_Parser::simple_parsing_feature_) == 0)
+ {
+ this->simple_parsing_ = (boolean_value == 0 ? 0 : 1);
+ return;
+ }
+ else if (ACE_OS::strcmp (name, ACEXML_Parser::namespaces_feature_) == 0)
+ {
+ this->namespaces_ = (boolean_value == 0 ? 0 : 1);
+ return;
+ }
+ else if (ACE_OS::strcmp (name,
+ ACEXML_Parser::namespace_prefixes_feature_) == 0)
+ {
+ this->namespace_prefixes_ = (boolean_value == 0 ? 0 : 1);
+ return;
+ }
+
+ ACEXML_THROW (ACEXML_SAXNotRecognizedException (name));
+}
+
+void *
+ACEXML_Parser::getProperty (const ACEXML_Char *name ACEXML_ENV_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
+ ACEXML_SAXNotSupportedException))
+{
+ ACEXML_THROW_RETURN (ACEXML_SAXNotSupportedException (name), 0);
+}
+
+void
+ACEXML_Parser::setProperty (const ACEXML_Char *name,
+ void *value ACEXML_ENV_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
+ ACEXML_SAXNotSupportedException))
+{
+ ACE_UNUSED_ARG (value);
+
+ ACEXML_THROW (ACEXML_SAXNotSupportedException (name));
+}
+
+void
+ACEXML_Parser::error (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL)
+{
+ ACEXML_SAXParseException* exception = 0;
+ ACE_NEW_NORETURN (exception, ACEXML_SAXParseException (msg));
+ if (this->error_handler_)
+ this->error_handler_->error (*exception ACEXML_ENV_ARG_PARAMETER);
+ else
+ ACEXML_ENV_RAISE (exception);
+ return;
+}
+
+void
+ACEXML_Parser::warning (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL)
+{
+ ACEXML_SAXParseException* exception = 0;
+ ACE_NEW_NORETURN (exception, ACEXML_SAXParseException (msg));
+ if (this->error_handler_)
+ this->error_handler_->warning (*exception ACEXML_ENV_ARG_PARAMETER);
+ return;
+}
+
+void
+ACEXML_Parser::fatal_error (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL)
+{
+ ACEXML_SAXParseException* exception = 0;
+ ACE_NEW_NORETURN (exception, ACEXML_SAXParseException (msg));
+ if (this->error_handler_)
+ this->error_handler_->fatalError (*exception ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_ENV_RAISE (exception);
+ return;
+}
+
+void
+ACEXML_Parser::parse_version_info (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException))
+{
+ ACEXML_Char* astring;
+ if (this->parse_token (ACE_TEXT("ersion")) < 0
+ || this->skip_equal () != 0
+ || this->parse_version_num (astring) != 0)
+ {
+ this->fatal_error (ACE_TEXT ("Invalid VersionInfo specification")
+ ACEXML_ENV_ARG_PARAMETER);
+ return;
+ }
+ if (ACE_OS::strcmp (astring, ACE_TEXT ("1.0")) != 0)
+ {
+ this->fatal_error (ACE_TEXT ("ACEXML Parser supports XML version 1.0 "
+ "documents only") ACEXML_ENV_ARG_PARAMETER);
+ return;
+ }
+}
+
+void
+ACEXML_Parser::parse_encoding_decl (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException))
+{
+ ACEXML_Char* astring;
+ if ((this->parse_token (ACE_TEXT("ncoding")) < 0)
+ || this->skip_equal () != 0
+ || this->parse_encname (astring) != 0)
+ {
+ this->fatal_error (ACE_TEXT ("Invalid EncodingDecl specification")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK;
+ }
+ const ACEXML_Char* encoding = this->current_.getInputSource()->getEncoding();
+ if (ACE_OS::strcmp (astring, encoding) != 0)
+ {
+ ACE_ERROR ((LM_ERROR, ACE_TEXT ("Detected Encoding is %s "
+ ": Declared Encoding is %s\n"),
+ encoding, astring));
+ this->warning (ACE_TEXT ("Declared encoding differs from detected "
+ "encoding") ACEXML_ENV_ARG_PARAMETER);
+ }
+}
+
+int
+ACEXML_Parser::parse_text_decl (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException))
+{
+ // Read xml
+ if (this->parse_token (ACE_TEXT("xml")) < 0)
+ {
+ this->fatal_error(ACE_TEXT ("Expecting keyword 'xml' in TextDecl")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+
+ ACEXML_Char fwd = this->skip_whitespace();
+ // Read version
+ if (fwd == 'v')
+ {
+ this->parse_version_info (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ fwd = this->skip_whitespace();
+ }
+
+ if (fwd == 'e')
+ {
+ this->parse_encoding_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ fwd = this->skip_whitespace();
+ }
+ else
+ {
+ this->fatal_error (ACE_TEXT ("Missing encodingDecl in TextDecl")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+
+ if (fwd == '?' && this->get() == '>')
+ return 0;
+ // All the rules fail. So return an error.
+ this->fatal_error (ACE_TEXT ("Invalid TextDecl") ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ return -1;
+}
+
+void
+ACEXML_Parser::parse_xml_decl (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException))
+{
+ // Read <?xml
+ if (this->parse_token (ACE_TEXT("xml")) < 0)
+ {
+ this->fatal_error(ACE_TEXT ("Expecting keyword xml in XMLDecl")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK;
+ }
+
+ ACEXML_Char fwd = this->skip_whitespace();
+
+ // Read version
+ if (fwd != 'v')
+ {
+ this->fatal_error (ACE_TEXT ("Expecting VersionInfo declaration")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK;
+ }
+
+ this->parse_version_info (ACEXML_ENV_SINGLE_ARG_PARAMETER);
+ ACEXML_CHECK;
+
+ fwd = this->skip_whitespace();
+ if (fwd != '?')
+ {
+ if (fwd == 'e')
+ {
+ this->parse_encoding_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER);
ACEXML_CHECK;
+ fwd = this->skip_whitespace();
+ }
+ if (fwd == 's')
+ {
+ ACEXML_Char* astring;
+ if ((this->parse_token (ACE_TEXT("tandalone")) == 0) &&
+ this->skip_equal () == 0 &&
+ this->parse_sddecl (astring) == 0)
+ {
+ if (ACE_OS::strcmp (astring, ACE_TEXT ("yes")) == 0)
+ this->standalone_ = 1;
+ fwd = this->skip_whitespace();
+ }
}
}
+ if (fwd == '?' && this->get() == '>')
+ return;
+ // All the rules fail. So return an error.
+ this->fatal_error (ACE_TEXT ("Invalid XMLDecl declaration")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK;
+}
+
+int
+ACEXML_Parser::parse_comment (void)
+{
+ int state = 0;
+
+ if (this->get () != '-' || // Skip the opening "<!--"
+ this->get () != '-' || // completely.
+ this->get () == '-') // and at least something not '-'.
+ return -1;
+
+ while (state < 3)
+ // Waiting for the trailing three character '-->'. Notice that
+ // according to the spec, '--->' is not a valid closing comment
+ // sequence. But we'll let it pass anyway.
+ {
+ ACEXML_Char fwd = this->get ();
+ if ((fwd == '-' && state < 2) ||
+ (fwd == '>' && state == 2))
+ state += 1;
+ else
+ state = 0; // Reset parse state.
+ }
+ return 0;
+}
+
+int
+ACEXML_Parser::parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException))
+{
+ const ACEXML_Char *pitarget = this->parse_name ();
+ ACEXML_Char *instruction = 0;
+
+ if (!ACE_OS::strcasecmp (ACE_TEXT ("xml"), pitarget))
+ {
+ // Invalid PITarget name.
+ this->fatal_error(ACE_TEXT ("PI can't have 'xml' in PITarget")
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ }
+
+ int state = 0;
+
+ ACEXML_Char ch = this->skip_whitespace();
+ while (state < 2)
+ {
+ switch (ch)
+ {
+ case '?':
+ if (state == 0)
+ state = 1;
+ break;
+ case '>':
+ if (state == 1)
+ {
+ instruction = this->obstack_.freeze ();
+ this->content_handler_->processingInstruction (pitarget,
+ instruction
+ ACEXML_ENV_ARG_PARAMETER);
+ ACEXML_CHECK_RETURN (-1);
+ this->obstack_.unwind (ACE_const_cast (ACEXML_Char*, pitarget));
+ return 0;
+ }
+ break;
+ case 0x0A:
+ // Fall thru...
+ default:
+ if (state == 1)
+ this->obstack_.grow ('?');
+ this->obstack_.grow (ch);
+ state = 0;
+ }
+ ch = this->get ();
+ }
+ return -1;
+}
+
+void
+ACEXML_Parser::reset (void)
+{
+ this->current_.reset();
+ if (this->ctx_stack_.pop (this->current_) != -1)
+ ACE_ERROR ((LM_ERROR, ACE_TEXT ("Mismatched push/pop of Context stack")));
+ this->current_.reset();
+ ACEXML_String temp;
+ while (this->GE_reference_.pop (temp) != -1)
+ ;
+ while (this->PE_reference_.pop (temp) != -1)
+ ;
+ this->obstack_.release();
+ this->xml_namespace_.reset();
+ this->nested_namespace_ = 0;
+ this->internal_GE_.reset();
+ this->external_GE_.reset();
+ this->unparsed_entities_.reset();
+ this->predef_entities_.reset();
+ this->internal_PE_.reset();
+ this->external_PE_.reset();
+ this->notations_.reset();
+ this->ref_state_ = ACEXML_ParserInt::INVALID;
+ this->external_subset_ = 0;
+ this->external_entity_ = 0;
+ this->has_pe_refs_ = 0;
+ this->standalone_ = 0;
+ this->external_dtd_ = 0;
+ this->internal_dtd_ = 0;
}
diff --git a/ACEXML/parser/parser/Parser.dsp b/ACEXML/parser/parser/Parser.dsp
index 76a8f3f5ad8..f70361682b0 100644
--- a/ACEXML/parser/parser/Parser.dsp
+++ b/ACEXML/parser/parser/Parser.dsp
@@ -100,6 +100,14 @@ SOURCE=.\Entity_Manager.cpp
SOURCE=.\Parser.cpp
# End Source File
+# Begin Source File
+
+SOURCE=.\ParserContext.cpp
+# End Source File
+# Begin Source File
+
+SOURCE=.\ParserInternals.cpp
+# End Source File
# End Group
# Begin Group "Header Files"
@@ -118,7 +126,15 @@ SOURCE=.\Parser_export.h
# End Source File
# Begin Source File
-SOURCE=.\ParserErrors.h
+SOURCE=.\ParserContext.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\ParserContext.inl
+# End Source File
+# Begin Source File
+
+SOURCE=.\ParserInternals.h
# End Source File
# End Group
# Begin Group "Resource Files"
diff --git a/ACEXML/parser/parser/Parser.h b/ACEXML/parser/parser/Parser.h
index f84ac2986d8..28eb2359ea3 100644
--- a/ACEXML/parser/parser/Parser.h
+++ b/ACEXML/parser/parser/Parser.h
@@ -7,6 +7,7 @@
* $Id$
*
* @author Nanbor Wang <nanbor@cs.wustl.edu>
+ * @author Krishnakumar B <kitty@cs.wustl.edu>
*/
//=============================================================================
@@ -28,10 +29,12 @@
#include "ace/Functor.h"
#include "ace/SString.h"
#include "ace/Hash_Map_Manager.h"
+#include "ace/Unbounded_Set.h"
#include "ace/Containers_T.h"
#include "ace/Auto_Ptr.h"
#include "ACEXML/parser/parser/Entity_Manager.h"
-#include "ACEXML/parser/parser/ParserErrors.h"
+#include "ACEXML/parser/parser/ParserInternals.h"
+#include "ACEXML/parser/parser/ParserContext.h"
/**
* @class ACEXML_Parser Parser.h "ACEXML/parser/parser/Parser.h"
@@ -48,7 +51,14 @@ public:
/// Destructor.
virtual ~ACEXML_Parser (void);
- /*
+ /**
+ * Initialize the parser state.
+ *
+ * @retval 0 if parser was initialized correctly else -1.
+ */
+ int initialize (ACEXML_InputSource* input);
+
+ /**
* Return the current content handler.
*/
virtual ACEXML_ContentHandler *getContentHandler (void) const;
@@ -133,80 +143,41 @@ public:
*/
virtual void setErrorHandler (ACEXML_ErrorHandler *handler);
- // *** Helper functions for parsing XML
- /**
- * Skip any whitespaces encountered until the first non-whitespace
- * character is encountered and consumed from the current input
- * CharStream.
- *
- * @param whitespace Return a pointer to the string of skipped
- * whitespace after proper conversion. Null if there's no
- * whitespace found.
- *
- * @return The first none-white space characters (which will be
- * consumed from the CharStream.) If no whitespace is found, it
- * returns 0.
- *
- * @sa skip_whitespace_count
- */
- ACEXML_Char skip_whitespace (ACEXML_Char **whitespace);
+protected:
/**
- * Skip any whitespaces encountered until the first non-whitespace
- * character. The first non-whitespace character is not consumed.
- * This method does peek into the input CharStream and therefore
- * is more expensive than @ref skip_whitespace.
- *
- * @param peek If non-null, @a peek points to a ACEXML_Char where
- * skip_whitespace_count stores the first non-whitespace
- * character it sees (character is not removed from the stream.)
- *
- * @return The number of whitespace characters consumed.
- *
- * @sa skip_whitespace
+ * Parse XML Prolog.
*/
- int skip_whitespace_count (ACEXML_Char *peek = 0);
+ void parse_xml_prolog (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
- * Check if a character @a c is a whitespace.
+ * Parse VersionInfo declaration.
*
- * @retval 1 if @a c is a valid white space character. 0 otherwise.
*/
- int is_whitespace (ACEXML_Char c);
+ void parse_version_info (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
- * Check if a character @a c is a whitespace or '='.
+ * Parse a EncodingDecl declaration.
*
- * @retval 1 if true, 0 otherwise.
*/
- int is_whitespace_or_equal (ACEXML_Char c);
+ void parse_encoding_decl (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
- * Check if a character @a c is a valid character for nonterminal NAME.
+ * Parse a XMLDecl declaration.
*
- * @retval 1 if true, 0 otherwise.
*/
- int is_nonname (ACEXML_Char c);
+ void parse_xml_decl (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
- * Skip an equal sign.
- *
- * @retval 0 when succeeds, -1 if no equal sign is found.
+ * Parse a TextDecl declaration.
*/
- int skip_equal (void);
-
- /**
- * Get a quoted string. Quoted strings are used to specify
- * attribute values and this routine will replace character and
- * entity references on-the-fly. Parameter entities are not allowed
- * (or replaced) in this function. (But regular entities are.)
- *
- * @param str returns the un-quoted string.
- *
- * @retval 0 on success, -1 otherwise.
- */
- int get_quoted_string (ACEXML_Char *&str);
+ int parse_text_decl (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Parse a PI statement. The first character encountered
@@ -214,33 +185,15 @@ public:
*
* @retval 0 on success, -1 otherwise.
*/
- int parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_DECL);
-
- /**
- * Skip over a comment. The first character encountered
- * should always be the first '-' in the comment prefix
- * "@<@!--".
- */
- int grok_comment ();
-
- /**
- * Read a name from the input CharStream (until white space).
- * If @a ch @!= 0, then we have already consumed the first name
- * character from the input CharStream, otherwise, read_name
- * will use this->get() to acquire the initial character.
- *
- * @return A pointer to the string in the obstack, 0 if it's not
- * a valid name.
- */
- ACEXML_Char *read_name (ACEXML_Char ch = 0);
+ int parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Parse the DOCTYPE declaration. The first character encountered
* should always be 'D' in doctype prefix: "@<@!DOCTYPE".
*/
int parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_DECL)
- ACE_THROW_SPEC ((ACEXML_SAXException))
- ;
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Parse an XML element. The first character encountered should
@@ -256,35 +209,39 @@ public:
* can be used in a validator.
*/
void parse_element (int is_root ACEXML_ENV_ARG_DECL)
- ACE_THROW_SPEC ((ACEXML_SAXException))
- ;
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
- * Parse XML Prolog.
+ * Parse a content declaration.
+ *
*/
- void parse_xml_prolog (ACEXML_ENV_SINGLE_ARG_DECL)
+ int parse_content (const ACEXML_Char* startname, const ACEXML_Char* ns_uri,
+ const ACEXML_Char* ns_lname
+ ACEXML_ENV_ARG_DECL)
ACE_THROW_SPEC ((ACEXML_SAXException));
-
/**
* Parse a character reference, i.e., "&#x20;" or "&#30;". The first
* character encountered should be the '#' char.
*
* @param buf points to a character buffer for the result.
- * @param len specifies the capacities of the buffer.
+ *
+ * @param len In/out argument which initially specifies the size of the
+ * buffer and is later set to the no. of characters in the reference.
*
* @retval 0 on success and -1 otherwise.
*/
int parse_char_reference (ACEXML_Char *buf, size_t len);
/**
- * Parse an entity reference, i.e., "&amp;". The first character
- * encountered should be the character following '&'.
+ * Parse a reference name, i.e., foo in "&foo;" or "%foo;". The first
+ * character encountered should be the character following '&' or '%'.
+ * Effectively the same as @sa parse_name but we don't use the parser's
+ * obstack. Caller is responsible for deleting the memory.
*
- * @return A pointer to the resolved const ACEXML_String if success
- * (previously defined), 0 otherwise.
+ * @retval A pointer to name of reference, 0 otherwise.
*/
- const ACEXML_String *parse_reference (void);
+ ACEXML_Char* parse_reference_name (void);
/**
* Parse a CDATA section. The first character should always be the first
@@ -293,13 +250,21 @@ public:
* @retval 0 on success.
* @retval -1 if fail.
*/
- int parse_cdata (ACEXML_ENV_SINGLE_ARG_DECL);
+ int parse_cdata (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Parse a "markupdecl" section, this includes both "markupdecl" and
* "DeclSep" sections in XML specification
*/
- int parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_DECL);
+ int parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Skip over a comment. The first character encountered should always be
+ * the first '-' in the comment prefix "@<@!--".
+ */
+ int parse_comment (void);
/**
* Parse an "ELEMENT" decl. The first character this method
@@ -308,7 +273,8 @@ public:
*
* @retval 0 on success, -1 otherwise.
*/
- int parse_element_decl (ACEXML_ENV_SINGLE_ARG_DECL);
+ int parse_element_decl (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Parse an "ENTITY" decl. The first character this method expects
@@ -316,7 +282,8 @@ public:
*
* @retval 0 on success, -1 otherwise.
*/
- int parse_entity_decl (ACEXML_ENV_SINGLE_ARG_DECL);
+ int parse_entity_decl (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Parse an "ATTLIST" decl. Thse first character this method
@@ -325,7 +292,15 @@ public:
*
* @retval 0 on success, -1 otherwise.
*/
- int parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_DECL);
+ int parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Parse a AttType declaration.
+ *
+ */
+ int parse_atttype (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
*Parse a "NOTATION" decl. The first character this method
@@ -334,7 +309,8 @@ public:
*
* @retval 0 on success, -1 otherwise.
*/
- int parse_notation_decl (ACEXML_ENV_SINGLE_ARG_DECL);
+ int parse_notation_decl (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Parse an ExternalID or a reference to PUBLIC ExternalID.
@@ -356,8 +332,81 @@ public:
*
* @retval 0 on success, -1 otherwise.
*/
- int parse_external_id_and_ref (ACEXML_Char *&publicId,
- ACEXML_Char *&systemId ACEXML_ENV_ARG_DECL);
+ int parse_external_id (ACEXML_Char *&publicId, ACEXML_Char *&systemId
+ ACEXML_ENV_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Parse an external DTD.
+ *
+ */
+ int parse_external_dtd (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Parse an external subset. This does the actual parsing of an external
+ * subset and is called by @sa parse_external_dtd.
+ *
+ */
+ int parse_external_subset (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Parse a markupDecl section.
+ *
+ */
+ int parse_markup_decl (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Parse a conditionalSect declaration.
+ *
+ */
+ int parse_conditional_section (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Parse a includeSect declaration.
+ *
+ */
+ int parse_includesect (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ *
+ * Parse a ignoreSect declaration.
+ */
+ int parse_ignoresect (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Parse a PEReference.
+ *
+ */
+ int parse_PE_reference (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Parse a Reference.
+ *
+ */
+ int parse_entity_reference (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Parse an entityValue.
+ *
+ */
+ int parse_entity_value (ACEXML_Char *&str ACEXML_ENV_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Parse a DefaultDecl specification.
+ *
+ */
+ int parse_defaultdecl (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
/**
* Parse the "children" and "Mixed" non-terminals in contentspec.
@@ -367,7 +416,8 @@ public:
*
* @retval 0 on success, -1 otherwise.
*/
- int parse_children_definition (ACEXML_ENV_SINGLE_ARG_DECL);
+ int parse_children_definition (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Parse a @c cp non-terminal. @c cp can either be a @c seq or a @c choice.
@@ -379,72 +429,241 @@ public:
*
* @retval 0 on success, -1 otherwise.
*/
- int parse_child (int skip_open_paren ACEXML_ENV_ARG_DECL);
+ int parse_child (int skip_open_paren ACEXML_ENV_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Parse a name from the input CharStream. If @a ch @!= 0, then we have
+ * already consumed the first name character from the input CharStream,
+ * otherwise, parse_name will use this->get() to acquire the initial
+ * character.
+ *
+ * @return A pointer to the string in the obstack, 0 if it's not a
+ * valid name.
+ */
+ ACEXML_Char *parse_name (ACEXML_Char ch = 0);
+
+ /**
+ * Parse a NMTOKEN from the input stream.
+ *
+ * @return A pointer to the string in the obstack, 0 if it's not a valid
+ * NMTOKEN.
+ */
+ ACEXML_Char* parse_nmtoken (ACEXML_Char ch = 0);
+
+ /**
+ * Parse the version string in an XML Prolog section.
+ *
+ * @param str String containing the version number if successful.
+ * @return 0 if the string was read successfully, 0 otherwise.
+ */
+ int parse_version (ACEXML_Char*& str);
+
+ /**
+ * Parse the version number in a VersionInfo declaration.
+ */
+ int parse_version_num (ACEXML_Char*& str);
+
+ /**
+ * Parse the encoding name in an XML Prolog section.
+ *
+ * @param str String containing the encoding name if successful.
+ * @return 0 if the string was read successfully, 0 otherwise.
+ */
+ int parse_encname (ACEXML_Char*& str);
+
+ /**
+ * Parse a SDDecl string.
+ *
+ * @param str String containing the encoding name if successful.
+ * @return 0 if the string was read successfully, 0 otherwise.
+ */
+ int parse_sddecl (ACEXML_Char*& str);
+
+ /**
+ * Parse an attribute value.
+ *
+ * @param str String containing the value of the attribute if successful.
+ * @return 0 if attribute value was read successfully, 0 otherwise.
+ */
+ int parse_attvalue (ACEXML_Char*& str ACEXML_ENV_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Parse a SystemLiteral.
+ *
+ * @param str String containing the SystemLiteral if successful.
+ * @return 0 if the string was read successfully, 0 otherwise.
+ */
+ int parse_system_literal (ACEXML_Char*& str);
+
+ /**
+ * Parse a PubidLiteral.
+ *
+ * @param str String containing the PubidLiteral if successful.
+ * @return 0 if the string was read successfully, 0 otherwise.
+ */
+ int parse_pubid_literal (ACEXML_Char*& str);
+
+ /**
+ * Check if a character @a c is a whitespace.
+ *
+ * @retval 1 if @a c is a valid white space character. 0 otherwise.
+ */
+ int is_whitespace (const ACEXML_Char c) const;
+
+ /**
+ * Check if a character @a c is a valid Char.
+ *
+ * @retval 1 if @a c is a valid character. 0 otherwise.
+ */
+ int isChar (ACEXML_UCS4 c) const;
+
+ /**
+ * Check if a character @a c is a valid CharRef character.
+ *
+ * @retval 1 if @a c is a valid character reference character, 0 otherwise.
+ */
+ int isCharRef (const ACEXML_Char c) const;
+
+ /**
+ * Check if a character @a c is a BaseChar.
+ *
+ * @retval 1 if @a c is a valid BaseChar character, 0 otherwise.
+ */
+ int isBasechar (const ACEXML_Char c) const;
+
+ /**
+ * Check if a character @a c is a Ideographic.
+ *
+ * @retval 1 if @a c is a valid Ideographic character, 0 otherwise.
+ */
+ int isIdeographic (const ACEXML_Char c) const;
+
+ /**
+ * Check if a character @a c is a CombiningChar.
+ *
+ * @retval 1 if @a c is a valid CombiningChar character, 0 otherwise.
+ */
+ int isCombiningchar (const ACEXML_Char c) const;
+
+ /**
+ * Check if a character @a c is a Digit.
+ *
+ * @retval 1 if @a c is a valid Digit character, 0 otherwise.
+ */
+ int isDigit (const ACEXML_Char c) const;
+
+ /**
+ * Check if a character @a c is an Extender.
+ *
+ * @retval 1 if @a c is a valid Extender character, 0 otherwise.
+ */
+ int isExtender (const ACEXML_Char c) const;
+
+ /**
+ * Check if a character @a c is a Letter.
+ *
+ * @retval 1 if @a c is a valid Letter character, 0 otherwise.
+ */
+ int isLetter (const ACEXML_Char c) const;
+
+ /**
+ * Check if a character is an acceptable NameChar.
+ *
+ * @retval 1 if @a c is a valid NameChar character, 0 otherwise.
+ */
+ int isNameChar (const ACEXML_Char c) const;
+
+ /**
+ * Check if a character is a PubidChar.
+ *
+ * @retval 1 if @a c is a valid PubidChar character, 0 otherwise.
+ */
+ int isPubidChar (const ACEXML_Char c) const;
-protected:
/// Get a character.
- ACEXML_Char get (void);
+ virtual ACEXML_Char get (void);
/// Peek a character.
- ACEXML_Char peek (void);
+ virtual ACEXML_Char peek (void);
- // Feature names:
+private:
+
+ // *** Helper functions for parsing XML
/**
- * \addtogroup acexml_parser_features
- * @{
+ * Skip any whitespaces encountered until the first non-whitespace
+ * character is encountered.
+ *
+ * @return The next non-whitespace character from the CharStream.
+ *
+ * @sa skip_whitespace_count
*/
+ ACEXML_Char skip_whitespace (void);
/**
- * @var simple_parsing_feature_
+ * Skip any whitespaces encountered until the first non-whitespace
+ * character. The first non-whitespace character is not consumed.
+ * This method does peek into the input CharStream and therefore
+ * is more expensive than @ref skip_whitespace.
*
- * This constant string defines the name of "simple XML parsing"
- * feature. When this feature is enabled, ACEXML parser is allowed
- * to parse a simple XML stream without mandated XML prolog
- * and no DTD defintion.
+ * @param peek If non-null, @a peek points to a ACEXML_Char where
+ * skip_whitespace_count stores the first non-whitespace
+ * character it sees (character is not removed from the stream.)
+ *
+ * @return The number of whitespace characters consumed.
+ *
+ * @sa skip_whitespace
*/
- static const ACEXML_Char simple_parsing_feature_[];
+ int skip_whitespace_count (ACEXML_Char *peek = 0);
/**
- * @var namespaces_feature_
+ * Skip an equal sign.
*
- * This constant string defines the SAX XML Namespace feature. When this
- * feature is enabled, ACEXML parser allows access by namespace qualified
- * names.
+ * @retval 0 when succeeds, -1 if no equal sign is found.
*/
- static const ACEXML_Char namespaces_feature_[];
+ int skip_equal (void);
/**
- * @var namespace_prefixes_feature_
+ * Get a quoted string. Quoted strings are used to specify
+ * attribute values and this routine will replace character and
+ * entity references on-the-fly. Parameter entities are not allowed
+ * (or replaced) in this function. (But regular entities are.)
*
- * This constant string defines the SAX XML Namespace prefixes feature.
- * Normally the list of attributes returned by the parser will not
- * contain attributes used as namespace declarations (xmlns*). When this
- * feature is enabled, the list of attributes contains the namespace
- * declarations also.
+ * @param str returns the un-quoted string.
+ *
+ * @retval 0 on success, -1 otherwise.
*/
- static const ACEXML_Char namespace_prefixes_feature_[];
+ int get_quoted_string (ACEXML_Char *&str);
- /* @} */
+ /**
+ * Check if a character @a c is a Digit.
+ *
+ * @retval 1 if @a c is a valid Digit character, 0 otherwise.
+ */
+ int isNormalDigit (const ACEXML_Char c) const;
-private:
/**
* Dispatch errors to ErrorHandler.
*
*/
- void report_error (ACEXML_Error minor_code ACEXML_ENV_ARG_DECL);
+ void error (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Dispatch warnings to ErrorHandler.
*
*/
- void report_warning (ACEXML_Error minor_code ACEXML_ENV_ARG_DECL);
+ void warning (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Dispatch fatal errors to ErrorHandler.
*
*/
- void report_fatal_error (ACEXML_Error minor_code ACEXML_ENV_ARG_DECL);
+ void fatal_error (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Dispatch prefix mapping calls to the ContentHandler.
@@ -454,15 +673,93 @@ private:
* @param name Local name
* @param start 1 => startPrefixMapping 0 => endPrefixMapping
*/
- void report_prefix_mapping (const ACEXML_Char* prefix,
+ void prefix_mapping (const ACEXML_Char* prefix,
const ACEXML_Char* uri,
const ACEXML_Char* name,
- int start ACEXML_ENV_ARG_DECL);
+ int start ACEXML_ENV_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Parse a keyword.
*/
int parse_token (const ACEXML_Char* keyword);
+ /**
+ * Push the current context on to the stack.
+ *
+ */
+ int push_context (const ACEXML_Parser_Context& context);
+
+ /**
+ * Pop the top element in the stack and replace current context with that.
+ */
+ int pop_context (void);
+
+ /**
+ * Create a new ACEXML_CharStream from @a systemId and @a publicId and
+ * replace the current input stream with the newly created stream.
+ */
+ virtual int switch_input (ACEXML_CharStream* cstream,
+ const ACEXML_Char* systemId = 0,
+ const ACEXML_Char* publicId = 0);
+ /**
+ * Create a new ACEXML_InputSource from @a systemId and @a publicId and
+ * replace the current input source with the newly created InputSource.
+ */
+ virtual int switch_input (ACEXML_InputSource* input,
+ const ACEXML_Char* systemId = 0,
+ const ACEXML_Char* publicId = 0);
+
+ /**
+ * Reset the parser state.
+ *
+ */
+ void reset (void);
+
+ /**
+ * Very trivial, non-conformant normalization of a systemid.
+ *
+ */
+ ACEXML_Char* normalize_systemid (const char* systemId);
+
+ // Feature names:
+
+ /**
+ * \addtogroup acexml_parser_features
+ * @{
+ */
+
+ /**
+ * @var simple_parsing_feature_
+ *
+ * This constant string defines the name of "simple XML parsing"
+ * feature. When this feature is enabled, ACEXML parser is allowed
+ * to parse a simple XML stream without mandated XML prolog
+ * and no DTD defintion.
+ */
+ static const ACEXML_Char simple_parsing_feature_[];
+
+ /**
+ * @var namespaces_feature_
+ *
+ * This constant string defines the SAX XML Namespace feature. When this
+ * feature is enabled, ACEXML parser allows access by namespace qualified
+ * names.
+ */
+ static const ACEXML_Char namespaces_feature_[];
+
+ /**
+ * @var namespace_prefixes_feature_
+ *
+ * This constant string defines the SAX XML Namespace prefixes feature.
+ * Normally the list of attributes returned by the parser will not
+ * contain attributes used as namespace declarations (xmlns*). When this
+ * feature is enabled, the list of attributes contains the namespace
+ * declarations also.
+ */
+ static const ACEXML_Char namespace_prefixes_feature_[];
+
+ /* @} */
+
/// Keeping track of the handlers. We do not manage the memory for
/// handlers.
ACEXML_DTDHandler *dtd_handler_;
@@ -470,36 +767,96 @@ private:
ACEXML_ContentHandler *content_handler_;
ACEXML_ErrorHandler *error_handler_;
- /// @@ Feature and properties management structure here.
- /// Current input char stream.
- ACEXML_CharStream *instream_;
-
- /// My doctype, if any.
+ /// Document Type
ACEXML_Char *doctype_;
- /// External DTD System Literal, if any.
- ACEXML_Char *dtd_system_;
+ /// Current parser context
+ ACEXML_Parser_Context current_;
- /// External DTD Public Literal, if any.
- ACEXML_Char *dtd_public_;
+ /// Stack used to hold the Parser_Context
+ ACE_Unbounded_Stack<ACEXML_Parser_Context> ctx_stack_;
+ /*
+ * The following two are essentially chains of references and is used by
+ * the parser to determine if there is any recursion. We keep two of
+ * these one for general entities and one for parameter entities, as they
+ * both fall under different namespaces.
+ *
+ */
+ /// Set used to hold the general entity references that are active.
+ ACE_Unbounded_Stack<ACEXML_String> GE_reference_;
+
+ /// Set used to hold the parameter entity references that are active.
+ ACE_Unbounded_Stack<ACEXML_String> PE_reference_;
+
+ /// Obstack used by the parser to hold all the strings parsed
ACE_Obstack_T<ACEXML_Char> obstack_;
+ /// Alternative obstack used to hold any strings when the original is in use
+ ACE_Obstack_T<ACEXML_Char> alt_stack_;
+
+ /// Namespace stack used by the parser to implement support for Namespaces
ACEXML_NamespaceSupport xml_namespace_;
- ACEXML_Entity_Manager entities_;
+ /// T => We are processing a nested namespace
+ int nested_namespace_;
- // Locator
- ACEXML_LocatorImpl locator_;
+ /// Set of internal parsed general entities in the document
+ ACEXML_Entity_Manager internal_GE_;
- // Flag set if the document is a standalone XML document
- int standalone_;
+ /// Set of external parsed general entities in the document
+ ACEXML_Entity_Manager external_GE_;
+
+ /// Set of unparsed entities in the document
+ ACEXML_Entity_Manager unparsed_entities_;
+
+ /// Set of predefined entities used by the parser
+ ACEXML_Entity_Manager predef_entities_;
+
+ /// Set of internal parsed parameter entities in the document
+ ACEXML_Entity_Manager internal_PE_;
+
+ /// Set of external parsed parameter entities in the document
+ ACEXML_Entity_Manager external_PE_;
+
+ /// Set of notations declared in the document
+ ACEXML_Entity_Manager notations_;
- // Feature flags &
+ /// State of the parser when it encounters a reference.
+ ACEXML_ParserInt::ReferenceState ref_state_;
+
+ /// T => We are parsing an external subset
+ int external_subset_;
+
+ /// T => We are parsing an external entity value
+ int external_entity_;
+
+ /// T => Internal DTD has parameter entity references
+ int has_pe_refs_;
+
+ /// Feature flags
+ /// If set, the parser should parse a document without a prolog
int simple_parsing_;
+
+ /// If set, the parser should also validate
+ int validate_;
+
+ /// If set, the parser should allow access by namespace qualified names.
int namespaces_;
+
+ /// If set, the parser should include namespace declarations in the list
+ /// of attributes of an element.
int namespace_prefixes_;
+ /// If set, the document is a standalone XML document
+ int standalone_;
+
+ /// If set, the document has an external DTD subset
+ int external_dtd_;
+
+ /// If set, the document has an internal DTD
+ int internal_dtd_;
+
};
#if defined (__ACEXML_INLINE__)
diff --git a/ACEXML/parser/parser/Parser.i b/ACEXML/parser/parser/Parser.i
index 5b6f072fba0..e03f09c2116 100644
--- a/ACEXML/parser/parser/Parser.i
+++ b/ACEXML/parser/parser/Parser.i
@@ -57,49 +57,113 @@ ACEXML_Parser::setErrorHandler (ACEXML_ErrorHandler *handler)
}
ACEXML_INLINE int
-ACEXML_Parser::is_whitespace (ACEXML_Char c)
+ACEXML_Parser::isChar (const ACEXML_UCS4 c) const
{
- switch (c)
+ return (c == 0x9 || c == 0xA || c == 0xD ||
+ c >= 0x20 && c <= 0xD7FF ||
+ c >= 0xE000 && c <= 0xFFFD ||
+ c >= 0x10000 && c <= 0x10FFFF);
+}
+
+ACEXML_INLINE int
+ACEXML_Parser::isCharRef (const ACEXML_Char c) const
{
- case 0xa:
- case 0x20:
- case 0x9:
- case 0xd:
- return 1;
- default:
+ return ((c >= 'a' && c <= 'f') ||
+ (c >= 'A' && c <= 'F'));
+}
+
+ACEXML_INLINE int
+ACEXML_Parser::isNormalDigit (const ACEXML_Char c) const
+{
+ return (c >= '\x30' && c <= '\x39');
+}
+
+ACEXML_INLINE int
+ACEXML_Parser::isBasechar (const ACEXML_Char c) const
+{
+#if defined (ACE_USES_WCHAR)
+ return ACEXML_ParserInt::isBasechar_i (c);
+#else
+ return ACEXML_ParserInt::base_char_table_[c];
+#endif /* ACE_USES_WCHAR */
+}
+
+ACEXML_INLINE int
+ACEXML_Parser::isIdeographic (const ACEXML_Char c) const
+{
+#if defined (ACE_USES_WCHAR)
+ return ACEXML_ParserInt::isIdeographic_i (c);
+#else
+ ACE_UNUSED_ARG (c);
+ return 0;
+#endif /* ACE_USES_WCHAR */
+}
+
+ACEXML_INLINE int
+ACEXML_Parser::isCombiningchar (const ACEXML_Char c) const
+{
+#if defined (ACE_USES_WCHAR)
+ return ACEXML_ParserInt::isCombiningchar_i (c);
+#else
+ ACE_UNUSED_ARG (c);
return 0;
+#endif /* ACE_USES_WCHAR */
}
+
+ACEXML_INLINE int
+ACEXML_Parser::isDigit (const ACEXML_Char c) const
+{
+#if defined (ACE_USES_WCHAR)
+ return ACEXML_ParserInt::isDigit_i (c);
+#else
+ return (this->isNormalDigit (c));
+#endif /* ACE_USES_WCHAR */
+}
+
+ACEXML_INLINE int
+ACEXML_Parser::isExtender (const ACEXML_Char c) const
+{
+#if defined (ACE_USES_WCHAR)
+ return ACEXML_ParserInt::isExtender_i (c);
+#else
+ return (c == '\xB7');
+#endif /* ACE_USES_WCHAR */
}
+ACEXML_INLINE int
+ACEXML_Parser::isLetter (const ACEXML_Char c) const
+{
+ return (this->isBasechar (c) || this->isIdeographic (c));
+}
ACEXML_INLINE int
-ACEXML_Parser::is_whitespace_or_equal (ACEXML_Char c)
+ACEXML_Parser::isNameChar (const ACEXML_Char c) const
{
- return (is_whitespace (c) || c == '=') ? 1 : 0;
+ return (this->isLetter (c) || this->isDigit (c) || c == '.' || c == '-' ||
+ c == '_' || c == ':' || this->isCombiningchar (c) ||
+ this->isExtender (c));
}
ACEXML_INLINE int
-ACEXML_Parser::is_nonname (ACEXML_Char c)
+ACEXML_Parser::isPubidChar (const ACEXML_Char c) const
{
- // Handle this separately as doing so avoids code duplication and enables
- // setting of line and column numbers in one place.
- if (is_whitespace_or_equal (c))
- return 1;
+ return (c == '\x20' || c == '\x0D' || c == '\x0A' ||
+ (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') || c == '-' || c == '\'' || c == '(' ||
+ c == ')' || c == '+' || c == ',' || c == '.' || c == '/' ||
+ c == ':' || c == '=' || c == '?' || c == ';' || c == '!' ||
+ c == '*' || c == '#' || c == '@' || c == '$' || c == '_' ||
+ c == '%');
+}
+
+ACEXML_INLINE int
+ACEXML_Parser::is_whitespace (const ACEXML_Char c) const
+{
switch (c)
{
- case '/':
- case '?':
- case '>':
- case '<':
- case ')':
- case '(':
- case '+':
- case '*':
- case '\'':
- case '"':
- case ',':
- case '|':
+ case '\x0A': case '\x20':
+ case '\x09': case '\x0D':
return 1;
default:
return 0;
@@ -107,19 +171,59 @@ ACEXML_Parser::is_nonname (ACEXML_Char c)
}
ACEXML_INLINE ACEXML_Char
+ACEXML_Parser::skip_whitespace (void)
+{
+ ACEXML_Char ch = this->get();
+ while (this->is_whitespace (ch))
+ ch = this->get ();
+ return ch;
+}
+
+
+ACEXML_INLINE int
+ACEXML_Parser::skip_whitespace_count (ACEXML_Char *peeky)
+{
+ int wscount = 0;
+ ACEXML_Char dummy;
+ ACEXML_Char &forward = (peeky == 0 ? dummy : *peeky);
+
+ for (;this->is_whitespace ((forward = this->peek ())); ++wscount)
+ this->get ();
+ return wscount;
+}
+
+ACEXML_INLINE int
+ACEXML_Parser::skip_equal (void)
+{
+ if (this->skip_whitespace() != '=')
+ return -1;
+ while (this->is_whitespace (this->peek()))
+ this->get();
+ return 0;
+}
+
+ACEXML_INLINE ACEXML_Char
ACEXML_Parser::get (void)
{
- // Using an extra level of indirection so we can
- // manage document location in the future.
+ ACEXML_Char ch = 0;
+ const ACEXML_InputSource* ip = this->current_.getInputSource();
+ ACEXML_CharStream* instream = ip->getCharStream();
- if (this->instream_ != 0)
+ if (instream->get (ch) != -1)
{
- ACEXML_Char ch;
- this->instream_->get (ch);
- this->locator_.incrColumnNumber();
- if (ch == 0x0A) {
- this->locator_.incrLineNumber();
- this->locator_.setColumnNumber (0);
+ this->current_.getLocator()->incrColumnNumber();
+ // Normalize white-space
+ if (ch == '\x0D')
+ {
+ if (instream->peek() == 0x0A)
+ instream->get (ch);
+ ch = '\x0A';
+ }
+ if (ch == '\x0A')
+ {
+ // Reset column number and increment Line Number.
+ this->current_.getLocator()->incrLineNumber();
+ this->current_.getLocator()->setColumnNumber (0);
}
return ch;
}
@@ -131,9 +235,24 @@ ACEXML_Parser::peek (void)
{
// Using an extra level of indirection so we can
// manage document location in the future.
+ ACEXML_Char ch = 0;
+ const ACEXML_InputSource* ip = this->current_.getInputSource();
+ ACEXML_CharStream* instream = ip->getCharStream();
+ ch = instream->peek ();
+ return (ch == -1 ? 0 : ch);
+}
- if (this->instream_ != 0)
- return this->instream_->peek ();
+ACEXML_INLINE int
+ACEXML_Parser::parse_token (const ACEXML_Char* keyword)
+{
+ if (keyword == 0)
+ return -1;
+ const ACEXML_Char* ptr = keyword;
+ ACEXML_Char ch;
+ for (; *ptr != 0 && ((ch = this->get()) == *ptr); ++ptr)
+ ;
+ if (*ptr == 0)
return 0;
-
+ else
+ return -1;
}
diff --git a/ACEXML/parser/parser/ParserContext.cpp b/ACEXML/parser/parser/ParserContext.cpp
new file mode 100644
index 00000000000..fd0792677fb
--- /dev/null
+++ b/ACEXML/parser/parser/ParserContext.cpp
@@ -0,0 +1,15 @@
+// $Id$
+
+#include "ACEXML/parser/parser/ParserContext.h"
+
+#if !defined (__ACEXML_INLINE__)
+# include "ACEXML/parser/parser/ParserContext.inl"
+#endif /* __ACEXML_INLINE__ */
+
+ACEXML_Parser_Context::~ACEXML_Parser_Context()
+{
+ delete this->instream_;
+ this->instream_ = 0;
+ delete this->locator_;
+ this->locator_ = 0;
+}
diff --git a/ACEXML/parser/parser/ParserContext.h b/ACEXML/parser/parser/ParserContext.h
new file mode 100644
index 00000000000..3e62e8f532b
--- /dev/null
+++ b/ACEXML/parser/parser/ParserContext.h
@@ -0,0 +1,78 @@
+// -*- C++ -*-
+
+//=============================================================================
+/**
+ * @file ParserContext.h
+ *
+ * $Id$
+ *
+ * @author Krishnakumar B <kitty@cs.wustl.edu>
+ */
+//=============================================================================
+
+#ifndef ACEXML_PARSER_CONTEXT_H
+#define ACEXML_PARSER_CONTEXT_H
+
+#include "ace/pre.h"
+#include "ACEXML/parser/parser/Parser_export.h"
+
+#if !defined (ACE_LACKS_PRAGMA_ONCE)
+#pragma once
+#endif /* ACE_LACKS_PRAGMA_ONCE */
+
+#include "ACEXML/common/XML_Types.h"
+#include "ACEXML/common/InputSource.h"
+#include "ACEXML/common/Locator.h"
+#include "ACEXML/common/LocatorImpl.h"
+#include "ace/Functor.h"
+#include "ace/Containers_T.h"
+
+class ACEXML_PARSER_Export ACEXML_Parser_Context
+{
+public:
+ /// Default constructor
+ ACEXML_Parser_Context();
+
+ /// Constructor which initializes the context
+ ACEXML_Parser_Context (ACEXML_InputSource* instream,
+ ACEXML_LocatorImpl* locator);
+
+ /// Copy constructor
+ ACEXML_Parser_Context (const ACEXML_Parser_Context& src);
+
+ /// Assignment operator
+ ACEXML_Parser_Context& operator= (const ACEXML_Parser_Context& src);
+
+ /// Comparison operator
+ int operator!= (const ACEXML_Parser_Context& src);
+
+ /// Destructor
+ virtual ~ACEXML_Parser_Context();
+
+ /// Reset the parser context. This does not free up the memory. Only sets
+ /// it to zero. Meant to be called after a context is pushed on to a
+ /// stack.
+ void reset (void);
+
+ /// Get the underlying input source.
+ virtual ACEXML_InputSource* getInputSource(void);
+
+ /// Get the underlying locator.
+ virtual ACEXML_LocatorImpl* getLocator(void);
+
+private:
+
+ /// Current input char stream.
+ ACEXML_InputSource *instream_;
+
+ /// Current Locator which provides line no., column no. systemId and publicId
+ ACEXML_LocatorImpl* locator_;
+};
+
+#if defined (__ACEXML_INLINE__)
+# include "ACEXML/parser/parser/ParserContext.inl"
+#endif /* __ACEXML_INLINE__ */
+
+#include "ace/post.h"
+
+#endif /* ACEXML_PARSER_CONTEXT_H */
diff --git a/ACEXML/parser/parser/ParserContext.inl b/ACEXML/parser/parser/ParserContext.inl
new file mode 100644
index 00000000000..adbfe099bc2
--- /dev/null
+++ b/ACEXML/parser/parser/ParserContext.inl
@@ -0,0 +1,67 @@
+// $Id$
+
+
+ACEXML_INLINE
+ACEXML_Parser_Context::ACEXML_Parser_Context()
+ : instream_ (0),
+ locator_ (0)
+{
+
+}
+
+ACEXML_INLINE
+ACEXML_Parser_Context::ACEXML_Parser_Context (ACEXML_InputSource* instream,
+ ACEXML_LocatorImpl* locator)
+ : instream_ (instream),
+ locator_ (locator)
+{
+
+}
+
+ACEXML_INLINE
+ACEXML_Parser_Context::ACEXML_Parser_Context (const ACEXML_Parser_Context& src)
+ : instream_ (src.instream_),
+ locator_ (src.locator_)
+{
+
+}
+
+ACEXML_INLINE int
+ACEXML_Parser_Context::operator!= (const ACEXML_Parser_Context& src)
+{
+ return (this->instream_ != src.instream_ && this->locator_ != src.locator_);
+}
+
+ACEXML_INLINE ACEXML_Parser_Context&
+ACEXML_Parser_Context::operator= (const ACEXML_Parser_Context& src)
+{
+ if (*this != src)
+ {
+ delete this->instream_;
+ delete this->locator_;
+ this->instream_ = src.instream_;
+ this->locator_ = src.locator_;
+ }
+ return *this;
+}
+
+
+
+ACEXML_INLINE ACEXML_InputSource*
+ACEXML_Parser_Context::getInputSource (void)
+{
+ return this->instream_;
+};
+
+ACEXML_INLINE ACEXML_LocatorImpl*
+ACEXML_Parser_Context::getLocator (void)
+{
+ return this->locator_;
+}
+
+ACEXML_INLINE void
+ACEXML_Parser_Context::reset (void)
+{
+ this->instream_ = 0;
+ this->locator_ = 0;
+}
diff --git a/ChangeLog b/ChangeLog
index 8e257b62e0e..285f051fb34 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,118 @@
+Tue Nov 12 19:48:34 2002 Krishnakumar B <kitty@cs.wustl.edu>
+
+ * ACEXML/parser/parser/ParserContext.cpp:
+ * ACEXML/parser/parser/ParserContext.h:
+ * ACEXML/parser/parser/ParserContext.inl:
+
+ New files which hold the ParserContext needed to handle the
+ switching of input streams on the fly.
+
+ * ACEXML/parser/parser/ParserInternals.cpp:
+ * ACEXML/parser/parser/ParserInternals.h:
+
+ Moved some generic code from Parser.cpp to here.
+
+ * ACEXML/apps/svcconf/Makefile:
+ * ACEXML/common/Makefile:
+ * ACEXML/parser/parser/Makefile:
+
+ Updated dependencies.
+
+ * ACEXML/common/Attributes_Def_Builder.h:
+
+ No need to typedef in C++.
+
+ * ACEXML/common/DefaultHandler.cpp:
+
+ Minor typos.
+
+ * ACEXML/common/Encoding.cpp:
+
+ If auto-detection of encoding fails, assume that it is UTF-8.
+
+ * ACEXML/common/Exception.cpp:
+
+ Change the error message from ACE_DEBUG to ACE_ERROR.
+
+ * ACEXML/common/FileCharStream.cpp: Handle BOM of UTF-8 in
+ addition to UTF-16. Cleanup unnecessary parens.
+
+ * ACEXML/common/HttpCharStream.cpp:
+ * ACEXML/common/HttpCharStream.h:
+
+ Add support for auto-detection of encoding.
+
+ * ACEXML/common/InputSource.cpp:
+ * ACEXML/common/InputSource.h:
+
+ Fixes for use with ACEXML_Parser_Context.
+
+ * ACEXML/common/LocatorImpl.cpp:
+ * ACEXML/common/LocatorImpl.h:
+
+ Fixed bug in copy constructor which resulted in locator
+ information not getting set properly.
+
+ * ACEXML/common/NamespaceSupport.cpp:
+ * ACEXML/common/NamespaceSupport.h:
+
+ Implement reset() method.
+
+ * ACEXML/common/SAXExceptions.cpp:
+
+ Change the error message from ACE_DEBUG to ACE_ERROR.
+
+ * ACEXML/common/StrCharStream.cpp:
+
+ Handle copying of bytes according to sizeof (ACE_WCHAR).
+
+ * ACEXML/common/StreamFactory.cpp: Create the appropriate stream
+ given an URI. We don't try to normalize the URI here. It is done
+ in the Parser.
+
+ * ACEXML/common/Transcode.cpp:
+ * ACEXML/common/Transcode.i:
+
+ Moved some very big functions from .i to .cpp.
+
+ * ACEXML/examples/SAXPrint/SAXPrint_Handler.cpp:
+ * ACEXML/examples/SAXPrint/main.cpp:
+
+ Updates to reflect the new calling convention in the Parser.
+
+ * ACEXML/parser/parser/Entity_Manager.cpp:
+ * ACEXML/parser/parser/Entity_Manager.h:
+ * ACEXML/parser/parser/Entity_Manager.i:
+
+ Implemented support for resolving SYSTEM and PUBLIC ids from
+ the Entity_Manager.
+
+ * ACEXML/parser/parser/Parser.cpp:
+ * ACEXML/parser/parser/Parser.h:
+ * ACEXML/parser/parser/Parser.i:
+
+ Implemented support for external parameter and entity
+ references. Rewrote a lot of the basic parsing functionality to
+ adhere to the standard. Implment partial support for validation
+ of XML files.
+
+
+Fri Oct 25 15:44:04 2002 Krishnakumar B <kitty@cs.wustl.edu>
+
+ * ACEXML/parser/parser/Parser.i: Handle end-of-line as required by
+ the spec. Specifically any sequence of 0x0D or 0x0D 0x0A should
+ be normalized to a 0x0A before passing to the XML processor.
+
+ * ACEXML/parser/parser/Parser.cpp: Remove checks for 0x0D as it is
+ handled tranparently now.
+
+Thu Oct 24 21:06:44 2002 Krishnakumar B <kitty@cs.wustl.edu>
+
+ * ACEXML/common/NamespaceSupport.cpp: Define strings normally and
+ not as an array.
+
+ * ACEXML/common/Attributes_Def_Builder.h: No need to typedef in C++.
+
Thu Oct 24 01:52:46 2002 Krishnakumar B <kitty@cs.wustl.edu>
* ACEXML\parser\parser\Parser.cpp: Moved out the declaration of
diff --git a/ChangeLogs/ChangeLog-03a b/ChangeLogs/ChangeLog-03a
index 8e257b62e0e..285f051fb34 100644
--- a/ChangeLogs/ChangeLog-03a
+++ b/ChangeLogs/ChangeLog-03a
@@ -1,3 +1,118 @@
+Tue Nov 12 19:48:34 2002 Krishnakumar B <kitty@cs.wustl.edu>
+
+ * ACEXML/parser/parser/ParserContext.cpp:
+ * ACEXML/parser/parser/ParserContext.h:
+ * ACEXML/parser/parser/ParserContext.inl:
+
+ New files which hold the ParserContext needed to handle the
+ switching of input streams on the fly.
+
+ * ACEXML/parser/parser/ParserInternals.cpp:
+ * ACEXML/parser/parser/ParserInternals.h:
+
+ Moved some generic code from Parser.cpp to here.
+
+ * ACEXML/apps/svcconf/Makefile:
+ * ACEXML/common/Makefile:
+ * ACEXML/parser/parser/Makefile:
+
+ Updated dependencies.
+
+ * ACEXML/common/Attributes_Def_Builder.h:
+
+ No need to typedef in C++.
+
+ * ACEXML/common/DefaultHandler.cpp:
+
+ Minor typos.
+
+ * ACEXML/common/Encoding.cpp:
+
+ If auto-detection of encoding fails, assume that it is UTF-8.
+
+ * ACEXML/common/Exception.cpp:
+
+ Change the error message from ACE_DEBUG to ACE_ERROR.
+
+ * ACEXML/common/FileCharStream.cpp: Handle BOM of UTF-8 in
+ addition to UTF-16. Cleanup unnecessary parens.
+
+ * ACEXML/common/HttpCharStream.cpp:
+ * ACEXML/common/HttpCharStream.h:
+
+ Add support for auto-detection of encoding.
+
+ * ACEXML/common/InputSource.cpp:
+ * ACEXML/common/InputSource.h:
+
+ Fixes for use with ACEXML_Parser_Context.
+
+ * ACEXML/common/LocatorImpl.cpp:
+ * ACEXML/common/LocatorImpl.h:
+
+ Fixed bug in copy constructor which resulted in locator
+ information not getting set properly.
+
+ * ACEXML/common/NamespaceSupport.cpp:
+ * ACEXML/common/NamespaceSupport.h:
+
+ Implement reset() method.
+
+ * ACEXML/common/SAXExceptions.cpp:
+
+ Change the error message from ACE_DEBUG to ACE_ERROR.
+
+ * ACEXML/common/StrCharStream.cpp:
+
+ Handle copying of bytes according to sizeof (ACE_WCHAR).
+
+ * ACEXML/common/StreamFactory.cpp: Create the appropriate stream
+ given an URI. We don't try to normalize the URI here. It is done
+ in the Parser.
+
+ * ACEXML/common/Transcode.cpp:
+ * ACEXML/common/Transcode.i:
+
+ Moved some very big functions from .i to .cpp.
+
+ * ACEXML/examples/SAXPrint/SAXPrint_Handler.cpp:
+ * ACEXML/examples/SAXPrint/main.cpp:
+
+ Updates to reflect the new calling convention in the Parser.
+
+ * ACEXML/parser/parser/Entity_Manager.cpp:
+ * ACEXML/parser/parser/Entity_Manager.h:
+ * ACEXML/parser/parser/Entity_Manager.i:
+
+ Implemented support for resolving SYSTEM and PUBLIC ids from
+ the Entity_Manager.
+
+ * ACEXML/parser/parser/Parser.cpp:
+ * ACEXML/parser/parser/Parser.h:
+ * ACEXML/parser/parser/Parser.i:
+
+ Implemented support for external parameter and entity
+ references. Rewrote a lot of the basic parsing functionality to
+ adhere to the standard. Implment partial support for validation
+ of XML files.
+
+
+Fri Oct 25 15:44:04 2002 Krishnakumar B <kitty@cs.wustl.edu>
+
+ * ACEXML/parser/parser/Parser.i: Handle end-of-line as required by
+ the spec. Specifically any sequence of 0x0D or 0x0D 0x0A should
+ be normalized to a 0x0A before passing to the XML processor.
+
+ * ACEXML/parser/parser/Parser.cpp: Remove checks for 0x0D as it is
+ handled tranparently now.
+
+Thu Oct 24 21:06:44 2002 Krishnakumar B <kitty@cs.wustl.edu>
+
+ * ACEXML/common/NamespaceSupport.cpp: Define strings normally and
+ not as an array.
+
+ * ACEXML/common/Attributes_Def_Builder.h: No need to typedef in C++.
+
Thu Oct 24 01:52:46 2002 Krishnakumar B <kitty@cs.wustl.edu>
* ACEXML\parser\parser\Parser.cpp: Moved out the declaration of
diff --git a/tests/Obstack_Test.cpp b/tests/Obstack_Test.cpp
index 110fea4eba9..ad424254b98 100644
--- a/tests/Obstack_Test.cpp
+++ b/tests/Obstack_Test.cpp
@@ -29,8 +29,6 @@ int ACE_TMAIN (int, ACE_TCHAR *[])
ACE_START_TEST (ACE_TEXT ("Obstack_Test"));
int errors = 0;
- // For this test, the length of the ACE_Obstack must be larger than
- // both of these strings, but less than their sum.
const ACE_TCHAR str1[] = ACE_TEXT ("Mary had a little lamb.");
const ACE_TCHAR str2[] = ACE_TEXT ("It's fleece was white as snow; but....");
ACE_Obstack_T<ACE_TCHAR> stack (sizeof (str1) + 1);
diff --git a/tests/Service_Config_Test.conf.xml b/tests/Service_Config_Test.conf.xml
index f3273f0cb93..767e885c467 100644
--- a/tests/Service_Config_Test.conf.xml
+++ b/tests/Service_Config_Test.conf.xml
@@ -1,4 +1,5 @@
<?xml version='1.0'?>
+<!DOCTYPE ACE_Svc_Conf "http://www.cs.wustl.edu/~kitty/svcconf.dtd">
<!-- Converted from Service_Config_Test.conf by svcconf-convert.pl -->
<ACE_Svc_Conf>
<!-- Dynamically loading each of the Service Objects below causes a -->