summaryrefslogtreecommitdiff
path: root/ACEXML/parser/parser/Parser.h
diff options
context:
space:
mode:
Diffstat (limited to 'ACEXML/parser/parser/Parser.h')
-rw-r--r--ACEXML/parser/parser/Parser.h689
1 files changed, 539 insertions, 150 deletions
diff --git a/ACEXML/parser/parser/Parser.h b/ACEXML/parser/parser/Parser.h
index e45e290ca79..c92af489869 100644
--- a/ACEXML/parser/parser/Parser.h
+++ b/ACEXML/parser/parser/Parser.h
@@ -7,6 +7,7 @@
* $Id$
*
* @author Nanbor Wang <nanbor@cs.wustl.edu>
+ * @author Krishnakumar B <kitty@cs.wustl.edu>
*/
//=============================================================================
@@ -28,9 +29,12 @@
#include "ace/Functor.h"
#include "ace/SString.h"
#include "ace/Hash_Map_Manager.h"
+#include "ace/Unbounded_Set.h"
#include "ace/Containers_T.h"
#include "ace/Auto_Ptr.h"
#include "ACEXML/parser/parser/Entity_Manager.h"
+#include "ACEXML/parser/parser/ParserInternals.h"
+#include "ACEXML/parser/parser/ParserContext.h"
/**
* @class ACEXML_Parser Parser.h "ACEXML/parser/parser/Parser.h"
@@ -47,7 +51,14 @@ public:
/// Destructor.
virtual ~ACEXML_Parser (void);
- /*
+ /**
+ * Initialize the parser state.
+ *
+ * @retval 0 if parser was initialized correctly else -1.
+ */
+ int initialize (ACEXML_InputSource* input);
+
+ /**
* Return the current content handler.
*/
virtual ACEXML_ContentHandler *getContentHandler (void) const;
@@ -132,80 +143,41 @@ public:
*/
virtual void setErrorHandler (ACEXML_ErrorHandler *handler);
- // *** Helper functions for parsing XML
- /**
- * Skip any whitespaces encountered until the first non-whitespace
- * character is encountered and consumed from the current input
- * CharStream.
- *
- * @param whitespace Return a pointer to the string of skipped
- * whitespace after proper conversion. Null if there's no
- * whitespace found.
- *
- * @return The first none-white space characters (which will be
- * consumed from the CharStream.) If no whitespace is found, it
- * returns 0.
- *
- * @sa skip_whitespace_count
- */
- ACEXML_Char skip_whitespace (ACEXML_Char **whitespace);
+protected:
/**
- * Skip any whitespaces encountered until the first non-whitespace
- * character. The first non-whitespace character is not consumed.
- * This method does peek into the input CharStream and therefore
- * is more expensive than @ref skip_whitespace.
- *
- * @param peek If non-null, @a peek points to a ACEXML_Char where
- * skip_whitespace_count stores the first non-whitespace
- * character it sees (character is not removed from the stream.)
- *
- * @return The number of whitespace characters consumed.
- *
- * @sa skip_whitespace
+ * Parse XML Prolog.
*/
- int skip_whitespace_count (ACEXML_Char *peek = 0);
+ void parse_xml_prolog (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
- * Check if a character @a c is a whitespace.
+ * Parse VersionInfo declaration.
*
- * @retval 1 if @a c is a valid white space character. 0 otherwise.
*/
- int is_whitespace (ACEXML_Char c);
+ void parse_version_info (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
- * Check if a character @a c is a whitespace or '='.
+ * Parse a EncodingDecl declaration.
*
- * @retval 1 if true, 0 otherwise.
*/
- int is_whitespace_or_equal (ACEXML_Char c);
+ void parse_encoding_decl (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
- * Check if a character @a c is a valid character for nonterminal NAME.
+ * Parse a XMLDecl declaration.
*
- * @retval 1 if true, 0 otherwise.
*/
- int is_nonname (ACEXML_Char c);
+ void parse_xml_decl (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
- * Skip an equal sign.
- *
- * @retval 0 when succeeds, -1 if no equal sign is found.
+ * Parse a TextDecl declaration.
*/
- int skip_equal (void);
-
- /**
- * Get a quoted string. Quoted strings are used to specify
- * attribute values and this routine will replace character and
- * entity references on-the-fly. Parameter entities are not allowed
- * (or replaced) in this function. (But regular entities are.)
- *
- * @param str returns the un-quoted string.
- *
- * @retval 0 on success, -1 otherwise.
- */
- int get_quoted_string (ACEXML_Char *&str);
+ int parse_text_decl (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Parse a PI statement. The first character encountered
@@ -213,33 +185,15 @@ public:
*
* @retval 0 on success, -1 otherwise.
*/
- int parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_DECL);
-
- /**
- * Skip over a comment. The first character encountered
- * should always be the first '-' in the comment prefix
- * "@<@!--".
- */
- int grok_comment ();
-
- /**
- * Read a name from the input CharStream (until white space).
- * If @a ch @!= 0, then we have already consumed the first name
- * character from the input CharStream, otherwise, read_name
- * will use this->get() to acquire the initial character.
- *
- * @return A pointer to the string in the obstack, 0 if it's not
- * a valid name.
- */
- ACEXML_Char *read_name (ACEXML_Char ch = 0);
+ int parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Parse the DOCTYPE declaration. The first character encountered
* should always be 'D' in doctype prefix: "@<@!DOCTYPE".
*/
int parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_DECL)
- ACE_THROW_SPEC ((ACEXML_SAXException))
- ;
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Parse an XML element. The first character encountered should
@@ -255,35 +209,39 @@ public:
* can be used in a validator.
*/
void parse_element (int is_root ACEXML_ENV_ARG_DECL)
- ACE_THROW_SPEC ((ACEXML_SAXException))
- ;
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
- * Parse XML Prolog.
+ * Parse a content declaration.
+ *
*/
- void parse_xml_prolog (ACEXML_ENV_SINGLE_ARG_DECL)
+ int parse_content (const ACEXML_Char* startname, const ACEXML_Char* ns_uri,
+ const ACEXML_Char* ns_lname
+ ACEXML_ENV_ARG_DECL)
ACE_THROW_SPEC ((ACEXML_SAXException));
-
/**
* Parse a character reference, i.e., "&#x20;" or "&#30;". The first
* character encountered should be the '#' char.
*
* @param buf points to a character buffer for the result.
- * @param len specifies the capacities of the buffer.
+ *
+ * @param len In/out argument which initially specifies the size of the
+ * buffer and is later set to the no. of characters in the reference.
*
* @retval 0 on success and -1 otherwise.
*/
- int parse_char_reference (ACEXML_Char *buf, size_t len);
+ int parse_char_reference (ACEXML_Char *buf, size_t& len);
/**
- * Parse an entity reference, i.e., "&amp;". The first character
- * encountered should be the character following '&'.
+ * Parse a reference name, i.e., foo in "&foo;" or "%foo;". The first
+ * character encountered should be the character following '&' or '%'.
+ * Effectively the same as @sa parse_name but we don't use the parser's
+ * obstack. Caller is responsible for deleting the memory.
*
- * @return A pointer to the resolved const ACEXML_String if success
- * (previously defined), 0 otherwise.
+ * @retval A pointer to name of reference, 0 otherwise.
*/
- const ACEXML_String *parse_reference (void);
+ ACEXML_Char* parse_reference_name (void);
/**
* Parse a CDATA section. The first character should always be the first
@@ -292,13 +250,21 @@ public:
* @retval 0 on success.
* @retval -1 if fail.
*/
- int parse_cdata (ACEXML_ENV_SINGLE_ARG_DECL);
+ int parse_cdata (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Parse a "markupdecl" section, this includes both "markupdecl" and
* "DeclSep" sections in XML specification
*/
- int parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_DECL);
+ int parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Skip over a comment. The first character encountered should always be
+ * the first '-' in the comment prefix "@<@!--".
+ */
+ int parse_comment (void);
/**
* Parse an "ELEMENT" decl. The first character this method
@@ -307,7 +273,8 @@ public:
*
* @retval 0 on success, -1 otherwise.
*/
- int parse_element_decl (ACEXML_ENV_SINGLE_ARG_DECL);
+ int parse_element_decl (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Parse an "ENTITY" decl. The first character this method expects
@@ -315,7 +282,8 @@ public:
*
* @retval 0 on success, -1 otherwise.
*/
- int parse_entity_decl (ACEXML_ENV_SINGLE_ARG_DECL);
+ int parse_entity_decl (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Parse an "ATTLIST" decl. Thse first character this method
@@ -324,7 +292,15 @@ public:
*
* @retval 0 on success, -1 otherwise.
*/
- int parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_DECL);
+ int parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Parse a AttType declaration.
+ *
+ */
+ int parse_atttype (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
*Parse a "NOTATION" decl. The first character this method
@@ -333,7 +309,8 @@ public:
*
* @retval 0 on success, -1 otherwise.
*/
- int parse_notation_decl (ACEXML_ENV_SINGLE_ARG_DECL);
+ int parse_notation_decl (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Parse an ExternalID or a reference to PUBLIC ExternalID.
@@ -355,8 +332,81 @@ public:
*
* @retval 0 on success, -1 otherwise.
*/
- int parse_external_id_and_ref (ACEXML_Char *&publicId,
- ACEXML_Char *&systemId ACEXML_ENV_ARG_DECL);
+ int parse_external_id (ACEXML_Char *&publicId, ACEXML_Char *&systemId
+ ACEXML_ENV_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Parse an external DTD.
+ *
+ */
+ int parse_external_dtd (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Parse an external subset. This does the actual parsing of an external
+ * subset and is called by @sa parse_external_dtd.
+ *
+ */
+ int parse_external_subset (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Parse a markupDecl section.
+ *
+ */
+ int parse_markup_decl (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Parse a conditionalSect declaration.
+ *
+ */
+ int parse_conditional_section (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Parse a includeSect declaration.
+ *
+ */
+ int parse_includesect (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ *
+ * Parse a ignoreSect declaration.
+ */
+ int parse_ignoresect (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Parse a PEReference.
+ *
+ */
+ int parse_PE_reference (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Parse a Reference.
+ *
+ */
+ int parse_entity_reference (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Parse an entityValue.
+ *
+ */
+ int parse_entity_value (ACEXML_Char *&str ACEXML_ENV_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Parse a DefaultDecl specification.
+ *
+ */
+ int parse_defaultdecl (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
/**
* Parse the "children" and "Mixed" non-terminals in contentspec.
@@ -366,7 +416,8 @@ public:
*
* @retval 0 on success, -1 otherwise.
*/
- int parse_children_definition (ACEXML_ENV_SINGLE_ARG_DECL);
+ int parse_children_definition (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Parse a @c cp non-terminal. @c cp can either be a @c seq or a @c choice.
@@ -378,81 +429,259 @@ public:
*
* @retval 0 on success, -1 otherwise.
*/
- int parse_child (int skip_open_paren ACEXML_ENV_ARG_DECL);
+ int parse_child (int skip_open_paren ACEXML_ENV_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
-protected:
- /// Get a character.
- ACEXML_Char get (void);
+ /**
+ * Parse a name from the input CharStream. If @a ch @!= 0, then we have
+ * already consumed the first name character from the input CharStream,
+ * otherwise, parse_name will use this->get() to acquire the initial
+ * character.
+ *
+ * @return A pointer to the string in the obstack, 0 if it's not a
+ * valid name.
+ */
+ ACEXML_Char *parse_name (ACEXML_Char ch = 0);
- /// Peek a character.
- ACEXML_Char peek (void);
+ /**
+ * Parse a NMTOKEN from the input stream.
+ *
+ * @return A pointer to the string in the obstack, 0 if it's not a valid
+ * NMTOKEN.
+ */
+ ACEXML_Char* parse_nmtoken (ACEXML_Char ch = 0);
/**
- * Check if more data can be added to a character buffer in obstack.
- * If not, the existing data in the buffer will be cleared out by
- * freezing the segment and pass it out thru a content_handler_->characters ()
- * call. @a counter records the length of the existing data in
- * obstack.
+ * Parse the version string in an XML Prolog section.
+ *
+ * @param str String containing the version number if successful.
+ * @return 0 if the string was read successfully, 0 otherwise.
*/
- int try_grow_cdata (size_t size, size_t &len ACEXML_ENV_ARG_DECL);
+ int parse_version (ACEXML_Char*& str);
- // Feature names:
+ /**
+ * Parse the version number in a VersionInfo declaration.
+ */
+ int parse_version_num (ACEXML_Char*& str);
/**
- * \addtogroup acexml_parser_features
- * @{
+ * Parse the encoding name in an XML Prolog section.
+ *
+ * @param str String containing the encoding name if successful.
+ * @return 0 if the string was read successfully, 0 otherwise.
*/
+ int parse_encname (ACEXML_Char*& str);
/**
- * @var simple_parsing_feature_
+ * Parse a SDDecl string.
*
- * This constant string defines the name of "simple XML parsing"
- * feature. When this feature is enabled, ACEXML parser is allowed
- * to parse a simple XML stream without mandated XML prolog
- * and no DTD defintion.
+ * @param str String containing the encoding name if successful.
+ * @return 0 if the string was read successfully, -1 otherwise.
*/
- static const ACEXML_Char simple_parsing_feature_[];
+ int parse_sddecl (ACEXML_Char*& str);
/**
- * @var namespaces_feature_
+ * Parse an attribute name.
*
- * This constant string defines the SAX XML Namespace feature. When this
- * feature is enabled, ACEXML parser allows access by namespace qualified
- * names.
+ * @retval str String containing the value of the attribute name
+ * if successful.
+ * @retval 0 otherwise.
*/
- static const ACEXML_Char namespaces_feature_[];
+ ACEXML_Char* parse_attname (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
- * @var namespace_prefixes_feature_
+ * Parse an attribute value.
*
- * This constant string defines the SAX XML Namespace prefixes feature.
- * Normally the list of attributes returned by the parser will not
- * contain attributes used as namespace declarations (xmlns*). When this
- * feature is enabled, the list of attributes contains the namespace
- * declarations also.
+ * @param str String containing the value of the attribute if successful.
+ * @return 0 if attribute value was read successfully, -1 otherwise.
*/
- static const ACEXML_Char namespace_prefixes_feature_[];
+ int parse_attvalue (ACEXML_Char*& str ACEXML_ENV_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
- /* @} */
+ /**
+ * Parse a tokenized type attribute.
+ *
+ * @return 0 if attribute type was read successfully, -1 otherwise.
+ */
+ int parse_tokenized_type (ACEXML_ENV_SINGLE_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
+
+ /**
+ * Parse a SystemLiteral.
+ *
+ * @param str String containing the SystemLiteral if successful.
+ * @return 0 if the string was read successfully, 0 otherwise.
+ */
+ int parse_system_literal (ACEXML_Char*& str);
+
+ /**
+ * Parse a PubidLiteral.
+ *
+ * @param str String containing the PubidLiteral if successful.
+ * @return 0 if the string was read successfully, 0 otherwise.
+ */
+ int parse_pubid_literal (ACEXML_Char*& str);
+
+ /**
+ * Check if a character @a c is a whitespace.
+ *
+ * @retval 1 if @a c is a valid white space character. 0 otherwise.
+ */
+ int is_whitespace (const ACEXML_Char c) const;
+
+ /**
+ * Check if a character @a c is a valid Char.
+ *
+ * @retval 1 if @a c is a valid character. 0 otherwise.
+ */
+ int isChar (ACEXML_UCS4 c) const;
+
+ /**
+ * Check if a character @a c is a valid CharRef character.
+ *
+ * @retval 1 if @a c is a valid character reference character, 0 otherwise.
+ */
+ int isCharRef (const ACEXML_Char c) const;
+
+ /**
+ * Check if a character @a c is a BaseChar.
+ *
+ * @retval 1 if @a c is a valid BaseChar character, 0 otherwise.
+ */
+ int isBasechar (const ACEXML_Char c) const;
+
+ /**
+ * Check if a character @a c is a Ideographic.
+ *
+ * @retval 1 if @a c is a valid Ideographic character, 0 otherwise.
+ */
+ int isIdeographic (const ACEXML_Char c) const;
+
+ /**
+ * Check if a character @a c is a CombiningChar.
+ *
+ * @retval 1 if @a c is a valid CombiningChar character, 0 otherwise.
+ */
+ int isCombiningchar (const ACEXML_Char c) const;
+
+ /**
+ * Check if a character @a c is a Digit.
+ *
+ * @retval 1 if @a c is a valid Digit character, 0 otherwise.
+ */
+ int isDigit (const ACEXML_Char c) const;
+
+ /**
+ * Check if a character @a c is an Extender.
+ *
+ * @retval 1 if @a c is a valid Extender character, 0 otherwise.
+ */
+ int isExtender (const ACEXML_Char c) const;
+
+ /**
+ * Check if a character @a c is a Letter.
+ *
+ * @retval 1 if @a c is a valid Letter character, 0 otherwise.
+ */
+ int isLetter (const ACEXML_Char c) const;
+
+ /**
+ * Check if a character is an acceptable NameChar.
+ *
+ * @retval 1 if @a c is a valid NameChar character, 0 otherwise.
+ */
+ int isNameChar (const ACEXML_Char c) const;
+
+ /**
+ * Check if a character is a PubidChar.
+ *
+ * @retval 1 if @a c is a valid PubidChar character, 0 otherwise.
+ */
+ int isPubidChar (const ACEXML_Char c) const;
+
+ /// Get a character.
+ virtual ACEXML_Char get (void);
+
+ /// Peek a character.
+ virtual ACEXML_Char peek (void);
private:
+
+ // *** Helper functions for parsing XML
+
+ /**
+ * Skip any whitespaces encountered until the first non-whitespace
+ * character is encountered.
+ *
+ * @return The next non-whitespace character from the CharStream.
+ *
+ * @sa skip_whitespace_count
+ */
+ ACEXML_Char skip_whitespace (void);
+
+ /**
+ * Skip any whitespaces encountered until the first non-whitespace
+ * character. The first non-whitespace character is not consumed.
+ * This method does peek into the input CharStream and therefore
+ * is more expensive than @ref skip_whitespace.
+ *
+ * @param peek If non-null, @a peek points to a ACEXML_Char where
+ * skip_whitespace_count stores the first non-whitespace
+ * character it sees (character is not removed from the stream.)
+ *
+ * @return The number of whitespace characters consumed.
+ *
+ * @sa skip_whitespace
+ */
+ int skip_whitespace_count (ACEXML_Char *peek = 0);
+
+ /**
+ * Skip an equal sign.
+ *
+ * @retval 0 when succeeds, -1 if no equal sign is found.
+ */
+ int skip_equal (void);
+
+ /**
+ * Get a quoted string. Quoted strings are used to specify
+ * attribute values and this routine will replace character and
+ * entity references on-the-fly. Parameter entities are not allowed
+ * (or replaced) in this function. (But regular entities are.)
+ *
+ * @param str returns the un-quoted string.
+ *
+ * @retval 0 on success, -1 otherwise.
+ */
+ int get_quoted_string (ACEXML_Char *&str);
+
+ /**
+ * Check if a character @a c is a Digit.
+ *
+ * @retval 1 if @a c is a valid Digit character, 0 otherwise.
+ */
+ int isNormalDigit (const ACEXML_Char c) const;
+
/**
* Dispatch errors to ErrorHandler.
*
*/
- void report_error (const ACEXML_Char* message ACEXML_ENV_ARG_DECL);
+ void error (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Dispatch warnings to ErrorHandler.
*
*/
- void report_warning (const ACEXML_Char* message ACEXML_ENV_ARG_DECL);
+ void warning (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Dispatch fatal errors to ErrorHandler.
*
*/
- void report_fatal_error (const ACEXML_Char* message ACEXML_ENV_ARG_DECL);
+ void fatal_error (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Dispatch prefix mapping calls to the ContentHandler.
@@ -462,15 +691,112 @@ private:
* @param name Local name
* @param start 1 => startPrefixMapping 0 => endPrefixMapping
*/
- void report_prefix_mapping (const ACEXML_Char* prefix,
+ void prefix_mapping (const ACEXML_Char* prefix,
const ACEXML_Char* uri,
const ACEXML_Char* name,
- int start ACEXML_ENV_ARG_DECL);
+ int start ACEXML_ENV_ARG_DECL)
+ ACE_THROW_SPEC ((ACEXML_SAXException));
/**
* Parse a keyword.
*/
int parse_token (const ACEXML_Char* keyword);
+ /**
+ * Push the current context on to the stack.
+ *
+ */
+ int push_context (ACEXML_Parser_Context* context);
+
+ /**
+ * Pop the top element in the stack and replace current context with that.
+ */
+ int pop_context (int GE_ref ACEXML_ENV_ARG_DECL);
+
+ /**
+ * Create a new ACEXML_CharStream from @a systemId and @a publicId and
+ * replace the current input stream with the newly created stream.
+ */
+ virtual int switch_input (ACEXML_CharStream* cstream,
+ const ACEXML_Char* systemId = 0,
+ const ACEXML_Char* publicId = 0);
+ /**
+ * Create a new ACEXML_InputSource from @a systemId and @a publicId and
+ * replace the current input source with the newly created InputSource.
+ */
+ virtual int switch_input (ACEXML_InputSource* input,
+ const ACEXML_Char* systemId = 0,
+ const ACEXML_Char* publicId = 0);
+
+ /**
+ * Check for a parameter entity reference. This is used to check for the
+ * occurence of a PE Reference withing markupDecl. Additionally this
+ * function consumes any leading or trailing whitespace around the PE
+ * Reference.
+ *
+ * @retval Number of whitespace characters skipped.
+ */
+ int check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_DECL);
+
+ /**
+ * Reset the parser state.
+ *
+ */
+ void reset (void);
+
+ /**
+ * Very trivial, non-conformant normalization of a systemid.
+ *
+ */
+ ACEXML_Char* normalize_systemid (const char* systemId);
+
+ // Feature names:
+
+ /**
+ * \addtogroup acexml_parser_features
+ * @{
+ */
+
+ /**
+ * @var simple_parsing_feature_
+ *
+ * This constant string defines the name of "simple XML parsing"
+ * feature. When this feature is enabled, ACEXML parser is allowed
+ * to parse a simple XML stream without mandated XML prolog
+ * and no DTD defintion.
+ */
+ static const ACEXML_Char simple_parsing_feature_[];
+
+ /**
+ * @var namespaces_feature_
+ *
+ * This constant string defines the SAX XML Namespace feature. When this
+ * feature is enabled, ACEXML parser allows access by namespace qualified
+ * names.
+ */
+ static const ACEXML_Char namespaces_feature_[];
+
+ /**
+ * @var namespace_prefixes_feature_
+ *
+ * This constant string defines the SAX XML Namespace prefixes feature.
+ * Normally the list of attributes returned by the parser will not
+ * contain attributes used as namespace declarations (xmlns*). When this
+ * feature is enabled, the list of attributes contains the namespace
+ * declarations also.
+ */
+ static const ACEXML_Char namespace_prefixes_feature_[];
+
+ /**
+ * @var validation_feature_
+ *
+ * This constant string defines the SAX XML Validation feature. When
+ * this feature is enabled, the parser validates the document in
+ * addition to checking for well-formedness.
+ */
+ static const ACEXML_Char validation_feature_[];
+
+ /* @} */
+
/// Keeping track of the handlers. We do not manage the memory for
/// handlers.
ACEXML_DTDHandler *dtd_handler_;
@@ -478,33 +804,96 @@ private:
ACEXML_ContentHandler *content_handler_;
ACEXML_ErrorHandler *error_handler_;
- /// @@ Feature and properties management structure here.
- /// Current input char stream.
- ACEXML_CharStream *instream_;
-
- /// My doctype, if any.
+ /// Document Type
ACEXML_Char *doctype_;
- /// External DTD System Literal, if any.
- ACEXML_Char *dtd_system_;
+ /// Current parser context
+ ACEXML_Parser_Context* current_;
+
+ /// Stack used to hold the Parser_Context
+ ACE_Unbounded_Stack<ACEXML_Parser_Context*> ctx_stack_;
+
+ /*
+ * The following two are essentially chains of references and is used by
+ * the parser to determine if there is any recursion. We keep two of
+ * these one for general entities and one for parameter entities, as they
+ * both fall under different namespaces.
+ *
+ */
+ /// Set used to hold the general entity references that are active.
+ ACE_Unbounded_Stack<ACEXML_String> GE_reference_;
- /// External DTD Public Literal, if any.
- ACEXML_Char *dtd_public_;
+ /// Set used to hold the parameter entity references that are active.
+ ACE_Unbounded_Stack<ACEXML_String> PE_reference_;
+ /// Obstack used by the parser to hold all the strings parsed
ACE_Obstack_T<ACEXML_Char> obstack_;
+ /// Alternative obstack used to hold any strings when the original is in use
+ ACE_Obstack_T<ACEXML_Char> alt_stack_;
+
+ /// Namespace stack used by the parser to implement support for Namespaces
ACEXML_NamespaceSupport xml_namespace_;
- ACEXML_Entity_Manager entities_;
+ /// T => We are processing a nested namespace
+ int nested_namespace_;
+
+ /// Set of internal parsed general entities in the document
+ ACEXML_Entity_Manager internal_GE_;
+
+ /// Set of external parsed general entities in the document
+ ACEXML_Entity_Manager external_GE_;
+
+ /// Set of unparsed entities in the document
+ ACEXML_Entity_Manager unparsed_entities_;
+
+ /// Set of predefined entities used by the parser
+ ACEXML_Entity_Manager predef_entities_;
+
+ /// Set of internal parsed parameter entities in the document
+ ACEXML_Entity_Manager internal_PE_;
- // Locator
- ACEXML_LocatorImpl locator_;
+ /// Set of external parsed parameter entities in the document
+ ACEXML_Entity_Manager external_PE_;
- // Feature flags &
+ /// Set of notations declared in the document
+ ACEXML_Entity_Manager notations_;
+
+ /// State of the parser when it encounters a reference.
+ ACEXML_ParserInt::ReferenceState ref_state_;
+
+ /// T => We are parsing an external subset
+ int external_subset_;
+
+ /// T => We are parsing an external entity value
+ int external_entity_;
+
+ /// T => Internal DTD has parameter entity references
+ int has_pe_refs_;
+
+ /// Feature flags
+ /// If set, the parser should parse a document without a prolog
int simple_parsing_;
+
+ /// If set, the parser should also validate
+ int validate_;
+
+ /// If set, the parser should allow access by namespace qualified names.
int namespaces_;
+
+ /// If set, the parser should include namespace declarations in the list
+ /// of attributes of an element.
int namespace_prefixes_;
+ /// If set, the document is a standalone XML document
+ int standalone_;
+
+ /// If set, the document has an external DTD subset
+ int external_dtd_;
+
+ /// If set, the document has an internal DTD
+ int internal_dtd_;
+
};
#if defined (__ACEXML_INLINE__)