diff options
Diffstat (limited to 'ACE/ACEXML/parser')
26 files changed, 6637 insertions, 0 deletions
diff --git a/ACE/ACEXML/parser/Makefile.am b/ACE/ACEXML/parser/Makefile.am new file mode 100644 index 00000000000..35c60abcb8d --- /dev/null +++ b/ACE/ACEXML/parser/Makefile.am @@ -0,0 +1,13 @@ +## Process this file with automake to create Makefile.in +## +## $Id$ +## +## This file was generated by MPC. Any changes made directly to +## this file will be lost the next time it is generated. +## +## MPC Command: +## ./bin/mwc.pl -type automake -noreldefs ACE.mwc + +SUBDIRS = \ + parser + diff --git a/ACE/ACEXML/parser/debug_validator/Debug_Attributes_Builder.cpp b/ACE/ACEXML/parser/debug_validator/Debug_Attributes_Builder.cpp new file mode 100644 index 00000000000..ccebd4b9098 --- /dev/null +++ b/ACE/ACEXML/parser/debug_validator/Debug_Attributes_Builder.cpp @@ -0,0 +1,211 @@ +// $Id$ + +#include "ACEXML/common/SAXExceptions.h" +#include "ACEXML/parser/debug_validator/Debug_Attributes_Builder.h" + +ACEXML_Debug_Attribute_Builder::ACEXML_Debug_Attribute_Builder () + : type_ (ERROR_TYPE), + default_decl_ (INVALID) +{ +} + +ACEXML_Debug_Attribute_Builder::ACEXML_Debug_Attribute_Builder (const ACEXML_Debug_Attribute_Builder &rhs) + : name_ (rhs.name_), + type_ (rhs.type_), + default_decl_ (rhs.default_decl_), + default_value_ (rhs.default_value_), + att_value_queue_ (rhs.att_value_queue_) +{ +} + +ACEXML_Debug_Attribute_Builder::~ACEXML_Debug_Attribute_Builder () +{ +} + +int +ACEXML_Debug_Attribute_Builder::setName (const ACEXML_Char *n) +{ + this->name_.set (n, 0); + return 0; +} + +const ACEXML_Char * +ACEXML_Debug_Attribute_Builder::getName (void) +{ + return this->name_.fast_rep (); +} + +int +ACEXML_Debug_Attribute_Builder::setAttType (const ATT_TYPE type + ACEXML_ENV_ARG_DECL) +{ + if (this->type_ == ERROR_TYPE) + { + this->type_ = type; + return 0; + } + ACEXML_THROW_RETURN (ACEXML_SAXParseException (ACE_TEXT("Attribute type redefinition in Debug Validator")), -1); + +} + +int +ACEXML_Debug_Attribute_Builder::insertList (const ACEXML_Char *n + ACEXML_ENV_ARG_DECL_NOT_USED) +{ + ACEXML_String str (n, 0, 0); + + this->att_value_queue_.enqueue_tail (str); + return 0; +} + +int +ACEXML_Debug_Attribute_Builder::setDefault (const DEFAULT_DECL def, + const ACEXML_Char *value + ACEXML_ENV_ARG_DECL_NOT_USED) +{ + this->default_decl_ = def; + this->default_value_.set (value, 0); + return 0; +} + +int +ACEXML_Debug_Attribute_Builder::validAttr (void) +{ + // @@ Not implemented. Always return 1 (true) for now. + return 1; +} + +void +ACEXML_Debug_Attribute_Builder::dump (void) +{ + cout << this->name_ << " "; + + switch (this->type_) + { + case CDATA: + cout << "CDATA "; + break; + case ID: + cout << "ID "; + break; + case IDREF: + cout << "IDREF "; + break; + case IDREFS: + cout << "IDREFS "; + break; + case ENTITY: + cout << "ENTITY "; + break; + case ENTITIES: + cout << "ENTITIES "; + break; + case NMTOKEN: + cout << "NMTOKEN "; + break; + case NMTOKENS: + cout << "NMTOKENS "; + break; + case NOTATION: + cout << "NOTATION "; + // Fall thru + case ENUMERATION: + { + cout << "("; + ACEXML_STRING_QUEUE_ITERATOR iter (this->att_value_queue_); + ACEXML_String *n = 0; + + while (iter.advance () != 0) + { + if (n == 0) + cout << " | "; + iter.next (n); + cout << *n; + } + cout << ") "; + } + break; + default: + cout << "*** UNKNOWN TYPE ***"; + break; + } + + switch (this->default_decl_) + { + case REQUIRED: + cout << "#REQUIRED"; + break; + case IMPLIED: + cout << "#IMPLIED"; + break; + case FIXED: + cout << "#FIXED " << this->default_value_; + break; + default: + cout << "**** UNDEFINED DEFAULT DECL ****"; + break; + } +} +// ======================================== + +ACEXML_Debug_Attributes_Builder::ACEXML_Debug_Attributes_Builder () +{ +} + +ACEXML_Debug_Attributes_Builder::~ACEXML_Debug_Attributes_Builder () +{ +} + +int +ACEXML_Debug_Attributes_Builder::setElement (const ACEXML_Char *, + const ACEXML_Char *, + const ACEXML_Char *qName ACEXML_ENV_ARG_DECL_NOT_USED) +{ + this->element_name_.set (qName, 0); + return 0; +} + +ACEXML_Attribute_Def_Builder * +ACEXML_Debug_Attributes_Builder::getAttribute_Def_Builder () +{ + ACEXML_Attribute_Def_Builder *tmp; + + ACE_NEW_RETURN (tmp, + ACEXML_Debug_Attribute_Builder (), + 0); + return tmp; +} + +int +ACEXML_Debug_Attributes_Builder::insertAttribute (ACEXML_Attribute_Def_Builder *def ACEXML_ENV_ARG_DECL) +{ + ACEXML_Attribute_Def_Builder::VAR ptr (def); + + if (def != 0) + { + ACEXML_String attname (def->getName (), 0, 0); + ACEXML_Debug_Attribute_Builder *ptr = + dynamic_cast<ACEXML_Debug_Attribute_Builder *> (def); + this->attributes_.bind (attname, *ptr); + return 0; + } + ACEXML_THROW_RETURN (ACEXML_SAXParseException (ACE_TEXT("ACEXML_Debug_Attributes_Builder internal error")), -1); +} + +void +ACEXML_Debug_Attributes_Builder::dump (void) +{ + // @@ Print print. + cout << "<!ATTLIST " << this->element_name_ << endl; + + ACEXML_ATT_MAP_ITER iter (this->attributes_); + ACEXML_ATT_MAP_ENTRY *item; + + while (iter.advance () != 0) + { + iter.next (item); + cout << "\n\t"; + item->int_id_.dump (); + } + cout << ">" << endl; +} diff --git a/ACE/ACEXML/parser/debug_validator/Debug_Attributes_Builder.h b/ACE/ACEXML/parser/debug_validator/Debug_Attributes_Builder.h new file mode 100644 index 00000000000..e22b6ef936a --- /dev/null +++ b/ACE/ACEXML/parser/debug_validator/Debug_Attributes_Builder.h @@ -0,0 +1,166 @@ +// -*- C++ -*- + +//============================================================================= +/** + * @file Debug_Attributes_Builder.h + * + * $Id$ + * + * @author Nanbor Wang <nanbor@cs.wustl.edu> + */ +//============================================================================= +#ifndef _ACEXML_DEBUG_ATTRIBUTES_BUILDER_H_ +#define _ACEXML_DEBUG_ATTRIBUTES_BUILDER_H_ + +#include /**/ "ace/pre.h" +#include "ACEXML/parser/debug_validator/Debug_DTD_Manager_Export.h" + +#if !defined (ACE_LACKS_PRAGMA_ONCE) +#pragma once +#endif /* ACE_LACKS_PRAGMA_ONCE */ + +#include "ACEXML/common/Attributes_Def_Builder.h" +#include "ace/Hash_Map_Manager.h" +#include "ace/Unbounded_Queue.h" + +typedef ACE_Unbounded_Queue<ACEXML_String> ACEXML_STRING_QUEUE; +typedef ACE_Unbounded_Queue_Iterator<ACEXML_String> ACEXML_STRING_QUEUE_ITERATOR; + +/** + * @class ACEXML_Debug_Attribute_Builder Debug_Attributes_Builder.h "parser/debug_validator/Debug_Attributes_Builder.h" + * + * This class prints out the Attribute definition for debugging purpose. + */ +class ACEXML_DEBUG_DTD_MANAGER_Export ACEXML_Debug_Attribute_Builder + : public ACEXML_Attribute_Def_Builder +{ +public: + ACEXML_Debug_Attribute_Builder (); + + ACEXML_Debug_Attribute_Builder (const ACEXML_Debug_Attribute_Builder &rhs); + + virtual ~ACEXML_Debug_Attribute_Builder (); + + /** + * Specify the name of the attribute. + */ + virtual int setName (const ACEXML_Char *n); + virtual const ACEXML_Char *getName (void); + + /** + * Set the attribute type. + */ + virtual int setAttType (const ATT_TYPE type ACEXML_ENV_ARG_DECL); + + /** + * Insert an element for NOTATION or ENUMERATION type attribute. + */ + virtual int insertList (const ACEXML_Char *Name ACEXML_ENV_ARG_DECL); + + /** + * Set default attribute declaration. + */ + virtual int setDefault (const DEFAULT_DECL def, + const ACEXML_Char *value ACEXML_ENV_ARG_DECL) + ; + + /** + * Check validity of the current attribute definition being built. + * + * @retval 0 if the attribute is not a valid combo. + */ + virtual int validAttr (void); + + + /** + * Dump the content of the attribute definition. + */ + virtual void dump (void); +private: + /// Attribute name. + ACEXML_String name_; + + /// Type of attribute. + ATT_TYPE type_; + + /// Default value type. + DEFAULT_DECL default_decl_; + + /// Default attribute value. + ACEXML_String default_value_; + + /// Holds a queue of enumerated attribute values. + ACEXML_STRING_QUEUE att_value_queue_; +}; + +typedef ACE_Hash_Map_Entry<ACEXML_String, + ACEXML_Debug_Attribute_Builder> ACEXML_ATT_MAP_ENTRY; + +typedef ACE_Hash_Map_Manager_Ex <ACEXML_String, + ACEXML_Debug_Attribute_Builder, + ACE_Hash<ACEXML_String>, + ACE_Equal_To<ACEXML_String>, + ACE_Null_Mutex> ACEXML_ATT_MAP; + +typedef ACE_Hash_Map_Iterator_Ex<ACEXML_String, + ACEXML_Debug_Attribute_Builder, + ACE_Hash<ACEXML_String>, + ACE_Equal_To<ACEXML_String>, + ACE_Null_Mutex> ACEXML_ATT_MAP_ITER; + +typedef ACE_Hash_Map_Reverse_Iterator_Ex<ACEXML_String, + ACEXML_Debug_Attribute_Builder, + ACE_Hash<ACEXML_String>, + ACE_Equal_To<ACEXML_String>, + ACE_Null_Mutex> ACEXML_ATT_MAP_REVERSE_ITER; + +/** + * @class ACEXML_Debug_Attributes_Builder Debug_Attributes_Builder.h "parser/debug_validator/Debug_Attributes_Builder.h" + * + * This class prints out Attribute definitions for debugging purpose. + */ +class ACEXML_DEBUG_DTD_MANAGER_Export ACEXML_Debug_Attributes_Builder + : public ACEXML_Attributes_Def_Builder +{ +public: + ACEXML_Debug_Attributes_Builder (); + + virtual ~ACEXML_Debug_Attributes_Builder (); + + /** + * Set the element name that the attribute builder applies. + * + * @retval 0 if valid, -1 otherwise. + */ + virtual int setElement (const ACEXML_Char *namespaceURI, + const ACEXML_Char *localName, + const ACEXML_Char *qName ACEXML_ENV_ARG_DECL) + ; + + /** + * Acquire an Attribute_Builder. + */ + virtual ACEXML_Attribute_Def_Builder *getAttribute_Def_Builder (); + + /** + * Add a definition for one attribute. + */ + virtual int insertAttribute (ACEXML_Attribute_Def_Builder *def ACEXML_ENV_ARG_DECL); + + + /** + * Dump the content of the attribute definition. + */ + virtual void dump (void); +protected: + /// The name of the element type these attributes applied. + ACEXML_String element_name_; + + /// Collection of attributes. + ACEXML_ATT_MAP attributes_; +}; + + +#include /**/ "ace/post.h" + +#endif /* _ACEXML_DEBUG_ATTRIBUTES_BUILDER_H_ */ diff --git a/ACE/ACEXML/parser/debug_validator/Debug_DTD_Manager.cpp b/ACE/ACEXML/parser/debug_validator/Debug_DTD_Manager.cpp new file mode 100644 index 00000000000..d0745d13bfc --- /dev/null +++ b/ACE/ACEXML/parser/debug_validator/Debug_DTD_Manager.cpp @@ -0,0 +1,71 @@ +// -*- C++ -*- $Id$ + +#include "ACEXML/common/SAXExceptions.h" +#include "ACEXML/parser/debug_validator/Debug_DTD_Manager.h" +#include "ACEXML/parser/debug_validator/Debug_Element_Builder.h" +#include "ACEXML/parser/debug_validator/Debug_Attributes_Builder.h" + +ACEXML_Debug_DTD_Manager::ACEXML_Debug_DTD_Manager () +{ + +} + +ACEXML_Debug_DTD_Manager::~ACEXML_Debug_DTD_Manager () +{ + +} + +ACEXML_Element_Def_Builder * +ACEXML_Debug_DTD_Manager::getElement_Def_Builder () +{ + return new ACEXML_Debug_Element_Builder (); +} + +int +ACEXML_Debug_DTD_Manager::insertElement_Definition (ACEXML_Element_Def_Builder *def ACEXML_ENV_ARG_DECL) +{ + ACEXML_Element_Def_Builder::VAR ptr (def); + + if (def != 0) + { + ptr->dump (); + return 0; + } + + ACEXML_THROW_RETURN (ACEXML_SAXParseException (ACE_TEXT("ACEXML_Debug_Attributes_Builder internal error")), -1); +} + +ACEXML_Attributes_Def_Builder * +ACEXML_Debug_DTD_Manager::getAttribute_Def_Builder () +{ + ACEXML_Attributes_Def_Builder *tmp; + ACE_NEW_RETURN (tmp, + ACEXML_Debug_Attributes_Builder (), + 0); + return tmp; +} + +int +ACEXML_Debug_DTD_Manager::insertAttributes_Definition (ACEXML_Attributes_Def_Builder *def ACEXML_ENV_ARG_DECL) +{ + ACEXML_Attributes_Def_Builder::VAR ptr (def); + if (def != 0) + { + ptr->dump (); + return 0; + } + + ACEXML_THROW_RETURN (ACEXML_SAXParseException (ACE_TEXT("ACEXML_Debug_Attributes_Builder internal error")), -1); +} + +ACEXML_Validator * +ACEXML_Debug_DTD_Manager::getValidator (const ACEXML_Char *namespaceURI, + const ACEXML_Char *localName, + const ACEXML_Char *qName ACEXML_ENV_ARG_DECL) +{ + ACE_UNUSED_ARG (namespaceURI); + ACE_UNUSED_ARG (localName); + ACE_UNUSED_ARG (qName); + + ACEXML_THROW_RETURN (ACEXML_SAXNotSupportedException (ACE_TEXT ("getValidator()")), 0); +} diff --git a/ACE/ACEXML/parser/debug_validator/Debug_DTD_Manager.h b/ACE/ACEXML/parser/debug_validator/Debug_DTD_Manager.h new file mode 100644 index 00000000000..0849d95ecb2 --- /dev/null +++ b/ACE/ACEXML/parser/debug_validator/Debug_DTD_Manager.h @@ -0,0 +1,76 @@ +// -*- C++ -*- + +//============================================================================= +/** + * @file Debug_DTD_Manager.h + * + * $Id$ + * + * @author Nanbor Wang <nanbor@cs.wustl.edu> + */ +//============================================================================= +#ifndef _ACEXML_DEBUG_DTD_Manager_H_ +#define _ACEXML_DEBUG_DTD_Manager_H_ + +#include /**/ "ace/pre.h" +#include "ACEXML/parser/debug_validator/Debug_DTD_Manager_Export.h" + +#if !defined (ACE_LACKS_PRAGMA_ONCE) +#pragma once +#endif /* ACE_LACKS_PRAGMA_ONCE */ + +#include "ACEXML/common/DTD_Manager.h" + +class ACEXML_DEBUG_DTD_MANAGER_Export ACEXML_Debug_DTD_Manager : public ACEXML_DTD_Manager +{ +public: + ACEXML_Debug_DTD_Manager (); + + virtual ~ACEXML_Debug_DTD_Manager (); + + /** + * Acquire a pointer to an element definition builder. The XML parser use + * this interface to acquire the definition builder and use the builder + * to create the DTD element definition. The resulting builder is then + * registered with the DTD Manager or destroyed if error occured when the + * builder encountered errors. + * + * @retval 0 if error occurs creating the builder. + */ + virtual ACEXML_Element_Def_Builder *getElement_Def_Builder (); + + /** + * Insert a new element definition into the DTD Manager. + * + * @retval 0 if success, -1 if error. + */ + virtual int insertElement_Definition (ACEXML_Element_Def_Builder *def ACEXML_ENV_ARG_DECL); + + /** + * Acquire a pointer to an attributes definition builder. + * + */ + virtual ACEXML_Attributes_Def_Builder *getAttribute_Def_Builder (); + + /** + * Insert a new attributes definition into the DTD Manager. + * + * @retval 0 if success, -1 otherwise. + */ + virtual int insertAttributes_Definition (ACEXML_Attributes_Def_Builder *def ACEXML_ENV_ARG_DECL); + + /** + * Acquire an element validator to validate an XML element. + * + * @todo I haven't figured out what memory management scheme + * we should use for the acquired validator. + */ + virtual ACEXML_Validator *getValidator (const ACEXML_Char *namespaceURI, + const ACEXML_Char *localName, + const ACEXML_Char *qName ACEXML_ENV_ARG_DECL); +}; + +#include /**/ "ace/post.h" + + +#endif /* _ACEXML_DTD_Manager_H_ */ diff --git a/ACE/ACEXML/parser/debug_validator/Debug_DTD_Manager_Export.h b/ACE/ACEXML/parser/debug_validator/Debug_DTD_Manager_Export.h new file mode 100644 index 00000000000..b4cf1c8a536 --- /dev/null +++ b/ACE/ACEXML/parser/debug_validator/Debug_DTD_Manager_Export.h @@ -0,0 +1,38 @@ + +// -*- C++ -*- +// $Id$ +// Definition for Win32 Export directives. +// This file is generated automatically by generate_export_file.pl -s ACEXML_DEBUG_DTD_MANAGER +// ------------------------------ +#ifndef ACEXML_DEBUG_DTD_MANAGER_EXPORT_H +#define ACEXML_DEBUG_DTD_MANAGER_EXPORT_H + +#include "ace/config-all.h" + +#if defined (ACE_AS_STATIC_LIBS) && !defined (ACEXML_DEBUG_DTD_MANAGER_HAS_DLL) +# define ACEXML_DEBUG_DTD_MANAGER_HAS_DLL 0 +#endif /* ACE_AS_STATIC_LIBS && ACEXML_DEBUG_DTD_MANAGER_HAS_DLL */ + +#if !defined (ACEXML_DEBUG_DTD_MANAGER_HAS_DLL) +# define ACEXML_DEBUG_DTD_MANAGER_HAS_DLL 1 +#endif /* ! ACEXML_DEBUG_DTD_MANAGER_HAS_DLL */ + +#if defined (ACEXML_DEBUG_DTD_MANAGER_HAS_DLL) && (ACEXML_DEBUG_DTD_MANAGER_HAS_DLL == 1) +# if defined (ACEXML_DEBUG_DTD_MANAGER_BUILD_DLL) +# define ACEXML_DEBUG_DTD_MANAGER_Export ACE_Proper_Export_Flag +# define ACEXML_DEBUG_DTD_MANAGER_SINGLETON_DECLARATION(T) ACE_EXPORT_SINGLETON_DECLARATION (T) +# define ACEXML_DEBUG_DTD_MANAGER_SINGLETON_DECLARE(SINGLETON_TYPE, CLASS, LOCK) ACE_EXPORT_SINGLETON_DECLARE(SINGLETON_TYPE, CLASS, LOCK) +# else /* ACEXML_DEBUG_DTD_MANAGER_BUILD_DLL */ +# define ACEXML_DEBUG_DTD_MANAGER_Export ACE_Proper_Import_Flag +# define ACEXML_DEBUG_DTD_MANAGER_SINGLETON_DECLARATION(T) ACE_IMPORT_SINGLETON_DECLARATION (T) +# define ACEXML_DEBUG_DTD_MANAGER_SINGLETON_DECLARE(SINGLETON_TYPE, CLASS, LOCK) ACE_IMPORT_SINGLETON_DECLARE(SINGLETON_TYPE, CLASS, LOCK) +# endif /* ACEXML_DEBUG_DTD_MANAGER_BUILD_DLL */ +#else /* ACEXML_DEBUG_DTD_MANAGER_HAS_DLL == 1 */ +# define ACEXML_DEBUG_DTD_MANAGER_Export +# define ACEXML_DEBUG_DTD_MANAGER_SINGLETON_DECLARATION(T) +# define ACEXML_DEBUG_DTD_MANAGER_SINGLETON_DECLARE(SINGLETON_TYPE, CLASS, LOCK) +#endif /* ACEXML_DEBUG_DTD_MANAGER_HAS_DLL == 1 */ + +#endif /* ACEXML_DEBUG_DTD_MANAGER_EXPORT_H */ + +// End of auto generated file. diff --git a/ACE/ACEXML/parser/debug_validator/Debug_Element_Builder.cpp b/ACE/ACEXML/parser/debug_validator/Debug_Element_Builder.cpp new file mode 100644 index 00000000000..da422a3429f --- /dev/null +++ b/ACE/ACEXML/parser/debug_validator/Debug_Element_Builder.cpp @@ -0,0 +1,144 @@ +// $Id$ + +#include "ACEXML/common/SAXExceptions.h" +#include "ACEXML/parser/debug_validator/Debug_Element_Builder.h" + +ACEXML_Debug_Element_Builder::ACEXML_Debug_Element_Builder () + : type_ (UNDEFINED), + root_ (0) +{ +} + +ACEXML_Debug_Element_Builder::~ACEXML_Debug_Element_Builder () +{ + delete this->root_; +} + +int +ACEXML_Debug_Element_Builder::setElementName (const ACEXML_Char *, + const ACEXML_Char *, + const ACEXML_Char *qName ACEXML_ENV_ARG_DECL_NOT_USED) +{ + this->element_.set (qName, 0); + return 0; +} + +int +ACEXML_Debug_Element_Builder::setContentType (CONTENT_TYPE type ACEXML_ENV_ARG_DECL) +{ + if (this->type_ == UNDEFINED) + { + this->type_ = type; + return 0; + } + + ACEXML_THROW_RETURN (ACEXML_SAXParseException (ACE_TEXT("Element type redefinition in Debug_Validator.")), -1); +} + +int +ACEXML_Debug_Element_Builder::insertMixedElement (const ACEXML_Char *, + const ACEXML_Char *, + const ACEXML_Char *qName ACEXML_ENV_ARG_DECL_NOT_USED) +{ + ACEXML_Element_Tree_Name_Node *node; + + // @@ We should "throw" an exception here instead of returning -1. + ACE_NEW_RETURN (node, + ACEXML_Element_Tree_Name_Node (qName), + -1); + + if (this->root_ == 0) + // @@ Memory leak if fail? + ACE_NEW_RETURN (this->root_, + ACEXML_Element_Tree_List_Node (), + -1); + + + return this->root_->insert (node); +} + +int +ACEXML_Debug_Element_Builder::startChildGroup () +{ + ACEXML_Element_Tree_List_Node *lnode; + + ACE_NEW_RETURN (lnode, + ACEXML_Element_Tree_List_Node (), + -1); + + if (this->root_ == 0) + { + this->root_ = lnode; + } + else + { + // @@ check error? + this->root_->insert (lnode); + } + + this->active_list_.push (lnode); + return 0; +} + +int +ACEXML_Debug_Element_Builder::endChildGroup (CARDINALITY ACEXML_ENV_ARG_DECL_NOT_USED) +{ + this->active_list_.pop (); + return 0; +} + +int +ACEXML_Debug_Element_Builder::setChoice () +{ + this->active_list_.top ()->set (ACEXML_Element_Tree_List_Node::CHOICE); + return 0; +} + +int +ACEXML_Debug_Element_Builder::setSequence () +{ + this->active_list_.top ()->set (ACEXML_Element_Tree_List_Node::SEQUENCE); + return 0; +} + +int +ACEXML_Debug_Element_Builder::insertElement (const ACEXML_Char *, + const ACEXML_Char *, + const ACEXML_Char *qName ACEXML_ENV_ARG_DECL_NOT_USED) +{ + ACEXML_Element_Tree_Name_Node *node; + + // @@ We should "throw" an exception here instead of returning -1. + ACE_NEW_RETURN (node, + ACEXML_Element_Tree_Name_Node (qName), + -1); + + return this->active_list_.top ()->insert (node); +} + +void +ACEXML_Debug_Element_Builder::dump () +{ + cout << "<!ELEMENT " << this->element_; + + // @@ Also dump element contentspec here. + switch (this->type_) + { + case EMPTY: + cout << "EMPTY"; + break; + case ANY: + cout << "ANY"; + break; + case MIXED: + case CHILDREN: + // @@ Dump the content of this->root_ + cout << "*** not implemented ***"; + break; + default: + cout << "*** Unidentified element type ***"; + break; + } + + cout << ">" << endl; +} diff --git a/ACE/ACEXML/parser/debug_validator/Debug_Element_Builder.h b/ACE/ACEXML/parser/debug_validator/Debug_Element_Builder.h new file mode 100644 index 00000000000..dc1f7765441 --- /dev/null +++ b/ACE/ACEXML/parser/debug_validator/Debug_Element_Builder.h @@ -0,0 +1,123 @@ +// -*- C++ -*- + +//============================================================================= +/** + * @file Debug_Element_Builder.h + * + * $Id$ + * + * @author Nanbor Wang <nanbor@cs.wustl.edu> + */ +//============================================================================= +#ifndef _ACEXML_DEBUG_ELEMENT_BUILDER_H_ +#define _ACEXML_DEBUG_ELEMENT_BUILDER_H_ + +#include /**/ "ace/pre.h" +#include "ACEXML/parser/debug_validator/Debug_DTD_Manager_Export.h" + +#if !defined (ACE_LACKS_PRAGMA_ONCE) +#pragma once +#endif /* ACE_LACKS_PRAGMA_ONCE */ + +#include "ACEXML/common/Element_Def_Builder.h" +#include "ACEXML/parser/debug_validator/Element_Tree.h" + +/** + * @class ACEXML_Debug_Element_Builder Debug_Element_Builder.h "parser/debug_validator/Debug_Element_Builder.h" + * + * This class prints out the element definition for debugging purpose. + * + * @todo This class is not namespace-aware. + */ +class ACEXML_DEBUG_DTD_MANAGER_Export ACEXML_Debug_Element_Builder + : public ACEXML_Element_Def_Builder +{ +public: + ACEXML_Debug_Element_Builder (); + + virtual ~ACEXML_Debug_Element_Builder (); + + /** + * Define the name of the element. + * + * @retval 0 if valid, -1 otherwise. + */ + virtual int setElementName (const ACEXML_Char *namespaceURI, + const ACEXML_Char *localName, + const ACEXML_Char *qName ACEXML_ENV_ARG_DECL) + ; + + /** + * Define the content type of the element. + * + * @retval 0 if valid, -1 otherwise. + */ + virtual int setContentType (CONTENT_TYPE type ACEXML_ENV_ARG_DECL) + ; + + /** + * Insert one more element into Mixed definition. + */ + virtual int insertMixedElement (const ACEXML_Char *namespaceURI, + const ACEXML_Char *localName, + const ACEXML_Char *qName ACEXML_ENV_ARG_DECL) + ; + + /** + * Start a new group of children. + */ + virtual int startChildGroup (); + + /** + * End a new group of children. + * + * @retval 0 on success. + */ + virtual int endChildGroup (CARDINALITY card ACEXML_ENV_ARG_DECL); + + /** + * Set the type of current child group to Choice. + * + * @retval 0 on success, -1 if the type of the child group has + * already been set and this action conflicts with the previous + * setting. + */ + virtual int setChoice (); + + /** + * Set the type of current child group to Sequence. + * + * @retval 0 on success, -1 if the type of the child group has + * already been set and this action conflicts with the previous + * setting. + */ + virtual int setSequence (); + + /** + * Insert an new element into the current child group. + * + * @retval 0 on success, -1 otherwise. + */ + virtual int insertElement (const ACEXML_Char *namespaceURI, + const ACEXML_Char *localName, + const ACEXML_Char *qName ACEXML_ENV_ARG_DECL) + ; + + /** + * Dump the content of the attribute definition. + */ + virtual void dump (void); +private: + CONTENT_TYPE type_; + + ACEXML_String element_; + + ACEXML_Element_Tree_List_Node *root_; + + ACEXML_Element_Tree_List_Stack active_list_; +}; + + +#include /**/ "ace/post.h" + +#endif /* _ACEXML_DEBUG_ELEMENT_BUILDER_H_ */ diff --git a/ACE/ACEXML/parser/debug_validator/Element_Tree.cpp b/ACE/ACEXML/parser/debug_validator/Element_Tree.cpp new file mode 100644 index 00000000000..e584e22dd62 --- /dev/null +++ b/ACE/ACEXML/parser/debug_validator/Element_Tree.cpp @@ -0,0 +1,72 @@ +// $Id$ + +#include "ACEXML/parser/debug_validator/Element_Tree.h" + +#if !defined (__ACEXML_INLINE__) +# include "ACEXML/parser/debug_validator/Element_Tree.inl" +#endif /* __ACEXML_INLINE__ */ + +ACEXML_Element_Tree_Node::~ACEXML_Element_Tree_Node () +{ + delete this->next_; +} + +ACE_ALLOC_HOOK_DEFINE (ACEXML_Element_Tree_Node) + +void +ACEXML_Element_Tree_Name_Node::dump () +{ + cout << this->name_; +} + +ACE_ALLOC_HOOK_DEFINE (ACEXML_Element_Tree_Name_Node) + +ACEXML_Element_Tree_List_Node::~ACEXML_Element_Tree_List_Node (void) +{ + delete this->head_; +} + +int +ACEXML_Element_Tree_List_Node::insert (ACEXML_Element_Tree_Node *node) +{ + if (this->head_ == 0) + { + this->tail_ = this->head_ = node; + } + else + { + this->tail_->next (node); + this->tail_ = node; + } + return 0; +} + +void +ACEXML_Element_Tree_List_Node::dump (void) +{ + ACEXML_Element_Tree_Node *ptr = this->head_; + const ACEXML_Char *separator = (this->type_ == SEQUENCE) ? ACE_TEXT(" , ") : ACE_TEXT(" | "); + + cout << "("; + + if (ptr != 0) + { + ptr->dump (); + ptr = ptr->next (); + + while (ptr != 0) + { + cout << separator; + ptr->dump (); + ptr->next (); + } + } + + cout << ")"; +} + +ACE_ALLOC_HOOK_DEFINE (ACEXML_Element_Tree_List_Node) + + + +ACE_ALLOC_HOOK_DEFINE (ACEXML_Element_Tree_List_Stack) diff --git a/ACE/ACEXML/parser/debug_validator/Element_Tree.h b/ACE/ACEXML/parser/debug_validator/Element_Tree.h new file mode 100644 index 00000000000..4738b5de2f4 --- /dev/null +++ b/ACE/ACEXML/parser/debug_validator/Element_Tree.h @@ -0,0 +1,160 @@ +// -*- C++ -*- + +//============================================================================= +/** + * @file Element_Tree.h + * + * $Id$ + * + * @author Nanbor Wang <nanbor@cs.wustl.edu> + */ +//============================================================================= +#ifndef _ACEXML_ELEMENT_TREE_H_ +#define _ACEXML_ELEMENT_TREE_H_ + +#include /**/ "ace/pre.h" +#include "ACEXML/parser/debug_validator/Debug_DTD_Manager_Export.h" + +#if !defined (ACE_LACKS_PRAGMA_ONCE) +#pragma once +#endif /* ACE_LACKS_PRAGMA_ONCE */ + +#include "ACEXML/common/XML_Types.h" + +/** + * @class ACEXML_Element_Tree_Node Element_Tree.h "parser/debug_validator/Element_Tree.h" + * + * @brief An abstract base class for describing DTD child element definition. + * + * @sa ACEXML_Element_Tree_Name_Node, ACEXML_Element_Tree_List_Node + */ +class ACEXML_DEBUG_DTD_MANAGER_Export ACEXML_Element_Tree_Node +{ +public: + + /// Default constructor. + ACEXML_Element_Tree_Node (); + + /// Destructor + virtual ~ACEXML_Element_Tree_Node (); + + /// Accessor for next element in chain + ACEXML_Element_Tree_Node *next (); + void next (ACEXML_Element_Tree_Node *n); + + /// Displaying the content. + virtual void dump () = 0; + + ACE_ALLOC_HOOK_DECLARE; + +protected: + ACEXML_Element_Tree_Node *next_; +}; + +/** + * @class ACEXML_Element_Tree_Name_Node Element_Tree.h "parser/debug_validator/Element_Tree.h" + * + * @brief An abstract base class for describing a name node in a DTD child + * element definition. + * + * @sa ACEXML_Element_Tree_Node, ACEXML_Element_Tree_List_Node + */ +class ACEXML_DEBUG_DTD_MANAGER_Export ACEXML_Element_Tree_Name_Node + : public ACEXML_Element_Tree_Node +{ +public: + /// Constructor. + ACEXML_Element_Tree_Name_Node (const ACEXML_Char *name, + int release = 1); + + /// Change the name of this node. + void set (const ACEXML_Char *name, + int release = 1); + + virtual void dump (); + + ACE_ALLOC_HOOK_DECLARE; +protected: + ACEXML_String name_; +}; + +class ACEXML_Element_Tree_List_Stack; + +/** + * @class ACEXML_Element_Tree_List_Node Element_Tree.h "parser/debug_validator/Element_Tree.h" + * + * @brief An abstract base class for describing a node list in a DTD child + * element definition. + * + * @sa ACEXML_Element_Tree_Node, ACEXML_Element_Tree_Name_Node + */ +class ACEXML_DEBUG_DTD_MANAGER_Export ACEXML_Element_Tree_List_Node + : public ACEXML_Element_Tree_Node +{ +public: + friend class ACEXML_Element_Tree_List_Stack; + + typedef enum { + SEQUENCE, + CHOICE + } LIST_TYPE; + + /// Default constructor. + ACEXML_Element_Tree_List_Node (void); + + /// Destructor. + virtual ~ACEXML_Element_Tree_List_Node (void); + + /// Insert a new ACEXML_Element_Tree_Node into the list. + int insert (ACEXML_Element_Tree_Node *node); + + /// Get/set the type of list. + LIST_TYPE get (void); + int set (LIST_TYPE type); + + virtual void dump (); + + ACE_ALLOC_HOOK_DECLARE; +protected: + LIST_TYPE type_; + + ACEXML_Element_Tree_Node *head_; + + ACEXML_Element_Tree_Node *tail_; + + ACEXML_Element_Tree_List_Node *pop_next_; +}; + +/** + * @class ACEXML_Element_Tree_List_Stack Element_Tree.h "parser/debug_validator/Element_Tree.h" + * + * @brief A class for managing a stack of ACEXML_Element_Tree_List_Node's. + * + * @sa ACEXML_Element_Tree_List_Node + */ +class ACEXML_DEBUG_DTD_MANAGER_Export ACEXML_Element_Tree_List_Stack +{ +public: + ACEXML_Element_Tree_List_Stack (); + + void push (ACEXML_Element_Tree_List_Node *n); + + ACEXML_Element_Tree_List_Node *pop (void); + + ACEXML_Element_Tree_List_Node *top (void); + + int empty (void); + + ACE_ALLOC_HOOK_DECLARE; + +protected: + ACEXML_Element_Tree_List_Node *top_; +}; + +#if defined (__ACEXML_INLINE__) +# include "ACEXML/parser/debug_validator/Element_Tree.inl" +#endif /* __ACEXML_INLINE__ */ + +#include /**/ "ace/post.h" + +#endif /* _ACEXML_ELEMENT_TREE_H_ */ diff --git a/ACE/ACEXML/parser/debug_validator/Element_Tree.inl b/ACE/ACEXML/parser/debug_validator/Element_Tree.inl new file mode 100644 index 00000000000..4b408835403 --- /dev/null +++ b/ACE/ACEXML/parser/debug_validator/Element_Tree.inl @@ -0,0 +1,92 @@ +// $Id$ + +ACEXML_INLINE +ACEXML_Element_Tree_Node::ACEXML_Element_Tree_Node () + : next_ (0) +{ +} + +ACEXML_INLINE ACEXML_Element_Tree_Node * +ACEXML_Element_Tree_Node::next () +{ + return this->next_; +} + +ACEXML_INLINE void +ACEXML_Element_Tree_Node::next (ACEXML_Element_Tree_Node * n) +{ + this->next_ = n; +} + +ACEXML_INLINE +ACEXML_Element_Tree_Name_Node::ACEXML_Element_Tree_Name_Node (const ACEXML_Char *name, + int release) + : name_ (name, 0, release) +{ +} + +ACEXML_INLINE void +ACEXML_Element_Tree_Name_Node::set (const ACEXML_Char *name, + int release) +{ + this->name_.set (name, release); +} + +ACEXML_INLINE +ACEXML_Element_Tree_List_Node::ACEXML_Element_Tree_List_Node (void) + : type_ (SEQUENCE), + head_ (0), + tail_ (0), + pop_next_ (0) +{ +} + +ACEXML_INLINE ACEXML_Element_Tree_List_Node::LIST_TYPE +ACEXML_Element_Tree_List_Node::get (void) +{ + return this->type_; +} + +ACEXML_INLINE int +ACEXML_Element_Tree_List_Node::set (ACEXML_Element_Tree_List_Node::LIST_TYPE type) +{ + this->type_ = type; + return 0; +} + +ACEXML_INLINE +ACEXML_Element_Tree_List_Stack::ACEXML_Element_Tree_List_Stack (void) + : top_ (0) +{ +} + +ACEXML_INLINE ACEXML_Element_Tree_List_Node * +ACEXML_Element_Tree_List_Stack::top () +{ + return this->top_; +} + +ACEXML_INLINE void +ACEXML_Element_Tree_List_Stack::push (ACEXML_Element_Tree_List_Node *n) +{ + n->pop_next_ = this->top_; + this->top_ = n; +} + +ACEXML_INLINE ACEXML_Element_Tree_List_Node * +ACEXML_Element_Tree_List_Stack::pop () +{ + if (this->top_ != 0) + { + ACEXML_Element_Tree_List_Node *ptr = this->top_; + this->top_ = this->top_->pop_next_; + return ptr; + } + return 0; +} + +ACEXML_INLINE int +ACEXML_Element_Tree_List_Stack::empty () +{ + return this->top_ == 0; +} diff --git a/ACE/ACEXML/parser/parser/ACEXML_Parser.pc.in b/ACE/ACEXML/parser/parser/ACEXML_Parser.pc.in new file mode 100644 index 00000000000..61fb0d02374 --- /dev/null +++ b/ACE/ACEXML/parser/parser/ACEXML_Parser.pc.in @@ -0,0 +1,11 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: ACEXML_Parser +Description: ACE XML Parser Library +Requires: ACEXML +Version: @VERSION@ +Libs: -L${libdir} -lACEXML_Parser +Cflags: -I${includedir} diff --git a/ACE/ACEXML/parser/parser/Entity_Manager.cpp b/ACE/ACEXML/parser/parser/Entity_Manager.cpp new file mode 100644 index 00000000000..0f27707ae72 --- /dev/null +++ b/ACE/ACEXML/parser/parser/Entity_Manager.cpp @@ -0,0 +1,20 @@ +// $Id$ + +#include "ACEXML/parser/parser/Entity_Manager.h" + +#if !defined (__ACEXML_INLINE__) +# include "ACEXML/parser/parser/Entity_Manager.inl" +#endif /* __ACEXML_INLINE__ */ + + +ACEXML_Entity_Manager::ACEXML_Entity_Manager (void) + : entities_ (0) +{ + +} + +ACEXML_Entity_Manager::~ACEXML_Entity_Manager (void) +{ + this->reset(); +} + diff --git a/ACE/ACEXML/parser/parser/Entity_Manager.h b/ACE/ACEXML/parser/parser/Entity_Manager.h new file mode 100644 index 00000000000..e97233a6146 --- /dev/null +++ b/ACE/ACEXML/parser/parser/Entity_Manager.h @@ -0,0 +1,100 @@ +// -*- C++ -*- + +//============================================================================= +/** + * @file Entity_Manager.h + * + * $Id$ + * + * @author Nanbor Wang <nanbor@cs.wustl.edu> + * @author Krishnakumar B <kitty@cs.wustl.edu> + */ +//============================================================================= + +#ifndef ACEXML_ENTITY_MANAGER_H +#define ACEXML_ENTITY_MANAGER_H + +#include /**/ "ace/pre.h" +#include "ACEXML/parser/parser/Parser_export.h" + +#if !defined (ACE_LACKS_PRAGMA_ONCE) +#pragma once +#endif /* ACE_LACKS_PRAGMA_ONCE */ + +#include "ACEXML/common/XML_Types.h" +#include "ace/Hash_Map_Manager.h" +#include "ace/Null_Mutex.h" + +typedef ACE_Hash_Map_Entry<ACEXML_String, + ACEXML_String> ACEXML_ENTITY_ENTRY; + +typedef ACE_Hash_Map_Manager_Ex<ACEXML_String, + ACEXML_String, + ACE_Hash<ACEXML_String>, + ACE_Equal_To<ACEXML_String>, + ACE_Null_Mutex> ACEXML_ENTITIES_MANAGER; + +typedef ACE_Hash_Map_Iterator_Ex<ACEXML_String, + ACEXML_String, + ACE_Hash<ACEXML_String>, + ACE_Equal_To<ACEXML_String>, + ACE_Null_Mutex> ACEXML_ENTITIES_MANAGER_ITER; + +typedef ACE_Hash_Map_Reverse_Iterator_Ex<ACEXML_String, + ACEXML_String, + ACE_Hash<ACEXML_String>, + ACE_Equal_To<ACEXML_String>, + ACE_Null_Mutex> ACEXML_ENTITIES_MANAGER_REVERSE_ITER; + +typedef ACE_Hash_Map_Bucket_Iterator<ACEXML_String, + ACEXML_String, + ACE_Hash<ACEXML_String>, + ACE_Equal_To<ACEXML_String>, + ACE_Null_Mutex> ACEXML_ENTITY_ENTRY_ITERATOR; + +/** + * @class ACEXML_Entity_Manager Entity_Manager.h "ACEXML/parser/parser/Entity_Manager.h" + * + * @brief Class to manage and resolve entity references. + * + * @todo Fill in details for this class. + */ +class ACEXML_PARSER_Export ACEXML_Entity_Manager +{ +public: + /// Default constructor. + ACEXML_Entity_Manager (void); + + /// Destructor. + ~ACEXML_Entity_Manager (void); + + /// Add a new entity declaration. + int add_entity (const ACEXML_Char *ref, const ACEXML_Char *value); + + /// Resolve an entity reference. + const ACEXML_Char* resolve_entity (const ACEXML_Char *ref); + + /// Resolve an entity reference and return the tuple of @c systemId and + /// @c publicId + int resolve_entity (const ACEXML_Char* ref, ACEXML_Char*& systemId, + ACEXML_Char*& publicId); + + /// Number of items in the Entity Manager + size_t size(void) const; + + /// Reset the state + int reset (void); + +private: + ACEXML_ENTITIES_MANAGER* entities_; + bool init_; + +}; + +#if defined (__ACEXML_INLINE__) +# include "ACEXML/parser/parser/Entity_Manager.inl" +#endif /* __ACEXML_INLINE__ */ + +#include /**/ "ace/post.h" + +#endif /* ACEXML_ENTITY_MANAGER_H */ diff --git a/ACE/ACEXML/parser/parser/Entity_Manager.inl b/ACE/ACEXML/parser/parser/Entity_Manager.inl new file mode 100644 index 00000000000..ca859a0b0d5 --- /dev/null +++ b/ACE/ACEXML/parser/parser/Entity_Manager.inl @@ -0,0 +1,66 @@ +// $Id$ + +ACEXML_INLINE int +ACEXML_Entity_Manager::add_entity (const ACEXML_Char *ref, + const ACEXML_Char *v) +{ + if (!this->entities_ ) + ACE_NEW_RETURN (this->entities_, ACEXML_ENTITIES_MANAGER, -1); + ACEXML_String name (ref, 0, false); + ACEXML_String value (v, 0, false); + return this->entities_->bind (name, value); +} + +ACEXML_INLINE const ACEXML_Char* +ACEXML_Entity_Manager::resolve_entity (const ACEXML_Char *ref) +{ + if (!this->entities_) + return 0; + + ACEXML_ENTITY_ENTRY *entry = 0; + + if (this->entities_->find (ACEXML_String (ref, 0, false), + entry) == 0) + return entry->int_id_.c_str(); + return 0; +} + +ACEXML_INLINE int +ACEXML_Entity_Manager::resolve_entity (const ACEXML_Char* ref, + ACEXML_Char*& systemId, + ACEXML_Char*& publicId) +{ + if (!this->entities_) + return 0; + + publicId = systemId = 0; + ACEXML_ENTITY_ENTRY_ITERATOR iter (*this->entities_, ref); + ACEXML_ENTITY_ENTRY_ITERATOR end (*this->entities_, ref, 1); + + if (iter != end) + { + systemId = const_cast<ACEXML_Char*> ((*iter).int_id_.c_str()); + ++iter; + if (iter != end) + publicId = const_cast<ACEXML_Char*> ((*iter).int_id_.c_str()); + return 0; + } + return -1; +} + +ACEXML_INLINE int +ACEXML_Entity_Manager::reset (void) +{ + delete this->entities_; + this->entities_ = 0; + return 0; +} + + +ACEXML_INLINE size_t +ACEXML_Entity_Manager::size (void) const +{ + if (!this->entities_) + return 0; + return this->entities_->current_size(); +} diff --git a/ACE/ACEXML/parser/parser/Makefile.am b/ACE/ACEXML/parser/parser/Makefile.am new file mode 100644 index 00000000000..f4440e30faf --- /dev/null +++ b/ACE/ACEXML/parser/parser/Makefile.am @@ -0,0 +1,73 @@ +## Process this file with automake to create Makefile.in +## +## $Id$ +## +## This file was generated by MPC. Any changes made directly to +## this file will be lost the next time it is generated. +## +## MPC Command: +## ./bin/mwc.pl -type automake -noreldefs ACE.mwc + +includedir = @includedir@/ACEXML/parser/parser +pkgconfigdir = @libdir@/pkgconfig + +ACE_BUILDDIR = $(top_builddir) +ACE_ROOT = $(top_srcdir) + + +## Makefile.ACEXML_Parser.am + +if !BUILD_ACE_FOR_TAO + +lib_LTLIBRARIES = libACEXML_Parser.la + +libACEXML_Parser_la_CPPFLAGS = \ + -I$(ACE_ROOT) \ + -I$(ACE_BUILDDIR) \ + -DACEXML_PARSER_BUILD_DLL + +libACEXML_Parser_la_SOURCES = \ + Entity_Manager.cpp \ + Parser.cpp \ + ParserContext.cpp \ + ParserInternals.cpp + +libACEXML_Parser_la_LDFLAGS = \ + -release @ACE_VERSION_NAME@ + +libACEXML_Parser_la_LIBADD = \ + $(ACE_BUILDDIR)/ACEXML/common/libACEXML.la \ + $(ACE_BUILDDIR)/ace/libACE.la + +nobase_include_HEADERS = \ + Entity_Manager.h \ + Entity_Manager.inl \ + Parser.h \ + Parser.inl \ + ParserContext.h \ + ParserContext.inl \ + ParserInternals.h \ + Parser_export.h + +pkgconfig_DATA = \ + ACEXML_Parser.pc + +CLEANFILES = \ + ACEXML_Parser.pc + +ACEXML_Parser.pc: ${top_builddir}/config.status ${srcdir}/ACEXML_Parser.pc.in + ${top_builddir}/config.status --file $@:${srcdir}/ACEXML_Parser.pc.in + +endif !BUILD_ACE_FOR_TAO + +EXTRA_DIST = \ + ACEXML_Parser.pc.in + + +## Clean up template repositories, etc. +clean-local: + -rm -f *~ *.bak *.rpo *.sym lib*.*_pure_* core core.* + -rm -f gcctemp.c gcctemp so_locations *.ics + -rm -rf cxx_repository ptrepository ti_files + -rm -rf templateregistry ir.out + -rm -rf ptrepository SunWS_cache Templates.DB diff --git a/ACE/ACEXML/parser/parser/Parser.cpp b/ACE/ACEXML/parser/parser/Parser.cpp new file mode 100644 index 00000000000..a7d4b8aab42 --- /dev/null +++ b/ACE/ACEXML/parser/parser/Parser.cpp @@ -0,0 +1,3352 @@ +// $Id$ + +#include "ACEXML/parser/parser/Parser.h" + +#if !defined (__ACEXML_INLINE__) +# include "ACEXML/parser/parser/Parser.inl" +#endif /* __ACEXML_INLINE__ */ + +#include "ace/ACE.h" +#include "ACEXML/common/Transcode.h" +#include "ACEXML/common/AttributesImpl.h" +#include "ACEXML/common/StrCharStream.h" +#include "ACEXML/common/StreamFactory.h" +#include "ACEXML/parser/parser/ParserInternals.h" +#include "ace/OS_NS_string.h" +#include "ace/OS_NS_strings.h" + +static const ACEXML_Char default_attribute_type[] = ACE_TEXT ("CDATA"); +static const ACEXML_Char empty_string[] = { 0 }; + +const ACEXML_Char +ACEXML_Parser::simple_parsing_feature_[] = ACE_TEXT ("Simple"); + +const ACEXML_Char +ACEXML_Parser::namespaces_feature_[] = ACE_TEXT ("http://xml.org/sax/features/namespaces"); + +const ACEXML_Char +ACEXML_Parser::namespace_prefixes_feature_[] = ACE_TEXT ("http://xml.org/sax/features/namespace-prefixes"); + +const ACEXML_Char +ACEXML_Parser::validation_feature_[] = ACE_TEXT ("http://xml.org/sax/features/validation"); + +ACEXML_Parser::ACEXML_Parser (void) + : dtd_handler_ (0), + entity_resolver_ (0), + content_handler_ (0), + error_handler_ (0), + doctype_ (0), + current_ (0), + alt_stack_ (MAXPATHLEN), + nested_namespace_ (0), + ref_state_ (ACEXML_ParserInt::INVALID), + external_subset_ (0), + external_entity_ (0), + has_pe_refs_ (0), + standalone_ (0), + external_dtd_ (0), + internal_dtd_ (0), + simple_parsing_ (0), + validate_ (1), + namespaces_(1), + namespace_prefixes_ (0) +{ +} + +ACEXML_Parser::~ACEXML_Parser (void) +{ + +} + +int +ACEXML_Parser::initialize(ACEXML_InputSource* input) +{ + // Initialize namespace support + if (this->xml_namespace_.init() == -1) + { + ACE_ERROR ((LM_ERROR, + ACE_TEXT ("Error initializing namespace support\n"))); + return -1; + } + for (int i = 0; i < 5; ++i) + { + if (this->predef_entities_.add_entity (ACEXML_ParserInt::predef_ent_[i], + ACEXML_ParserInt::predef_val_[i]) + != 0) + { + ACE_ERROR ((LM_DEBUG, + ACE_TEXT ("Error adding entity %s to Manager\n"), + ACEXML_ParserInt::predef_ent_[i])); + return -1; + } + } + return this->switch_input (input, input->getSystemId()); +} + +void +ACEXML_Parser::parse (const ACEXML_Char *systemId ACEXML_ENV_ARG_DECL) +{ + ACEXML_InputSource* input = 0; + ACE_NEW (input, ACEXML_InputSource (systemId)); + this->parse (input ACEXML_ENV_ARG_PARAMETER); +} + +void +ACEXML_Parser::parse (ACEXML_InputSource *input ACEXML_ENV_ARG_DECL) +{ + if (input == 0) + { + this->fatal_error(ACE_TEXT ("Invalid input source") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + } + if (this->content_handler_ == 0) + { + this->fatal_error (ACE_TEXT ("No content handlers defined. Exiting..") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + } + + if (this->validate_ && this->dtd_handler_ == 0) + { + this->fatal_error (ACE_TEXT ("No DTD handlers defined. Exiting..") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + } + + + if (this->initialize(input) == -1) + { + this->fatal_error (ACE_TEXT ("Failed to initialize parser state") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + } + // Set up Locator. + this->content_handler_->setDocumentLocator (this->current_->getLocator()); + + int xmldecl_defined = 0; + ACEXML_Char fwd = this->get(); // Consume '<' + if (fwd == '<' && this->peek() == '?') + { + this->get(); // Consume '?' + fwd = this->peek(); + if (fwd == 'x' && !xmldecl_defined) + { + this->parse_xml_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK; + xmldecl_defined = 1; + } + } + // We need a XMLDecl in a Valid XML document + if (this->validate_ && !xmldecl_defined) + { + this->fatal_error (ACE_TEXT ("Expecting an XMLDecl at the beginning of") + ACE_TEXT (" a valid document") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + } + this->content_handler_->startDocument (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK; + + int doctype_defined = 0; + for (int prolog_done = 0; prolog_done == 0; ) + { + // Expect a '<' only if we have encountered a XMLDecl, or we are + // looping through Misc blocks. + if (xmldecl_defined) + { + if (this->skip_whitespace () != '<') + { + this->fatal_error (ACE_TEXT ("Expecting '<' at the beginning of ") + ACE_TEXT ("Misc section") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + } + fwd = this->peek(); + } + switch (fwd) + { + case '?': + this->get(); + this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK; + xmldecl_defined = 1; + break; + case '!': + this->get(); + fwd = this->peek (); + if (fwd == 'D' && !doctype_defined) // DOCTYPE + { + // This will also take care of the trailing MISC block if any. + this->parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK; + doctype_defined = 1; + // Now that we have a DOCTYPE Decl defined, we shouldn't + // accept XML Decl any longer + xmldecl_defined = 1; + } + else if (fwd == 'D') + { + this->fatal_error (ACE_TEXT ("Duplicate DOCTYPE declaration") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + } + else if (fwd == '-') // COMMENT + { + if (this->parse_comment () < 0) + { + this->fatal_error(ACE_TEXT ("Invalid comment in document") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + } + xmldecl_defined = 1; + } + break; + case 0: + this->fatal_error (ACE_TEXT ("Unexpected end-of-file") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + default: // Root element begins + prolog_done = 1; + break; + } + } + + if (this->validate_ && !doctype_defined) + { + this->warning (ACE_TEXT ("No doctypeDecl in valid document") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + } + + // Now parse root element. + this->parse_element (1 ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + + this->content_handler_->endDocument (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK; + + // Reset the parser state + this->reset(); + +} + +int +ACEXML_Parser::parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_DECL) +{ + if (this->parse_token (ACE_TEXT ("DOCTYPE")) < 0) + { + this->fatal_error(ACE_TEXT ("Expecting keyword DOCTYPE in a doctypedecl") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + ACEXML_Char nextch = 0; + if (this->skip_whitespace_count (&nextch) == 0) + { + this->fatal_error(ACE_TEXT ("Expecting a space between DOCTYPE keyword ") + ACE_TEXT ("and name") ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + this->doctype_ = this->parse_name (); + if (this->doctype_ == 0) + { + this->fatal_error(ACE_TEXT ("Invalid DOCTYPE name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + int count = this->skip_whitespace_count (&nextch); + + if (nextch == 'S' || nextch == 'P') // ExternalID defined + { + if (count == 0) + { + this->fatal_error(ACE_TEXT ("Expecting a space between DOCTYPE") + ACE_TEXT ("keyword and name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + this->external_dtd_ = 1; + this->parse_external_dtd (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + nextch = this->skip_whitespace (); + switch (nextch) + { + case '[': + this->internal_dtd_ = 1; // Internal DTD definition + this->parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + case '>': // End of DTD definition + // This is an XML document without a doctypedecl. + if (this->validate_ && !this->external_dtd_) + { + this->fatal_error (ACE_TEXT ("No DTD defined") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + return 0; + case '0': + this->fatal_error (ACE_TEXT ("Unexpected end-of-file") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + default: + break; + } + + if (this->skip_whitespace() != '>') + { + this->fatal_error(ACE_TEXT ("Expecting '>' at end of doctypedecl") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + return 0; +} + +int +ACEXML_Parser::parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_DECL) +{ + this->ref_state_ = ACEXML_ParserInt::IN_INT_DTD; + ACEXML_Char nextch = this->skip_whitespace (); + do { + switch (nextch) + { + case '<': + nextch = this->get(); + switch (nextch) + { + case '!': + this->parse_markup_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + case '?': + this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + default: + this->fatal_error (ACE_TEXT ("Invalid internal subset") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + } + break; + case '%': + this->has_pe_refs_ = 1; + this->parse_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + case ']': // End of internal definitions. + return 0; + case '&': + this->fatal_error (ACE_TEXT ("Invalid Reference in internal DTD") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + case 0: + this->pop_context (0 ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + default: + this->fatal_error (ACE_TEXT ("Invalid content in internal subset") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + }; + nextch = this->skip_whitespace (); + } while (1); + + ACE_NOTREACHED (return -1); +} + +int +ACEXML_Parser::parse_external_dtd (ACEXML_ENV_SINGLE_ARG_DECL) +{ + this->ref_state_ = ACEXML_ParserInt::IN_EXT_DTD; + ACEXML_Char* publicId = 0; + ACEXML_Char* systemId = 0; + if (this->parse_external_id (publicId, systemId + ACEXML_ENV_ARG_PARAMETER) != 0) + { + this->fatal_error (ACE_TEXT ("Error in parsing ExternalID") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (this->validate_) + { + ACEXML_Char* uri = this->normalize_systemid (systemId); + ACE_Auto_Basic_Array_Ptr<ACEXML_Char> cleanup_uri (uri); + ACEXML_InputSource* ip = 0; + if (this->entity_resolver_) + { + ip = this->entity_resolver_->resolveEntity (publicId, + (uri ? uri : systemId) + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (ip) + { + if (this->switch_input (ip, (uri ? uri : systemId), publicId) != 0) + return -1; + } + else + { + ACEXML_StreamFactory factory; + ACEXML_CharStream* cstream = factory.create_stream (uri ? + uri: systemId); + if (!cstream) { + this->fatal_error (ACE_TEXT ("Invalid input source") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (this->switch_input (cstream, systemId, publicId) != 0) + return -1; + } + this->parse_external_subset (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + return 0; +} + + +int +ACEXML_Parser::parse_external_subset (ACEXML_ENV_SINGLE_ARG_DECL) +{ + this->ref_state_ = ACEXML_ParserInt::IN_EXT_DTD; + this->external_subset_ = 1; + size_t nrelems = 0; + ACEXML_Char nextch = this->skip_whitespace(); + do { + switch (nextch) + { + case '<': + nextch = this->get(); + switch (nextch) + { + case '!': + nextch = this->peek(); + if (nextch == '[') + this->parse_conditional_section (ACEXML_ENV_SINGLE_ARG_PARAMETER); + else + this->parse_markup_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + case '?': + nextch = this->peek(); + if (nextch == 'x') + this->parse_text_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + else + this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + default: + this->fatal_error (ACE_TEXT ("Invalid content in external DTD") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + case '%': + this->parse_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + case 0: + nrelems = this->pop_context (0 ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + if (nrelems == 1) + return 0; + break; + default: + this->fatal_error (ACE_TEXT ("Invalid content in external DTD") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + nextch = this->skip_whitespace(); + } while (1); +} + +int +ACEXML_Parser::parse_conditional_section (ACEXML_ENV_SINGLE_ARG_DECL) +{ + ACEXML_Char ch = this->get (); + int include = 0; + if (ch != '[') + { + this->fatal_error(ACE_TEXT ("Internal Parser Error") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + ch = this->skip_whitespace(); + if (ch == '%') + { + this->parse_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + ch = this->skip_whitespace(); + } + if (ch == 'I') + { + ch = this->get(); + switch (ch) + { + case 'N': + if (this->parse_token (ACE_TEXT ("CLUDE")) < 0) + { + this->fatal_error (ACE_TEXT ("Expecting keyword INCLUDE in ") + ACE_TEXT ("conditionalSect") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + include = 1; + break; + case 'G': + if (this->parse_token (ACE_TEXT ("GNORE")) < 0) + { + this->fatal_error (ACE_TEXT ("Expecting keyword IGNORE in ") + ACE_TEXT ("conditionalSect") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + include = 0; + break; + default: + this->fatal_error (ACE_TEXT ("Invalid conditionalSect") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + ACEXML_Char fwd = '\xFF'; + this->skip_whitespace_count (&fwd); + if (fwd == 0) + { + this->get(); // Consume the 0 + this->pop_context (0 ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + } + else + { + this->fatal_error (ACE_TEXT ("Invalid conditionalSect") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (this->skip_whitespace() != '[') + { + this->fatal_error (ACE_TEXT ("Expecting '[' in conditionalSect") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (include) + this->parse_includesect (ACEXML_ENV_SINGLE_ARG_PARAMETER); + else + this->parse_ignoresect (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + return 0; +} + +int +ACEXML_Parser::parse_ignoresect (ACEXML_ENV_SINGLE_ARG_DECL) +{ + ACEXML_Char nextch = this->skip_whitespace(); + int count = 0; + int done = 0; + do { + switch (nextch) + { + case '<': + if (this->peek() == '!') + { + this->get(); + if (this->peek() == '[') + { + this->get(); + count++; + } + } + break; + case ']': + if (this->peek() == ']') + { + this->get(); + if (this->peek() == '>') + { + this->get(); + if (count) + { + --count; + break; + } + done = 1; + } + } + break; + case 0: // [VC: Proper Conditional Section/PE Nesting] + if (count != 0) + { + this->fatal_error (ACE_TEXT ("Invalid Conditional Section/PE ") + ACE_TEXT ("Nesting ") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + default: + break; + } + if (done) + break; + nextch = this->get(); + } while (1); + + return 0; +} + +int +ACEXML_Parser::parse_includesect (ACEXML_ENV_SINGLE_ARG_DECL) +{ + ACEXML_Char nextch = this->skip_whitespace(); + do { + switch (nextch) + { + case '<': + nextch = this->get(); + switch (nextch) + { + case '!': + nextch = this->peek(); + if (nextch == '[') + this->parse_conditional_section (ACEXML_ENV_SINGLE_ARG_PARAMETER); + else + this->parse_markup_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + case '?': + nextch = this->peek(); + this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + default: + this->fatal_error (ACE_TEXT ("Invalid includeSect") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + case '%': + this->parse_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + case 0: // [VC: Proper Conditional Section/PE Nesting] + this->fatal_error (ACE_TEXT ("Invalid Conditional Section/PE ") + ACE_TEXT ("Nesting ") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + case ']': + if (this->peek() == ']') + { + nextch = this->get(); + if (this->peek() == '>') + { + nextch = this->get(); + return 0; + } + } + default: + this->fatal_error (ACE_TEXT ("Invalid includeSect") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + nextch = this->skip_whitespace(); + } while (1); +} + +int +ACEXML_Parser::parse_markup_decl (ACEXML_ENV_SINGLE_ARG_DECL) +{ + ACEXML_Char nextch = this->peek (); + switch (nextch) + { + case 'E': // An ELEMENT or ENTITY decl + this->get (); + nextch = this->peek (); + switch (nextch) + { + case 'L': + this->parse_element_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + case 'N': + this->parse_entity_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + default: + this->fatal_error(ACE_TEXT ("Expecting keyword ELEMENT/ENTITY") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + + case 'A': // An ATTLIST decl + this->parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + + case 'N': // A NOTATION decl + this->parse_notation_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + + case '-': // a comment. + if (this->parse_comment () < 0) + { + this->fatal_error(ACE_TEXT ("Invalid comment") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + case 0: // [VC: Proper Declaration/PE Nesting] + this->fatal_error (ACE_TEXT ("Unexpected end-of-file") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + default: + this->fatal_error (ACE_TEXT ("Invalid markupDecl") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + return 0; +} + +int +ACEXML_Parser::parse_external_id (ACEXML_Char *&publicId, + ACEXML_Char *&systemId + ACEXML_ENV_ARG_DECL) +{ + publicId = systemId = 0; + ACEXML_Char nextch = this->get (); + ACEXML_Char fwd = 0; + switch (nextch) + { + case 'S': // External SYSTEM id. + if (this->parse_token (ACE_TEXT ("YSTEM")) < 0 || + this->skip_whitespace_count () < 1) + { + this->fatal_error(ACE_TEXT ("Expecting keyword SYSTEM") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (this->parse_system_literal (systemId) != 0) + { + this->fatal_error(ACE_TEXT ("Invalid systemLiteral") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + case 'P': // External PUBLIC id or previously defined PUBLIC id. + if (this->parse_token (ACE_TEXT ("UBLIC")) < 0 || + this->skip_whitespace_count () < 1) + { + this->fatal_error(ACE_TEXT ("Expecing keyword PUBLIC") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (this->parse_pubid_literal (publicId) != 0) + { + this->fatal_error(ACE_TEXT ("Invalid PubidLiteral") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + this->skip_whitespace_count(&fwd); + if (fwd == '\'' || fwd == '"') + { + if (this->parse_system_literal (systemId) != 0) + { + this->fatal_error(ACE_TEXT ("Invalid systemLiteral") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + } + else if (this->ref_state_ != ACEXML_ParserInt::IN_NOTATION) + { + this->fatal_error(ACE_TEXT ("Expecting systemLiteral after a ") + ACE_TEXT ("PUBLIC keyword") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + default: + this->fatal_error(ACE_TEXT ("Invalid system/public Literal") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + return 0; +} + +ACEXML_Char* +ACEXML_Parser::normalize_systemid (const ACEXML_Char* systemId) +{ + if (ACE_OS::strstr (systemId, ACE_TEXT("ftp://")) != 0 || + ACE_OS::strstr (systemId, ACE_TEXT ("http://")) != 0 || + ACE_OS::strstr (systemId, ACE_TEXT ("file://")) != 0) + return 0; + else + { + ACEXML_Char* normalized_uri = 0; + const ACEXML_Char* baseURI = this->current_->getLocator()->getSystemId(); + ACE_ASSERT (baseURI); + const ACEXML_Char* temp = 0; + if (ACE_OS::strstr (baseURI, ACE_TEXT ("http://")) != 0) + // baseURI is a HTTP URL and systemId is relative. Note that this + // is not compliant with RFC2396. Caveat Emptor ! + temp = ACE_OS::strrchr (baseURI, '/'); + else + // baseURI is a local file and systemId is relative + // Unlike the HTTP one, this will work always. + temp = ACE_OS::strrchr (baseURI,ACE_DIRECTORY_SEPARATOR_CHAR); + if (temp) + { + size_t pos = temp - baseURI + 1; + size_t len = pos + ACE_OS::strlen (systemId) + 1; + ACE_NEW_RETURN (normalized_uri, ACEXML_Char[len], 0); + ACE_OS::strncpy (normalized_uri, baseURI, pos); + ACE_OS::strcpy (normalized_uri + pos, systemId); + return normalized_uri; + } + return 0; + } +} + +void +ACEXML_Parser::parse_element (int is_root ACEXML_ENV_ARG_DECL) +{ + // Parse STag. + const ACEXML_Char *startname = this->parse_name (); + if (startname == 0) + { + this->fatal_error (ACE_TEXT ("Unexpected end-of-file") + ACEXML_ENV_ARG_PARAMETER); + return; + } + if (is_root && this->doctype_ != 0 + && ACE_OS::strcmp (startname, this->doctype_) != 0) + { + this->fatal_error (ACE_TEXT ("Root element different from DOCTYPE") + ACEXML_ENV_ARG_PARAMETER); + return ; + } + ACEXML_AttributesImpl attributes; + ACEXML_Char ch; + int ns_flag = 0; // Push only one namespace context onto the stack + // if there are multiple namespaces declared. + + const ACEXML_Char* ns_uri = 0; + const ACEXML_Char* ns_lname = 0; // namespace URI and localName + for (int start_element_done = 0; start_element_done == 0;) + { + ch = this->skip_whitespace (); + + switch (ch) + { + case 0: + this->fatal_error(ACE_TEXT ("Internal Parser error") + ACEXML_ENV_ARG_PARAMETER); + return; + case '/': + if (this->get () != '>') + { + this->fatal_error(ACE_TEXT ("Expecting '>' at end of element ") + ACE_TEXT ("definition") + ACEXML_ENV_ARG_PARAMETER); + return; + } + this->xml_namespace_.processName(startname, ns_uri, + ns_lname, 0); + this->prefix_mapping (this->xml_namespace_.getPrefix(ns_uri), + ns_uri, 1 + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + this->content_handler_->startElement(ns_uri, ns_lname, + startname, &attributes + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + this->content_handler_->endElement (ns_uri, ns_lname, startname + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + this->prefix_mapping (this->xml_namespace_.getPrefix(ns_uri), + ns_uri, 0 + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + if (ns_flag) + { + this->xml_namespace_.popContext (); + this->nested_namespace_--; + } + return; + case '>': + this->xml_namespace_.processName (startname, ns_uri, + ns_lname, 0); + this->prefix_mapping (this->xml_namespace_.getPrefix(ns_uri), + ns_uri, 1 + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + this->content_handler_->startElement(ns_uri, ns_lname, startname, + &attributes + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + start_element_done = 1; + break; + default: + ACEXML_Char *attvalue = 0; + ACEXML_Char *attname = this->parse_name (ch); + + if (attname == 0 || + this->skip_equal () != 0 || + this->parse_attvalue (attvalue ACEXML_ENV_ARG_PARAMETER) != 0) + { + this->fatal_error(ACE_TEXT ("Error reading attribute value") + ACEXML_ENV_ARG_PARAMETER); + return; + } + + // Handling new namespace if any. Notice that the order of + // namespace declaration does matter. + if (ACE_OS::strncmp (attname, ACE_TEXT("xmlns"), 5) == 0) + { + if (this->namespaces_) + { + if (!ns_flag) + { + this->xml_namespace_.pushContext (); + this->nested_namespace_++; + ns_flag = 1; + } + + ACEXML_Char* name = ACE_OS::strchr (attname, ':'); + const ACEXML_Char* ns_name = (name == 0)? + empty_string:name+1; + if (this->xml_namespace_.declarePrefix (ns_name, + attvalue) == -1) + { + this->fatal_error(ACE_TEXT ("Duplicate definition of ") + ACE_TEXT ("prefix") + ACEXML_ENV_ARG_PARAMETER); + return; + } + } + if (this->namespace_prefixes_) + { + // Namespace_prefixes_feature_ is required. So add the + // xmlns:foo to the list of attributes. + if (attributes.addAttribute (ACE_TEXT (""), ACE_TEXT (""), + attname, + default_attribute_type, + attvalue) == -1) + { + this->fatal_error(ACE_TEXT ("Duplicate attribute ") + ACE_TEXT ("definition. Hint: Try ") + ACE_TEXT ("setting namespace_prefix") + ACE_TEXT ("es feature to 0") + ACEXML_ENV_ARG_PARAMETER); + return; + } + } + if (!this->namespaces_ && !this->namespace_prefixes_) + { + this->fatal_error(ACE_TEXT ("One of namespaces or ") + ACE_TEXT ("namespace_prefixes should be") + ACE_TEXT (" declared") + ACEXML_ENV_ARG_PARAMETER); + return; + } + } + else + { + const ACEXML_Char *uri, *lName; + this->xml_namespace_.processName (attname, uri, lName, 1); + if (attributes.addAttribute (uri, lName, attname, + default_attribute_type, + attvalue) == -1) + { + this->fatal_error(ACE_TEXT ("Duplicate attribute ") + ACE_TEXT ("definition") + ACEXML_ENV_ARG_PARAMETER); + return; + } + } + break; + } + } + if (this->parse_content (startname, ns_uri, ns_lname, ns_flag + ACEXML_ENV_ARG_PARAMETER) != 0) + return; +} + +int +ACEXML_Parser::parse_content (const ACEXML_Char* startname, + const ACEXML_Char*& ns_uri, + const ACEXML_Char*& ns_lname, int ns_flag ACEXML_ENV_ARG_DECL) +{ + ACEXML_Char *cdata; + size_t cdata_length = 0; + + // Parse element contents. + while (1) + { + ACEXML_Char ch = this->get (); + switch (ch) + { + case 0: + this->pop_context (1 ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + case '<': + // Push out old 'characters' event. + if (cdata_length != 0) + { + cdata = this->obstack_.freeze (); + this->content_handler_->characters (cdata, 0, cdata_length + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + this->obstack_.unwind (cdata); + cdata_length = 0; + } + ch = this->peek(); + switch (ch) + { + case '!': // a comment or a CDATA section. + this->get (); // consume '!' + ch = this->peek (); + if (ch == '-') // a comment + { + if (this->parse_comment () < 0) + { + this->fatal_error(ACE_TEXT ("Invalid comment in ") + ACE_TEXT ("document") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + } + else if (ch == '[') // a CDATA section. + { + this->parse_cdata (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + else + { + this->fatal_error(ACE_TEXT ("Expecting a CDATA section ") + ACE_TEXT ("or a comment section") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + case '?': // a PI. + this->get(); // consume the '?' + this->parse_processing_instruction + (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + case '/': // an ETag. + { + this->get (); // consume '/' + ACEXML_Char* endname = this->parse_name (); + if (endname == 0 || + ACE_OS::strcmp (startname, endname) != 0) + { + this->fatal_error(ACE_TEXT ("Name in ETag doesn't ") + ACE_TEXT ("match name in STag") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (this->skip_whitespace () != '>') + { + this->fatal_error(ACE_TEXT ("Expecting '>' at end ") + ACE_TEXT ("of element") + ACEXML_ENV_ARG_PARAMETER); + return -1; + } + this->content_handler_->endElement (ns_uri, ns_lname, + endname + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + this->prefix_mapping (this->xml_namespace_.getPrefix(ns_uri), + ns_uri, 0 + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + if (this->namespaces_ && ns_flag) + { + if (this->nested_namespace_ >= 1) + { + this->xml_namespace_.popContext (); + this->nested_namespace_--; + } + } + return 0; + } + default: // a new nested element? + this->parse_element (0 ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + } + break; + case '&': + if (this->peek () == '#') + { + ACEXML_Char buf[7]; + size_t len = 0; + do + { + len = sizeof (buf); + if (this->parse_char_reference (buf, len) != 0) + { + // [WFC: Legal Character] + this->fatal_error (ACE_TEXT ("Invalid CharRef") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + } while (buf[0] == '&' && this->peek() == '#'); + for (size_t j = 0; j < len; ++j) + this->obstack_.grow (buf[j]); + cdata_length += len; + } + else + { + this->ref_state_ = ACEXML_ParserInt::IN_CONTENT; + int length = this->parse_entity_reference(ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + if (length == 1) + cdata_length++; + } + break; + case '\x20': case '\x0D': case '\x0A': case '\x09': +// if (this->validate_) +// { +// // Flush out any non-whitespace characters +// if (cdata_length != 0) +// { +// cdata = this->obstack_.freeze (); +// this->content_handler_->characters(cdata, 0, cdata_length +// ACEXML_ENV_ARG_PARAMETER); +// ACEXML_CHECK_RETURN (-1); +// this->obstack_.unwind (cdata); +// cdata_length = 0; +// } +// ++cdata_length; +// this->obstack_.grow (ch); +// while (1) +// { +// ch = this->peek(); +// if (ch == '\x20' || ch == '\x0D' || ch == '\x0A' || +// ch == '\x09') +// { +// ch = this->get(); +// this->obstack_.grow (ch); +// continue; +// } +// break; +// } +// cdata = this->obstack_.freeze (); +// this->content_handler_->ignorableWhitespace (cdata, 0, +// cdata_length +// ACEXML_ENV_ARG_PARAMETER); +// ACEXML_CHECK_RETURN (-1); +// this->obstack_.unwind (cdata); +// cdata_length = 0; +// break; +// } + // Fall thru... + default: + ++cdata_length; + this->obstack_.grow (ch); + } + } + ACE_NOTREACHED (return 0;) +} + + +int +ACEXML_Parser::parse_cdata (ACEXML_ENV_SINGLE_ARG_DECL) +{ + if (this->parse_token (ACE_TEXT ("[CDATA[")) < 0) + { + this->fatal_error(ACE_TEXT ("Expecting '[CDATA[' at beginning of CDATA ") + ACE_TEXT ("section") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + ACEXML_Char ch; + int datalen = 0; + ACEXML_Char *cdata = 0; + while (1) + { + ch = this->get (); + // Anything goes except the sequence "]]>". + if (ch == ']' && this->peek() == ']') + { + ACEXML_Char temp = ch; + ch = this->get(); + if (ch == ']' && this->peek() == '>') + { + ch = this->get(); + cdata = this->obstack_.freeze (); + this->content_handler_->characters (cdata, 0, datalen + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + this->obstack_.unwind(cdata); + return 0; + } + this->obstack_.grow (temp); + ++datalen; + } + this->obstack_.grow (ch); + ++datalen; + }; + ACE_NOTREACHED (return -1); +} + + +int +ACEXML_Parser::parse_entity_decl (ACEXML_ENV_SINGLE_ARG_DECL) +{ + ACEXML_Char nextch = 0; + + if ((this->parse_token (ACE_TEXT ("NTITY")) < 0) || + this->skip_whitespace_count (&nextch) == 0) + { + this->fatal_error (ACE_TEXT ("Expecting keyword ENTITY followed by a ") + ACE_TEXT ("space") ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + int is_GEDecl = 1; + if (nextch == '%') // This is a PEDecl. + { + is_GEDecl = 0; + this->get (); // consume the '%' + if (this->skip_whitespace_count (&nextch) == 0) + { + this->fatal_error (ACE_TEXT ("Expecting space between % and ") + ACE_TEXT ("entity name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + } + + ACEXML_Char *entity_name = this->parse_name (); + if (entity_name == 0) + { + this->fatal_error (ACE_TEXT ("Invalid entity name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + if (this->skip_whitespace_count (&nextch) == 0) + { + this->fatal_error (ACE_TEXT ("Expecting space between entity name and ") + ACE_TEXT ("entityDef") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + int retval = 0; + if (nextch == '\'' || nextch == '"') + { + ACEXML_Char *entity_value = 0; + if (this->parse_entity_value (entity_value + ACEXML_ENV_ARG_PARAMETER) != 0) + { + this->fatal_error(ACE_TEXT ("Invalid EntityValue") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (is_GEDecl) + retval = this->internal_GE_.add_entity (entity_name, + entity_value); + else + retval = this->internal_PE_.add_entity (entity_name, + entity_value); + if (retval < 0) + { + this->fatal_error (ACE_TEXT ("Internal Parser Error in adding") + ACE_TEXT ("Entity to map") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + else if (retval == 1) + { + this->warning (ACE_TEXT ("Duplicate entity found") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + } + else + { + ACEXML_Char *systemid, *publicid; + + this->parse_external_id (publicid, systemid + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + if (systemid == 0) + { + this->fatal_error(ACE_TEXT ("Invalid SystemLiteral") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + this->skip_whitespace_count (&nextch); + if (nextch == 'N') // NDATA section followed + { + if (is_GEDecl == 0) + { + this->fatal_error(ACE_TEXT ("Invalid NDataDecl in PEDef") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + if ((this->parse_token (ACE_TEXT ("NDATA")) < 0) || + this->skip_whitespace_count (&nextch) == 0) + { + this->fatal_error(ACE_TEXT ("Expecting keyword NDATA followed ") + ACE_TEXT ("by a space") ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + ACEXML_Char *ndata = this->parse_name (); + if (this->validate_) // [VC: Notation Declared] + { + if (!this->notations_.resolve_entity (ndata)) + { + this->fatal_error (ACE_TEXT ("Undeclared Notation name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + this->dtd_handler_->unparsedEntityDecl(entity_name, publicid, + systemid, ndata + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + } + else + { + if (is_GEDecl) + retval = this->external_GE_.add_entity (entity_name, + systemid); + else + retval = this->external_PE_.add_entity (entity_name, + systemid); + if (retval < 0) + { + this->fatal_error(ACE_TEXT ("Internal Parser Error") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + else if (retval == 1) + this->warning(ACE_TEXT ("Duplicate external entity") + ACEXML_ENV_ARG_PARAMETER); + if (is_GEDecl) + retval = this->external_GE_.add_entity (entity_name, + publicid); + else + retval = this->external_PE_.add_entity (entity_name, + publicid); + if (retval < 0) + { + this->fatal_error(ACE_TEXT ("Internal Parser Error") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + else if (retval == 1) + this->warning (ACE_TEXT ("Duplicate entity definition") + ACEXML_ENV_ARG_PARAMETER); + } + } + + // End of ENTITY definition + if (this->skip_whitespace() != '>') + { + this->fatal_error(ACE_TEXT ("Expecting '>' at end of entityDef") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + return 0; +} + +int +ACEXML_Parser::parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_DECL) +{ + if (this->parse_token (ACE_TEXT ("ATTLIST")) < 0) + { + this->fatal_error(ACE_TEXT ("Expecting keyword 'ATTLIST'") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + int count = check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + if (!count) + { + this->fatal_error(ACE_TEXT ("Expecting space between ATTLIST and ") + ACE_TEXT ("element name") ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + ACEXML_Char *element_name = this->parse_name (); + if (element_name == 0) + { + this->fatal_error(ACE_TEXT ("Invalid element Name in attlistDecl") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + ACEXML_Char fwd = 0; + count = this->skip_whitespace_count (&fwd); + // Parse AttDef* + while (fwd != '>') + { + if (!this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER) + && !count) + this->fatal_error(ACE_TEXT ("Expecting space between element ") + ACE_TEXT ("name and AttDef") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + this->skip_whitespace_count (&fwd); + if (fwd == '>') + break; + + count = this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + + this->parse_attname (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + + count = this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + if (!count) + { + this->fatal_error(ACE_TEXT ("Expecting space between AttName and ") + ACE_TEXT ("AttType") ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + this->parse_atttype (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + + count = this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + if (!count) + { + this->fatal_error(ACE_TEXT ("Expecting space between AttType and") + ACE_TEXT (" DefaultDecl") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + this->parse_defaultdecl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + + count = this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + this->skip_whitespace_count(&fwd); + } + this->get (); // consume closing '>' + return 0; +} + + +int +ACEXML_Parser::check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_DECL) +{ + ACEXML_Char fwd = '\xFF'; + // Skip any leading whitespaces and store the number of such chars skipped + int count = this->skip_whitespace_count (&fwd); + if (fwd == 0) + { + this->get(); // Consume the 0 + this->pop_context (0 ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + fwd = this->peek(); + } + if (fwd == '%') + { + this->get(); // Consume the % + if (this->external_subset_) + { + this->parse_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + else + { + this->fatal_error(ACE_TEXT ("Illegal PERef within markupDecl") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + } + if (count) + { + // We have atleast one whitespace. So just skip any more whitespaces + // and return the count + this->skip_whitespace_count(); + return count; + } + return this->skip_whitespace_count(); +} + +ACEXML_Char* +ACEXML_Parser::parse_attname (ACEXML_ENV_SINGLE_ARG_DECL) +{ + // Parse attribute name + ACEXML_Char *att_name = this->parse_name (); + if (att_name == 0) + { + this->fatal_error(ACE_TEXT ("Invalid AttName") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (0); + } + return att_name; +} + +int +ACEXML_Parser::parse_defaultdecl (ACEXML_ENV_SINGLE_ARG_DECL) +{ + // DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) + ACEXML_Char nextch = this->peek (); + ACEXML_Char *fixed_attr = 0; + switch (nextch) + { + case '#': + this->get (); // consume the '#' + switch (this->get ()) + { + case 'R': + if (this->parse_token (ACE_TEXT ("EQUIRED")) < 0) + { + this->fatal_error(ACE_TEXT ("Expecting keyword REQUIRED") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + // We now know this attribute is required + // @@ Set up the validator as such. + break; + case 'I': + if (this->parse_token (ACE_TEXT ("MPLIED")) < 0) + { + this->fatal_error(ACE_TEXT ("Expecting keyword IMPLIED") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + // We now know this attribute is implied. + // @@ Set up the validator as such. + break; + case 'F': + if (this->parse_token (ACE_TEXT ("IXED")) < 0 || + this->skip_whitespace_count () == 0) + { + this->fatal_error(ACE_TEXT ("Expecting keyword FIXED") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + // We now know this attribute is fixed. + if (this->parse_attvalue (fixed_attr + ACEXML_ENV_ARG_PARAMETER) != 0) + { + this->fatal_error(ACE_TEXT ("Invalid Default AttValue") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + // @@ set up validator + break; + default: + this->fatal_error (ACE_TEXT ("Invalid DefaultDecl") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + case '\'': + case '"': + if (this->parse_attvalue (fixed_attr ACEXML_ENV_ARG_PARAMETER) != 0) + { + this->fatal_error(ACE_TEXT ("Invalid AttValue") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + // @@ set up validator + break; + default: + this->fatal_error (ACE_TEXT ("Invalid DefaultDecl") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + } + return 0; +} + +int +ACEXML_Parser::parse_tokenized_type (ACEXML_ENV_SINGLE_ARG_DECL) +{ + ACEXML_Char ch = this->get(); + switch (ch) + { + case 'I': + if (this->get () == 'D') + { + if (this->peek() != 'R' && this->is_whitespace (this->peek())) + { + // We have successfully identified the type of the + // attribute as ID + // @@ Set up validator as such. + break; + } + if (this->parse_token (ACE_TEXT ("REF")) == 0) + { + if (this->peek() != 'S' && this->is_whitespace (this->peek())) + { + // We have successfully identified the type of + // the attribute as IDREF + // @@ Set up validator as such. + break; + } + else if (this->peek() == 'S' + && this->get() // consume the 'S' + && this->is_whitespace (this->peek())) + { + // We have successfully identified the type of + // the attribute as IDREFS + // @@ Set up validator as such. + break; + } + } + } + // Admittedly, this error message is not precise enough + this->fatal_error(ACE_TEXT ("Expecting keyword `ID', `IDREF', or") + ACE_TEXT ("`IDREFS'") ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + case 'E': // ENTITY or ENTITIES + if (this->parse_token (ACE_TEXT ("NTIT")) == 0) + { + ACEXML_Char nextch = this->get (); + if (nextch == 'Y') + { + // We have successfully identified the type of + // the attribute as ENTITY + // @@ Set up validator as such. + } + else if (this->parse_token (ACE_TEXT ("IES")) == 0) + { + // We have successfully identified the type of + // the attribute as ENTITIES + // @@ Set up validator as such. + } + if (this->is_whitespace (this->peek())) + { + // success + break; + } + } + // Admittedly, this error message is not precise enough + this->fatal_error(ACE_TEXT ("Expecting keyword `ENTITY', or") + ACE_TEXT ("`ENTITIES'") ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + case 'M': + if (this->parse_token (ACE_TEXT ("TOKEN")) == 0) + { + if (this->is_whitespace (this->peek())) + { + // We have successfully identified the type of + // the attribute as NMTOKEN + // @@ Set up validator as such. + break; + } + else if (this->peek() == 'S' + && this->get() + && this->is_whitespace (this->peek())) + { + // We have successfully identified the type of + // the attribute as NMTOKENS + // @@ Set up validator as such. + break; + } + } + this->fatal_error(ACE_TEXT ("Expecting keyword `NMTOKEN' or `NMTO") + ACE_TEXT ("KENS'") ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + default: + this->fatal_error (ACE_TEXT ("Internal Parser Error") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + } + return 0; +} + + +/** + * AttType ::= StringType | TokenizedType | EnumeratedType + * StringType ::= 'CDATA' + * TokenizedType ::= 'ID' [VC: ID] + * [VC: One ID per Element Type] + * [VC: ID Attribute Default] + * | 'IDREF' [VC: IDREF] + * | 'IDREFS' [VC: IDREF] + * | 'ENTITY' [VC: Entity Name] + * | 'ENTITIES' [VC: Entity Name] + * | 'NMTOKEN' [VC: Name Token] + * | 'NMTOKENS' + * + * EnumeratedType ::= NotationType | Enumeration + * NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' + * [VC: Notation Attributes] + * [VC: One Notation Per Element Type] + * [VC: No Notation on Empty Element] + * Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' + * [VC: Enumeration] + */ +int +ACEXML_Parser::parse_atttype (ACEXML_ENV_SINGLE_ARG_DECL) +{ + ACEXML_Char nextch = this->peek(); + switch (nextch) + { + case 'C': // CDATA + if (this->parse_token (ACE_TEXT ("CDATA")) < 0) + { + this->fatal_error(ACE_TEXT ("Expecting keyword 'CDATA'") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + // Else, we have successfully identified the type of the + // attribute as CDATA + // @@ Set up validator appropriately here. + break; + case 'I': case 'E': // ID, IDREF, IDREFS, ENTITY or ENTITIES + this->parse_tokenized_type (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + case 'N': // NMTOKEN, NMTOKENS, or NOTATION + this->get(); + nextch = this->peek(); + if (nextch != 'M' && nextch != 'O') + { + this->fatal_error (ACE_TEXT ("Expecting keyword 'NMTOKEN', ") + ACE_TEXT ("'NMTOKENS' or 'NOTATION'") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (nextch == 'M') + { + this->parse_tokenized_type (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + } + else // NOTATION + { + if (this->parse_token (ACE_TEXT ("OTATION")) < 0) + { + this->fatal_error(ACE_TEXT ("Expecting keyword `NOTATION'") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + int count = this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + if (!count) + { + this->fatal_error (ACE_TEXT ("Expecting space between keyword ") + ACE_TEXT ("NOTATION and '('") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (this->get () != '(') + { + this->fatal_error(ACE_TEXT ("Expecting '(' in NotationType") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + do { + this->skip_whitespace_count(); + ACEXML_Char *notation_name = this->parse_name (); + if (notation_name == 0) + { + this->fatal_error(ACE_TEXT ("Invalid notation name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + // @@ get another notation name, set up validator as such + this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + nextch = this->get(); + } while (nextch == '|'); + if (nextch != ')') + { + this->fatal_error (ACE_TEXT ("Expecting a ')' after a ") + ACE_TEXT ("NotationType declaration") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + } + break; + case '(': // EnumeratedType - Enumeration + this->get(); + this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + do { + this->skip_whitespace_count(); + ACEXML_Char *token_name = this->parse_nmtoken (); + if (token_name == 0) + { + this->fatal_error(ACE_TEXT ("Invalid enumeration name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + // @@ get another nmtoken, set up validator as such + this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + nextch = this->get(); + } while (nextch == '|'); + if (nextch != ')') + { + this->fatal_error (ACE_TEXT ("Expecting a ')' after a ") + ACE_TEXT ("Enumeration declaration") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + default: + { + this->fatal_error(ACE_TEXT ("Invalid AttType") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + ACE_NOTREACHED (break); + } + return 0; +} + +int +ACEXML_Parser::parse_notation_decl (ACEXML_ENV_SINGLE_ARG_DECL) +{ + if (this->parse_token (ACE_TEXT ("NOTATION")) < 0) + { + this->fatal_error(ACE_TEXT ("Expecting Keyword 'NOTATION'") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + int count = this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + if (!count) + { + this->fatal_error (ACE_TEXT ("Expecting a space between keyword NOTATION") + ACE_TEXT (" and notation name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + ACEXML_Char *notation = this->parse_name (); + if (notation == 0) + { + this->fatal_error(ACE_TEXT ("Invalid Notation name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + count = this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + if (!count) + { + this->fatal_error (ACE_TEXT ("Expecting a space between notation name ") + ACE_TEXT ("and ExternalID/PublicID") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + ACEXML_Char *systemid, *publicid; + + // Gross hack but otherwise we need to go around a lot of loops to parse, + // When the ExternalID starts with 'PUBLIC' we cannot distinguish a + // PublicId from a ExternalID by looking using a one character read-ahead + ACEXML_ParserInt::ReferenceState temp = this->ref_state_; + this->ref_state_ = ACEXML_ParserInt::IN_NOTATION; + + this->parse_external_id (publicid, systemid + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + // Restore the original value. + this->ref_state_ = temp; + + // [VC: Unique Notation Name] + if (systemid && this->notations_.add_entity (notation, systemid) != 0 + && this->validate_) + { + this->fatal_error(ACE_TEXT ("Internal Parser Error") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (publicid) + { + int retval = this->notations_.add_entity (notation, publicid); + if (retval != 0 && !systemid && this->validate_) + { + this->fatal_error(ACE_TEXT ("Internal Parser Error") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + } + + if (this->skip_whitespace() != '>') + { + this->fatal_error(ACE_TEXT ("Expecting '>' at end of NotationDecl") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + if (this->validate_ && this->dtd_handler_) + { + this->dtd_handler_->notationDecl (notation, + publicid, + systemid ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + return 0; +} + +int +ACEXML_Parser::parse_element_decl (ACEXML_ENV_SINGLE_ARG_DECL) +{ + if (this->parse_token (ACE_TEXT ("LEMENT")) < 0) + { + this->fatal_error (ACE_TEXT ("Expecting keyword ELEMENT") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + int count = this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + if (!count) + { + this->fatal_error (ACE_TEXT ("Expecting a space between keyword ELEMENT") + ACE_TEXT (" and element name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + ACEXML_Char *element_name = this->parse_name (); + if (element_name == 0) + { + this->fatal_error (ACE_TEXT ("Invalid element name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + count = this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + if (!count) + { + this->fatal_error (ACE_TEXT ("Expecting a space between element name ") + ACE_TEXT ("and element definition") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + ACEXML_Char nextch = this->peek(); + switch (nextch) + { + case 'E': // EMPTY + if (this->parse_token (ACE_TEXT ("EMPTY")) < 0) + { + this->fatal_error (ACE_TEXT ("Expecting keyword EMPTY") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + case 'A': // ANY + if (this->parse_token (ACE_TEXT ("ANY")) < 0) + { + this->fatal_error (ACE_TEXT ("Expecting keyword ANY") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + case '(': // children + this->parse_children_definition (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + default: // error + this->fatal_error (ACE_TEXT ("Invalid element definition") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + count = this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + if (this->skip_whitespace () != '>') + { + this->fatal_error (ACE_TEXT ("Expecting '>' after element defintion") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + return 0; +} + + +int +ACEXML_Parser::parse_children_definition (ACEXML_ENV_SINGLE_ARG_DECL) +{ + this->get (); // consume the '(' + this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + int subelement_number = 0; + ACEXML_Char nextch = this->peek(); + switch (nextch) + { + case '#': // Mixed element, + if (this->parse_token (ACE_TEXT ("#PCDATA")) < 0) + { + this->fatal_error(ACE_TEXT ("Expecting keyword '#PCDATA'") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + nextch = this->get(); + while (nextch == '|') + { + this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + ACEXML_Char *name = this->parse_name (); + // @@ name will be used in the Validator later. + ACE_UNUSED_ARG (name); + ++subelement_number; + // @@ Install Mixed element name into the validator. + this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + nextch = this->skip_whitespace(); + } + if (nextch != ')' || + (subelement_number && this->get () != '*')) + { + this->fatal_error(ACE_TEXT ("Expecing ')' or ')*' at end of Mixed") + ACE_TEXT (" element") ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + // @@ close the element definition in the validator. + break; + default: + int status = this->parse_child (1 ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + if (status != 0) + return -1; + } + + // Check for trailing '?', '*', '+' + nextch = this->peek (); + switch (nextch) + { + case '?': + // @@ Consume the character and inform validator as such, + this->get (); + break; + case '*': + // @@ Consume the character and inform validator as such, + this->get (); + break; + case '+': + // @@ Consume the character and inform validator as such, + this->get (); + break; + default: + break; // not much to do. + } + + return 0; +} + +int +ACEXML_Parser::parse_child (int skip_open_paren ACEXML_ENV_ARG_DECL) +{ + // Conditionally consume the open paren. + if (skip_open_paren == 0 && this->get () != '(') + { + this->fatal_error(ACE_TEXT ("Expecting '(' at beginning of children") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + ACEXML_Char node_type = 0; + ACEXML_Char nextch; + + do { + this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + this->skip_whitespace_count (&nextch); + switch (nextch) + { + case '(': + this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + this->parse_child (0 ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + default: + this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + // must be an element name here. + ACEXML_Char *subelement = this->parse_name (); + if (subelement == 0) + { + this->fatal_error(ACE_TEXT ("Invalid subelement name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + // Check for trailing '?', '*', '+' + nextch = this->peek (); + switch (nextch) + { + case '?': + // @@ Consume the character and inform validator as such, + this->get (); + break; + case '*': + // @@ Consume the character and inform validator as such, + this->get (); + break; + case '+': + // @@ Consume the character and inform validator as such, + this->get (); + break; + default: + break; // not much to do. + } + + // @@ Inform validator of the new element here. + break; + } + this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + this->skip_whitespace_count (&nextch); + switch (nextch) + { + case '|': + switch (node_type) + { + case 0: + node_type = '|'; + // @@ inform validator of this new type?? + break; + case '|': + break; + default: + this->fatal_error (ACE_TEXT ("Expecting `,', `|', or `)' ") + ACE_TEXT ("while defining an element") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + case ',': + switch (node_type) + { + case 0: + node_type = ','; + // @@ inform validator of this new type?? + break; + case ',': + break; + default: + this->fatal_error (ACE_TEXT ("Expecting `,', `|', or `)' ") + ACE_TEXT ("while defining an element") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + case ')': + break; + default: + this->fatal_error (ACE_TEXT ("Expecting `,', `|', or `)' ") + ACE_TEXT ("while defining an element") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + nextch = this->get(); // Consume the `,' or `|' or `)' + if (nextch == ')') + break; + this->check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + this->skip_whitespace_count (&nextch); + } while (nextch != ')'); + + // Check for trailing '?', '*', '+' + nextch = this->peek (); + switch (nextch) + { + case '?': + // @@ Consume the character and inform validator as such, + this->get (); + break; + case '*': + // @@ Consume the character and inform validator as such, + this->get (); + break; + case '+': + // @@ Consume the character and inform validator as such, + this->get (); + break; + default: + break; // not much to do. + } + + + return 0; +} + +int +ACEXML_Parser::parse_char_reference (ACEXML_Char *buf, size_t& len) +{ + if (len < 7) // Max size of a CharRef plus terminating '\0' + return -1; + ACEXML_Char ch = this->get(); + if (ch != '#') // Internal error. + return -1; + int hex = 0; + ch = this->peek(); + if (ch == 'x') + { + hex = 1; + this->get (); + } + size_t i = 0; + int more_digit = 0; + ch = this->get (); + for ( ; i < len && + (this->isNormalDigit (ch) || (hex ? this->isCharRef(ch): 0)); ++i) + { + buf[i] = ch; + ch = this->get(); + ++more_digit; + } + if (ch != ';' || !more_digit) + return -1; + buf[i] = 0; + ACEXML_UCS4 sum = (ACEXML_UCS4) ACE_OS::strtol (buf, 0, (hex ? 16 : 10)); + // [WFC: Legal Character] + if (!this->isChar (sum)) + return -1; + int clen; +#if defined (ACE_USES_WCHAR) +# if (ACE_SIZEOF_WCHAR == 2) // UTF-16 + if ((clen = ACEXML_Transcoder::ucs42utf16 (sum, buf, len)) < 0) + return -1; +# elif (ACE_SIZEOF_WCHAR == 4) // UCS 4 + buf [0] = sum; + buf [1] = 0; + clen = 2; +# endif /* ACE_SIZEOF_WCHAR */ + +#else // or UTF-8 + if ((clen = ACEXML_Transcoder::ucs42utf8 (sum, buf, len)) < 0) + return -1; +#endif + buf [clen] = 0; + len = clen; + return 0; +} + +ACEXML_Char* +ACEXML_Parser::parse_reference_name (void) +{ + ACEXML_Char ch = this->get (); + if (!this->isLetter (ch) && (ch != '_' || ch != ':')) + return 0; + while (ch) { + this->alt_stack_.grow (ch); + ch = this->peek (); + if (!this->isNameChar (ch)) + break; + ch = this->get (); + }; + if (ch != ';') + return 0; + ch = this->get(); + return this->alt_stack_.freeze (); +} + +int +ACEXML_Parser::parse_attvalue (ACEXML_Char *&str ACEXML_ENV_ARG_DECL) +{ + ACEXML_Char quote = this->get (); + if (quote != '\'' && quote != '"') // Not a quoted string. + return -1; + ACEXML_Char ch = this->get (); + while (1) + { + if (ch == quote) + { + ACEXML_Char* temp = this->obstack_.freeze (); + // If the attribute type is not CDATA, then the XML processor + // must further process the normalized attribute value by + // discarding any leading and trailing space (#x20) characters, + // and by replacing sequences of space (#x20) characters by a + // single space (#x20) character. + + // if (atttype != CDATA) { + // ACEXML_Char* start = temp; + // ACEXML_Char* end = temp + ACE_OS::strlen (temp); + // while (*start == '\x20') + // start++; + // if (start == end) // String which is all spaces + // str = start; + // while (*start != 0) + // { + // this->obstack_.grow (*start); + // start++; + // while (*start == '\x20') + // start++; + // } + // str = this->obstack_.freeze(); + // } + str = temp; + return 0; + } + switch (ch) + { + case '&': + if (this->peek () == '#') + { + ACEXML_Char buf[7]; + size_t len = sizeof (buf); + if (this->parse_char_reference (buf, len) != 0) + { + // [WFC: Legal Character] + this->fatal_error (ACE_TEXT ("Invalid CharacterRef") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + for (size_t j = 0; j < len; ++j) + this->obstack_.grow (buf[j]); + } + else + { + this->ref_state_ = ACEXML_ParserInt::IN_ATT_VALUE; + this->parse_entity_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + break; + case '\x20': case '\x0D': case '\x0A': case '\x09': + this->obstack_.grow ('\x20'); + break; + case '<': // [WFC: No < in Attribute Values] + this->fatal_error (ACE_TEXT ("Illegal '<' in AttValue") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + case 0: + this->pop_context (1 ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + default: + this->obstack_.grow (ch); + break; + } + ch = this->get(); + } +} + + + +int +ACEXML_Parser::parse_entity_reference (ACEXML_ENV_SINGLE_ARG_DECL) +{ + ACEXML_Char* replace = this->parse_reference_name (); + if (replace == 0) + { + this->fatal_error (ACE_TEXT ("Invalid Reference name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + // [WFC: Parsed Entity] + if (this->unparsed_entities_.resolve_entity (replace)) { + this->fatal_error (ACE_TEXT ("EntityRef refers to unparsed entity") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + // Look in the internal general entities set first. + const ACEXML_Char* entity = this->internal_GE_.resolve_entity(replace); + + // Look in the predefined entities. + if (!entity) + { + entity = this->predef_entities_.resolve_entity (replace); + if (entity) + { + // Special case to return the length in case of predefined entities + this->obstack_.grow (*entity); + return 1; + } + } + + if (!this->validate_) + { + if (this->standalone_) + { + // [WFC: Entity Declared] + this->fatal_error (ACE_TEXT ("Undeclared Entity reference") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + else + { + this->content_handler_->skippedEntity (replace + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + return 0; + } + } + + // No match in internal subset + if (!entity + // or No DTDs + && (!(this->internal_dtd_ || this->external_dtd_) + // or Only Internal DTD and no parameter entity references + || (this->internal_dtd_ && !this->external_dtd_ + && !this->has_pe_refs_) + // or Standalone = 'yes' + || this->standalone_)) + { + // [WFC: Entity Declared] + this->fatal_error (ACE_TEXT ("Undeclared Entity reference") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + ACEXML_Char* systemId = 0; + ACEXML_Char* publicId = 0; + if (!entity) + { + if (this->external_GE_.resolve_entity (replace, systemId, publicId) < 0) + { + this->fatal_error (ACE_TEXT ("Undeclared Entity reference") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (this->ref_state_ == ACEXML_ParserInt::IN_ATT_VALUE) + { + this->fatal_error (ACE_TEXT ("External EntityRef in Attribute Value") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + this->external_entity_++; + } + + + // [WFC: No Recursion] + ACEXML_Char* ref_name = replace; + int present = this->GE_reference_.insert (ref_name); + if (present == 1 || present == -1) + { + while (this->GE_reference_.pop(ref_name) != -1) + ; + this->fatal_error (ACE_TEXT ("Recursion in resolving entity") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + if (!this->external_entity_) + { + ACEXML_StrCharStream* str = 0; + ACE_NEW_RETURN (str, ACEXML_StrCharStream, -1); + if (str->open (entity, replace) < 0 + || this->switch_input (str, replace) != 0) + { + this->fatal_error (ACE_TEXT ("Unable to create internal input ") + ACE_TEXT ("stream") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + return 0; + } + else + { + ACEXML_Char* uri = this->normalize_systemid (systemId); + ACE_Auto_Basic_Array_Ptr<ACEXML_Char> cleanup_uri (uri); + ACEXML_InputSource* ip = 0; + if (this->entity_resolver_) + { + ip = this->entity_resolver_->resolveEntity (publicId, + (uri ? uri : systemId) + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + if (ip) + { + if (this->switch_input (ip, (uri ? uri : systemId), + publicId) != 0) + { + this->fatal_error (ACE_TEXT ("Internal Parser Error") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + return 0; + } + } + ACEXML_StreamFactory factory; + ACEXML_CharStream* cstream = factory.create_stream (uri ? uri: systemId); + if (!cstream) { + this->fatal_error (ACE_TEXT ("Invalid input source") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (this->switch_input (cstream, systemId, publicId) != 0) + { + this->fatal_error (ACE_TEXT ("Internal Parser Error") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + } + return 0; +} + +int +ACEXML_Parser::parse_PE_reference (ACEXML_ENV_SINGLE_ARG_DECL) +{ + ACEXML_Char* replace = this->parse_reference_name (); + if (replace == 0) + { + this->fatal_error (ACE_TEXT ("Invalid PEReference name") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + // Look in the internal general entities set first. + const ACEXML_Char* entity = this->internal_PE_.resolve_entity(replace); + + if (!entity && // No match in internal + (!this->external_dtd_ || // or No External DTDs + this->standalone_)) // or Standalone + { + // [VC: Entity Declared] + this->fatal_error (ACE_TEXT ("Undefined Internal PEReference") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + ACEXML_Char* systemId = 0; + ACEXML_Char* publicId = 0; + if (!entity && this->validate_) + { + if (this->external_PE_.resolve_entity (replace, systemId, publicId) < 0) + { + this->fatal_error (ACE_TEXT ("Undefined PEReference") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + this->external_entity_++; + } + + // [WFC: No Recursion] + ACEXML_Char* ref_name = replace; + int present = this->PE_reference_.insert (ref_name); + if (present == 1 || present == -1) + { + while (this->PE_reference_.pop(ref_name) != -1) + ; + this->fatal_error (ACE_TEXT ("Recursion in resolving entity") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + if (entity && !this->external_entity_) + { + ACEXML_StrCharStream* sstream = 0; + ACEXML_String str (entity); + if (this->ref_state_ != ACEXML_ParserInt::IN_ENTITY_VALUE) + { + const ACEXML_Char* ch = ACE_TEXT (" "); + str = ch + str + ch; + } + // ACE_DEBUG ((LM_DEBUG, + // ACE_TEXT ("Entity is %s\n Replacement Text is : %s\n"), + // replace, str.c_str())); + ACE_NEW_RETURN (sstream, ACEXML_StrCharStream, -1); + if (sstream->open (str.c_str(), replace) < 0 + || this->switch_input (sstream, replace) != 0) + { + this->fatal_error (ACE_TEXT ("Error in switching InputSource") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + return 0; + } + else if (this->external_entity_ && this->validate_) + { + ACEXML_Char* uri = this->normalize_systemid (systemId); + ACE_Auto_Basic_Array_Ptr<ACEXML_Char> cleanup_uri (uri); + ACEXML_InputSource* ip = 0; + if (this->entity_resolver_) + { + ip = this->entity_resolver_->resolveEntity (publicId, + (uri ? uri : systemId) + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (ip) + { + if (this->switch_input (ip, (uri ? uri : systemId), publicId) != 0) + { + this->fatal_error (ACE_TEXT ("Error in switching InputSource") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + return 0; + } + else + { + ACEXML_StreamFactory factory; + ACEXML_CharStream* cstream = factory.create_stream (uri ? uri: systemId); + if (!cstream) { + this->fatal_error (ACE_TEXT ("Invalid input source") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (this->switch_input (cstream, systemId, publicId) != 0) + { + this->fatal_error (ACE_TEXT ("Error in switching InputSource") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + if (this->ref_state_ == ACEXML_ParserInt::IN_ENTITY_VALUE) + { + ACEXML_Char less, mark; + if (this->peek() == '<') + { + less = this->get(); + if (this->peek() == '?') + { + mark = this->get(); + if (this->peek() == 'x') + { + this->parse_text_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + else + { + this->obstack_.grow (less); + this->obstack_.grow (mark); + } + } + this->obstack_.grow (less); + } + } + return 0; + } + } + this->fatal_error (ACE_TEXT ("Undefined PEReference") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + return -1; +} + +int +ACEXML_Parser::parse_entity_value (ACEXML_Char *&str + ACEXML_ENV_ARG_DECL) +{ + ACEXML_ParserInt::ReferenceState temp = this->ref_state_; + ACEXML_Char quote = this->get (); + if (quote != '\'' && quote != '"') // Not a quoted string. + return -1; + ACEXML_Char ch = this->get (); + while (1) + { + if (ch == quote) + { + str = this->obstack_.freeze (); + this->ref_state_ = temp; + return 0; + } + switch (ch) + { + case '&': + if (this->peek () == '#') + { + if (!this->external_entity_) + { + ACEXML_Char buf[7]; + size_t len = sizeof (buf); + if (this->parse_char_reference (buf, len) != 0) + { + // [WFC: Legal Character] + this->fatal_error (ACE_TEXT ("Invalid character ") + ACE_TEXT ("reference") + ACEXML_ENV_ARG_PARAMETER); + return -1; + } + for (size_t j = 0; j < len; ++j) + this->obstack_.grow (buf[j]); + break; + } + } + this->obstack_.grow (ch); + break; + case '%': + if (!this->external_entity_) + { + this->ref_state_ = ACEXML_ParserInt::IN_ENTITY_VALUE; + this->parse_PE_reference(ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + } + this->obstack_.grow (ch); + break; + case 0: + this->pop_context (0 ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + break; + default: + this->obstack_.grow (ch); + break; + } + ch = this->get(); + } +} + +ACEXML_Char * +ACEXML_Parser::parse_name (ACEXML_Char ch) +{ + if (ch == 0) + ch = this->get (); + if (!this->isLetter (ch) && ch != '_' && ch != ':') + return 0; + while (ch) { + this->obstack_.grow (ch); + ch = this->peek (); + if (!this->isNameChar (ch)) + break; + ch = this->get (); + }; + return this->obstack_.freeze (); +} + +ACEXML_Char* +ACEXML_Parser::parse_nmtoken (ACEXML_Char ch) +{ + if (ch == 0) + ch = this->get (); + if (!this->isNameChar (ch)) + return 0; + while (ch) { + this->obstack_.grow (ch); + ch = this->peek (); + if (!this->isNameChar (ch)) + break; + ch = this->get (); + }; + return this->obstack_.freeze (); +} + +int +ACEXML_Parser::parse_version_num (ACEXML_Char*& str) +{ + ACEXML_Char quote = this->get (); + if (quote != '\'' && quote != '"') // Not a quoted string. + return -1; + int numchars = 0; + while (1) + { + ACEXML_Char ch = this->get (); + if (ch == quote && !numchars) + return -1; + else if (ch == quote) + { + str = this->obstack_.freeze (); + return 0; + } + // [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ + if (ch == '-' || ((ch >= 'a' && ch <= 'z') || + (ch >= 'A' && ch <= 'Z') || + (ch >= '0' && ch <= '9') || + (ch == '_' || ch == '.' || ch == ':'))) + { + this->obstack_.grow (ch); + numchars++; + } + else + return -1; + } +} + +int +ACEXML_Parser::parse_system_literal (ACEXML_Char*& str) +{ + const ACEXML_Char quote = this->get(); + if (quote != '\'' && quote != '"') // Not a quoted string. + return -1; + while (1) + { + ACEXML_Char ch = this->get (); + if (ch == quote) + { + str = this->obstack_.freeze (); + return 0; + } + switch (ch) + { + case '\x00': case '\x01': case '\x02': case '\x03': case '\x04': + case '\x05': case '\x06': case '\x07': case '\x08': case '\x09': + case '\x0A': case '\x0B': case '\x0C': case '\x0D': case '\x0E': + case '\x0F': case '\x10': case '\x11': case '\x12': case '\x13': + case '\x14': case '\x15': case '\x16': case '\x17': case '\x18': + case '\x19': case '\x1A': case '\x1B': case '\x1C': case '\x1D': + case '\x1E': case '\x1F': case '\x7F': case '\x20': case '<': + case '>': case '#': case '%': + ACE_ERROR ((LM_ERROR, + ACE_TEXT ("Invalid char %c in SystemLiteral\n"), ch)); + return -1; + default: + this->obstack_.grow (ch); + } + } +} + +int +ACEXML_Parser::parse_pubid_literal (ACEXML_Char*& str) +{ + const ACEXML_Char quote = this->get(); + if (quote != '\'' && quote != '"') // Not a quoted string. + return -1; + while (1) + { + ACEXML_Char ch = this->get (); + if (ch == quote) + { + str = this->obstack_.freeze (); + return 0; + } + else if (this->isPubidChar (ch)) + this->obstack_.grow (ch); + else + return -1; + } +} + +int +ACEXML_Parser::parse_encname (ACEXML_Char*& str) +{ + const ACEXML_Char quote = this->get (); + if (quote != '\'' && quote != '"') // Not a quoted string. + return -1; + int numchars = 0; + while (1) + { + ACEXML_Char ch = this->get (); + if (ch == quote && !numchars) + return -1; + else if (ch == quote) + { + str = this->obstack_.freeze (); + return 0; + } + // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* + if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) + && !numchars) + return -1; + if (ch == '-' || ((ch >= 'a' && ch <= 'z') || + (ch >= 'A' && ch <= 'Z') || + (ch >= '0' && ch <= '9') || + (ch == '_' || ch == '.'))) + { + this->obstack_.grow (ch); + numchars++; + } + else + return -1; + } +} + +int +ACEXML_Parser::parse_sddecl (ACEXML_Char*& str) +{ + ACEXML_Char quote = this->get (); + if (quote != '\'' && quote != '"') // Not a quoted string. + return -1; + int numchars = 0; + while (1) + { + ACEXML_Char ch = this->get (); + if (ch == quote && numchars < 2) + return -1; + else if (ch == quote) + { + str = this->obstack_.freeze (); + return 0; + } + // [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") + // | ('"' ('yes' | 'no') '"')) + switch (ch) + { + case 'y': case 'e': case 's': case 'n': case 'o': + this->obstack_.grow (ch); + numchars++; + break; + default: + return -1; + } + } +} + +void +ACEXML_Parser::prefix_mapping (const ACEXML_Char* prefix, + const ACEXML_Char* uri, + int start ACEXML_ENV_ARG_DECL) +{ + if (this->namespaces_) + { + const ACEXML_Char* temp = (prefix == 0) ? empty_string : prefix; + if (start) { + this->content_handler_->startPrefixMapping (temp, uri + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + } + else + { + this->content_handler_->endPrefixMapping(temp + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + } + } +} + +int +ACEXML_Parser::switch_input (ACEXML_CharStream* cstream, + const ACEXML_Char* systemId, + const ACEXML_Char* publicId) +{ + ACEXML_InputSource* input = 0; + ACE_NEW_RETURN (input, ACEXML_InputSource (cstream), -1); + return this->switch_input (input, systemId, publicId); +} + +int +ACEXML_Parser::switch_input (ACEXML_InputSource* input, + const ACEXML_Char* systemId, + const ACEXML_Char* publicId) +{ + ACEXML_LocatorImpl* locator = 0; + if (!systemId) + systemId = input->getSystemId(); + ACE_NEW_RETURN (locator, ACEXML_LocatorImpl (systemId, publicId), -1); + ACEXML_Parser_Context* new_context = 0; + ACE_NEW_RETURN (new_context, ACEXML_Parser_Context(input, locator), -1); + if (this->push_context (new_context) != 0) + { + ACE_ERROR ((LM_ERROR, "Unable to switch input streams")); + delete new_context; + return -1; + } + this->current_ = new_context; + this->content_handler_->setDocumentLocator (this->current_->getLocator()); + return 0; +} + +int +ACEXML_Parser::push_context (ACEXML_Parser_Context* context) +{ + if (this->ctx_stack_.push (context) < 0) + { + ACE_ERROR ((LM_ERROR, "Unable to push input source onto the stack")); + return -1; + } + return 0; +} + +size_t +ACEXML_Parser::pop_context (int GE_ref ACEXML_ENV_ARG_DECL) +{ + size_t nrelems = this->ctx_stack_.size(); + if (nrelems <= 1) + { + this->fatal_error(ACE_TEXT ("Unexpected end-of-file") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + ACEXML_Parser_Context* temp = 0; + int retval = this->ctx_stack_.pop (temp); + if (retval != 0) + { + this->fatal_error (ACE_TEXT ("Unable to pop element of the input stack") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + delete temp; + if (this->ctx_stack_.top (this->current_) != 0) + { + this->fatal_error (ACE_TEXT ("Unable to read top element of input stack") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + ACEXML_Char* reference = 0; + if (GE_ref == 1 && this->GE_reference_.size() > 0) + { + if (this->GE_reference_.pop (reference) < 0) + { + this->fatal_error (ACE_TEXT ("Internal Parser Error") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + } + else if (GE_ref == 0 && this->PE_reference_.size() > 0) + { + if (this->PE_reference_.pop (reference) < 0) + { + this->fatal_error (ACE_TEXT ("Internal Parser Error") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + } + nrelems = this->ctx_stack_.size(); + + if (this->external_entity_ && (GE_ref == 0 || GE_ref == 1)) + this->external_entity_--; + + this->content_handler_->setDocumentLocator (this->current_->getLocator()); + + return nrelems; +} + +int +ACEXML_Parser::getFeature (const ACEXML_Char *name ACEXML_ENV_ARG_DECL) +{ + if (ACE_OS::strcmp (name, ACEXML_Parser::simple_parsing_feature_) == 0) + { + return this->simple_parsing_; + } + else if (ACE_OS::strcmp (name, ACEXML_Parser::namespaces_feature_) == 0) + { + return this->namespaces_; + } + else if (ACE_OS::strcmp (name, + ACEXML_Parser::namespace_prefixes_feature_) == 0) + { + return this->namespace_prefixes_; + } + else if (ACE_OS::strcmp (name, ACEXML_Parser::validation_feature_) == 0) + { + return this->validate_; + } + ACEXML_THROW_RETURN (ACEXML_SAXNotRecognizedException (name), -1); +} + + + +void +ACEXML_Parser::setFeature (const ACEXML_Char *name, + int boolean_value ACEXML_ENV_ARG_DECL) +{ + if (ACE_OS::strcmp (name, ACEXML_Parser::simple_parsing_feature_) == 0) + { + this->simple_parsing_ = (boolean_value == 0 ? 0 : 1); + return; + } + else if (ACE_OS::strcmp (name, ACEXML_Parser::namespaces_feature_) == 0) + { + this->namespaces_ = (boolean_value == 0 ? 0 : 1); + return; + } + else if (ACE_OS::strcmp (name, + ACEXML_Parser::namespace_prefixes_feature_) == 0) + { + this->namespace_prefixes_ = (boolean_value == 0 ? 0 : 1); + return; + } + else if (ACE_OS::strcmp (name, ACEXML_Parser::validation_feature_) == 0) + { + this->validate_ = (boolean_value == 0 ? 0 : 1); + return; + } + + ACEXML_THROW (ACEXML_SAXNotRecognizedException (name)); +} + +void * +ACEXML_Parser::getProperty (const ACEXML_Char *name ACEXML_ENV_ARG_DECL) +{ + ACEXML_THROW_RETURN (ACEXML_SAXNotSupportedException (name), 0); +} + +void +ACEXML_Parser::setProperty (const ACEXML_Char *name, + void *value ACEXML_ENV_ARG_DECL) +{ + ACE_UNUSED_ARG (value); + + ACEXML_THROW (ACEXML_SAXNotSupportedException (name)); +} + +void +ACEXML_Parser::error (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL) +{ + ACEXML_SAXParseException* exception = 0; + ACE_NEW_NORETURN (exception, ACEXML_SAXParseException (msg)); + if (this->error_handler_) + this->error_handler_->error (*exception ACEXML_ENV_ARG_PARAMETER); + else + ACEXML_ENV_RAISE (exception); + return; +} + +void +ACEXML_Parser::warning (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL) +{ + ACEXML_SAXParseException* exception = 0; + ACE_NEW_NORETURN (exception, ACEXML_SAXParseException (msg)); + if (this->error_handler_) + this->error_handler_->warning (*exception ACEXML_ENV_ARG_PARAMETER); + delete exception; + return; +} + +void +ACEXML_Parser::fatal_error (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL) +{ + ACEXML_SAXParseException* exception = 0; + ACE_NEW_NORETURN (exception, ACEXML_SAXParseException (msg)); + if (this->error_handler_) + this->error_handler_->fatalError (*exception ACEXML_ENV_ARG_PARAMETER); + this->reset(); + ACEXML_ENV_RAISE (exception); + return; +} + +void +ACEXML_Parser::parse_version_info (ACEXML_ENV_SINGLE_ARG_DECL) +{ + ACEXML_Char* astring; + if (this->parse_token (ACE_TEXT("ersion")) < 0 + || this->skip_equal () != 0 + || this->parse_version_num (astring) != 0) + { + this->fatal_error (ACE_TEXT ("Invalid VersionInfo specification") + ACEXML_ENV_ARG_PARAMETER); + return; + } + if (ACE_OS::strcmp (astring, ACE_TEXT ("1.0")) != 0) + { + this->fatal_error (ACE_TEXT ("ACEXML Parser supports XML version 1.0 ") + ACE_TEXT ("documents only") ACEXML_ENV_ARG_PARAMETER); + return; + } +} + +void +ACEXML_Parser::parse_encoding_decl (ACEXML_ENV_SINGLE_ARG_DECL) +{ + ACEXML_Char* astring = 0; + if ((this->parse_token (ACE_TEXT("ncoding")) < 0) + || this->skip_equal () != 0 + || this->parse_encname (astring) != 0) + { + this->fatal_error (ACE_TEXT ("Invalid EncodingDecl specification") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + } + const ACEXML_Char* encoding = this->current_->getInputSource()->getEncoding(); + if (encoding != 0 && ACE_OS::strcmp (astring, encoding) != 0) + { + ACE_ERROR ((LM_ERROR, ACE_TEXT ("Detected Encoding is %s ") + ACE_TEXT (": Declared Encoding is %s\n"), + encoding, astring)); + this->warning (ACE_TEXT ("Declared encoding differs from detected ") + ACE_TEXT ("encoding") ACEXML_ENV_ARG_PARAMETER); + } +} + +int +ACEXML_Parser::parse_text_decl (ACEXML_ENV_SINGLE_ARG_DECL) +{ + // Read xml + if (this->parse_token (ACE_TEXT("xml")) < 0) + { + this->fatal_error(ACE_TEXT ("Expecting keyword 'xml' in TextDecl") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + ACEXML_Char fwd = this->skip_whitespace(); + // Read version + if (fwd == 'v') + { + this->parse_version_info (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + fwd = this->skip_whitespace(); + } + + if (fwd == 'e') + { + this->parse_encoding_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + fwd = this->skip_whitespace(); + } + else + { + this->fatal_error (ACE_TEXT ("Missing encodingDecl in TextDecl") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + if (fwd == '?' && this->get() == '>') + return 0; + // All the rules fail. So return an error. + this->fatal_error (ACE_TEXT ("Invalid TextDecl") ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + return -1; +} + +void +ACEXML_Parser::parse_xml_decl (ACEXML_ENV_SINGLE_ARG_DECL) +{ + // Read <?xml + if (this->parse_token (ACE_TEXT("xml")) < 0) + { + this->fatal_error(ACE_TEXT ("Expecting keyword xml in XMLDecl") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + } + + ACEXML_Char fwd = this->skip_whitespace(); + + // Read version + if (fwd != 'v') + { + this->fatal_error (ACE_TEXT ("Expecting VersionInfo declaration") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; + } + + this->parse_version_info (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK; + + fwd = this->skip_whitespace(); + if (fwd != '?') + { + if (fwd == 'e') + { + this->parse_encoding_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER); + ACEXML_CHECK; + fwd = this->skip_whitespace(); + } + if (fwd == 's') + { + ACEXML_Char* astring; + if ((this->parse_token (ACE_TEXT("tandalone")) == 0) && + this->skip_equal () == 0 && + this->parse_sddecl (astring) == 0) + { + if (ACE_OS::strcmp (astring, ACE_TEXT ("yes")) == 0) + this->standalone_ = 1; + fwd = this->skip_whitespace(); + } + } + } + if (fwd == '?' && this->get() == '>') + return; + // All the rules fail. So return an error. + this->fatal_error (ACE_TEXT ("Invalid XMLDecl declaration") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK; +} + +int +ACEXML_Parser::parse_comment (void) +{ + int state = 0; + + if (this->get () != '-' || // Skip the opening "<!--" + this->get () != '-' || // completely. + this->get () == '-') // and at least something not '-'. + return -1; + + while (state < 3) + // Waiting for the trailing three character '-->'. Notice that + // according to the spec, '--->' is not a valid closing comment + // sequence. But we'll let it pass anyway. + { + ACEXML_Char fwd = this->get (); + if ((fwd == '-' && state < 2) || + (fwd == '>' && state == 2)) + state += 1; + else + state = 0; // Reset parse state. + } + return 0; +} + +int +ACEXML_Parser::parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_DECL) +{ + const ACEXML_Char *pitarget = this->parse_name (); + ACEXML_Char *instruction = 0; + + if (!ACE_OS::strcasecmp (ACE_TEXT ("xml"), pitarget)) + { + // Invalid PITarget name. + this->fatal_error(ACE_TEXT ("PI can't have 'xml' in PITarget") + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + } + + int state = 0; + + ACEXML_Char ch = this->skip_whitespace(); + while (state < 2) + { + switch (ch) + { + case '?': + if (state == 0) + state = 1; + break; + case '>': + if (state == 1) + { + instruction = this->obstack_.freeze (); + this->content_handler_->processingInstruction (pitarget, + instruction + ACEXML_ENV_ARG_PARAMETER); + ACEXML_CHECK_RETURN (-1); + this->obstack_.unwind (const_cast<ACEXML_Char*> (pitarget)); + return 0; + } + break; + case 0x0A: + // Fall thru... + default: + if (state == 1) + this->obstack_.grow ('?'); + this->obstack_.grow (ch); + state = 0; + } + ch = this->get (); + } + return -1; +} + +void +ACEXML_Parser::reset (void) +{ + this->doctype_ = 0; + if (this->ctx_stack_.pop (this->current_) == -1) + ACE_ERROR ((LM_ERROR, + ACE_TEXT ("Mismatched push/pop of Context stack"))); + if (this->current_) + { + this->current_->getInputSource()->getCharStream()->rewind(); + + this->current_->setInputSource (0); + delete this->current_; + this->current_ = 0; + } + + ACEXML_Char* temp = 0; + while (this->GE_reference_.pop (temp) != -1) + ; + while (this->PE_reference_.pop (temp) != -1) + ; + this->obstack_.release(); + this->alt_stack_.release(); + this->xml_namespace_.reset(); + this->nested_namespace_ = 0; + this->internal_GE_.reset(); + this->external_GE_.reset(); + this->unparsed_entities_.reset(); + this->predef_entities_.reset(); + this->internal_PE_.reset(); + this->external_PE_.reset(); + this->notations_.reset(); + this->ref_state_ = ACEXML_ParserInt::INVALID; + this->external_subset_ = 0; + this->external_entity_ = 0; + this->has_pe_refs_ = 0; + this->standalone_ = 0; + this->external_dtd_ = 0; + this->internal_dtd_ = 0; +} + diff --git a/ACE/ACEXML/parser/parser/Parser.h b/ACE/ACEXML/parser/parser/Parser.h new file mode 100644 index 00000000000..3b0725ec214 --- /dev/null +++ b/ACE/ACEXML/parser/parser/Parser.h @@ -0,0 +1,858 @@ +// -*- C++ -*- + +//============================================================================= +/** + * @file Parser.h + * + * $Id$ + * + * @author Nanbor Wang <nanbor@cs.wustl.edu> + * @author Krishnakumar B <kitty@cs.wustl.edu> + */ +//============================================================================= + +#ifndef _ACEXML_BASIC_PARSER_H_ +#define _ACEXML_BASIC_PARSER_H_ + +#include /**/ "ace/pre.h" +#include "ACEXML/parser/parser/Parser_export.h" + +#if !defined (ACE_LACKS_PRAGMA_ONCE) +#pragma once +#endif /* ACE_LACKS_PRAGMA_ONCE */ + +#include "ACEXML/common/XMLReader.h" +#include "ACEXML/common/LocatorImpl.h" +#include "ACEXML/common/NamespaceSupport.h" +#include "ACEXML/common/CharStream.h" +#include "ace/Obstack.h" +#include "ace/Functor.h" +#include "ace/SString.h" +#include "ace/Hash_Map_Manager.h" +#include "ace/Unbounded_Set.h" +#include "ace/Containers_T.h" +#include "ace/Auto_Ptr.h" +#include "ACEXML/parser/parser/Entity_Manager.h" +#include "ACEXML/parser/parser/ParserInternals.h" +#include "ACEXML/parser/parser/ParserContext.h" + +/** + * @class ACEXML_Parser Parser.h "ACEXML/parser/parser/Parser.h" + * + * @brief A SAX based parser. + * + */ +class ACEXML_PARSER_Export ACEXML_Parser : public ACEXML_XMLReader +{ +public: + /// Default constructor. + ACEXML_Parser (void); + + /// Destructor. + virtual ~ACEXML_Parser (void); + + /** + * Initialize the parser state. + * + * @retval 0 if parser was initialized correctly else -1. + */ + int initialize (ACEXML_InputSource* input); + + /** + * Return the current content handler. + */ + virtual ACEXML_ContentHandler *getContentHandler (void) const; + + /* + * Return the current DTD handler. + */ + virtual ACEXML_DTDHandler *getDTDHandler (void) const; + + /* + * Return the current entity resolver. + */ + virtual ACEXML_EntityResolver *getEntityResolver (void) const; + + /* + * Return the current error handler. + */ + virtual ACEXML_ErrorHandler *getErrorHandler (void) const; + + /** + * Look up the value of a feature. This method allows + * programmers to check whether a specific feature has been + * activated in the parser. + */ + virtual int getFeature (const ACEXML_Char *name ACEXML_ENV_ARG_DECL); + + /** + * Activating or deactivating a feature. + */ + virtual void setFeature (const ACEXML_Char *name, + int boolean_value ACEXML_ENV_ARG_DECL); + + /* + * Look up the value of a property. + */ + virtual void * getProperty (const ACEXML_Char *name ACEXML_ENV_ARG_DECL); + + /* + * Set the value of a property. + */ + virtual void setProperty (const ACEXML_Char *name, + void *value ACEXML_ENV_ARG_DECL); + + /* + * Parse an XML document. + */ + virtual void parse (ACEXML_InputSource *input ACEXML_ENV_ARG_DECL); + + /* + * Parse an XML document from a system identifier (URI). + */ + virtual void parse (const ACEXML_Char *systemId ACEXML_ENV_ARG_DECL) + ; + + /* + * Allow an application to register a content event handler. + */ + virtual void setContentHandler (ACEXML_ContentHandler *handler); + + /* + * Allow an application to register a DTD event handler. + */ + virtual void setDTDHandler (ACEXML_DTDHandler *handler); + + /* + * Allow an application to register an entity resolver. + */ + virtual void setEntityResolver (ACEXML_EntityResolver *resolver); + + /* + * Allow an application to register an error event handler. + */ + virtual void setErrorHandler (ACEXML_ErrorHandler *handler); + + + +protected: + /** + * Parse XML Prolog. + */ + void parse_xml_prolog (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Parse VersionInfo declaration. + * + */ + void parse_version_info (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Parse a EncodingDecl declaration. + * + */ + void parse_encoding_decl (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Parse a XMLDecl declaration. + * + */ + void parse_xml_decl (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Parse a TextDecl declaration. + */ + int parse_text_decl (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Parse a PI statement. The first character encountered + * should always be '?' in the PI prefix "@<?". + * + * @retval 0 on success, -1 otherwise. + */ + int parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Parse the DOCTYPE declaration. The first character encountered + * should always be 'D' in doctype prefix: "@<@!DOCTYPE". + */ + int parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Parse an XML element. The first character encountered should + * be the first character of the element "Name". + * + * @param is_root If not 0, then we are expecting to see the "root" + * element now, and the next element's name need to match the name + * defined in DOCTYPE definition, i.e., @a this->doctype_. + * + * @todo Instead of simply checking for the root element based on the + * argument @a is_root, we should instead either pass in some sort + * of validator or allow the function to return the element name so it + * can be used in a validator. + */ + void parse_element (int is_root ACEXML_ENV_ARG_DECL); + + /** + * Parse a content declaration. + * + */ + int parse_content (const ACEXML_Char* startname, const ACEXML_Char*& ns_uri, + const ACEXML_Char*& ns_lname, int ns_flag + ACEXML_ENV_ARG_DECL); + + /** + * Parse a character reference, i.e., " " or "". The first + * character encountered should be the '#' char. + * + * @param buf points to a character buffer for the result. + * + * @param len In/out argument which initially specifies the size of the + * buffer and is later set to the no. of characters in the reference. + * + * @retval 0 on success and -1 otherwise. + */ + int parse_char_reference (ACEXML_Char *buf, size_t& len); + + /** + * Parse a reference name, i.e., foo in "&foo;" or "%foo;". The first + * character encountered should be the character following '&' or '%'. + * Effectively the same as @sa parse_name but we don't use the parser's + * obstack. Caller is responsible for deleting the memory. + * + * @retval A pointer to name of reference, 0 otherwise. + */ + ACEXML_Char* parse_reference_name (void); + + /** + * Parse a CDATA section. The first character should always be the first + * '[' in CDATA definition. + * + * @retval 0 on success. + * @retval -1 if fail. + */ + int parse_cdata (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Parse a "markupdecl" section, this includes both "markupdecl" and + * "DeclSep" sections in XML specification + */ + int parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Skip over a comment. The first character encountered should always be + * the first '-' in the comment prefix "@<@!--". + */ + int parse_comment (void); + + /** + * Parse an "ELEMENT" decl. The first character this method + * expects is always the 'L' (the second char) in the word + * "ELEMENT". + * + * @retval 0 on success, -1 otherwise. + */ + int parse_element_decl (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Parse an "ENTITY" decl. The first character this method expects + * is always the 'N' (the second char) in the word "ENTITY". + * + * @retval 0 on success, -1 otherwise. + */ + int parse_entity_decl (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Parse an "ATTLIST" decl. Thse first character this method + * expects is always the 'A' (the first char) in the word + * "ATTLIST". + * + * @retval 0 on success, -1 otherwise. + */ + int parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Parse a AttType declaration. + * + */ + int parse_atttype (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + *Parse a "NOTATION" decl. The first character this method + * expects is always the 'N' (the first char) in the word + * "NOTATION". + * + * @retval 0 on success, -1 otherwise. + */ + int parse_notation_decl (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Parse an ExternalID or a reference to PUBLIC ExternalID. + * Possible cases are in the forms of: <code> + * + * SYSTEM 'quoted string representing system resource' + * PUBLIC 'quoted name of public ID' 'quoted resource' + * PUBLIC 'quoted name we are referring to' + * </code> + * + * The first character this function sees must be either 'S' or 'P'. + * When the function finishes parsing, the input stream points + * at the first non-whitespace character. + * + * @param publicId returns the unquoted publicId read. If none + * is available, it will be reset to 0. + * @param systemId returns the unquoted systemId read. If none + * is available, it will be reset to 0. + * + * @retval 0 on success, -1 otherwise. + */ + int parse_external_id (ACEXML_Char *&publicId, ACEXML_Char *&systemId + ACEXML_ENV_ARG_DECL); + + /** + * Parse an external DTD. + * + */ + int parse_external_dtd (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Parse an external subset. This does the actual parsing of an external + * subset and is called by @sa parse_external_dtd. + * + */ + int parse_external_subset (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Parse a markupDecl section. + * + */ + int parse_markup_decl (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Parse a conditionalSect declaration. + * + */ + int parse_conditional_section (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Parse a includeSect declaration. + * + */ + int parse_includesect (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * + * Parse a ignoreSect declaration. + */ + int parse_ignoresect (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Parse a PEReference. + * + */ + int parse_PE_reference (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Parse a Reference. + * + */ + int parse_entity_reference (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Parse an entityValue. + * + */ + int parse_entity_value (ACEXML_Char *&str ACEXML_ENV_ARG_DECL); + + /** + * Parse a DefaultDecl specification. + * + */ + int parse_defaultdecl (ACEXML_ENV_SINGLE_ARG_DECL); + + + /** + * Parse the "children" and "Mixed" non-terminals in contentspec. + * + * The first character this function sees must be the first + * open paren '(' in children. + * + * @retval 0 on success, -1 otherwise. + */ + int parse_children_definition (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Parse a @c cp non-terminal. @c cp can either be a @c seq or a @c choice. + * This function calls itself recursively. + * + * @param skip_open_paren when non-zero, it indicates that the open paren of + * the @c seq or @c choice has already been removed from the input + * stream. + * + * @retval 0 on success, -1 otherwise. + */ + int parse_child (int skip_open_paren ACEXML_ENV_ARG_DECL); + + /** + * Parse a name from the input CharStream. If @a ch @!= 0, then we have + * already consumed the first name character from the input CharStream, + * otherwise, parse_name will use this->get() to acquire the initial + * character. + * + * @return A pointer to the string in the obstack, 0 if it's not a + * valid name. + */ + ACEXML_Char *parse_name (ACEXML_Char ch = 0); + + /** + * Parse a NMTOKEN from the input stream. + * + * @return A pointer to the string in the obstack, 0 if it's not a valid + * NMTOKEN. + */ + ACEXML_Char* parse_nmtoken (ACEXML_Char ch = 0); + + /** + * Parse the version string in an XML Prolog section. + * + * @param str String containing the version number if successful. + * @return 0 if the string was read successfully, 0 otherwise. + */ + int parse_version (ACEXML_Char*& str); + + /** + * Parse the version number in a VersionInfo declaration. + */ + int parse_version_num (ACEXML_Char*& str); + + /** + * Parse the encoding name in an XML Prolog section. + * + * @param str String containing the encoding name if successful. + * @return 0 if the string was read successfully, 0 otherwise. + */ + int parse_encname (ACEXML_Char*& str); + + /** + * Parse a SDDecl string. + * + * @param str String containing the encoding name if successful. + * @return 0 if the string was read successfully, -1 otherwise. + */ + int parse_sddecl (ACEXML_Char*& str); + + /** + * Parse an attribute name. + * + * @retval str String containing the value of the attribute name + * if successful. + * @retval 0 otherwise. + */ + ACEXML_Char* parse_attname (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Parse an attribute value. + * + * @param str String containing the value of the attribute if successful. + * @return 0 if attribute value was read successfully, -1 otherwise. + */ + int parse_attvalue (ACEXML_Char*& str ACEXML_ENV_ARG_DECL); + + /** + * Parse a tokenized type attribute. + * + * @return 0 if attribute type was read successfully, -1 otherwise. + */ + int parse_tokenized_type (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Parse a SystemLiteral. + * + * @param str String containing the SystemLiteral if successful. + * @return 0 if the string was read successfully, 0 otherwise. + */ + int parse_system_literal (ACEXML_Char*& str); + + /** + * Parse a PubidLiteral. + * + * @param str String containing the PubidLiteral if successful. + * @return 0 if the string was read successfully, 0 otherwise. + */ + int parse_pubid_literal (ACEXML_Char*& str); + + /** + * Check if a character @a c is a whitespace. + * + * @retval 1 if @a c is a valid white space character. 0 otherwise. + */ + int is_whitespace (const ACEXML_Char c) const; + + /** + * Check if a character @a c is a valid Char. + * + * @retval 1 if @a c is a valid character. 0 otherwise. + */ + int isChar (ACEXML_UCS4 c) const; + + /** + * Check if a character @a c is a valid CharRef character. + * + * @retval 1 if @a c is a valid character reference character, 0 otherwise. + */ + int isCharRef (const ACEXML_Char c) const; + + /** + * Check if a character @a c is a BaseChar. + * + * @retval 1 if @a c is a valid BaseChar character, 0 otherwise. + */ + int isBasechar (const ACEXML_Char c) const; + + /** + * Check if a character @a c is a Ideographic. + * + * @retval 1 if @a c is a valid Ideographic character, 0 otherwise. + */ + int isIdeographic (const ACEXML_Char c) const; + + /** + * Check if a character @a c is a CombiningChar. + * + * @retval 1 if @a c is a valid CombiningChar character, 0 otherwise. + */ + int isCombiningchar (const ACEXML_Char c) const; + + /** + * Check if a character @a c is a Digit. + * + * @retval 1 if @a c is a valid Digit character, 0 otherwise. + */ + int isDigit (const ACEXML_Char c) const; + + /** + * Check if a character @a c is an Extender. + * + * @retval 1 if @a c is a valid Extender character, 0 otherwise. + */ + int isExtender (const ACEXML_Char c) const; + + /** + * Check if a character @a c is a Letter. + * + * @retval 1 if @a c is a valid Letter character, 0 otherwise. + */ + int isLetter (const ACEXML_Char c) const; + + /** + * Check if a character is an acceptable NameChar. + * + * @retval 1 if @a c is a valid NameChar character, 0 otherwise. + */ + int isNameChar (const ACEXML_Char c) const; + + /** + * Check if a character is a PubidChar. + * + * @retval 1 if @a c is a valid PubidChar character, 0 otherwise. + */ + int isPubidChar (const ACEXML_Char c) const; + + /// Get a character. + virtual ACEXML_Char get (void); + + /// Peek a character. + virtual ACEXML_Char peek (void); + +private: + + // *** Helper functions for parsing XML + + /** + * Skip any whitespaces encountered until the first non-whitespace + * character is encountered. + * + * @return The next non-whitespace character from the CharStream. + * + * @sa skip_whitespace_count + */ + ACEXML_Char skip_whitespace (void); + + /** + * Skip any whitespaces encountered until the first non-whitespace + * character. The first non-whitespace character is not consumed. + * This method does peek into the input CharStream and therefore + * is more expensive than @ref skip_whitespace. + * + * @param peek If non-null, @a peek points to a ACEXML_Char where + * skip_whitespace_count stores the first non-whitespace + * character it sees (character is not removed from the stream.) + * + * @return The number of whitespace characters consumed. + * + * @sa skip_whitespace + */ + int skip_whitespace_count (ACEXML_Char *peek = 0); + + /** + * Skip an equal sign. + * + * @retval 0 when succeeds, -1 if no equal sign is found. + */ + int skip_equal (void); + + /** + * Get a quoted string. Quoted strings are used to specify + * attribute values and this routine will replace character and + * entity references on-the-fly. Parameter entities are not allowed + * (or replaced) in this function. (But regular entities are.) + * + * @param str returns the un-quoted string. + * + * @retval 0 on success, -1 otherwise. + */ + int get_quoted_string (ACEXML_Char *&str); + + /** + * Check if a character @a c is a Digit. + * + * @retval 1 if @a c is a valid Digit character, 0 otherwise. + */ + int isNormalDigit (const ACEXML_Char c) const; + + /** + * Dispatch errors to ErrorHandler. + * + */ + void error (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL); + + /** + * Dispatch warnings to ErrorHandler. + * + */ + void warning (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL); + + /** + * Dispatch fatal errors to ErrorHandler. + * + */ + void fatal_error (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL); + + /** + * Dispatch prefix mapping calls to the ContentHandler. + * + * @param prefix Namespace prefix + * @param uri Namespace URI + * @param name Local name + * @param start 1 => startPrefixMapping 0 => endPrefixMapping + */ + void prefix_mapping (const ACEXML_Char* prefix, + const ACEXML_Char* uri, + int start ACEXML_ENV_ARG_DECL); + /** + * Parse a keyword. + */ + int parse_token (const ACEXML_Char* keyword); + + /** + * Push the current context on to the stack. + * + */ + int push_context (ACEXML_Parser_Context* context); + + /** + * Pop the top element in the stack and replace current context with that. + */ + size_t pop_context (int GE_ref ACEXML_ENV_ARG_DECL); + + /** + * Create a new ACEXML_CharStream from @a systemId and @a publicId and + * replace the current input stream with the newly created stream. + */ + virtual int switch_input (ACEXML_CharStream* cstream, + const ACEXML_Char* systemId, + const ACEXML_Char* publicId = 0); + /** + * Create a new ACEXML_InputSource from @a systemId and @a publicId and + * replace the current input source with the newly created InputSource. + */ + virtual int switch_input (ACEXML_InputSource* input, + const ACEXML_Char* systemId, + const ACEXML_Char* publicId = 0); + + /** + * Check for a parameter entity reference. This is used to check for the + * occurence of a PE Reference withing markupDecl. Additionally this + * function consumes any leading or trailing whitespace around the PE + * Reference. + * + * @retval Number of whitespace characters skipped. + */ + int check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_DECL); + + /** + * Reset the parser state. + * + */ + void reset (void); + + /** + * Very trivial, non-conformant normalization of a systemid. + * + */ + ACEXML_Char* normalize_systemid (const ACEXML_Char* systemId); + + // Feature names: + + /** + * \addtogroup acexml_parser_features + * @{ + */ + + /** + * @var simple_parsing_feature_ + * + * This constant string defines the name of "simple XML parsing" + * feature. When this feature is enabled, ACEXML parser is allowed + * to parse a simple XML stream without mandated XML prolog + * and no DTD defintion. + */ + static const ACEXML_Char simple_parsing_feature_[]; + + /** + * @var namespaces_feature_ + * + * This constant string defines the SAX XML Namespace feature. When this + * feature is enabled, ACEXML parser allows access by namespace qualified + * names. + */ + static const ACEXML_Char namespaces_feature_[]; + + /** + * @var namespace_prefixes_feature_ + * + * This constant string defines the SAX XML Namespace prefixes feature. + * Normally the list of attributes returned by the parser will not + * contain attributes used as namespace declarations (xmlns*). When this + * feature is enabled, the list of attributes contains the namespace + * declarations also. + */ + static const ACEXML_Char namespace_prefixes_feature_[]; + + /** + * @var validation_feature_ + * + * This constant string defines the SAX XML Validation feature. When + * this feature is enabled, the parser validates the document in + * addition to checking for well-formedness. + */ + static const ACEXML_Char validation_feature_[]; + + /* @} */ + + /// Keeping track of the handlers. We do not manage the memory for + /// handlers. + ACEXML_DTDHandler *dtd_handler_; + ACEXML_EntityResolver *entity_resolver_; + ACEXML_ContentHandler *content_handler_; + ACEXML_ErrorHandler *error_handler_; + + /// Document Type + ACEXML_Char *doctype_; + + /// Current parser context + ACEXML_Parser_Context* current_; + + /// Stack used to hold the Parser_Context + ACE_Unbounded_Stack<ACEXML_Parser_Context*> ctx_stack_; + + /* + * The following two are essentially chains of references and is used by + * the parser to determine if there is any recursion. We keep two of + * these one for general entities and one for parameter entities, as they + * both fall under different namespaces. + * + */ + /// Set used to hold the general entity references that are active. + ACE_Unbounded_Stack<ACEXML_Char*> GE_reference_; + + /// Set used to hold the parameter entity references that are active. + ACE_Unbounded_Stack<ACEXML_Char*> PE_reference_; + + /// Obstack used by the parser to hold all the strings parsed + ACE_Obstack_T<ACEXML_Char> obstack_; + + /// Alternative obstack used to hold any strings when the original is in use + ACE_Obstack_T<ACEXML_Char> alt_stack_; + + /// Namespace stack used by the parser to implement support for Namespaces + ACEXML_NamespaceSupport xml_namespace_; + + /// T => We are processing a nested namespace + int nested_namespace_; + + /// Set of internal parsed general entities in the document + ACEXML_Entity_Manager internal_GE_; + + /// Set of external parsed general entities in the document + ACEXML_Entity_Manager external_GE_; + + /// Set of unparsed entities in the document + ACEXML_Entity_Manager unparsed_entities_; + + /// Set of predefined entities used by the parser + ACEXML_Entity_Manager predef_entities_; + + /// Set of internal parsed parameter entities in the document + ACEXML_Entity_Manager internal_PE_; + + /// Set of external parsed parameter entities in the document + ACEXML_Entity_Manager external_PE_; + + /// Set of notations declared in the document + ACEXML_Entity_Manager notations_; + + /// State of the parser when it encounters a reference. + ACEXML_ParserInt::ReferenceState ref_state_; + + /// T => We are parsing an external subset + int external_subset_; + + /// T => We are parsing an external entity value + int external_entity_; + + /// T => Internal DTD has parameter entity references + int has_pe_refs_; + + /// If set, the document is a standalone XML document + int standalone_; + + /// If set, the document has an external DTD subset + int external_dtd_; + + /// If set, the document has an internal DTD + int internal_dtd_; + + /// Feature flags + /// If set, the parser should parse a document without a prolog + int simple_parsing_; + + /// If set, the parser should also validate + int validate_; + + /// If set, the parser should allow access by namespace qualified names. + int namespaces_; + + /// If set, the parser should include namespace declarations in the list + /// of attributes of an element. + int namespace_prefixes_; + +}; + +#if defined (__ACEXML_INLINE__) +# include "ACEXML/parser/parser/Parser.inl" +#endif /* __ACEXML_INLINE__ */ + +#include /**/ "ace/post.h" + +#endif /* _ACEXML_BASIC_PARSER_H_ */ diff --git a/ACE/ACEXML/parser/parser/Parser.inl b/ACE/ACEXML/parser/parser/Parser.inl new file mode 100644 index 00000000000..1706d0a88e1 --- /dev/null +++ b/ACE/ACEXML/parser/parser/Parser.inl @@ -0,0 +1,257 @@ +//============================================================================= +/** + * @file Parser.inl + * + * $Id$ + * + * @author Nanbor Wang <nanbor@cs.wustl.edu> + */ +//============================================================================= + +ACEXML_INLINE ACEXML_ContentHandler * +ACEXML_Parser::getContentHandler (void) const +{ + return this->content_handler_; +} + +ACEXML_INLINE ACEXML_DTDHandler * +ACEXML_Parser::getDTDHandler (void) const +{ + return this->dtd_handler_; +} + +ACEXML_INLINE ACEXML_EntityResolver * +ACEXML_Parser::getEntityResolver (void) const +{ + return this->entity_resolver_; +} + +ACEXML_INLINE ACEXML_ErrorHandler * +ACEXML_Parser::getErrorHandler (void) const +{ + return this->error_handler_; +} + +ACEXML_INLINE void +ACEXML_Parser::setContentHandler (ACEXML_ContentHandler *handler) +{ + this->content_handler_ = handler; +} + +ACEXML_INLINE void +ACEXML_Parser::setDTDHandler (ACEXML_DTDHandler *handler) +{ + this->dtd_handler_ = handler; +} + +ACEXML_INLINE void +ACEXML_Parser::setEntityResolver (ACEXML_EntityResolver *resolver) +{ + this->entity_resolver_ = resolver; +} + +ACEXML_INLINE void +ACEXML_Parser::setErrorHandler (ACEXML_ErrorHandler *handler) +{ + this->error_handler_ = handler; +} + +ACEXML_INLINE int +ACEXML_Parser::isChar (ACEXML_UCS4 c) const +{ + return (c == 0x9 || c == 0xA || c == 0xD || + c >= 0x20 && c <= 0xD7FF || + c >= 0xE000 && c <= 0xFFFD || + c >= 0x10000 && c <= 0x10FFFF); +} + +ACEXML_INLINE int +ACEXML_Parser::isCharRef (const ACEXML_Char c) const + { + return ((c >= 'a' && c <= 'f') || + (c >= 'A' && c <= 'F')); +} + +ACEXML_INLINE int +ACEXML_Parser::isNormalDigit (const ACEXML_Char c) const +{ + return (c >= '\x30' && c <= '\x39'); +} + +ACEXML_INLINE int +ACEXML_Parser::isBasechar (const ACEXML_Char c) const +{ +#if defined (ACE_USES_WCHAR) + return ACEXML_ParserInt::isBasechar_i (c); +#else + return ACEXML_ParserInt::base_char_table_[(int) c]; +#endif /* ACE_USES_WCHAR */ +} + +ACEXML_INLINE int +ACEXML_Parser::isIdeographic (const ACEXML_Char c) const +{ +#if defined (ACE_USES_WCHAR) + return ACEXML_ParserInt::isIdeographic_i (c); +#else + ACE_UNUSED_ARG (c); + return 0; +#endif /* ACE_USES_WCHAR */ +} + +ACEXML_INLINE int +ACEXML_Parser::isCombiningchar (const ACEXML_Char c) const +{ +#if defined (ACE_USES_WCHAR) + return ACEXML_ParserInt::isCombiningchar_i (c); +#else + ACE_UNUSED_ARG (c); + return 0; +#endif /* ACE_USES_WCHAR */ + } + +ACEXML_INLINE int +ACEXML_Parser::isDigit (const ACEXML_Char c) const +{ +#if defined (ACE_USES_WCHAR) + return ACEXML_ParserInt::isDigit_i (c); +#else + return (this->isNormalDigit (c)); +#endif /* ACE_USES_WCHAR */ +} + +ACEXML_INLINE int +ACEXML_Parser::isExtender (const ACEXML_Char c) const +{ +#if defined (ACE_USES_WCHAR) + return ACEXML_ParserInt::isExtender_i (c); +#else + return (c == '\xB7'); +#endif /* ACE_USES_WCHAR */ +} + +ACEXML_INLINE int +ACEXML_Parser::isLetter (const ACEXML_Char c) const +{ + return (this->isBasechar (c) || this->isIdeographic (c)); +} + +ACEXML_INLINE int +ACEXML_Parser::isNameChar (const ACEXML_Char c) const +{ + return (this->isLetter (c) || this->isDigit (c) || c == '.' || c == '-' || + c == '_' || c == ':' || this->isCombiningchar (c) || + this->isExtender (c)); +} + +ACEXML_INLINE int +ACEXML_Parser::isPubidChar (const ACEXML_Char c) const +{ + return (c == '\x20' || c == '\x0D' || c == '\x0A' || + (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || c == '-' || c == '\'' || c == '(' || + c == ')' || c == '+' || c == ',' || c == '.' || c == '/' || + c == ':' || c == '=' || c == '?' || c == ';' || c == '!' || + c == '*' || c == '#' || c == '@' || c == '$' || c == '_' || + c == '%'); +} + + +ACEXML_INLINE int +ACEXML_Parser::is_whitespace (const ACEXML_Char c) const +{ + switch (c) + { + case '\x0A': case '\x20': + case '\x09': case '\x0D': + return 1; + default: + return 0; + } +} + +ACEXML_INLINE ACEXML_Char +ACEXML_Parser::skip_whitespace (void) +{ + ACEXML_Char ch = this->get(); + while (this->is_whitespace (ch)) + ch = this->get (); + return ch; +} + + +ACEXML_INLINE int +ACEXML_Parser::skip_whitespace_count (ACEXML_Char *peeky) +{ + int wscount = 0; + ACEXML_Char dummy; + ACEXML_Char &forward = (peeky == 0 ? dummy : *peeky); + + for (;this->is_whitespace ((forward = this->peek ())); ++wscount) + this->get (); + return wscount; +} + +ACEXML_INLINE int +ACEXML_Parser::skip_equal (void) +{ + if (this->skip_whitespace() != '=') + return -1; + while (this->is_whitespace (this->peek())) + this->get(); + return 0; +} + +ACEXML_INLINE ACEXML_Char +ACEXML_Parser::get (void) +{ + ACEXML_Char ch = 0; + const ACEXML_InputSource* ip = this->current_->getInputSource(); + ACEXML_CharStream* instream = ip->getCharStream(); + + if (instream->get (ch) != -1) + { + this->current_->getLocator()->incrColumnNumber(); + // Normalize white-space + if (ch == '\x0D') + { + if (instream->peek() == 0x0A) + instream->get (ch); + ch = '\x0A'; + } + if (ch == '\x0A') + { + // Reset column number and increment Line Number. + this->current_->getLocator()->incrLineNumber(); + this->current_->getLocator()->setColumnNumber (0); + } + return ch; + } + return 0; +} + +ACEXML_INLINE ACEXML_Char +ACEXML_Parser::peek (void) +{ + // Using an extra level of indirection so we can + // manage document location in the future. + ACEXML_Char ch = 0; + const ACEXML_InputSource* ip = this->current_->getInputSource(); + ACEXML_CharStream* instream = ip->getCharStream(); + ch = static_cast<ACEXML_Char> (instream->peek ()); + return (ch > 0 ? ch : 0); +} + +ACEXML_INLINE int +ACEXML_Parser::parse_token (const ACEXML_Char* keyword) +{ + if (keyword == 0) + return -1; + const ACEXML_Char* ptr = keyword; + for (; *ptr != 0 && (this->get() == *ptr); ++ptr) + ; + if (*ptr == 0) + return 0; + else + return -1; +} diff --git a/ACE/ACEXML/parser/parser/ParserContext.cpp b/ACE/ACEXML/parser/parser/ParserContext.cpp new file mode 100644 index 00000000000..663ae1ef1ac --- /dev/null +++ b/ACE/ACEXML/parser/parser/ParserContext.cpp @@ -0,0 +1,16 @@ +// $Id$ + +#include "ACEXML/parser/parser/ParserContext.h" + +#if !defined (__ACEXML_INLINE__) +# include "ACEXML/parser/parser/ParserContext.inl" +#endif /* __ACEXML_INLINE__ */ + +ACEXML_Parser_Context::~ACEXML_Parser_Context() +{ + delete this->instream_; + this->instream_ = 0; + delete this->locator_; + this->locator_ = 0; +} + diff --git a/ACE/ACEXML/parser/parser/ParserContext.h b/ACE/ACEXML/parser/parser/ParserContext.h new file mode 100644 index 00000000000..34b1edbba78 --- /dev/null +++ b/ACE/ACEXML/parser/parser/ParserContext.h @@ -0,0 +1,84 @@ +// -*- C++ -*- + +//============================================================================= +/** + * @file ParserContext.h + * + * $Id$ + * + * @author Krishnakumar B <kitty@cs.wustl.edu> + */ +//============================================================================= + +#ifndef ACEXML_PARSER_CONTEXT_H +#define ACEXML_PARSER_CONTEXT_H + +#include /**/ "ace/pre.h" +#include "ACEXML/parser/parser/Parser_export.h" + +#if !defined (ACE_LACKS_PRAGMA_ONCE) +#pragma once +#endif /* ACE_LACKS_PRAGMA_ONCE */ + +#include "ACEXML/common/XML_Types.h" +#include "ACEXML/common/InputSource.h" +#include "ACEXML/common/Locator.h" +#include "ACEXML/common/LocatorImpl.h" +#include "ace/Functor.h" +#include "ace/Containers_T.h" + +class ACEXML_PARSER_Export ACEXML_Parser_Context +{ +public: + /// Default constructor + ACEXML_Parser_Context(); + + /// Constructor which initializes the context + ACEXML_Parser_Context (ACEXML_InputSource* instream, + ACEXML_LocatorImpl* locator); + + /// Comparison operator + bool operator!= (const ACEXML_Parser_Context& src); + + /// Destructor + virtual ~ACEXML_Parser_Context(); + + /// Reset the parser context. This does not free up the memory. Only sets + /// it to zero. Meant to be called after a context is pushed on to a + /// stack. + void reset (void); + + /// Get the underlying input source. + virtual ACEXML_InputSource* getInputSource(void); + + /// Get the underlying locator. + virtual ACEXML_LocatorImpl* getLocator(void); + + /// Set the underlying input source. + virtual void setInputSource(ACEXML_InputSource* ip); + + /// Set the underlying locator. + virtual void setLocator(ACEXML_LocatorImpl* locator); + +private: + + /// Copy constructor + ACEXML_Parser_Context (const ACEXML_Parser_Context& src); + + /// Assignment operator + ACEXML_Parser_Context& operator= (const ACEXML_Parser_Context& src); + + /// Current input char stream. + ACEXML_InputSource *instream_; + + /// Current Locator which provides line no., column no. systemId and publicId + ACEXML_LocatorImpl* locator_; +}; + +#if defined (__ACEXML_INLINE__) +# include "ACEXML/parser/parser/ParserContext.inl" +#endif /* __ACEXML_INLINE__ */ + +#include /**/ "ace/post.h" + +#endif /* ACEXML_PARSER_CONTEXT_H */ diff --git a/ACE/ACEXML/parser/parser/ParserContext.inl b/ACE/ACEXML/parser/parser/ParserContext.inl new file mode 100644 index 00000000000..b8f30ea248d --- /dev/null +++ b/ACE/ACEXML/parser/parser/ParserContext.inl @@ -0,0 +1,77 @@ +// -*- C++ -*- +// +// $Id$ + +#include <algorithm> + +ACEXML_INLINE +ACEXML_Parser_Context::ACEXML_Parser_Context() + : instream_ (0), + locator_ (0) +{ + +} + +ACEXML_INLINE +ACEXML_Parser_Context::ACEXML_Parser_Context (ACEXML_InputSource* instream, + ACEXML_LocatorImpl* locator) + : instream_ (instream), + locator_ (locator) +{ + +} + +ACEXML_INLINE +ACEXML_Parser_Context::ACEXML_Parser_Context (const ACEXML_Parser_Context& src) + : instream_ (src.instream_), + locator_ (src.locator_) +{ + +} + +ACEXML_INLINE bool +ACEXML_Parser_Context::operator!= (const ACEXML_Parser_Context& src) +{ + return (this->instream_ != src.instream_ && this->locator_ != src.locator_); +} + +ACEXML_INLINE ACEXML_Parser_Context& +ACEXML_Parser_Context::operator= (const ACEXML_Parser_Context& src) +{ + ACEXML_Parser_Context tmp (src); + std::swap (this->instream_, tmp.instream_); + std::swap (this->locator_, tmp.locator_); + return *this; +} + + +ACEXML_INLINE ACEXML_InputSource* +ACEXML_Parser_Context::getInputSource (void) +{ + return this->instream_; +} + +ACEXML_INLINE ACEXML_LocatorImpl* +ACEXML_Parser_Context::getLocator (void) +{ + return this->locator_; +} + +ACEXML_INLINE void +ACEXML_Parser_Context::setInputSource (ACEXML_InputSource* ip) +{ + this->instream_ = ip; +} + +ACEXML_INLINE void +ACEXML_Parser_Context::setLocator (ACEXML_LocatorImpl* locator) +{ + this->locator_ = locator; +} + +ACEXML_INLINE void +ACEXML_Parser_Context::reset (void) +{ + this->instream_ = 0; + this->locator_ = 0; +} diff --git a/ACE/ACEXML/parser/parser/ParserInternals.cpp b/ACE/ACEXML/parser/parser/ParserInternals.cpp new file mode 100644 index 00000000000..38db8291fa5 --- /dev/null +++ b/ACE/ACEXML/parser/parser/ParserInternals.cpp @@ -0,0 +1,394 @@ +// $Id$ + +#include "ACEXML/parser/parser/ParserInternals.h" + + +const ACEXML_Char* ACEXML_ParserInt::predef_ent_[] = { + ACE_TEXT ("amp"), + ACE_TEXT ("lt"), + ACE_TEXT ("gt"), + ACE_TEXT ("apos"), + ACE_TEXT ("quot") +}; + +const ACEXML_Char* ACEXML_ParserInt::predef_val_[] = { + ACE_TEXT ("&"), + ACE_TEXT ("<"), + ACE_TEXT (">"), + ACE_TEXT ("'"), + ACE_TEXT ("\"") +}; + +// Optimize away the most common cases. Any compiler worth it's salt should +// give generate a single memory access. + +const ACEXML_Char ACEXML_ParserInt::base_char_table_[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */ + 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */ + 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */ +}; + +#if defined (ACE_USES_WCHAR) + +int +ACEXML_ParserInt::isBasechar_i (const ACEXML_Char c) +{ + if (c < 0x0100) + return ACEXML_ParserInt::base_char_table_[c]; + else if (c < 0x0905) + return ((c >= 0x0100 && c <= 0x0131) || + (c >= 0x0134 && c <= 0x013E) || + (c >= 0x0141 && c <= 0x0148) || + (c >= 0x014A && c <= 0x017E) || + (c >= 0x0180 && c <= 0x01C3) || + (c >= 0x01CD && c <= 0x01F0) || + (c >= 0x01F4 && c <= 0x01F5) || + (c >= 0x01FA && c <= 0x0217) || + (c >= 0x0250 && c <= 0x02A8) || + (c >= 0x02BB && c <= 0x02C1) || + (c == 0x0386) || + (c >= 0x0388 && c <= 0x038A) || + (c == 0x038C) || + (c >= 0x038E && c <= 0x03A1) || + (c >= 0x03A3 && c <= 0x03CE) || + (c >= 0x03D0 && c <= 0x03D6) || + (c == 0x03DA) || + (c == 0x03DC) || + (c == 0x03DE) || + (c == 0x03E0) || + (c >= 0x03E2 && c <= 0x03F3) || + (c >= 0x0401 && c <= 0x040C) || + (c >= 0x040E && c <= 0x044F) || + (c >= 0x0451 && c <= 0x045C) || + (c >= 0x045E && c <= 0x0481) || + (c >= 0x0490 && c <= 0x04C4) || + (c >= 0x04C7 && c <= 0x04C8) || + (c >= 0x04CB && c <= 0x04CC) || + (c >= 0x04D0 && c <= 0x04EB) || + (c >= 0x04EE && c <= 0x04F5) || + (c >= 0x04F8 && c <= 0x04F9) || + (c >= 0x0531 && c <= 0x0556) || + (c == 0x0559) || + (c >= 0x0561 && c <= 0x0586) || + (c >= 0x05D0 && c <= 0x05EA) || + (c >= 0x05F0 && c <= 0x05F2) || + (c >= 0x0621 && c <= 0x063A) || + (c >= 0x0641 && c <= 0x064A) || + (c >= 0x0671 && c <= 0x06B7) || + (c >= 0x06BA && c <= 0x06BE) || + (c >= 0x06C0 && c <= 0x06CE) || + (c >= 0x06D0 && c <= 0x06D3) || + (c == 0x06D5) || + (c >= 0x06E5 && c <= 0x06E6)); + else if (c < 0x10A0) + return ((c >= 0x0905 && c <= 0x0939) || + (c == 0x093D) || + (c >= 0x0958 && c <= 0x0961) || + (c >= 0x0985 && c <= 0x098C) || + (c >= 0x098F && c <= 0x0990) || + (c >= 0x0993 && c <= 0x09A8) || + (c >= 0x09AA && c <= 0x09B0) || + (c == 0x09B2) || + (c >= 0x09B6 && c <= 0x09B9) || + (c >= 0x09DC && c <= 0x09DD) || + (c >= 0x09DF && c <= 0x09E1) || + (c >= 0x09F0 && c <= 0x09F1) || + (c >= 0x0A05 && c <= 0x0A0A) || + (c >= 0x0A0F && c <= 0x0A10) || + (c >= 0x0A13 && c <= 0x0A28) || + (c >= 0x0A2A && c <= 0x0A30) || + (c >= 0x0A32 && c <= 0x0A33) || + (c >= 0x0A35 && c <= 0x0A36) || + (c >= 0x0A38 && c <= 0x0A39) || + (c >= 0x0A59 && c <= 0x0A5C) || + (c == 0x0A5E) || + (c >= 0x0A72 && c <= 0x0A74) || + (c >= 0x0A85 && c <= 0x0A8B) || + (c == 0x0A8D) || + (c >= 0x0A8F && c <= 0x0A91) || + (c >= 0x0A93 && c <= 0x0AA8) || + (c >= 0x0AAA && c <= 0x0AB0) || + (c >= 0x0AB2 && c <= 0x0AB3) || + (c >= 0x0AB5 && c <= 0x0AB9) || + (c == 0x0ABD || c == 0x0AE0) || + (c >= 0x0B05 && c <= 0x0B0C) || + (c >= 0x0B0F && c <= 0x0B10) || + (c >= 0x0B13 && c <= 0x0B28) || + (c >= 0x0B2A && c <= 0x0B30) || + (c >= 0x0B32 && c <= 0x0B33) || + (c >= 0x0B36 && c <= 0x0B39) || + (c == 0x0B3D) || + (c >= 0x0B5C && c <= 0x0B5D) || + (c >= 0x0B5F && c <= 0x0B61) || + (c >= 0x0B85 && c <= 0x0B8A) || + (c >= 0x0B8E && c <= 0x0B90) || + (c >= 0x0B92 && c <= 0x0B95) || + (c >= 0x0B99 && c <= 0x0B9A) || + (c == 0x0B9C) || + (c >= 0x0B9E && c <= 0x0B9F) || + (c >= 0x0BA3 && c <= 0x0BA4) || + (c >= 0x0BA8 && c <= 0x0BAA) || + (c >= 0x0BAE && c <= 0x0BB5) || + (c >= 0x0BB7 && c <= 0x0BB9) || + (c >= 0x0C05 && c <= 0x0C0C) || + (c >= 0x0C0E && c <= 0x0C10) || + (c >= 0x0C12 && c <= 0x0C28) || + (c >= 0x0C2A && c <= 0x0C33) || + (c >= 0x0C35 && c <= 0x0C39) || + (c >= 0x0C60 && c <= 0x0C61) || + (c >= 0x0C85 && c <= 0x0C8C) || + (c >= 0x0C8E && c <= 0x0C90) || + (c >= 0x0C92 && c <= 0x0CA8) || + (c >= 0x0CAA && c <= 0x0CB3) || + (c >= 0x0CB5 && c <= 0x0CB9) || + (c == 0x0CDE) || + (c >= 0x0CE0 && c <= 0x0CE1) || + (c >= 0x0D05 && c <= 0x0D0C) || + (c >= 0x0D0E && c <= 0x0D10) || + (c >= 0x0D12 && c <= 0x0D28) || + (c >= 0x0D2A && c <= 0x0D39) || + (c >= 0x0D60 && c <= 0x0D61) || + (c >= 0x0E01 && c <= 0x0E2E) || + (c == 0x0E30) || + (c >= 0x0E32 && c <= 0x0E33) || + (c >= 0x0E40 && c <= 0x0E45) || + (c >= 0x0E81 && c <= 0x0E82) || + (c == 0x0E84) || + (c >= 0x0E87 && c <= 0x0E88) || + (c == 0x0E8A || c == 0x0E8D) || + (c >= 0x0E94 && c <= 0x0E97) || + (c >= 0x0E99 && c <= 0x0E9F) || + (c >= 0x0EA1 && c <= 0x0EA3) || + (c == 0x0EA5 || c == 0x0EA7) || + (c >= 0x0EAA && c <= 0x0EAB) || + (c >= 0x0EAD && c <= 0x0EAE) || + (c == 0x0EB0) || + (c >= 0x0EB2 && c <= 0x0EB3) || + (c == 0x0EBD) || + (c >= 0x0EC0 && c <= 0x0EC4) || + (c >= 0x0F40 && c <= 0x0F47) || + (c >= 0x0F49 && c <= 0x0F69)); + else + return ((c >= 0x10A0 && c <= 0x10C5) || + (c >= 0x10D0 && c <= 0x10F6) || + (c == 0x1100) || + (c >= 0x1102 && c <= 0x1103) || + (c >= 0x1105 && c <= 0x1107) || + (c == 0x1109) || + (c >= 0x110B && c <= 0x110C) || + (c >= 0x110E && c <= 0x1112) || + (c == 0x113C || c == 0x113E || c == 0x1140) || + (c == 0x114C || c == 0x114E || c == 0x1150) || + (c >= 0x1154 && c <= 0x1155) || + (c == 0x1159) || + (c >= 0x115F && c <= 0x1161) || + (c == 0x1163) || + (c == 0x1165) || + (c == 0x1167) || + (c == 0x1169) || + (c >= 0x116D && c <= 0x116E) || + (c >= 0x1172 && c <= 0x1173) || + (c == 0x1175) || + (c == 0x119E) || + (c == 0x11A8) || + (c == 0x11AB) || + (c >= 0x11AE && c <= 0x11AF) || + (c >= 0x11B7 && c <= 0x11B8) || + (c == 0x11BA) || + (c >= 0x11BC && c <= 0x11C2) || + (c == 0x11EB) || + (c == 0x11F0) || + (c == 0x11F9) || + (c >= 0x1E00 && c <= 0x1E9B) || + (c >= 0x1EA0 && c <= 0x1EF9) || + (c >= 0x1F00 && c <= 0x1F15) || + (c >= 0x1F18 && c <= 0x1F1D) || + (c >= 0x1F20 && c <= 0x1F45) || + (c >= 0x1F48 && c <= 0x1F4D) || + (c >= 0x1F50 && c <= 0x1F57) || + (c == 0x1F59) || + (c == 0x1F5B) || + (c == 0x1F5D) || + (c >= 0x1F5F && c <= 0x1F7D) || + (c >= 0x1F80 && c <= 0x1FB4) || + (c >= 0x1FB6 && c <= 0x1FBC) || + (c == 0x1FBE) || + (c >= 0x1FC2 && c <= 0x1FC4) || + (c >= 0x1FC6 && c <= 0x1FCC) || + (c >= 0x1FD0 && c <= 0x1FD3) || + (c >= 0x1FD6 && c <= 0x1FDB) || + (c >= 0x1FE0 && c <= 0x1FEC) || + (c >= 0x1FF2 && c <= 0x1FF4) || + (c >= 0x1FF6 && c <= 0x1FFC) || + (c == 0x2126) || + (c >= 0x212A && c <= 0x212B) || + (c == 0x212E) || + (c >= 0x2180 && c <= 0x2182) || + (c >= 0x3041 && c <= 0x3094) || + (c >= 0x30A1 && c <= 0x30FA) || + (c >= 0x3105 && c <= 0x312C) || + (c >= 0xAC00 && c <= 0xD7A3)); +}; + +int +ACEXML_ParserInt::isIdeographic_i (const ACEXML_Char c) +{ + return ((c >= 0x4E00 && c <= 0x9FA5) || + (c == 3007) || + (c >= 0x3021 && c <= 0x3029)); +} + +int +ACEXML_ParserInt::isCombiningchar_i (const ACEXML_Char c) +{ + if (c < 0x0901) + return ((c >= 0x0300 && c <= 0x0345) || + (c >= 0x0360 && c <= 0x0361) || + (c >= 0x0483 && c <= 0x0486) || + (c >= 0x0591 && c <= 0x05A1) || + (c >= 0x05A3 && c <= 0x05B9) || + (c >= 0x05BB && c <= 0x05BD) || + (c == 0x05BF) || + (c >= 0x05C1 && c <= 0x05C2) || + (c == 0x05C4) || + (c >= 0x064B && c <= 0x0652) || + (c == 0x0670) || + (c >= 0x06D6 && c <= 0x06DC) || + (c >= 0x06DD && c <= 0x06DF) || + (c >= 0x06E0 && c <= 0x06E4) || + (c >= 0x06E7 && c <= 0x06E8) || + (c >= 0x06EA && c <= 0x06ED)); + else + return ((c >= 0x0901 && c <= 0x0903) || + (c == 0x093C) || + (c >= 0x093E && c <= 0x094C) || + (c == 0x094D) || + (c >= 0x0951 && c <= 0x0954) || + (c >= 0x0962 && c <= 0x0963) || + (c >= 0x0981 && c <= 0x0983) || + (c == 0x09BC) || + (c == 0x09BE) || + (c == 0x09BF) || + (c >= 0x09C0 && c <= 0x09C4) || + (c >= 0x09C7 && c <= 0x09C8) || + (c >= 0x09CB && c <= 0x09CD) || + (c == 0x09D7) || + (c >= 0x09E2 && c <= 0x09E3) || + (c == 0x0A02) || + (c == 0x0A3C) || + (c == 0x0A3E) || + (c == 0x0A3F) || + (c >= 0x0A40 && c <= 0x0A42) || + (c >= 0x0A47 && c <= 0x0A48) || + (c >= 0x0A4B && c <= 0x0A4D) || + (c >= 0x0A70 && c <= 0x0A71) || + (c >= 0x0A81 && c <= 0x0A83) || + (c == 0x0ABC) || + (c >= 0x0ABE && c <= 0x0AC5) || + (c >= 0x0AC7 && c <= 0x0AC9) || + (c >= 0x0ACB && c <= 0x0ACD) || + (c >= 0x0B01 && c <= 0x0B03) || + (c == 0x0B3C) || + (c >= 0x0B3E && c <= 0x0B43) || + (c >= 0x0B47 && c <= 0x0B48) || + (c >= 0x0B4B && c <= 0x0B4D) || + (c >= 0x0B56 && c <= 0x0B57) || + (c >= 0x0B82 && c <= 0x0B83) || + (c >= 0x0BBE && c <= 0x0BC2) || + (c >= 0x0BC6 && c <= 0x0BC8) || + (c >= 0x0BCA && c <= 0x0BCD) || + (c == 0x0BD7) || + (c >= 0x0C01 && c <= 0x0C03) || + (c >= 0x0C3E && c <= 0x0C44) || + (c >= 0x0C46 && c <= 0x0C48) || + (c >= 0x0C4A && c <= 0x0C4D) || + (c >= 0x0C55 && c <= 0x0C56) || + (c >= 0x0C82 && c <= 0x0C83) || + (c >= 0x0CBE && c <= 0x0CC4) || + (c >= 0x0CC6 && c <= 0x0CC8) || + (c >= 0x0CCA && c <= 0x0CCD) || + (c >= 0x0CD5 && c <= 0x0CD6) || + (c >= 0x0D02 && c <= 0x0D03) || + (c >= 0x0D3E && c <= 0x0D43) || + (c >= 0x0D46 && c <= 0x0D48) || + (c >= 0x0D4A && c <= 0x0D4D) || + (c == 0x0D57) || + (c == 0x0E31) || + (c >= 0x0E34 && c <= 0x0E3A) || + (c >= 0x0E47 && c <= 0x0E4E) || + (c == 0x0EB1) || + (c >= 0x0EB4 && c <= 0x0EB9) || + (c >= 0x0EBB && c <= 0x0EBC) || + (c >= 0x0EC8 && c <= 0x0ECD) || + (c >= 0x0F18 && c <= 0x0F19) || + (c == 0x0F35) || + (c == 0x0F37) || + (c == 0x0F39) || + (c == 0x0F3E) || + (c == 0x0F3F) || + (c >= 0x0F71 && c <= 0x0F84) || + (c >= 0x0F86 && c <= 0x0F8B) || + (c >= 0x0F90 && c <= 0x0F95) || + (c == 0x0F97) || + (c >= 0x0F99 && c <= 0x0FAD) || + (c >= 0x0FB1 && c <= 0x0FB7) || + (c == 0x0FB9) || + (c >= 0x20D0 && c <= 0x20DC) || + (c == 0x20E1) || + (c >= 0x302A && c <= 0x302F) || + (c == 0x3099) || + (c == 0x309A)); +} + +int +ACEXML_ParserInt::isDigit_i (const ACEXML_Char c) +{ + if (c < 0x0040) + return (c >= 0x0030 && c <= 0x0039); + else + return ((c >= 0x0660 && c <= 0x0669) || + (c >= 0x06F0 && c <= 0x06F9) || + (c >= 0x0966 && c <= 0x096F) || + (c >= 0x09E6 && c <= 0x09EF) || + (c >= 0x0A66 && c <= 0x0A6F) || + (c >= 0x0AE6 && c <= 0x0AEF) || + (c >= 0x0B66 && c <= 0x0B6F) || + (c >= 0x0BE7 && c <= 0x0BEF) || + (c >= 0x0C66 && c <= 0x0C6F) || + (c >= 0x0CE6 && c <= 0x0CEF) || + (c >= 0x0D66 && c <= 0x0D6F) || + (c >= 0x0E50 && c <= 0x0E59) || + (c >= 0x0ED0 && c <= 0x0ED9) || + (c >= 0x0F20 && c <= 0x0F29)); +} + +int +ACEXML_ParserInt::isExtender_i (const ACEXML_Char c) +{ + // The compiler should generate a jump table and index into it directly. + switch (c) + { + case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387: case 0x0640: + case 0x0E46: case 0x0EC6: case 0x3005: case 0x3031: case 0x3032: + case 0x3033: case 0x3034: case 0x3035: case 0x309D: case 0x309E: + case 0x30FC: case 0x30FD: case 0x30FE: + return 1; + default: + return 0; + } +} + +#endif /* ACE_USES_WCHAR */ diff --git a/ACE/ACEXML/parser/parser/ParserInternals.h b/ACE/ACEXML/parser/parser/ParserInternals.h new file mode 100644 index 00000000000..f1c89d8d25e --- /dev/null +++ b/ACE/ACEXML/parser/parser/ParserInternals.h @@ -0,0 +1,102 @@ +// -*- C++ -*- + +//============================================================================= +/** + * @file ParserInternals.h + * + * $Id$ + * + * @author Krishnakumar B <kitty@cs.wustl.edu> + */ +//============================================================================= + +#ifndef _ACEXML_PARSER_INTERNALS_H_ +#define _ACEXML_PARSER_INTERNALS_H_ + +#include /**/ "ace/pre.h" +#include "ACEXML/parser/parser/Parser_export.h" + +#if !defined (ACE_LACKS_PRAGMA_ONCE) +#pragma once +#endif /* ACE_LACKS_PRAGMA_ONCE */ + +#include "ACEXML/common/XML_Types.h" + +/** + * @class ACEXML_ParserInt ParserInternals.h "ACEXML/parser/parser/ParserInternals.h" + * + * @brief A class to hide some of the internal implementation details of + * the parser. + * + */ +class ACEXML_PARSER_Export ACEXML_ParserInt +{ + +public: +#if defined (ACE_USES_WCHAR) + + /** + * Check if a character @a c is a BaseChar. This method checks for the + * complete set of characters allowed when WCHAR support is enabled. + * + * @retval 1 if @a c is a valid XML Character, 0 otherwise. + */ + static int isBasechar_i (const ACEXML_Char c); + + /** + * Check if a character @a c is a Ideographic. This method checks for the + * complete set of characters allowed when WCHAR support is enabled. + * + * @retval 1 if @a c is a valid XML Character, 0 otherwise. + */ + static int isIdeographic_i (const ACEXML_Char c); + + /** + * Check if a character @a c is a CombiningChar. This method checks for the + * complete set of characters allowed when WCHAR support is enabled. + * + * @retval 1 if @a c is a valid XML Character, 0 otherwise. + */ + static int isCombiningchar_i (const ACEXML_Char c); + + /** + * Check if a character @a c is a Digit. This method checks for the + * complete set of characters allowed when WCHAR support is enabled. + * + * @retval 1 if @a c is a valid XML Character, 0 otherwise. + */ + static int isDigit_i (const ACEXML_Char c); + + /** + * Check if a character @a c is an Extender. This method checks for the + * complete set of characters allowed when WCHAR support is enabled. + * + * @retval 1 if @a c is a valid XML Character, 0 otherwise. + */ + static int isExtender_i (const ACEXML_Char c); + +#endif /* ACE_USES_WCHAR */ + + static const ACEXML_Char base_char_table_[256]; + + static const ACEXML_Char* predef_ent_[]; + + static const ACEXML_Char* predef_val_[]; + + // Enum describing the position in a document when a reference occurs. + enum ReferenceState { + IN_CONTENT, + IN_ATT_VALUE, + AS_ATT_VALUE, + IN_ENTITY_VALUE, + IN_INT_DTD, + IN_EXT_DTD, + IN_NOTATION, + INVALID = -1 + }; + +}; + +#include /**/ "ace/post.h" + +#endif /* _ACEXML_PARSER_INTERNALS_H_ */ diff --git a/ACE/ACEXML/parser/parser/Parser_export.h b/ACE/ACEXML/parser/parser/Parser_export.h new file mode 100644 index 00000000000..71630ccd962 --- /dev/null +++ b/ACE/ACEXML/parser/parser/Parser_export.h @@ -0,0 +1,47 @@ +// -*- C++ -*- + +//============================================================================= +/** + * @file Parser_export.h + * + * $Id$ + * + * @author Nanbor Wang <nanbor@cs.wustl.edu> + */ +//============================================================================= + +// Definition for Win32 Export directives. +// This file is generated automatically by generate_export_file.pl +// ------------------------------ +#ifndef ACEXML_PARSER_EXPORT_H +#define ACEXML_PARSER_EXPORT_H + +#include "ace/config-all.h" + +#if defined (ACE_AS_STATIC_LIBS) && !defined (ACEXML_PARSER_HAS_DLL) +# define ACEXML_PARSER_HAS_DLL 0 +#endif /* ACE_AS_STATIC_LIBS && ACEXML_PARSER_HAS_DLL */ + +#if !defined (ACEXML_PARSER_HAS_DLL) +# define ACEXML_PARSER_HAS_DLL 1 +#endif /* ! ACEXML_PARSER_HAS_DLL */ + +#if defined (ACEXML_PARSER_HAS_DLL) && (ACEXML_PARSER_HAS_DLL == 1) +# if defined (ACEXML_PARSER_BUILD_DLL) +# define ACEXML_PARSER_Export ACE_Proper_Export_Flag +# define ACEXML_PARSER_SINGLETON_DECLARATION(T) ACE_EXPORT_SINGLETON_DECLARATION (T) +# define ACEXML_PARSER_SINGLETON_DECLARE(SINGLETON_TYPE, CLASS, LOCK) ACE_EXPORT_SINGLETON_DECLARE(SINGLETON_TYPE, CLASS, LOCK) +# else /* ACEXML_PARSER_BUILD_DLL */ +# define ACEXML_PARSER_Export ACE_Proper_Import_Flag +# define ACEXML_PARSER_SINGLETON_DECLARATION(T) ACE_IMPORT_SINGLETON_DECLARATION (T) +# define ACEXML_PARSER_SINGLETON_DECLARE(SINGLETON_TYPE, CLASS, LOCK) ACE_IMPORT_SINGLETON_DECLARE(SINGLETON_TYPE, CLASS, LOCK) +# endif /* ACEXML_PARSER_BUILD_DLL */ +#else /* ACEXML_PARSER_HAS_DLL == 1 */ +# define ACEXML_PARSER_Export +# define ACEXML_PARSER_SINGLETON_DECLARATION(T) +# define ACEXML_PARSER_SINGLETON_DECLARE(SINGLETON_TYPE, CLASS, LOCK) +#endif /* ACEXML_PARSER_HAS_DLL == 1 */ + +#endif /* ACEXML_PARSER_EXPORT_H */ + +// End of auto generated file. diff --git a/ACE/ACEXML/parser/parser/parser.mpc b/ACE/ACEXML/parser/parser/parser.mpc new file mode 100644 index 00000000000..580b85495a4 --- /dev/null +++ b/ACE/ACEXML/parser/parser/parser.mpc @@ -0,0 +1,14 @@ +// -*- MPC -*- +// $Id$ + +project(ACEXML_Parser): acelib, ace_output, install { + avoids += ace_for_tao + sharedname = ACEXML_Parser + after += ACEXML + libs += ACEXML + dynamicflags += ACEXML_PARSER_BUILD_DLL + + Pkgconfig_Files { + ACEXML_Parser.pc.in + } +} |