/* * Copyright 1999-2009, Gisle Aas * Copyright 1999-2000, Michael A. Chase * * This library is free software; you can redistribute it and/or * modify it under the same terms as Perl itself. */ /* * Declare various structures and constants. The main thing * is 'struct p_state' that contains various fields to represent * the state of the parser. */ #ifdef MARKED_SECTION enum marked_section_t { MS_NONE = 0, MS_INCLUDE, MS_RCDATA, MS_CDATA, MS_IGNORE }; #endif /* MARKED_SECTION */ #define P_SIGNATURE 0x16091964 /* tag struct p_state for safer cast */ enum event_id { E_DECLARATION = 0, E_COMMENT, E_START, E_END, E_TEXT, E_PROCESS, E_START_DOCUMENT, E_END_DOCUMENT, E_DEFAULT, /**/ EVENT_COUNT, E_NONE /* used for reporting skipped text (non-events) */ }; typedef enum event_id event_id_t; /* must match event_id_t above */ static char* event_id_str[] = { "declaration", "comment", "start", "end", "text", "process", "start_document", "end_document", "default", }; struct p_handler { SV* cb; SV* argspec; }; struct p_state { U32 signature; /* state */ SV* buf; STRLEN offset; STRLEN line; STRLEN column; bool start_document; bool parsing; bool eof; /* special parsing modes */ char* literal_mode; bool is_cdata; bool no_dash_dash_comment_end; char *pending_end_tag; /* unbroken_text option needs a buffer of pending text */ SV* pend_text; bool pend_text_is_cdata; STRLEN pend_text_offset; STRLEN pend_text_line; STRLEN pend_text_column; /* skipped text is accumulated here */ SV* skipped_text; #ifdef MARKED_SECTION /* marked section support */ enum marked_section_t ms; AV* ms_stack; bool marked_sections; #endif /* various boolean configuration attributes */ bool strict_comment; bool strict_names; bool strict_end; bool xml_mode; bool unbroken_text; bool attr_encoded; bool case_sensitive; bool closing_plaintext; bool utf8_mode; bool empty_element_tags; bool xml_pic; bool backquote; /* other configuration stuff */ SV* bool_attr_val; struct p_handler handlers[EVENT_COUNT]; int argspec_entity_decode; /* filters */ HV* report_tags; HV* ignore_tags; HV* ignore_elements; /* these are set when we are currently inside an element we want to ignore */ SV* ignoring_element; int ignore_depth; /* cache */ HV* entity2char; /* %HTML::Entities::entity2char */ SV* tmp; }; typedef struct p_state PSTATE;