summaryrefslogtreecommitdiff
path: root/hparser.h
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@lorry>2013-05-08 22:21:52 +0000
committerLorry Tar Creator <lorry-tar-importer@lorry>2013-05-08 22:21:52 +0000
commit2f253cfc85ffd55a8acb988e91f0bc5ab348124c (patch)
tree4734ccd522c71dd455879162006742002f8c1565 /hparser.h
downloadHTML-Parser-tarball-2f253cfc85ffd55a8acb988e91f0bc5ab348124c.tar.gz
Diffstat (limited to 'hparser.h')
-rw-r--r--hparser.h132
1 files changed, 132 insertions, 0 deletions
diff --git a/hparser.h b/hparser.h
new file mode 100644
index 0000000..986e6c5
--- /dev/null
+++ b/hparser.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright 1999-2009, Gisle Aas
+ * Copyright 1999-2000, Michael A. Chase
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the same terms as Perl itself.
+ */
+
+/*
+ * Declare various structures and constants. The main thing
+ * is 'struct p_state' that contains various fields to represent
+ * the state of the parser.
+ */
+
+#ifdef MARKED_SECTION
+
+enum marked_section_t {
+ MS_NONE = 0,
+ MS_INCLUDE,
+ MS_RCDATA,
+ MS_CDATA,
+ MS_IGNORE
+};
+
+#endif /* MARKED_SECTION */
+
+
+#define P_SIGNATURE 0x16091964 /* tag struct p_state for safer cast */
+
+enum event_id {
+ E_DECLARATION = 0,
+ E_COMMENT,
+ E_START,
+ E_END,
+ E_TEXT,
+ E_PROCESS,
+ E_START_DOCUMENT,
+ E_END_DOCUMENT,
+ E_DEFAULT,
+ /**/
+ EVENT_COUNT,
+ E_NONE /* used for reporting skipped text (non-events) */
+};
+typedef enum event_id event_id_t;
+
+/* must match event_id_t above */
+static char* event_id_str[] = {
+ "declaration",
+ "comment",
+ "start",
+ "end",
+ "text",
+ "process",
+ "start_document",
+ "end_document",
+ "default",
+};
+
+struct p_handler {
+ SV* cb;
+ SV* argspec;
+};
+
+struct p_state {
+ U32 signature;
+
+ /* state */
+ SV* buf;
+ STRLEN offset;
+ STRLEN line;
+ STRLEN column;
+ bool start_document;
+ bool parsing;
+ bool eof;
+
+ /* special parsing modes */
+ char* literal_mode;
+ bool is_cdata;
+ bool no_dash_dash_comment_end;
+ char *pending_end_tag;
+
+ /* unbroken_text option needs a buffer of pending text */
+ SV* pend_text;
+ bool pend_text_is_cdata;
+ STRLEN pend_text_offset;
+ STRLEN pend_text_line;
+ STRLEN pend_text_column;
+
+ /* skipped text is accumulated here */
+ SV* skipped_text;
+
+#ifdef MARKED_SECTION
+ /* marked section support */
+ enum marked_section_t ms;
+ AV* ms_stack;
+ bool marked_sections;
+#endif
+
+ /* various boolean configuration attributes */
+ bool strict_comment;
+ bool strict_names;
+ bool strict_end;
+ bool xml_mode;
+ bool unbroken_text;
+ bool attr_encoded;
+ bool case_sensitive;
+ bool closing_plaintext;
+ bool utf8_mode;
+ bool empty_element_tags;
+ bool xml_pic;
+ bool backquote;
+
+ /* other configuration stuff */
+ SV* bool_attr_val;
+ struct p_handler handlers[EVENT_COUNT];
+ int argspec_entity_decode;
+
+ /* filters */
+ HV* report_tags;
+ HV* ignore_tags;
+ HV* ignore_elements;
+
+ /* these are set when we are currently inside an element we want to ignore */
+ SV* ignoring_element;
+ int ignore_depth;
+
+ /* cache */
+ HV* entity2char; /* %HTML::Entities::entity2char */
+ SV* tmp;
+};
+typedef struct p_state PSTATE;
+