summaryrefslogtreecommitdiff
path: root/hparser.h
blob: 986e6c5f67397b3f4e731589ad7b1668d12d4214 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
/* 
 * Copyright 1999-2009, Gisle Aas
 * Copyright 1999-2000, Michael A. Chase
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the same terms as Perl itself.
 */

/*
 * Declare various structures and constants.  The main thing
 * is 'struct p_state' that contains various fields to represent
 * the state of the parser.
 */

#ifdef MARKED_SECTION

enum marked_section_t {
    MS_NONE = 0,
    MS_INCLUDE,
    MS_RCDATA,
    MS_CDATA,
    MS_IGNORE
};

#endif /* MARKED_SECTION */


#define P_SIGNATURE 0x16091964  /* tag struct p_state for safer cast */

enum event_id {
    E_DECLARATION = 0,
    E_COMMENT,
    E_START,
    E_END,
    E_TEXT,
    E_PROCESS,
    E_START_DOCUMENT,
    E_END_DOCUMENT,
    E_DEFAULT,
    /**/
    EVENT_COUNT,
    E_NONE   /* used for reporting skipped text (non-events) */
};
typedef enum event_id event_id_t;

/* must match event_id_t above */
static char* event_id_str[] = {
    "declaration",
    "comment",
    "start",
    "end",
    "text",
    "process",
    "start_document",
    "end_document",
    "default",
};

struct p_handler {
    SV* cb;
    SV* argspec;
};

struct p_state {
    U32 signature;

    /* state */
    SV* buf;
    STRLEN offset;
    STRLEN line;
    STRLEN column;
    bool start_document;
    bool parsing;
    bool eof;

    /* special parsing modes */
    char* literal_mode;
    bool  is_cdata;
    bool  no_dash_dash_comment_end;
    char *pending_end_tag;

    /* unbroken_text option needs a buffer of pending text */
    SV*    pend_text;
    bool   pend_text_is_cdata;
    STRLEN pend_text_offset;
    STRLEN pend_text_line;
    STRLEN pend_text_column;

    /* skipped text is accumulated here */
    SV* skipped_text;

#ifdef MARKED_SECTION
    /* marked section support */
    enum marked_section_t ms;
    AV* ms_stack;
    bool marked_sections;
#endif

    /* various boolean configuration attributes */
    bool strict_comment;
    bool strict_names;
    bool strict_end;
    bool xml_mode;
    bool unbroken_text;
    bool attr_encoded;
    bool case_sensitive;
    bool closing_plaintext;
    bool utf8_mode;
    bool empty_element_tags;
    bool xml_pic;
    bool backquote;

    /* other configuration stuff */
    SV* bool_attr_val;
    struct p_handler handlers[EVENT_COUNT];
    int argspec_entity_decode;

    /* filters */
    HV* report_tags;
    HV* ignore_tags;
    HV* ignore_elements;

    /* these are set when we are currently inside an element we want to ignore */
    SV* ignoring_element;
    int ignore_depth;

    /* cache */
    HV* entity2char;            /* %HTML::Entities::entity2char */
    SV* tmp;
};
typedef struct p_state PSTATE;