From 4de031adb16019295d67fe02e515f9982b32a74b Mon Sep 17 00:00:00 2001 From: Michael Drake Date: Sat, 15 May 2021 19:41:55 +0100 Subject: Treebuilder: Massively optimise element type from name with gperf. Loading the html5 single page spec: * We were spending 10.81% of total runtime in element_type_from_name. Now it takes 0.66% of total runtime. * Total instruction fetch cost is reduced from 5,660,475,511 to 4,523,112,517. --- src/treebuilder/Makefile | 8 ++- src/treebuilder/element-type.gperf | 131 ++++++++++++++++++++++++++++++++++ src/treebuilder/element-type.h | 49 +++++++++++++ src/treebuilder/internal.h | 27 +------ src/treebuilder/treebuilder.c | 140 ++----------------------------------- 5 files changed, 194 insertions(+), 161 deletions(-) create mode 100644 src/treebuilder/element-type.gperf create mode 100644 src/treebuilder/element-type.h diff --git a/src/treebuilder/Makefile b/src/treebuilder/Makefile index 31feae1..ce00a4c 100644 --- a/src/treebuilder/Makefile +++ b/src/treebuilder/Makefile @@ -6,6 +6,12 @@ DIR_SOURCES := treebuilder.c \ in_cell.c in_select.c in_select_in_table.c \ in_foreign_content.c after_body.c in_frameset.c \ after_frameset.c after_after_body.c after_after_frameset.c \ - generic_rcdata.c + generic_rcdata.c element-type.c + +$(DIR)element-type.c: $(DIR)element-type.gperf + $(VQ)$(ECHO) " GPERF: $<" + $(Q)gperf --output-file=$@ $< + +CLEAN_ITEMS := $(DIR)element-type.c include $(NSBUILD)/Makefile.subdir diff --git a/src/treebuilder/element-type.gperf b/src/treebuilder/element-type.gperf new file mode 100644 index 0000000..d4f2aa2 --- /dev/null +++ b/src/treebuilder/element-type.gperf @@ -0,0 +1,131 @@ +/* + * This file is part of Hubbub. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2021 Michael Drake + */ + +%language=ANSI-C +%compare-strncmp +%readonly-tables +%ignore-case +%struct-type +%switch=1 +%define hash-function-name hubbub_element_type_hash +%define lookup-function-name hubbub_element_type_lookup + +%{ +#include + +#include "treebuilder/element-type.h" + +%} + +struct element_type_map; +%% +a, A +address, ADDRESS +annotation-xml, ANNOTATION_XML +applet, APPLET +area, AREA +article, ARTICLE +aside, ASIDE +b, B +base, BASE +basefont, BASEFONT +bgsound, BGSOUND +big, BIG +blockquote, BLOCKQUOTE +body, BODY +br, BR +button, BUTTON +caption, CAPTION +center, CENTER +col, COL +colgroup, COLGROUP +command, COMMAND +dd, DD +desc, DESC +details, DETAILS +dialog, DIALOG +dir, DIR +div, DIV +dl, DL +dt, DT +em, EM +embed, EMBED +fieldset, FIELDSET +figcaption, FIGCAPTION +figure, FIGURE +font, FONT +footer, FOOTER +foreignobject, FOREIGNOBJECT +form, FORM +frame, FRAME +frameset, FRAMESET +h1, H1 +h2, H2 +h3, H3 +h4, H4 +h5, H5 +h6, H6 +head, HEAD +hr, HR +html, HTML +i, I +iframe, IFRAME +image, IMAGE +img, IMG +input, INPUT +isindex, ISINDEX +li, LI +link, LINK +listing, LISTING +malignmark, MALIGNMARK +marquee, MARQUEE +math, MATH +menu, MENU +meta, META +mglyph, MGLYPH +mi, MI +mn, MN +mo, MO +ms, MS +mtext, MTEXT +nobr, NOBR +noembed, NOEMBED +noframes, NOFRAMES +noscript, NOSCRIPT +object, OBJECT +ol, OL +optgroup, OPTGROUP +option, OPTION +output, OUTPUT +p, P +param, PARAM +plaintext, PLAINTEXT +pre, PRE +s, S +script, SCRIPT +select, SELECT +small, SMALL +spacer, SPACER +strike, STRIKE +strong, STRONG +style, STYLE +summary, SUMMARY +svg, SVG +table, TABLE +tbody, TBODY +td, TD +textarea, TEXTAREA +tfoot, TFOOT +th, TH +thead, THEAD +title, TITLE +tr, TR +tt, TT +u, U +ul, UL +wbr, WBR +xmp, XMP diff --git a/src/treebuilder/element-type.h b/src/treebuilder/element-type.h new file mode 100644 index 0000000..08f58de --- /dev/null +++ b/src/treebuilder/element-type.h @@ -0,0 +1,49 @@ +/* + * This file is part of Hubbub. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2008 John-Mark Bell + */ + +#ifndef hubbub_treebuilder_element_type_h_ +#define hubbub_treebuilder_element_type_h_ + +#include "treebuilder/treebuilder.h" + +typedef enum +{ +/* Special */ + ADDRESS, AREA, ARTICLE, ASIDE, BASE, BASEFONT, BGSOUND, BLOCKQUOTE, + BODY, BR, CENTER, COL, COLGROUP, COMMAND, DATAGRID, DD, DETAILS, + DIALOG, DIR, DIV, DL, DT, EMBED, FIELDSET, FIGCAPTION, FIGURE, FOOTER, + FORM, FRAME, FRAMESET, H1, H2, H3, H4, H5, H6, HEAD, HEADER, HR, IFRAME, + IMAGE, IMG, INPUT, ISINDEX, LI, LINK, LISTING, MAIN, MENU, META, NAV, + NOEMBED, NOFRAMES, NOSCRIPT, OL, OPTGROUP, OPTION, P, PARAM, PLAINTEXT, + PRE, SCRIPT, SECTION, SELECT, SPACER, STYLE, SUMMARY, TBODY, TEXTAREA, + TFOOT, THEAD, TITLE, TR, UL, WBR, +/* Scoping */ + APPLET, BUTTON, CAPTION, HTML, MARQUEE, OBJECT, TABLE, TD, TH, +/* Formatting */ + A, B, BIG, CODE, EM, FONT, I, NOBR, S, SMALL, STRIKE, STRONG, TT, U, +/* Phrasing */ + /**< \todo Enumerate phrasing elements */ + LABEL, OUTPUT, RP, RT, RUBY, SPAN, SUB, SUP, VAR, XMP, +/* MathML */ + MATH, MGLYPH, MALIGNMARK, MI, MO, MN, MS, MTEXT, ANNOTATION_XML, +/* SVG */ + SVG, FOREIGNOBJECT, /* foreignobject is scoping, but only in SVG ns */ + DESC, + UNKNOWN +} element_type; + +struct element_type_map { + const char *name; + element_type type; +}; + +const struct element_type_map *hubbub_element_type_lookup( + register const char *str, + register size_t len); + +#endif + diff --git a/src/treebuilder/internal.h b/src/treebuilder/internal.h index 3c112c9..d9e1a00 100644 --- a/src/treebuilder/internal.h +++ b/src/treebuilder/internal.h @@ -9,32 +9,7 @@ #define hubbub_treebuilder_internal_h_ #include "treebuilder/treebuilder.h" - -typedef enum -{ -/* Special */ - ADDRESS, AREA, ARTICLE, ASIDE, BASE, BASEFONT, BGSOUND, BLOCKQUOTE, - BODY, BR, CENTER, COL, COLGROUP, COMMAND, DATAGRID, DD, DETAILS, - DIALOG, DIR, DIV, DL, DT, EMBED, FIELDSET, FIGCAPTION, FIGURE, FOOTER, - FORM, FRAME, FRAMESET, H1, H2, H3, H4, H5, H6, HEAD, HEADER, HR, IFRAME, - IMAGE, IMG, INPUT, ISINDEX, LI, LINK, LISTING, MAIN, MENU, META, NAV, - NOEMBED, NOFRAMES, NOSCRIPT, OL, OPTGROUP, OPTION, P, PARAM, PLAINTEXT, - PRE, SCRIPT, SECTION, SELECT, SPACER, STYLE, SUMMARY, TBODY, TEXTAREA, - TFOOT, THEAD, TITLE, TR, UL, WBR, -/* Scoping */ - APPLET, BUTTON, CAPTION, HTML, MARQUEE, OBJECT, TABLE, TD, TH, -/* Formatting */ - A, B, BIG, CODE, EM, FONT, I, NOBR, S, SMALL, STRIKE, STRONG, TT, U, -/* Phrasing */ - /**< \todo Enumerate phrasing elements */ - LABEL, OUTPUT, RP, RT, RUBY, SPAN, SUB, SUP, VAR, XMP, -/* MathML */ - MATH, MGLYPH, MALIGNMARK, MI, MO, MN, MS, MTEXT, ANNOTATION_XML, -/* SVG */ - SVG, FOREIGNOBJECT, /* foreignobject is scoping, but only in SVG ns */ - DESC, - UNKNOWN -} element_type; +#include "treebuilder/element-type.h" /** * Item on the element stack diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c index b84ca11..d2a186d 100644 --- a/src/treebuilder/treebuilder.c +++ b/src/treebuilder/treebuilder.c @@ -17,127 +17,6 @@ #include "utils/utils.h" #include "utils/string.h" - -#define S(x) x, SLEN(x) - -static const struct { - const char *name; - size_t len; - element_type type; -} name_type_map[] = { - { S("address"), ADDRESS }, - { S("area"), AREA }, - { S("article"), ARTICLE }, - { S("aside"), ASIDE }, - { S("base"), BASE }, - { S("basefont"), BASEFONT }, - { S("bgsound"), BGSOUND }, - { S("blockquote"), BLOCKQUOTE }, - { S("body"), BODY }, - { S("br"), BR }, - { S("center"), CENTER }, - { S("col"), COL }, - { S("colgroup"), COLGROUP }, - { S("command"), COMMAND }, - { S("dd"), DD }, - { S("details"), DETAILS }, - { S("dialog"), DIALOG }, - { S("dir"), DIR }, - { S("div"), DIV }, - { S("dl"), DL }, - { S("dt"), DT }, - { S("embed"), EMBED }, - { S("fieldset"), FIELDSET }, - { S("figcaption"), FIGCAPTION }, - { S("figure"), FIGURE }, - { S("footer"), FOOTER }, - { S("form"), FORM }, - { S("frame"), FRAME }, - { S("frameset"), FRAMESET }, - { S("h1"), H1 }, - { S("h2"), H2 }, - { S("h3"), H3 }, - { S("h4"), H4 }, - { S("h5"), H5 }, - { S("h6"), H6 }, - { S("head"), HEAD }, - { S("hr"), HR }, - { S("iframe"), IFRAME }, - { S("image"), IMAGE }, - { S("img"), IMG }, - { S("input"), INPUT }, - { S("isindex"), ISINDEX }, - { S("li"), LI }, - { S("link"), LINK }, - { S("listing"), LISTING }, - { S("menu"), MENU }, - { S("meta"), META }, - { S("noembed"), NOEMBED }, - { S("noframes"), NOFRAMES }, - { S("noscript"), NOSCRIPT }, - { S("ol"), OL }, - { S("optgroup"), OPTGROUP }, - { S("option"), OPTION }, - { S("output"), OUTPUT }, - { S("p"), P }, - { S("param"), PARAM }, - { S("plaintext"), PLAINTEXT }, - { S("pre"), PRE }, - { S("script"), SCRIPT }, - { S("select"), SELECT }, - { S("spacer"), SPACER }, - { S("style"), STYLE }, - { S("summary"), SUMMARY }, - { S("tbody"), TBODY }, - { S("textarea"), TEXTAREA }, - { S("tfoot"), TFOOT }, - { S("thead"), THEAD }, - { S("title"), TITLE }, - { S("tr"), TR }, - { S("ul"), UL }, - { S("wbr"), WBR }, - - { S("applet"), APPLET }, - { S("button"), BUTTON }, - { S("caption"), CAPTION }, - { S("html"), HTML }, - { S("marquee"), MARQUEE }, - { S("object"), OBJECT }, - { S("table"), TABLE }, - { S("td"), TD }, - { S("th"), TH }, - - { S("a"), A }, - { S("b"), B }, - { S("big"), BIG }, - { S("em"), EM }, - { S("font"), FONT }, - { S("i"), I }, - { S("nobr"), NOBR }, - { S("s"), S }, - { S("small"), SMALL }, - { S("strike"), STRIKE }, - { S("strong"), STRONG }, - { S("tt"), TT }, - { S("u"), U }, - - { S("xmp"), XMP }, - - { S("math"), MATH }, - { S("mglyph"), MGLYPH }, - { S("malignmark"), MALIGNMARK }, - { S("mi"), MI }, - { S("mo"), MO }, - { S("mn"), MN }, - { S("ms"), MS }, - { S("mtext"), MTEXT }, - { S("annotation-xml"), ANNOTATION_XML }, - - { S("svg"), SVG }, - { S("desc"), DESC }, - { S("foreignobject"), FOREIGNOBJECT }, -}; - static bool is_form_associated(element_type type); /** @@ -1045,24 +924,17 @@ hubbub_error append_text(hubbub_treebuilder *treebuilder, element_type element_type_from_name(hubbub_treebuilder *treebuilder, const hubbub_string *tag_name) { - const uint8_t *name = tag_name->ptr; - size_t len = tag_name->len; - uint32_t i; + const struct element_type_map *value; UNUSED(treebuilder); - /** \todo optimise this */ - - for (i = 0; i < N_ELEMENTS(name_type_map); i++) { - if (name_type_map[i].len != len) - continue; - - if (strncasecmp(name_type_map[i].name, - (const char *) name, len) == 0) - return name_type_map[i].type; + value = hubbub_element_type_lookup((const char *)tag_name->ptr, + tag_name->len); + if (value == NULL) { + return UNKNOWN; } - return UNKNOWN; + return value->type; } /** -- cgit v1.2.1