diff options
-rw-r--r-- | Makefile | 9 | ||||
-rw-r--r-- | src/html_blocks.h | 205 | ||||
-rwxr-xr-x | src/markdown.c | 104 |
3 files changed, 231 insertions, 87 deletions
@@ -23,7 +23,7 @@ CFLAGS=-c -g -O3 -fPIC -Wall -Werror -Isrc -Ihtml $(MFLAGS) LDFLAGS=-g -O3 -Wall -Werror $(MFLAGS) CC=gcc -all: libsundown.so sundown smartypants +all: libsundown.so sundown smartypants html_blocks .PHONY: all clean @@ -43,6 +43,13 @@ sundown: examples/sundown.o src/markdown.o src/array.o src/autolink.o src/buffer smartypants: examples/smartypants.o src/buffer.o html/html_smartypants.o html/html.o src/autolink.o $(CC) $(LDFLAGS) $^ -o $@ +# perfect hashing +html_blocks: src/html_blocks.h + +src/html_blocks.h: html_block_names.txt + gperf -N find_block_tag -H hash_block_tag -C -c -E --ignore-case $^ > $@ + + # housekeeping clean: rm -f src/*.o html/*.o examples/*.o diff --git a/src/html_blocks.h b/src/html_blocks.h new file mode 100644 index 0000000..bbd0c76 --- /dev/null +++ b/src/html_blocks.h @@ -0,0 +1,205 @@ +/* C code produced by gperf version 3.0.3 */ +/* Command-line: gperf -N find_block_tag -H hash_block_tag -C -c -E --ignore-case html_block_names.txt */ +/* Computed positions: -k'1-2' */ + +#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ + && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ + && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ + && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ + && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ + && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ + && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ + && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ + && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ + && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ + && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ + && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ + && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ + && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>." +#endif + +/* maximum key range = 37, duplicates = 0 */ + +#ifndef GPERF_DOWNCASE +#define GPERF_DOWNCASE 1 +static unsigned char gperf_downcase[256] = + { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, + 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, + 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, + 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, + 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, + 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, + 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, + 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, + 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, + 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, + 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, + 255 + }; +#endif + +#ifndef GPERF_CASE_STRNCMP +#define GPERF_CASE_STRNCMP 1 +static int +gperf_case_strncmp (s1, s2, n) + register const char *s1; + register const char *s2; + register unsigned int n; +{ + for (; n > 0;) + { + unsigned char c1 = gperf_downcase[(unsigned char)*s1++]; + unsigned char c2 = gperf_downcase[(unsigned char)*s2++]; + if (c1 != 0 && c1 == c2) + { + n--; + continue; + } + return (int)c1 - (int)c2; + } + return 0; +} +#endif + +#ifdef __GNUC__ +__inline +#else +#ifdef __cplusplus +inline +#endif +#endif +static unsigned int +hash_block_tag (str, len) + register const char *str; + register unsigned int len; +{ + static const unsigned char asso_values[] = + { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 8, 30, 25, 20, 15, 10, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 0, 38, 0, 38, + 5, 5, 5, 15, 0, 38, 38, 0, 15, 10, + 0, 38, 38, 15, 0, 5, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, 0, 38, + 0, 38, 5, 5, 5, 15, 0, 38, 38, 0, + 15, 10, 0, 38, 38, 15, 0, 5, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38 + }; + register int hval = len; + + switch (hval) + { + default: + hval += asso_values[(unsigned char)str[1]+1]; + /*FALLTHROUGH*/ + case 1: + hval += asso_values[(unsigned char)str[0]]; + break; + } + return hval; +} + +#ifdef __GNUC__ +__inline +#ifdef __GNUC_STDC_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif +const char * +find_block_tag (str, len) + register const char *str; + register unsigned int len; +{ + enum + { + TOTAL_KEYWORDS = 23, + MIN_WORD_LENGTH = 1, + MAX_WORD_LENGTH = 10, + MIN_HASH_VALUE = 1, + MAX_HASH_VALUE = 37 + }; + + static const char * const wordlist[] = + { + "", + "p", + "dl", + "div", + "math", + "table", + "", + "ul", + "del", + "form", + "blockquote", + "figure", + "ol", + "fieldset", + "", + "h1", + "", + "h6", + "pre", + "", "", + "script", + "h5", + "noscript", + "", "", + "iframe", + "h4", + "ins", + "", "", "", + "h3", + "", "", "", "", + "h2" + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = hash_block_tag (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register const char *s = wordlist[key]; + + if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0') + return s; + } + } + return 0; +} diff --git a/src/markdown.c b/src/markdown.c index fa28eef..f769a8b 100755 --- a/src/markdown.c +++ b/src/markdown.c @@ -31,6 +31,11 @@ #define MKD_LI_END 8 /* internal list flag */ +#define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n) +#define GPERF_DOWNCASE 1 +#define GPERF_CASE_STRNCMP 1 +#include "html_blocks.h" + /*************** * LOCAL TYPES * ***************/ @@ -104,11 +109,9 @@ struct render { size_t max_nesting; }; -/* html_tag • structure for quick HTML tag search (inspired from discount) */ -struct html_tag { - const char *text; - size_t size; -}; +/*************************** + * HELPER FUNCTIONS * + ***************************/ static inline struct buf * rndr_newbuf(struct render *rndr, int type) @@ -134,43 +137,6 @@ rndr_popbuf(struct render *rndr, int type) rndr->work_bufs[type].size--; } -/******************** - * GLOBAL VARIABLES * - ********************/ - -/* block_tags • recognised block tags, sorted by cmp_html_tag */ -static struct html_tag block_tags[] = { -/*0*/ { "p", 1 }, - { "dl", 2 }, - { "h1", 2 }, - { "h2", 2 }, - { "h3", 2 }, - { "h4", 2 }, - { "h5", 2 }, - { "h6", 2 }, - { "ol", 2 }, - { "ul", 2 }, - { "del", 3 }, /* 10 */ - { "div", 3 }, - { "ins", 3 }, /* 12 */ - { "pre", 3 }, - { "form", 4 }, - { "math", 4 }, - { "table", 5 }, - { "figure", 6 }, - { "iframe", 6 }, - { "script", 6 }, - { "fieldset", 8 }, - { "noscript", 8 }, - { "blockquote", 10 } -}; - -#define INS_TAG (block_tags + 12) -#define DEL_TAG (block_tags + 10) - -/*************************** - * HELPER FUNCTIONS * - ***************************/ static void unscape_text(struct buf *ob, struct buf *src) { @@ -208,39 +174,6 @@ cmp_link_ref_sort(const void *a, const void *b) return bufcasecmp(lra->id, lrb->id); } -/* cmp_html_tag • comparison function for bsearch() (stolen from discount) */ -static int -cmp_html_tag(const void *a, const void *b) -{ - const struct html_tag *hta = a; - const struct html_tag *htb = b; - if (hta->size != htb->size) return (int)(hta->size - htb->size); - return strncasecmp(hta->text, htb->text, hta->size); -} - - -/* find_block_tag • returns the current block tag */ -static struct html_tag * -find_block_tag(char *data, size_t size) -{ - size_t i = 0; - struct html_tag key; - - /* looking for the word end */ - while (i < size && ((data[i] >= '0' && data[i] <= '9') - || (data[i] >= 'A' && data[i] <= 'Z') - || (data[i] >= 'a' && data[i] <= 'z'))) - i++; - if (i >= size) return 0; - - /* binary search of the tag */ - key.text = data; - key.size = i; - return bsearch(&key, block_tags, - sizeof block_tags / sizeof block_tags[0], - sizeof block_tags[0], cmp_html_tag); -} - /**************************** * INLINE PARSING FUNCTIONS * ****************************/ @@ -1680,20 +1613,18 @@ parse_atxheader(struct buf *ob, struct render *rndr, char *data, size_t size) /* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */ /* returns the length on match, 0 otherwise */ static size_t -htmlblock_end(struct html_tag *tag, struct render *rndr, char *data, size_t size) +htmlblock_end(const char *tag, size_t tag_len, struct render *rndr, char *data, size_t size) { size_t i, w; - /* assuming data[0] == '<' && data[1] == '/' already tested */ - /* checking if tag is a match */ - if (tag->size + 3 >= size - || strncasecmp(data + 2, tag->text, tag->size) - || data[tag->size + 2] != '>') + if (tag_len + 3 >= size || + strncasecmp(data + 2, tag, tag_len) != 0 || + data[tag_len + 2] != '>') return 0; /* checking white lines */ - i = tag->size + 3; + i = tag_len + 3; w = 0; if (i < size && (w = is_empty(data + i, size - i)) == 0) return 0; /* non-blank after tag */ @@ -1717,7 +1648,7 @@ static size_t parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, int do_render) { size_t i, j = 0; - struct html_tag *curtag; + const char *curtag; int found; struct buf work = { data, 0, 0, 0, 0 }; @@ -1777,17 +1708,18 @@ parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, in /* if not found, trying a second pass looking for indented match */ /* but not if tag is "ins" or "del" (following original Markdown.pl) */ - if (curtag != INS_TAG && curtag != DEL_TAG) { + if (strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) { + size_t tag_size = strlen(curtag); i = 1; while (i < size) { i++; while (i < size && !(data[i - 1] == '<' && data[i] == '/')) i++; - if (i + 2 + curtag->size >= size) + if (i + 2 + tag_size >= size) break; - j = htmlblock_end(curtag, rndr, data + i - 1, size - i + 1); + j = htmlblock_end(curtag, tag_size, rndr, data + i - 1, size - i + 1); if (j) { i += j - 1; |