summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile9
-rw-r--r--src/html_blocks.h205
-rwxr-xr-xsrc/markdown.c104
3 files changed, 231 insertions, 87 deletions
diff --git a/Makefile b/Makefile
index 56dc501..de38f2a 100644
--- a/Makefile
+++ b/Makefile
@@ -23,7 +23,7 @@ CFLAGS=-c -g -O3 -fPIC -Wall -Werror -Isrc -Ihtml $(MFLAGS)
LDFLAGS=-g -O3 -Wall -Werror $(MFLAGS)
CC=gcc
-all: libsundown.so sundown smartypants
+all: libsundown.so sundown smartypants html_blocks
.PHONY: all clean
@@ -43,6 +43,13 @@ sundown: examples/sundown.o src/markdown.o src/array.o src/autolink.o src/buffer
smartypants: examples/smartypants.o src/buffer.o html/html_smartypants.o html/html.o src/autolink.o
$(CC) $(LDFLAGS) $^ -o $@
+# perfect hashing
+html_blocks: src/html_blocks.h
+
+src/html_blocks.h: html_block_names.txt
+ gperf -N find_block_tag -H hash_block_tag -C -c -E --ignore-case $^ > $@
+
+
# housekeeping
clean:
rm -f src/*.o html/*.o examples/*.o
diff --git a/src/html_blocks.h b/src/html_blocks.h
new file mode 100644
index 0000000..bbd0c76
--- /dev/null
+++ b/src/html_blocks.h
@@ -0,0 +1,205 @@
+/* C code produced by gperf version 3.0.3 */
+/* Command-line: gperf -N find_block_tag -H hash_block_tag -C -c -E --ignore-case html_block_names.txt */
+/* Computed positions: -k'1-2' */
+
+#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
+ && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
+ && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
+ && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
+ && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
+ && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
+ && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
+ && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
+ && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
+ && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
+ && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
+ && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
+ && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
+ && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
+ && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
+ && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
+ && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
+ && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
+ && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
+ && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
+ && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
+ && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
+ && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
+/* The character set is not based on ISO-646. */
+error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
+#endif
+
+/* maximum key range = 37, duplicates = 0 */
+
+#ifndef GPERF_DOWNCASE
+#define GPERF_DOWNCASE 1
+static unsigned char gperf_downcase[256] =
+ {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+ 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
+ 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
+ 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
+ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+ 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
+ 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+ 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
+ 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
+ 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
+ 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
+ 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
+ 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
+ 255
+ };
+#endif
+
+#ifndef GPERF_CASE_STRNCMP
+#define GPERF_CASE_STRNCMP 1
+static int
+gperf_case_strncmp (s1, s2, n)
+ register const char *s1;
+ register const char *s2;
+ register unsigned int n;
+{
+ for (; n > 0;)
+ {
+ unsigned char c1 = gperf_downcase[(unsigned char)*s1++];
+ unsigned char c2 = gperf_downcase[(unsigned char)*s2++];
+ if (c1 != 0 && c1 == c2)
+ {
+ n--;
+ continue;
+ }
+ return (int)c1 - (int)c2;
+ }
+ return 0;
+}
+#endif
+
+#ifdef __GNUC__
+__inline
+#else
+#ifdef __cplusplus
+inline
+#endif
+#endif
+static unsigned int
+hash_block_tag (str, len)
+ register const char *str;
+ register unsigned int len;
+{
+ static const unsigned char asso_values[] =
+ {
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 8, 30, 25, 20, 15, 10, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38, 0, 38, 0, 38,
+ 5, 5, 5, 15, 0, 38, 38, 0, 15, 10,
+ 0, 38, 38, 15, 0, 5, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38, 38, 38, 0, 38,
+ 0, 38, 5, 5, 5, 15, 0, 38, 38, 0,
+ 15, 10, 0, 38, 38, 15, 0, 5, 38, 38,
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 38, 38, 38, 38, 38, 38, 38
+ };
+ register int hval = len;
+
+ switch (hval)
+ {
+ default:
+ hval += asso_values[(unsigned char)str[1]+1];
+ /*FALLTHROUGH*/
+ case 1:
+ hval += asso_values[(unsigned char)str[0]];
+ break;
+ }
+ return hval;
+}
+
+#ifdef __GNUC__
+__inline
+#ifdef __GNUC_STDC_INLINE__
+__attribute__ ((__gnu_inline__))
+#endif
+#endif
+const char *
+find_block_tag (str, len)
+ register const char *str;
+ register unsigned int len;
+{
+ enum
+ {
+ TOTAL_KEYWORDS = 23,
+ MIN_WORD_LENGTH = 1,
+ MAX_WORD_LENGTH = 10,
+ MIN_HASH_VALUE = 1,
+ MAX_HASH_VALUE = 37
+ };
+
+ static const char * const wordlist[] =
+ {
+ "",
+ "p",
+ "dl",
+ "div",
+ "math",
+ "table",
+ "",
+ "ul",
+ "del",
+ "form",
+ "blockquote",
+ "figure",
+ "ol",
+ "fieldset",
+ "",
+ "h1",
+ "",
+ "h6",
+ "pre",
+ "", "",
+ "script",
+ "h5",
+ "noscript",
+ "", "",
+ "iframe",
+ "h4",
+ "ins",
+ "", "", "",
+ "h3",
+ "", "", "", "",
+ "h2"
+ };
+
+ if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+ {
+ register int key = hash_block_tag (str, len);
+
+ if (key <= MAX_HASH_VALUE && key >= 0)
+ {
+ register const char *s = wordlist[key];
+
+ if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0')
+ return s;
+ }
+ }
+ return 0;
+}
diff --git a/src/markdown.c b/src/markdown.c
index fa28eef..f769a8b 100755
--- a/src/markdown.c
+++ b/src/markdown.c
@@ -31,6 +31,11 @@
#define MKD_LI_END 8 /* internal list flag */
+#define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n)
+#define GPERF_DOWNCASE 1
+#define GPERF_CASE_STRNCMP 1
+#include "html_blocks.h"
+
/***************
* LOCAL TYPES *
***************/
@@ -104,11 +109,9 @@ struct render {
size_t max_nesting;
};
-/* html_tag • structure for quick HTML tag search (inspired from discount) */
-struct html_tag {
- const char *text;
- size_t size;
-};
+/***************************
+ * HELPER FUNCTIONS *
+ ***************************/
static inline struct buf *
rndr_newbuf(struct render *rndr, int type)
@@ -134,43 +137,6 @@ rndr_popbuf(struct render *rndr, int type)
rndr->work_bufs[type].size--;
}
-/********************
- * GLOBAL VARIABLES *
- ********************/
-
-/* block_tags • recognised block tags, sorted by cmp_html_tag */
-static struct html_tag block_tags[] = {
-/*0*/ { "p", 1 },
- { "dl", 2 },
- { "h1", 2 },
- { "h2", 2 },
- { "h3", 2 },
- { "h4", 2 },
- { "h5", 2 },
- { "h6", 2 },
- { "ol", 2 },
- { "ul", 2 },
- { "del", 3 }, /* 10 */
- { "div", 3 },
- { "ins", 3 }, /* 12 */
- { "pre", 3 },
- { "form", 4 },
- { "math", 4 },
- { "table", 5 },
- { "figure", 6 },
- { "iframe", 6 },
- { "script", 6 },
- { "fieldset", 8 },
- { "noscript", 8 },
- { "blockquote", 10 }
-};
-
-#define INS_TAG (block_tags + 12)
-#define DEL_TAG (block_tags + 10)
-
-/***************************
- * HELPER FUNCTIONS *
- ***************************/
static void
unscape_text(struct buf *ob, struct buf *src)
{
@@ -208,39 +174,6 @@ cmp_link_ref_sort(const void *a, const void *b)
return bufcasecmp(lra->id, lrb->id);
}
-/* cmp_html_tag • comparison function for bsearch() (stolen from discount) */
-static int
-cmp_html_tag(const void *a, const void *b)
-{
- const struct html_tag *hta = a;
- const struct html_tag *htb = b;
- if (hta->size != htb->size) return (int)(hta->size - htb->size);
- return strncasecmp(hta->text, htb->text, hta->size);
-}
-
-
-/* find_block_tag • returns the current block tag */
-static struct html_tag *
-find_block_tag(char *data, size_t size)
-{
- size_t i = 0;
- struct html_tag key;
-
- /* looking for the word end */
- while (i < size && ((data[i] >= '0' && data[i] <= '9')
- || (data[i] >= 'A' && data[i] <= 'Z')
- || (data[i] >= 'a' && data[i] <= 'z')))
- i++;
- if (i >= size) return 0;
-
- /* binary search of the tag */
- key.text = data;
- key.size = i;
- return bsearch(&key, block_tags,
- sizeof block_tags / sizeof block_tags[0],
- sizeof block_tags[0], cmp_html_tag);
-}
-
/****************************
* INLINE PARSING FUNCTIONS *
****************************/
@@ -1680,20 +1613,18 @@ parse_atxheader(struct buf *ob, struct render *rndr, char *data, size_t size)
/* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
/* returns the length on match, 0 otherwise */
static size_t
-htmlblock_end(struct html_tag *tag, struct render *rndr, char *data, size_t size)
+htmlblock_end(const char *tag, size_t tag_len, struct render *rndr, char *data, size_t size)
{
size_t i, w;
- /* assuming data[0] == '<' && data[1] == '/' already tested */
-
/* checking if tag is a match */
- if (tag->size + 3 >= size
- || strncasecmp(data + 2, tag->text, tag->size)
- || data[tag->size + 2] != '>')
+ if (tag_len + 3 >= size ||
+ strncasecmp(data + 2, tag, tag_len) != 0 ||
+ data[tag_len + 2] != '>')
return 0;
/* checking white lines */
- i = tag->size + 3;
+ i = tag_len + 3;
w = 0;
if (i < size && (w = is_empty(data + i, size - i)) == 0)
return 0; /* non-blank after tag */
@@ -1717,7 +1648,7 @@ static size_t
parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, int do_render)
{
size_t i, j = 0;
- struct html_tag *curtag;
+ const char *curtag;
int found;
struct buf work = { data, 0, 0, 0, 0 };
@@ -1777,17 +1708,18 @@ parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, in
/* if not found, trying a second pass looking for indented match */
/* but not if tag is "ins" or "del" (following original Markdown.pl) */
- if (curtag != INS_TAG && curtag != DEL_TAG) {
+ if (strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
+ size_t tag_size = strlen(curtag);
i = 1;
while (i < size) {
i++;
while (i < size && !(data[i - 1] == '<' && data[i] == '/'))
i++;
- if (i + 2 + curtag->size >= size)
+ if (i + 2 + tag_size >= size)
break;
- j = htmlblock_end(curtag, rndr, data + i - 1, size - i + 1);
+ j = htmlblock_end(curtag, tag_size, rndr, data + i - 1, size - i + 1);
if (j) {
i += j - 1;