diff options
author | H. Peter Anvin <hpa@zytor.com> | 2011-05-02 23:36:41 -0700 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2011-05-02 23:39:43 -0700 |
commit | 40068cfcbf723c9363194bf20a80b80a53c249fd (patch) | |
tree | b5e0b3aaec9981ac7d97a030e07d32d9de6836de /core/fs | |
parent | 2da6006eedc7a1481b669090dc112411fbf8b384 (diff) | |
download | syslinux-40068cfcbf723c9363194bf20a80b80a53c249fd.tar.gz |
pxe, http: support readdir (ls) over httpsyslinux-4.10-pre12
Use a heuristic http index parser (which is assumed to work with most
webserver-generated indicies) to support ls over http.
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'core/fs')
-rw-r--r-- | core/fs/pxe/http.c | 8 | ||||
-rw-r--r-- | core/fs/pxe/http_readdir.c | 471 | ||||
-rw-r--r-- | core/fs/pxe/pxe.c | 18 | ||||
-rw-r--r-- | core/fs/pxe/pxe.h | 4 |
4 files changed, 498 insertions, 3 deletions
diff --git a/core/fs/pxe/http.c b/core/fs/pxe/http.c index 3f2bb15a..1fd87aa6 100644 --- a/core/fs/pxe/http.c +++ b/core/fs/pxe/http.c @@ -145,6 +145,12 @@ void http_bake_cookies(void) http_do_bake_cookies(cookie_buf); } +static const struct pxe_conn_ops http_conn_ops = { + .fill_buffer = tcp_fill_buffer, + .close = tcp_close_file, + .readdir = http_readdir, +}; + void http_open(struct url_info *url, int flags, struct inode *inode, const char **redir) { @@ -179,7 +185,7 @@ void http_open(struct url_info *url, int flags, struct inode *inode, return; /* http is broken... */ /* This is a straightforward TCP connection after headers */ - socket->ops = &tcp_conn_ops; + socket->ops = &http_conn_ops; /* Reset all of the variables */ inode->size = content_length = -1; diff --git a/core/fs/pxe/http_readdir.c b/core/fs/pxe/http_readdir.c new file mode 100644 index 00000000..b6e480e7 --- /dev/null +++ b/core/fs/pxe/http_readdir.c @@ -0,0 +1,471 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2011 Intel Corporation; author: H. Peter Anvin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston MA 02110-1301, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +#include <inttypes.h> +#include <string.h> +#include <stdlib.h> +#include <ctype.h> +#include <dprintf.h> +#include "pxe.h" + +enum http_readdir_state { + st_start, /* 0 Initial state */ + st_open, /* 1 "<" */ + st_a, /* 2 "<a" */ + st_attribute, /* 3 "<a " */ + st_h, /* 4 "<a h" */ + st_hr, /* 5 */ + st_hre, /* 6 */ + st_href, /* 7 */ + st_hrefeq, /* 8 */ + st_hrefqu, /* 9 */ + st_badtag, /* 10 */ + st_badtagqu, /* 11 */ + st_badattr, /* 12 */ + st_badattrqu, /* 13 */ +}; + +struct machine { + char xchar; + uint8_t st_xchar; + uint8_t st_left; /* < */ + uint8_t st_right; /* > */ + uint8_t st_space; /* white */ + uint8_t st_other; /* anything else */ +}; + +static const struct machine statemachine[] = { + /* xchar st_xchar st_left st_right st_space st_other */ + { 0, 0, st_open, st_start, st_start, st_start }, + { 'a', st_a, st_badtag, st_start, st_open, st_badtag }, + { 0, 0, st_open, st_open, st_attribute, st_badtag }, + { 'h', st_h, st_open, st_start, st_attribute, st_badattr }, + { 'r', st_hr, st_open, st_start, st_attribute, st_badattr }, + { 'e', st_hre, st_open, st_start, st_attribute, st_badattr }, + { 'f', st_href, st_open, st_start, st_attribute, st_badattr }, + { '=', st_hrefeq, st_open, st_start, st_attribute, st_badattr }, + { '\"', st_hrefqu, st_open, st_start, st_attribute, st_hrefeq }, + { '\"', st_attribute, st_hrefqu, st_hrefqu, st_hrefqu, st_hrefqu }, + { '\"', st_badtagqu, st_open, st_start, st_badtag, st_badtag }, + { '\"', st_badtag, st_badtagqu, st_badtagqu, st_badtagqu, st_badtagqu }, + { '\"', st_badattrqu, st_open, st_start, st_attribute, st_badattr }, + { '\"', st_attribute, st_badattrqu, st_badattrqu, st_badattrqu, st_badattrqu }, +}; + +struct html_entity { + uint16_t ucs; + const char entity[9]; +}; + +static const struct html_entity entities[] = { + { 34, "quot" }, + { 38, "amp" }, + { 60, "lt" }, + { 62, "gt" }, +#ifdef HTTP_ALL_ENTITIES + { 160, "nbsp" }, + { 161, "iexcl" }, + { 162, "cent" }, + { 163, "pound" }, + { 164, "curren" }, + { 165, "yen" }, + { 166, "brvbar" }, + { 167, "sect" }, + { 168, "uml" }, + { 169, "copy" }, + { 170, "ordf" }, + { 171, "laquo" }, + { 172, "not" }, + { 173, "shy" }, + { 174, "reg" }, + { 175, "macr" }, + { 176, "deg" }, + { 177, "plusmn" }, + { 178, "sup2" }, + { 179, "sup3" }, + { 180, "acute" }, + { 181, "micro" }, + { 182, "para" }, + { 183, "middot" }, + { 184, "cedil" }, + { 185, "sup1" }, + { 186, "ordm" }, + { 187, "raquo" }, + { 188, "frac14" }, + { 189, "frac12" }, + { 190, "frac34" }, + { 191, "iquest" }, + { 192, "Agrave" }, + { 193, "Aacute" }, + { 194, "Acirc" }, + { 195, "Atilde" }, + { 196, "Auml" }, + { 197, "Aring" }, + { 198, "AElig" }, + { 199, "Ccedil" }, + { 200, "Egrave" }, + { 201, "Eacute" }, + { 202, "Ecirc" }, + { 203, "Euml" }, + { 204, "Igrave" }, + { 205, "Iacute" }, + { 206, "Icirc" }, + { 207, "Iuml" }, + { 208, "ETH" }, + { 209, "Ntilde" }, + { 210, "Ograve" }, + { 211, "Oacute" }, + { 212, "Ocirc" }, + { 213, "Otilde" }, + { 214, "Ouml" }, + { 215, "times" }, + { 216, "Oslash" }, + { 217, "Ugrave" }, + { 218, "Uacute" }, + { 219, "Ucirc" }, + { 220, "Uuml" }, + { 221, "Yacute" }, + { 222, "THORN" }, + { 223, "szlig" }, + { 224, "agrave" }, + { 225, "aacute" }, + { 226, "acirc" }, + { 227, "atilde" }, + { 228, "auml" }, + { 229, "aring" }, + { 230, "aelig" }, + { 231, "ccedil" }, + { 232, "egrave" }, + { 233, "eacute" }, + { 234, "ecirc" }, + { 235, "euml" }, + { 236, "igrave" }, + { 237, "iacute" }, + { 238, "icirc" }, + { 239, "iuml" }, + { 240, "eth" }, + { 241, "ntilde" }, + { 242, "ograve" }, + { 243, "oacute" }, + { 244, "ocirc" }, + { 245, "otilde" }, + { 246, "ouml" }, + { 247, "divide" }, + { 248, "oslash" }, + { 249, "ugrave" }, + { 250, "uacute" }, + { 251, "ucirc" }, + { 252, "uuml" }, + { 253, "yacute" }, + { 254, "thorn" }, + { 255, "yuml" }, + { 338, "OElig" }, + { 339, "oelig" }, + { 352, "Scaron" }, + { 353, "scaron" }, + { 376, "Yuml" }, + { 402, "fnof" }, + { 710, "circ" }, + { 732, "tilde" }, + { 913, "Alpha" }, + { 914, "Beta" }, + { 915, "Gamma" }, + { 916, "Delta" }, + { 917, "Epsilon" }, + { 918, "Zeta" }, + { 919, "Eta" }, + { 920, "Theta" }, + { 921, "Iota" }, + { 922, "Kappa" }, + { 923, "Lambda" }, + { 924, "Mu" }, + { 925, "Nu" }, + { 926, "Xi" }, + { 927, "Omicron" }, + { 928, "Pi" }, + { 929, "Rho" }, + { 931, "Sigma" }, + { 932, "Tau" }, + { 933, "Upsilon" }, + { 934, "Phi" }, + { 935, "Chi" }, + { 936, "Psi" }, + { 937, "Omega" }, + { 945, "alpha" }, + { 946, "beta" }, + { 947, "gamma" }, + { 948, "delta" }, + { 949, "epsilon" }, + { 950, "zeta" }, + { 951, "eta" }, + { 952, "theta" }, + { 953, "iota" }, + { 954, "kappa" }, + { 955, "lambda" }, + { 956, "mu" }, + { 957, "nu" }, + { 958, "xi" }, + { 959, "omicron" }, + { 960, "pi" }, + { 961, "rho" }, + { 962, "sigmaf" }, + { 963, "sigma" }, + { 964, "tau" }, + { 965, "upsilon" }, + { 966, "phi" }, + { 967, "chi" }, + { 968, "psi" }, + { 969, "omega" }, + { 977, "thetasym" }, + { 978, "upsih" }, + { 982, "piv" }, + { 8194, "ensp" }, + { 8195, "emsp" }, + { 8201, "thinsp" }, + { 8204, "zwnj" }, + { 8205, "zwj" }, + { 8206, "lrm" }, + { 8207, "rlm" }, + { 8211, "ndash" }, + { 8212, "mdash" }, + { 8216, "lsquo" }, + { 8217, "rsquo" }, + { 8218, "sbquo" }, + { 8220, "ldquo" }, + { 8221, "rdquo" }, + { 8222, "bdquo" }, + { 8224, "dagger" }, + { 8225, "Dagger" }, + { 8226, "bull" }, + { 8230, "hellip" }, + { 8240, "permil" }, + { 8242, "prime" }, + { 8243, "Prime" }, + { 8249, "lsaquo" }, + { 8250, "rsaquo" }, + { 8254, "oline" }, + { 8260, "frasl" }, + { 8364, "euro" }, + { 8465, "image" }, + { 8472, "weierp" }, + { 8476, "real" }, + { 8482, "trade" }, + { 8501, "alefsym" }, + { 8592, "larr" }, + { 8593, "uarr" }, + { 8594, "rarr" }, + { 8595, "darr" }, + { 8596, "harr" }, + { 8629, "crarr" }, + { 8656, "lArr" }, + { 8657, "uArr" }, + { 8658, "rArr" }, + { 8659, "dArr" }, + { 8660, "hArr" }, + { 8704, "forall" }, + { 8706, "part" }, + { 8707, "exist" }, + { 8709, "empty" }, + { 8711, "nabla" }, + { 8712, "isin" }, + { 8713, "notin" }, + { 8715, "ni" }, + { 8719, "prod" }, + { 8721, "sum" }, + { 8722, "minus" }, + { 8727, "lowast" }, + { 8730, "radic" }, + { 8733, "prop" }, + { 8734, "infin" }, + { 8736, "ang" }, + { 8743, "and" }, + { 8744, "or" }, + { 8745, "cap" }, + { 8746, "cup" }, + { 8747, "int" }, + { 8756, "there4" }, + { 8764, "sim" }, + { 8773, "cong" }, + { 8776, "asymp" }, + { 8800, "ne" }, + { 8801, "equiv" }, + { 8804, "le" }, + { 8805, "ge" }, + { 8834, "sub" }, + { 8835, "sup" }, + { 8836, "nsub" }, + { 8838, "sube" }, + { 8839, "supe" }, + { 8853, "oplus" }, + { 8855, "otimes" }, + { 8869, "perp" }, + { 8901, "sdot" }, + { 8968, "lceil" }, + { 8969, "rceil" }, + { 8970, "lfloor" }, + { 8971, "rfloor" }, + { 9001, "lang" }, + { 9002, "rang" }, + { 9674, "loz" }, + { 9824, "spades" }, + { 9827, "clubs" }, + { 9829, "hearts" }, + { 9830, "diams" }, +#endif /* HTTP_ALL_ENTITIES */ + { 0, "" } +}; + +struct entity_state { + char entity_buf[16]; + char *ep; +}; + +static char *emit(char *p, int c, struct entity_state *st) +{ + const struct html_entity *ent; + unsigned int ucs; + + if (!st->ep) { + if (c == '&') { + /* Entity open */ + st->ep = st->entity_buf; + } else { + *p++ = c; + } + } else { + if (c == ';') { + st->ep = NULL; + *p = '\0'; + if (st->entity_buf[0] == '#') { + if ((st->entity_buf[1] | 0x20)== 'x') { + ucs = strtoul(st->entity_buf + 2, NULL, 16); + } else { + ucs = strtoul(st->entity_buf + 1, NULL, 10); + } + } else { + for (ent = entities; ent->ucs; ent++) { + if (!strcmp(st->entity_buf, ent->entity)) + break; + } + ucs = ent->ucs; + } + if (ucs < 32 || ucs >= 0x10ffff) + return p; /* Bogus */ + if (ucs >= 0x10000) { + *p++ = 0xf0 + (ucs >> 18); + *p++ = 0x80 + ((ucs >> 12) & 0x3f); + *p++ = 0x80 + ((ucs >> 6) & 0x3f); + *p++ = 0x80 + (ucs & 0x3f); + } else if (ucs >= 0x800) { + *p++ = 0xe0 + (ucs >> 12); + *p++ = 0x80 + ((ucs >> 6) & 0x3f); + *p++ = 0x80 + (ucs & 0x3f); + } else if (ucs >= 0x80) { + *p++ = 0xc0 + (ucs >> 6); + *p++ = 0x80 + (ucs & 0x3f); + } else { + *p++ = ucs; + } + } else if (st->ep < st->entity_buf + sizeof st->entity_buf - 1) { + *st->ep++ = c; + } + } + return p; +} + +static const char *http_get_filename(struct inode *inode, char *buf) +{ + int c, lc; + char *p; + const struct machine *sm; + struct entity_state es; + enum http_readdir_state state = st_start; + enum http_readdir_state pstate = st_start; + + memset(&es, 0, sizeof es); + + p = buf; + for (;;) { + c = pxe_getc(inode); + if (c == -1) + return NULL; + + lc = tolower(c); + + sm = &statemachine[state]; + + if (lc == sm->xchar) + state = sm->st_xchar; + else if (c == '<') + state = sm->st_left; + else if (c == '>') + state = sm->st_right; + else if (isspace(c)) + state = sm->st_space; + else + state = sm->st_other; + + if (state == st_hrefeq || state == st_hrefqu) { + if (state != pstate) + p = buf; + else if (p < buf + FILENAME_MAX) + p = emit(p, c, &es); + pstate = state; + } else { + if (pstate != st_start) + pstate = st_start; + if (p != buf && state == st_start) { + *p = '\0'; + return buf; + } + } + } +} + +int http_readdir(struct inode *inode, struct dirent *dirent) +{ + char buf[FILENAME_MAX + 6]; + const char *fn, *sp; + + for (;;) { + fn = http_get_filename(inode, buf); + + if (!fn) + return -1; /* End of directory */ + + /* Ignore entries with http special characters */ + if (strchr(fn, '#')) + continue; + if (strchr(fn, '?')) + continue; + + /* A slash if present has to be the last character, and not the first */ + sp = strchr(fn, '/'); + if (sp) { + if (sp == fn || sp[1]) + continue; + } else { + sp = strchr(fn, '\0'); + } + + if (sp > fn + NAME_MAX) + continue; + + dirent->d_ino = 0; /* Not applicable */ + dirent->d_off = 0; /* Not applicable */ + dirent->d_reclen = offsetof(struct dirent, d_name) + (sp-fn) + 1; + dirent->d_type = *sp == '/' ? DT_DIR : DT_REG; + memcpy(dirent->d_name, fn, sp-fn); + dirent->d_name[sp-fn] = '\0'; + return 0; + } +} diff --git a/core/fs/pxe/pxe.c b/core/fs/pxe/pxe.c index f54e595e..acefac99 100644 --- a/core/fs/pxe/pxe.c +++ b/core/fs/pxe/pxe.c @@ -367,10 +367,12 @@ static void __pxe_searchdir(const char *filename, int flags, struct file *file) #endif } - if (inode->size) + if (inode->size) { file->inode = inode; - else + file->inode->mode = (flags & O_DIRECTORY) ? DT_DIR : DT_REG; + } else { free_socket(inode); + } return; } @@ -1091,6 +1093,17 @@ cant_free: return; } +static int pxe_readdir(struct file *file, struct dirent *dirent) +{ + struct inode *inode = file->inode; + struct pxe_pvt_inode *socket = PVT(inode); + + if (socket->ops->readdir) + return socket->ops->readdir(inode, dirent); + else + return -1; /* No such operation */ +} + const struct fs_ops pxe_fs_ops = { .fs_name = "pxe", .fs_flags = FS_NODEV, @@ -1102,4 +1115,5 @@ const struct fs_ops pxe_fs_ops = { .close_file = pxe_close_file, .mangle_name = pxe_mangle_name, .load_config = pxe_load_config, + .readdir = pxe_readdir, }; diff --git a/core/fs/pxe/pxe.h b/core/fs/pxe/pxe.h index 6c84d8be..4c7e8e6e 100644 --- a/core/fs/pxe/pxe.h +++ b/core/fs/pxe/pxe.h @@ -111,6 +111,7 @@ struct netbuf; struct pxe_conn_ops { void (*fill_buffer)(struct inode *inode); void (*close)(struct inode *inode); + int (*readdir)(struct inode *inode, struct dirent *dirent); }; struct pxe_pvt_inode { @@ -222,6 +223,9 @@ void http_open(struct url_info *url, int flags, struct inode *inode, const char **redir); void http_bake_cookies(void); +/* http_readdir.c */ +int http_readdir(struct inode *inode, struct dirent *dirent); + /* ftp.c */ void ftp_open(struct url_info *url, int flags, struct inode *inode, const char **redir); |