diff options
author | Glenn Strauss <gstrauss@gluelogic.com> | 2022-06-10 13:15:18 -0400 |
---|---|---|
committer | Glenn Strauss <gstrauss@gluelogic.com> | 2022-06-10 13:15:18 -0400 |
commit | 91ce3b085738f9279e903ec8eb0d1339f7a6a0f0 (patch) | |
tree | 371e1d3e394d91c3fd4e2704ab230b50476a60d1 | |
parent | ca407dca5d367cf5778449634cc63687630efa7d (diff) | |
download | lighttpd-git-91ce3b085738f9279e903ec8eb0d1339f7a6a0f0.tar.gz |
[core] buffer_append_bs_escaped_json()
separate func from buffer_append_bs_escaped() so that both
buffer_append_bs_escaped() and buffer_append_bs_escaped_json()
can be slightly more specialized and optimized
-rw-r--r-- | src/buffer.c | 82 | ||||
-rw-r--r-- | src/buffer.h | 8 | ||||
-rw-r--r-- | src/mod_accesslog.c | 38 | ||||
-rw-r--r-- | src/mod_dirlisting.c | 2 | ||||
-rw-r--r-- | src/mod_magnet.c | 11 | ||||
-rw-r--r-- | src/t/test_buffer.c | 36 |
6 files changed, 125 insertions, 52 deletions
diff --git a/src/buffer.c b/src/buffer.c index 35fed054..a0eae151 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -715,12 +715,10 @@ void buffer_append_string_c_escaped(buffer * const restrict b, const char * cons void buffer_append_bs_escaped (buffer * const restrict b, - const char * restrict s, const size_t len, - const buffer_bs_escape_t esc) + const char * restrict s, const size_t len) { /* replaces non-printable chars with escaped string * default: \xHH where HH is the hex representation of the byte - * json: \u00HH where HH is the hex representation of the byte * exceptions: " => \", \ => \\, whitespace chars => \n \t etc. */ /* Intended for use escaping string to be surrounded by double-quotes */ /* Performs single pass over string and is optimized for ASCII; @@ -755,29 +753,61 @@ buffer_append_bs_escaped (buffer * const restrict b, d[1] = c; break; default: - if (0 == esc) { /* BS_ESCAPE_DEFAULT */ - /* non printable char => \xHH */ - d = buffer_extend(b, 4); - d[0] = '\\'; - d[1] = 'x'; - d += 2; - } - else { /* BS_ESCAPE_JSON */ - /*(technically do not have to escape DEL (\127) or higher)*/ - /*(would be faster if handled in tighter do/while loop above)*/ - if (c >= 127) { - buffer_append_char(b, (char)c); - break; - } - d = buffer_extend(b, 6); - d[0] = '\\'; - d[1] = 'u'; - d[2] = '0'; - d[3] = '0'; - d += 4; - } - d[0] = hex_chars_uc[c >> 4]; - d[1] = hex_chars_uc[c & 0xF]; + /* non printable char => \xHH */ + d = buffer_extend(b, 4); + d[0] = '\\'; + d[1] = 'x'; + d[2] = hex_chars_uc[c >> 4]; + d[3] = hex_chars_uc[c & 0xF]; + break; + } + } +} + + +void +buffer_append_bs_escaped_json (buffer * const restrict b, + const char * restrict s, const size_t len) +{ + /* replaces non-printable chars with escaped string + * json: \u00HH where HH is the hex representation of the byte + * exceptions: " => \", \ => \\, whitespace chars => \n \t etc. */ + /* Intended for use escaping string to be surrounded by double-quotes */ + buffer_string_prepare_append(b, len); + for (const char * const end = s+len; s < end; ++s) { + unsigned int c; + const char * const ptr = s; + do { + c = *(const unsigned char *)s; + } while (c >= ' ' && c != '"' && c != '\\' && ++s < end); + if (s - ptr) buffer_append_string_len(b, ptr, s - ptr); + + if (s == end) + return; + + /* ('\a', '\v' shortcuts are technically not json-escaping) */ + /* ('\0' is also omitted due to the possibility of string corruption if + * the receiver supports decoding octal escapes (\000) and the escaped + * string contains \0 followed by two digits not part of escaping)*/ + + char *d; + switch (c) { + case '\a':case '\b':case '\t':case '\n':case '\v':case '\f':case '\r': + c = "0000000abtnvfr"[c]; + __attribute_fallthrough__ + case '"': case '\\': + d = buffer_extend(b, 2); + d[0] = '\\'; + d[1] = c; + break; + default: + d = buffer_extend(b, 6); + d[0] = '\\'; + d[1] = 'u'; + d[2] = '0'; + d[3] = '0'; + d[4] = hex_chars_uc[c >> 4]; + d[5] = hex_chars_uc[c & 0xF]; break; } } diff --git a/src/buffer.h b/src/buffer.h index 14fef70a..14c04bf7 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -183,13 +183,9 @@ void buffer_append_string_encoded(buffer * restrict b, const char * restrict s, __attribute_nonnull__() void buffer_append_string_c_escaped(buffer * restrict b, const char * restrict s, size_t s_len); -typedef enum { - BS_ESCAPE_DEFAULT - ,BS_ESCAPE_JSON -} buffer_bs_escape_t; - /* escape non-printable chars, '"', '\\', and chars which high bit set */ -void buffer_append_bs_escaped (buffer * restrict b, const char * restrict s, size_t len, buffer_bs_escape_t esc); +void buffer_append_bs_escaped (buffer * restrict b, const char * restrict s, size_t len); +void buffer_append_bs_escaped_json (buffer * restrict b, const char * restrict s, size_t len); __attribute_nonnull__() void buffer_urldecode_path(buffer *b); diff --git a/src/mod_accesslog.c b/src/mod_accesslog.c index 7ba1308a..fc3ce9e8 100644 --- a/src/mod_accesslog.c +++ b/src/mod_accesslog.c @@ -164,6 +164,13 @@ typedef struct { format_fields *default_format;/* allocated if default format */ } plugin_data; +typedef void(esc_fn_t)(buffer * restrict b, const char * restrict s, size_t len); + +typedef enum { + BS_ESCAPE_DEFAULT + ,BS_ESCAPE_JSON +} buffer_bs_escape_t; + INIT_FUNC(mod_accesslog_init) { return calloc(1, sizeof(plugin_data)); } @@ -584,14 +591,12 @@ TRIGGER_FUNC(log_access_periodic_flush) { return HANDLER_GO_ON; } -#define accesslog_append_escaped buffer_append_bs_escaped - static void accesslog_append_buffer (buffer * const restrict dest, - const buffer * const restrict b, const int esc) + const buffer * const restrict b, esc_fn_t esc_fn) { if (!buffer_string_is_empty(b)) - accesslog_append_escaped(dest, BUF_PTR_LEN(b), esc); + esc_fn(dest, BUF_PTR_LEN(b)); else buffer_append_char(dest, '-'); } @@ -610,7 +615,8 @@ __attribute_noinline__ static void accesslog_append_cookie (buffer * const restrict dest, const request_st * const restrict r, - const buffer * const restrict name, const int esc) + const buffer * const restrict name, + esc_fn_t esc_fn) { const buffer * const vb = http_header_request_get(r, HTTP_HEADER_COOKIE, CONST_STR_LEN("Cookie")); @@ -625,7 +631,7 @@ accesslog_append_cookie (buffer * const restrict dest, for (str = v; *str != '\0' && *str != ';'; ++str) ; if (str == v) break; do { --str; } while (str > v && (*str == ' ' || *str == '\t')); - accesslog_append_escaped(dest, v, str - v + 1, esc); + esc_fn(dest, v, str - v + 1); break; } else { @@ -758,7 +764,7 @@ __attribute_cold__ __attribute_noinline__ static void log_access_record_cold (buffer * const b, const request_st * const r, - const format_field * const f, const int esc) + const format_field * const f, esc_fn_t esc_fn) { connection * const con = r->con; switch (f->field) { @@ -797,16 +803,15 @@ log_access_record_cold (buffer * const b, const request_st * const r, { const uint32_t len = buffer_clen(&r->target); const char * const qmark = memchr(r->target.ptr, '?', len); - accesslog_append_escaped(b, r->target.ptr, - qmark ? (uint32_t)(qmark - r->target.ptr) - : len, esc); + esc_fn(b, r->target.ptr, + qmark ? (uint32_t)(qmark - r->target.ptr) : len); } break; case FORMAT_QUERY_STRING: - accesslog_append_escaped(b, BUF_PTR_LEN(&r->uri.query), esc); + esc_fn(b, BUF_PTR_LEN(&r->uri.query)); break; case FORMAT_FILENAME: - accesslog_append_buffer(b, &r->physical.path, esc); + accesslog_append_buffer(b, &r->physical.path, esc_fn); break; case FORMAT_CONNECTION_STATUS: buffer_append_char(b, (r->state == CON_STATE_RESPONSE_END) @@ -829,7 +834,7 @@ log_access_record_cold (buffer * const b, const request_st * const r, } } -static int log_access_record (const request_st * const r, buffer * const b, format_fields * const parsed_format, const buffer_bs_escape_t esc) { +static int log_access_record (const request_st * const r, buffer * const b, format_fields * const parsed_format, esc_fn_t esc) { const buffer *vb; unix_timespec64_t ts = { 0, 0 }; int flush = 0; @@ -880,7 +885,7 @@ static int log_access_record (const request_st * const r, buffer * const b, form /*(attempt to reconstruct request line)*/ http_method_append(b, r->http_method); buffer_append_char(b, ' '); - accesslog_append_escaped(b, BUF_PTR_LEN(&r->target_orig), esc); + esc(b, BUF_PTR_LEN(&r->target_orig)); buffer_append_char(b, ' '); http_version_append(b, r->http_version); break; @@ -930,8 +935,11 @@ REQUESTDONE_FUNC(log_access_write) { ? (buffer_clear(r->tmp_buf), r->tmp_buf) : &fdlog->b; + esc_fn_t * const esc_fn = !p->conf.escaping + ? buffer_append_bs_escaped + : buffer_append_bs_escaped_json; const int flush = - log_access_record(r, b, p->conf.parsed_format, p->conf.escaping); + log_access_record(r, b, p->conf.parsed_format, esc_fn); #ifdef HAVE_SYSLOG_H if (p->conf.use_syslog) { diff --git a/src/mod_dirlisting.c b/src/mod_dirlisting.c index e513ad50..80b6bd7d 100644 --- a/src/mod_dirlisting.c +++ b/src/mod_dirlisting.c @@ -1098,7 +1098,7 @@ static int http_read_directory(handler_ctx * const p) { p->jcomma = 1; buffer_append_string_len(p->jb, CONST_STR_LEN( "{\"name\":\"")); } - buffer_append_bs_escaped(p->jb, d_name, dsz, BS_ESCAPE_JSON); + buffer_append_bs_escaped_json(p->jb, d_name, dsz); const char *t; size_t tlen; diff --git a/src/mod_magnet.c b/src/mod_magnet.c index 2a1000eb..db2a2cb0 100644 --- a/src/mod_magnet.c +++ b/src/mod_magnet.c @@ -1162,7 +1162,7 @@ static int magnet_bsdec(lua_State *L) { return 1; } -static int magnet_bsenc(lua_State *L, const buffer_bs_escape_t esc) { +static int magnet_bsenc(lua_State *L, const int esc_json) { if (lua_isnoneornil(L, -1)) { lua_pushlstring(L, "", 0); return 1; @@ -1173,18 +1173,21 @@ static int magnet_bsenc(lua_State *L, const buffer_bs_escape_t esc) { return 1; } buffer * const b = magnet_tmpbuf_acquire(L); - buffer_append_bs_escaped(b, s.ptr, s.len, esc); + if (esc_json) + buffer_append_bs_escaped(b, s.ptr, s.len); + else + buffer_append_bs_escaped_json(b, s.ptr, s.len); lua_pushlstring(L, BUF_PTR_LEN(b)); magnet_tmpbuf_release(b); return 1; } static int magnet_bsenc_default(lua_State *L) { - return magnet_bsenc(L, BS_ESCAPE_DEFAULT); + return magnet_bsenc(L, 0); } static int magnet_bsenc_json(lua_State *L) { - return magnet_bsenc(L, BS_ESCAPE_JSON); + return magnet_bsenc(L, 1); } static int magnet_xmlenc(lua_State *L) { diff --git a/src/t/test_buffer.c b/src/t/test_buffer.c index 2855a59f..2d1e9a35 100644 --- a/src/t/test_buffer.c +++ b/src/t/test_buffer.c @@ -144,6 +144,41 @@ static void test_buffer_append_path_len(void) { buffer_free(b); } +static void test_buffer_append_bs_escaped(void) { + buffer *b = buffer_init(); + + buffer_append_bs_escaped_json(b, CONST_STR_LEN(" ")); + assert(buffer_eq_slen(b, CONST_STR_LEN(" "))); + buffer_clear(b); + buffer_append_bs_escaped_json(b, CONST_STR_LEN("\0")); + assert(buffer_eq_slen(b, CONST_STR_LEN("\\u0000"))); + buffer_clear(b); + buffer_append_bs_escaped_json(b, CONST_STR_LEN("\1")); + assert(buffer_eq_slen(b, CONST_STR_LEN("\\u0001"))); + buffer_clear(b); + buffer_append_bs_escaped_json(b, CONST_STR_LEN("\n")); + assert(buffer_eq_slen(b, CONST_STR_LEN("\\n"))); + buffer_clear(b); + buffer_append_bs_escaped_json(b, CONST_STR_LEN("é")); + assert(buffer_eq_slen(b, CONST_STR_LEN("é"))); + buffer_clear(b); + buffer_append_bs_escaped_json(b, CONST_STR_LEN("ö")); + assert(buffer_eq_slen(b, CONST_STR_LEN("ö"))); + + #if 0 + buffer_clear(b); + magnet_buffer_append_bsdec(b, CONST_STR_LEN("\\u00E9")); + assert(buffer_eq_slen(b, CONST_STR_LEN("é"))); + buffer_clear(b); + magnet_buffer_append_bsdec(b, CONST_STR_LEN("\\u00F6")); + assert(buffer_eq_slen(b, CONST_STR_LEN("ö"))); + #endif + + /* TODO: more */ + + buffer_free(b); +} + void test_buffer (void); void test_buffer (void) { @@ -151,4 +186,5 @@ void test_buffer (void) test_buffer_to_lower_upper(); test_buffer_string_space(); test_buffer_append_path_len(); + test_buffer_append_bs_escaped(); } |