summaryrefslogtreecommitdiff
path: root/APACHE_1_3_42/src/main/gen_test_char.c
blob: 8cd4cb7259456780bada6bb10e6a7312580528b4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/* we need some of the portability definitions... for strchr */
#include "httpd.h"

/* A bunch of functions in util.c scan strings looking for certain characters.
 * To make that more efficient we encode a lookup table.
 */
#define T_ESCAPE_SHELL_CMD	(0x01)
#define T_ESCAPE_PATH_SEGMENT	(0x02)
#define T_OS_ESCAPE_PATH	(0x04)
#define T_HTTP_TOKEN_STOP	(0x08)
#define T_ESCAPE_LOGITEM	(0x10)
#define T_ESCAPE_FORENSIC       (0x20)

int main(int argc, char *argv[])
{
    unsigned c;
    unsigned char flags;

    printf(
"/* this file is automatically generated by gen_test_char, do not edit */\n"
"#define T_ESCAPE_SHELL_CMD	0x%02x /* chars with special meaning in the shell */\n"
"#define T_ESCAPE_PATH_SEGMENT	0x%02x /* find path segment, as defined in RFC1808 */\n"
"#define T_OS_ESCAPE_PATH	0x%02x /* escape characters in a path or uri */\n"
"#define T_HTTP_TOKEN_STOP	0x%02x /* find http tokens, as defined in RFC2616 */\n"
"#define T_ESCAPE_LOGITEM	0x%02x /* filter what should go in the log file */\n"
"#define T_ESCAPE_FORENSIC	0x%02x /* filter what should go in the forensic log */\n"
"\n",
	T_ESCAPE_SHELL_CMD,
	T_ESCAPE_PATH_SEGMENT,
	T_OS_ESCAPE_PATH,
	T_HTTP_TOKEN_STOP,
	T_ESCAPE_LOGITEM,
	T_ESCAPE_FORENSIC
	);

    printf("static const unsigned char test_char_table[256] = {\n"
	   "    ");

    for (c = 0; c < 256; ++c) {
	flags = 0;

	/* escape_shell_cmd */
#if defined(WIN32) || defined(OS2)
        /* Win32/OS2 have many of the same vulnerable characters
         * as Unix sh, plus the carriage return and percent char.
         * The proper escaping of these characters varies from unix
         * since Win32/OS2 use carets or doubled-double quotes, 
         * and neither lf nor cr can be escaped.  We escape unix 
         * specific as well, to assure that cross-compiled unix 
         * applications behave similiarly when invoked on win32/os2.
         */
        if (c && strchr("&;`'\"|*?~<>^()[]{}$\\\n\r%", c)) {
	    flags |= T_ESCAPE_SHELL_CMD;
	}
#else
        if (c && strchr("&;`'\"|*?~<>^()[]{}$\\\n", c)) {
	    flags |= T_ESCAPE_SHELL_CMD;
	}
#endif

	if (!ap_isalnum(c) && !strchr("$-_.+!*'(),:@&=~", c)) {
	    flags |= T_ESCAPE_PATH_SEGMENT;
	}

	if (!ap_isalnum(c) && !strchr("$-_.+!*'(),:@&=/~", c)) {
	    flags |= T_OS_ESCAPE_PATH;
	}

	/* these are the "tspecials" from RFC2068 */
	if (c && (ap_iscntrl(c) || strchr(" \t()<>@,;:\\/[]?={}", c))) {
	    flags |= T_HTTP_TOKEN_STOP;
	}

	/* For logging, escape all control characters,
	 * double quotes (because they delimit the request in the log file)
	 * backslashes (because we use backslash for escaping)
	 * and 8-bit chars with the high bit set
	 */
	if (c && (!ap_isprint(c) || c == '"' || c == '\\' || ap_iscntrl(c))) {
	    flags |= T_ESCAPE_LOGITEM;
	}

        /* For forensic logging, escape all control characters, top bit set,
         * :, | (used as delimiters) and % (used for escaping).
         */
        if (!ap_isprint(c) || c == ':' || c == '|' || c == '%'
            || ap_iscntrl(c) || !c) {
            flags |= T_ESCAPE_FORENSIC;
        }

	printf("0x%02x%s", flags, (c < 255) ? ", " : "  ");

	if ((c % 8) == 7)
	    printf(" /*0x%02x...0x%02x*/\n    ", c-7, c);
    }
    printf("\n};\n");

    return 0;
}