summaryrefslogtreecommitdiff
path: root/src/server/wsgi_validate.c
blob: a37528c0194bfda588864112d514f0672a4683ba (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
/* ------------------------------------------------------------------------- */

/*
 * Copyright 2007-2022 GRAHAM DUMPLETON
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/* ------------------------------------------------------------------------- */

#include "wsgi_validate.h"

#include "wsgi_convert.h"

#include <ctype.h>

/* ------------------------------------------------------------------------- */

/*
 * A WSGI response status line consists of a status code and a reason
 * phrase separated by one or more space characters. The status code is
 * a 3 digit integer. The reason phrase is any text excluding control
 * characters and specifically excluding any carriage return or line
 * feed characters. Technically the reason phrase can be empty so long
 * as there still is at least a single space after the status code.
 */

int wsgi_validate_status_line(PyObject *value)
{
    const char *s;

    if (!PyBytes_Check(value)) {
        PyErr_Format(PyExc_TypeError, "expected byte string object for "
                     "status line, value of type %.200s found",
                     value->ob_type->tp_name);
        return 0;
    }
    
    s = PyBytes_AsString(value);

    if (!isdigit(*s++) || !isdigit(*s++) || !isdigit(*s++)) {
        PyErr_SetString(PyExc_ValueError,
                        "status code is not a 3 digit integer");
        return 0;
    }

    if (isdigit(*s)) {
        PyErr_SetString(PyExc_ValueError,
                        "status code is not a 3 digit integer");
        return 0;
    }

    if (*s != ' ') {
        PyErr_SetString(PyExc_ValueError, "no space following status code");
        return 0;
    }

    if (!*s) {
        PyErr_SetString(PyExc_ValueError, "no reason phrase supplied");
        return 0;
    }

    while (*s) {
        if (iscntrl(*s)) {
            PyErr_SetString(PyExc_ValueError,
                            "control character present in reason phrase");
            return 0;
        }
        s++;
    }

    return 1;
}

/* ------------------------------------------------------------------------- */

/*
 * A WSGI header name is a token consisting of one or more characters
 * except control characters, the separator characters "(", ")", "<",
 * ">", "@", ",", ";", ":", "\", <">, "/", "[", "]", "?", "=", "{", "}"
 * and the space character. Only bother checking for control characters
 * and space characters as it is only carriage return, line feed,
 * leading and trailing white space that are really a problem.
 */

int wsgi_validate_header_name(PyObject *value)
{
    const char *s;

    if (!PyBytes_Check(value)) {
        PyErr_Format(PyExc_TypeError, "expected byte string object for "
                     "header name, value of type %.200s found",
                     value->ob_type->tp_name);
        return 0;
    }
    
    s = PyBytes_AsString(value);

    if (!*s) {
        PyErr_SetString(PyExc_ValueError, "header name is empty");
        return 0;
    }

    while (*s) {
        if (iscntrl(*s)) {
            PyErr_SetString(PyExc_ValueError,
                            "control character present in header name");
            return 0;
        }

        if (*s == ' ') {
            PyErr_SetString(PyExc_ValueError,
                            "space character present in header name");
            return 0;
        }
        s++;
    }

    return 1;
}

/* ------------------------------------------------------------------------- */

/*
 * A WSGI header value consists of any number of characters except
 * control characters. Only bother checking for carriage return and line
 * feed characters as it is not possible to trust that applications will
 * not use control characters. In practice the intent is that WSGI
 * applications shouldn't use embedded carriage return and line feed
 * characters to prevent attempts at line continuation which may cause
 * problems with some hosting mechanisms. In other words, the header
 * value should be all on one line.
 */

int wsgi_validate_header_value(PyObject *value)
{
    const char *s;

    if (!PyBytes_Check(value)) {
        PyErr_Format(PyExc_TypeError, "expected byte string object for "
                     "header value, value of type %.200s found",
                     value->ob_type->tp_name);
        return 0;
    }
    
    s = PyBytes_AsString(value);

    while (*s) {
        if (*s == '\r' || *s == '\n') {
            PyErr_SetString(PyExc_ValueError, "carriage return/line "
                            "feed character present in header value");
            return 0;
        }
        s++;
    }

    return 1;
}

/* ------------------------------------------------------------------------- */

/* vi: set sw=4 expandtab : */