1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
|
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef NET_BASE_ESCAPE_H_
#define NET_BASE_ESCAPE_H_
#include <stdint.h>
#include <set>
#include <string>
#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
#include "base/strings/utf_offset_string_conversions.h"
#include "net/base/net_export.h"
namespace net {
// Escaping --------------------------------------------------------------------
// Escapes all characters except unreserved characters. Unreserved characters,
// as defined in RFC 3986, include alphanumerics and -._~
NET_EXPORT std::string EscapeAllExceptUnreserved(base::StringPiece text);
// Escapes characters in text suitable for use as a query parameter value.
// We %XX everything except alphanumerics and -_.!~*'()
// Spaces change to "+" unless you pass usePlus=false.
// This is basically the same as encodeURIComponent in javascript.
NET_EXPORT std::string EscapeQueryParamValue(base::StringPiece text,
bool use_plus);
// Escapes a partial or complete file/pathname. This includes:
// non-printable, non-7bit, and (including space) "#%:<>?[\]^`{|}
NET_EXPORT std::string EscapePath(base::StringPiece path);
#if defined(OS_MACOSX)
// Escapes characters as per expectations of NSURL. This includes:
// non-printable, non-7bit, and (including space) "#%<>[\]^`{|}
NET_EXPORT std::string EscapeNSURLPrecursor(base::StringPiece precursor);
#endif // defined(OS_MACOSX)
// Escapes application/x-www-form-urlencoded content. This includes:
// non-printable, non-7bit, and (including space) ?>=<;+'&%$#"![\]^`{|}
// Space is escaped as + (if use_plus is true) and other special characters
// as %XX (hex).
NET_EXPORT std::string EscapeUrlEncodedData(base::StringPiece path,
bool use_plus);
// Escapes all non-ASCII input, as well as escaping % to %25.
NET_EXPORT std::string EscapeNonASCIIAndPercent(base::StringPiece input);
// Escapes all non-ASCII input. Note this function leaves % unescaped, which
// means the unescaping the resulting string will not give back the original
// input.
NET_EXPORT std::string EscapeNonASCII(base::StringPiece input);
// Escapes characters in text suitable for use as an external protocol handler
// command.
// We %XX everything except alphanumerics and -_.!~*'() and the restricted
// characters (;/?:@&=+$,#[]) and a valid percent escape sequence (%XX).
NET_EXPORT std::string EscapeExternalHandlerValue(base::StringPiece text);
// Appends the given character to the output string, escaping the character if
// the character would be interpreted as an HTML delimiter.
NET_EXPORT void AppendEscapedCharForHTML(char c, std::string* output);
// Escapes chars that might cause this text to be interpreted as HTML tags.
NET_EXPORT std::string EscapeForHTML(base::StringPiece text);
NET_EXPORT base::string16 EscapeForHTML(base::StringPiece16 text);
// Unescaping ------------------------------------------------------------------
class UnescapeRule {
public:
// A combination of the following flags that is passed to the unescaping
// functions.
typedef uint32_t Type;
enum {
// Don't unescape anything at all.
NONE = 0,
// Don't unescape anything special, but all normal unescaping will happen.
// This is a placeholder and can't be combined with other flags (since it's
// just the absence of them). All other unescape rules imply "normal" in
// addition to their special meaning. Things like escaped letters, digits,
// and most symbols will get unescaped with this mode.
NORMAL = 1 << 0,
// Convert %20 to spaces. In some places where we're showing URLs, we may
// want this. In places where the URL may be copied and pasted out, then
// you wouldn't want this since it might not be interpreted in one piece
// by other applications. Other UTF-8 spaces will not be unescaped.
SPACES = 1 << 1,
// Unescapes '/' and '\\'. If these characters were unescaped, the resulting
// URL won't be the same as the source one. Moreover, they are dangerous to
// unescape in strings that will be used as file paths or names. This value
// should only be used when slashes don't have special meaning, like data
// URLs.
PATH_SEPARATORS = 1 << 2,
// Unescapes various characters that will change the meaning of URLs,
// including '%', '+', '&', '#'. Does not unescape path separators.
// If these characters were unescaped, the resulting URL won't be the same
// as the source one. This flag is used when generating final output like
// filenames for URLs where we won't be interpreting as a URL and want to do
// as much unescaping as possible.
URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS = 1 << 3,
// URL queries use "+" for space. This flag controls that replacement.
REPLACE_PLUS_WITH_SPACE = 1 << 4,
};
};
// Unescapes |escaped_text| and returns the result.
// Unescaping consists of looking for the exact pattern "%XX", where each X is
// a hex digit, and converting to the character with the numerical value of
// those digits. Thus "i%20=%203%3b" unescapes to "i = 3;", if the
// "UnescapeRule::SPACES" used.
//
// This method does not ensure that the output is a valid string using any
// character encoding. However, it does leave escaped certain byte sequences
// that would be dangerous to display to the user, because if interpreted as
// UTF-8, they could be used to mislead the user. Callers that want to
// unconditionally unescape everything for uses other than displaying data to
// the user should use UnescapeBinaryURLComponent().
NET_EXPORT std::string UnescapeURLComponent(base::StringPiece escaped_text,
UnescapeRule::Type rules);
// Unescapes the given substring as a URL, and then tries to interpret the
// result as being encoded as UTF-8. If the result is convertible into UTF-8, it
// will be returned as converted. If it is not, the original escaped string will
// be converted into a base::string16 and returned. |adjustments| provides
// information on how the original string was adjusted to get the string
// returned.
NET_EXPORT base::string16 UnescapeAndDecodeUTF8URLComponentWithAdjustments(
base::StringPiece text,
UnescapeRule::Type rules,
base::OffsetAdjuster::Adjustments* adjustments);
// Unescapes a component of a URL for use as binary data. Unlike
// UnescapeURLComponent, leaves nothing unescaped, including nulls, invalid
// characters, characters that are unsafe to display, etc. This should *not*
// be used when displaying the decoded data to the user.
//
// Only the NORMAL and REPLACE_PLUS_WITH_SPACE rules are allowed.
NET_EXPORT std::string UnescapeBinaryURLComponent(
base::StringPiece escaped_text,
UnescapeRule::Type rules = UnescapeRule::NORMAL);
// Variant of UnescapeBinaryURLComponent(). Writes output to |unescaped_text|.
// Returns true on success, returns false and clears |unescaped_text| on
// failure. Fails on characters escaped that are unsafe to unescape in some
// contexts, which are defined as characters "\0" through "\x1F" (Which includes
// CRLF but not space), and optionally path separators. Path separators include
// both forward and backward slashes on all platforms. Does not fail if any of
// those characters appear unescaped in the input string.
NET_EXPORT bool UnescapeBinaryURLComponentSafe(base::StringPiece escaped_text,
bool fail_on_path_separators,
std::string* unescaped_text);
// Unescapes the following ampersand character codes from |text|:
// < > & " '
NET_EXPORT base::string16 UnescapeForHTML(base::StringPiece16 text);
// Returns true if |escaped_text| contains any element of |bytes| in
// percent-encoded form.
//
// For example, if |bytes| is {'%', '/'}, returns true if |escaped_text|
// contains "%25" or "%2F", but not if it just contains bare '%' or '/'
// characters.
NET_EXPORT bool ContainsEncodedBytes(base::StringPiece escaped_text,
const std::set<unsigned char>& bytes);
} // namespace net
#endif // NET_BASE_ESCAPE_H_
|