summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>2019-02-26 12:37:40 +0100
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>2019-02-26 12:37:40 +0100
commit92e068b465299d5d88fe480bc5cf2bf4bd750516 (patch)
treec1156c20ae1252a6482996a3f6bc6bfb6a0da9de /src
parent84319aa76e744c501d62052cc233e44dc48ece8a (diff)
downloadsystemd-92e068b465299d5d88fe480bc5cf2bf4bd750516.tar.gz
basic/utf8: do not read past end of string when looking for a multi-byte character
Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=9341.
Diffstat (limited to 'src')
-rw-r--r--src/basic/device-nodes.c2
-rw-r--r--src/basic/utf8.c25
-rw-r--r--src/basic/utf8.h2
-rw-r--r--src/libudev/libudev-util.c2
-rw-r--r--src/shared/ask-password-api.c12
-rw-r--r--src/shared/json.c5
-rw-r--r--src/test/test-utf8.c20
7 files changed, 42 insertions, 26 deletions
diff --git a/src/basic/device-nodes.c b/src/basic/device-nodes.c
index 5fcdf24bd2..5ebe5b2483 100644
--- a/src/basic/device-nodes.c
+++ b/src/basic/device-nodes.c
@@ -28,7 +28,7 @@ int encode_devnode_name(const char *str, char *str_enc, size_t len) {
for (i = 0, j = 0; str[i] != '\0'; i++) {
int seqlen;
- seqlen = utf8_encoded_valid_unichar(&str[i]);
+ seqlen = utf8_encoded_valid_unichar(str + i, (size_t) -1);
if (seqlen > 1) {
if (len-j < (size_t)seqlen)
diff --git a/src/basic/utf8.c b/src/basic/utf8.c
index ffe0adb33c..090c69d140 100644
--- a/src/basic/utf8.c
+++ b/src/basic/utf8.c
@@ -128,14 +128,14 @@ bool utf8_is_printable_newline(const char* str, size_t length, bool newline) {
assert(str);
- for (p = str; length;) {
+ for (p = str; length > 0;) {
int encoded_len, r;
char32_t val;
- encoded_len = utf8_encoded_valid_unichar(p);
- if (encoded_len < 0 ||
- (size_t) encoded_len > length)
+ encoded_len = utf8_encoded_valid_unichar(p, length);
+ if (encoded_len < 0)
return false;
+ assert(encoded_len > 0 && (size_t) encoded_len <= length);
r = utf8_encoded_to_unichar(p, &val);
if (r < 0 ||
@@ -159,7 +159,7 @@ char *utf8_is_valid(const char *str) {
while (*p) {
int len;
- len = utf8_encoded_valid_unichar(p);
+ len = utf8_encoded_valid_unichar(p, (size_t) -1);
if (len < 0)
return NULL;
@@ -181,7 +181,7 @@ char *utf8_escape_invalid(const char *str) {
while (*str) {
int len;
- len = utf8_encoded_valid_unichar(str);
+ len = utf8_encoded_valid_unichar(str, (size_t) -1);
if (len > 0) {
s = mempcpy(s, str, len);
str += len;
@@ -208,7 +208,7 @@ char *utf8_escape_non_printable(const char *str) {
while (*str) {
int len;
- len = utf8_encoded_valid_unichar(str);
+ len = utf8_encoded_valid_unichar(str, (size_t) -1);
if (len > 0) {
if (utf8_is_printable(str, len)) {
s = mempcpy(s, str, len);
@@ -452,17 +452,24 @@ static int utf8_unichar_to_encoded_len(char32_t unichar) {
}
/* validate one encoded unicode char and return its length */
-int utf8_encoded_valid_unichar(const char *str) {
+int utf8_encoded_valid_unichar(const char *str, size_t length /* bytes */) {
char32_t unichar;
size_t len, i;
int r;
assert(str);
+ assert(length > 0);
+
+ /* We read until NUL, at most length bytes. (size_t) -1 may be used to disable the length check. */
len = utf8_encoded_expected_len(str[0]);
if (len == 0)
return -EINVAL;
+ /* Do we have a truncated multi-byte character? */
+ if (len > length)
+ return -EINVAL;
+
/* ascii is valid */
if (len == 1)
return 1;
@@ -495,7 +502,7 @@ size_t utf8_n_codepoints(const char *str) {
while (*str != 0) {
int k;
- k = utf8_encoded_valid_unichar(str);
+ k = utf8_encoded_valid_unichar(str, (size_t) -1);
if (k < 0)
return (size_t) -1;
diff --git a/src/basic/utf8.h b/src/basic/utf8.h
index 628456936e..6df70921db 100644
--- a/src/basic/utf8.h
+++ b/src/basic/utf8.h
@@ -32,7 +32,7 @@ char16_t *utf8_to_utf16(const char *s, size_t length);
size_t char16_strlen(const char16_t *s); /* returns the number of 16bit words in the string (not bytes!) */
-int utf8_encoded_valid_unichar(const char *str);
+int utf8_encoded_valid_unichar(const char *str, size_t length);
int utf8_encoded_to_unichar(const char *str, char32_t *ret_unichar);
static inline bool utf16_is_surrogate(char16_t c) {
diff --git a/src/libudev/libudev-util.c b/src/libudev/libudev-util.c
index 7e21719fbf..37660d0313 100644
--- a/src/libudev/libudev-util.c
+++ b/src/libudev/libudev-util.c
@@ -175,7 +175,7 @@ size_t util_replace_chars(char *str, const char *white) {
}
/* accept valid utf8 */
- len = utf8_encoded_valid_unichar(&str[i]);
+ len = utf8_encoded_valid_unichar(str + i, (size_t) -1);
if (len > 1) {
i += len;
continue;
diff --git a/src/shared/ask-password-api.c b/src/shared/ask-password-api.c
index 072bf72c56..bc5e1cf669 100644
--- a/src/shared/ask-password-api.c
+++ b/src/shared/ask-password-api.c
@@ -385,13 +385,13 @@ int ask_password_tty(
if (!(flags & ASK_PASSWORD_SILENT))
backspace_chars(ttyfd, 1);
- /* Remove a full UTF-8 codepoint from the end. For that, figure out where the last one
- * begins */
+ /* Remove a full UTF-8 codepoint from the end. For that, figure out where the
+ * last one begins */
q = 0;
for (;;) {
size_t z;
- z = utf8_encoded_valid_unichar(passphrase + q);
+ z = utf8_encoded_valid_unichar(passphrase + q, (size_t) -1);
if (z == 0) {
q = (size_t) -1; /* Invalid UTF8! */
break;
@@ -410,8 +410,8 @@ int ask_password_tty(
flags |= ASK_PASSWORD_SILENT;
- /* There are two ways to enter silent mode. Either by pressing backspace as first key
- * (and only as first key), or ... */
+ /* There are two ways to enter silent mode. Either by pressing backspace as
+ * first key (and only as first key), or ... */
if (ttyfd >= 0)
(void) loop_write(ttyfd, "(no echo) ", 10, false);
@@ -440,7 +440,7 @@ int ask_password_tty(
if (!(flags & ASK_PASSWORD_SILENT) && ttyfd >= 0) {
/* Check if we got a complete UTF-8 character now. If so, let's output one '*'. */
- n = utf8_encoded_valid_unichar(passphrase + codepoint);
+ n = utf8_encoded_valid_unichar(passphrase + codepoint, (size_t) -1);
if (n >= 0) {
codepoint = p;
(void) loop_write(ttyfd, (flags & ASK_PASSWORD_ECHO) ? &c : "*", 1, false);
diff --git a/src/shared/json.c b/src/shared/json.c
index 3786ff12b8..7ae1ffb1b2 100644
--- a/src/shared/json.c
+++ b/src/shared/json.c
@@ -1756,7 +1756,6 @@ static void inc_lines_columns(unsigned *line, unsigned *column, const char *s, s
assert(s || n == 0);
while (n > 0) {
-
if (*s == '\n') {
(*line)++;
*column = 1;
@@ -1765,7 +1764,7 @@ static void inc_lines_columns(unsigned *line, unsigned *column, const char *s, s
else {
int w;
- w = utf8_encoded_valid_unichar(s);
+ w = utf8_encoded_valid_unichar(s, n);
if (w < 0) /* count invalid unichars as normal characters */
w = 1;
else if ((size_t) w > n) /* never read more than the specified number of characters */
@@ -1930,7 +1929,7 @@ static int json_parse_string(const char **p, char **ret) {
continue;
}
- len = utf8_encoded_valid_unichar(c);
+ len = utf8_encoded_valid_unichar(c, (size_t) -1);
if (len < 0)
return len;
diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c
index 9849530ac8..d1e48da2a6 100644
--- a/src/test/test-utf8.c
+++ b/src/test/test-utf8.c
@@ -36,11 +36,21 @@ static void test_ascii_is_valid_n(void) {
}
static void test_utf8_encoded_valid_unichar(void) {
- assert_se(utf8_encoded_valid_unichar("\342\204\242") == 3);
- assert_se(utf8_encoded_valid_unichar("\302\256") == 2);
- assert_se(utf8_encoded_valid_unichar("a") == 1);
- assert_se(utf8_encoded_valid_unichar("\341\204") < 0);
- assert_se(utf8_encoded_valid_unichar("\341\204\341\204") < 0);
+ assert_se(utf8_encoded_valid_unichar("\342\204\242", 1) == -EINVAL); /* truncated */
+ assert_se(utf8_encoded_valid_unichar("\342\204\242", 2) == -EINVAL); /* truncated */
+ assert_se(utf8_encoded_valid_unichar("\342\204\242", 3) == 3);
+ assert_se(utf8_encoded_valid_unichar("\342\204\242", 4) == 3);
+ assert_se(utf8_encoded_valid_unichar("\302\256", 1) == -EINVAL); /* truncated */
+ assert_se(utf8_encoded_valid_unichar("\302\256", 2) == 2);
+ assert_se(utf8_encoded_valid_unichar("\302\256", 3) == 2);
+ assert_se(utf8_encoded_valid_unichar("\302\256", (size_t) -1) == 2);
+ assert_se(utf8_encoded_valid_unichar("a", 1) == 1);
+ assert_se(utf8_encoded_valid_unichar("a", 2) == 1);
+ assert_se(utf8_encoded_valid_unichar("\341\204", 1) == -EINVAL); /* truncated, potentially valid */
+ assert_se(utf8_encoded_valid_unichar("\341\204", 2) == -EINVAL); /* truncated, potentially valid */
+ assert_se(utf8_encoded_valid_unichar("\341\204", 3) == -EINVAL);
+ assert_se(utf8_encoded_valid_unichar("\341\204\341\204", 4) == -EINVAL);
+ assert_se(utf8_encoded_valid_unichar("\341\204\341\204", 5) == -EINVAL);
}
static void test_utf8_escaping(void) {