summaryrefslogtreecommitdiff
path: root/lib/charset.c
diff options
context:
space:
mode:
authorHeinrich Schuchardt <xypron.glpk@gmx.de>2021-02-27 14:08:37 +0100
committerHeinrich Schuchardt <xypron.glpk@gmx.de>2021-03-07 17:37:13 +0100
commitddbaff53da5b99563fa371db0b09544e139fdabb (patch)
tree6f471341b3c6e6ae0fb98ad1193553dacf08825e /lib/charset.c
parent73bb90cabcdffcd528d1002a12779779196bf200 (diff)
downloadu-boot-ddbaff53da5b99563fa371db0b09544e139fdabb.tar.gz
lib/charset: utf8_get() should return error
utf8_get() should return an error if hitting an illegal UTF-8 sequence and not silently convert the input to a question mark. Correct utf_8() and the its unit test. console_read_unicode() now will ignore illegal UTF-8 sequences. Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
Diffstat (limited to 'lib/charset.c')
-rw-r--r--lib/charset.c25
1 files changed, 16 insertions, 9 deletions
diff --git a/lib/charset.c b/lib/charset.c
index 1345c8f9f0..946d5ee23e 100644
--- a/lib/charset.c
+++ b/lib/charset.c
@@ -32,7 +32,7 @@ static struct capitalization_table capitalization_table[] =
*
* @read_u8: - stream reader
* @src: - string buffer passed to stream reader, optional
- * Return: - Unicode code point
+ * Return: - Unicode code point, or -1
*/
static int get_code(u8 (*read_u8)(void *data), void *data)
{
@@ -78,7 +78,7 @@ static int get_code(u8 (*read_u8)(void *data), void *data)
}
return ch;
error:
- return '?';
+ return -1;
}
/**
@@ -120,14 +120,21 @@ static u8 read_console(void *data)
int console_read_unicode(s32 *code)
{
- if (!tstc()) {
- /* No input available */
- return 1;
- }
+ for (;;) {
+ s32 c;
- /* Read Unicode code */
- *code = get_code(read_console, NULL);
- return 0;
+ if (!tstc()) {
+ /* No input available */
+ return 1;
+ }
+
+ /* Read Unicode code */
+ c = get_code(read_console, NULL);
+ if (c > 0) {
+ *code = c;
+ return 0;
+ }
+ }
}
s32 utf8_get(const char **src)