summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph M. Becker <cmbecker69@gmx.de>2018-02-24 22:34:32 +0100
committerChristoph M. Becker <cmbecker69@gmx.de>2018-03-11 17:38:28 +0100
commit01ea314e8cfd6c1c6a2c7db9e13be1f581f5d0a1 (patch)
tree055d1fdb673f6dee94177ef200b9afb679e86d6f
parent76fc73cbfc1961fcb78d19cc4070423a9d9d1b39 (diff)
downloadphp-git-01ea314e8cfd6c1c6a2c7db9e13be1f581f5d0a1.tar.gz
Fix #62545: wrong unicode mapping in some charsets
Undefined characters are best mapped to Unicode REPLACEMENT characters.
-rw-r--r--NEWS3
-rw-r--r--ext/mbstring/libmbfl/filters/unicode_table_cp1251.h2
-rw-r--r--ext/mbstring/libmbfl/filters/unicode_table_cp1252.h8
-rw-r--r--ext/mbstring/tests/bug62545.phpt18
4 files changed, 26 insertions, 5 deletions
diff --git a/NEWS b/NEWS
index 079722fbc9..4b27bb5363 100644
--- a/NEWS
+++ b/NEWS
@@ -100,6 +100,9 @@ PHP NEWS
- IMAP:
. Fixed bug #75774 (imap_append HeapCorruction). (Anatol)
+- Mbstring:
+ . Fixed bug #62545 (wrong unicode mapping in some charsets). (cmb)
+
- Opcache:
. Fixed bug #75720 (File cache not populated after SHM runs full). (Dmitry)
. Fixed bug #75579 (Interned strings buffer overflow may cause crash).
diff --git a/ext/mbstring/libmbfl/filters/unicode_table_cp1251.h b/ext/mbstring/libmbfl/filters/unicode_table_cp1251.h
index 9be88c587f..f504713805 100644
--- a/ext/mbstring/libmbfl/filters/unicode_table_cp1251.h
+++ b/ext/mbstring/libmbfl/filters/unicode_table_cp1251.h
@@ -30,7 +30,7 @@ static const unsigned short cp1251_ucs_table[] = {
0x0402, 0x0403, 0x201a, 0x0453, 0x201e, 0x2026, 0x2020, 0x2021,
0x20ac, 0x2030, 0x0409, 0x2039, 0x040a, 0x040c, 0x040b, 0x040f,
0x0452, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
- 0x003f, 0x2122, 0x0459, 0x203a, 0x045a, 0x045c, 0x045b, 0x045f,
+ 0xfffd, 0x2122, 0x0459, 0x203a, 0x045a, 0x045c, 0x045b, 0x045f,
0x00a0, 0x040e, 0x045e, 0x0408, 0x00a4, 0x0490, 0x00a6, 0x00a7,
0x0401, 0x00a9, 0x0404, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0407,
0x00b0, 0x00b1, 0x0406, 0x0456, 0x0491, 0x00b5, 0x00b6, 0x00b7,
diff --git a/ext/mbstring/libmbfl/filters/unicode_table_cp1252.h b/ext/mbstring/libmbfl/filters/unicode_table_cp1252.h
index eb48af55e3..92d20893cd 100644
--- a/ext/mbstring/libmbfl/filters/unicode_table_cp1252.h
+++ b/ext/mbstring/libmbfl/filters/unicode_table_cp1252.h
@@ -32,9 +32,9 @@
* as it only covers this range, while the rest cover 0xa0 onwards */
static const unsigned short cp1252_ucs_table[] = {
- 0x20ac,0xfffe,0x201a,0x0192,0x201e,0x2026,0x2020,0x2021,
- 0x02c6,0x2030,0x0160,0x2039,0x0152,0xfffe,0x017d,0xfffe,
- 0xfffe,0x2018,0x2019,0x201c,0x201d,0x2022,0x2013,0x2014,
- 0x02dc,0x2122,0x0161,0x203a,0x0153,0xfffe,0x017e,0x0178
+ 0x20ac,0xfffd,0x201a,0x0192,0x201e,0x2026,0x2020,0x2021,
+ 0x02c6,0x2030,0x0160,0x2039,0x0152,0xfffd,0x017d,0xfffd,
+ 0xfffd,0x2018,0x2019,0x201c,0x201d,0x2022,0x2013,0x2014,
+ 0x02dc,0x2122,0x0161,0x203a,0x0153,0xfffd,0x017e,0x0178
};
#endif /* UNICODE_TABLE_CP1252_H */
diff --git a/ext/mbstring/tests/bug62545.phpt b/ext/mbstring/tests/bug62545.phpt
new file mode 100644
index 0000000000..6a68d0dee8
--- /dev/null
+++ b/ext/mbstring/tests/bug62545.phpt
@@ -0,0 +1,18 @@
+--TEST--
+Bug #62545 (wrong unicode mapping in some charsets)
+--SKIPIF--
+<?php
+if (!extension_loaded('mbstring')) die('skip mbstring extension not available');
+?>
+--FILE--
+<?php
+var_dump(
+ bin2hex(mb_convert_encoding("\x98", 'UTF-8', 'Windows-1251')),
+ bin2hex(mb_convert_encoding("\x81\x8d\x8f\x90\x9d", 'UTF-8', 'Windows-1252'))
+);
+?>
+===DONE===
+--EXPECT--
+string(6) "efbfbd"
+string(30) "efbfbdefbfbdefbfbdefbfbdefbfbd"
+===DONE===