summaryrefslogtreecommitdiff
path: root/ext/mbstring/libmbfl/filters
diff options
context:
space:
mode:
authorAlex Dowad <alexinbeijing@gmail.com>2020-09-19 16:26:04 +0200
committerAlex Dowad <alexinbeijing@gmail.com>2021-01-14 22:26:24 +0200
commitc11e12ffe001dd7f4a57c39a8ebd5de48a72cb42 (patch)
tree7096961946e4dab342df36497cd5d3fff9168fe0 /ext/mbstring/libmbfl/filters
parent4b95fdf2cac7269a38d035141e7321a295e19b29 (diff)
downloadphp-git-c11e12ffe001dd7f4a57c39a8ebd5de48a72cb42.tar.gz
Add comment explaining why ISO-2022-JP-2004, etc strings end with ESC ( B
These encodings have multiple modes which can be selected via escape sequences. The default starting mode is ASCII. If a string _ends_ in a different mode, we emit a 'redundant' escape sequence to switch back to ASCII. If the resulting string is never concatenated with other strings, that extra escape sequence serves no purpose. But if the resulting string is concatenated with other strings of the same encoding, it ensures that the resulting string will be valid.
Diffstat (limited to 'ext/mbstring/libmbfl/filters')
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c4
1 files changed, 3 insertions, 1 deletions
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c
index c4f6fc3ac2..f52eebce1d 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c
@@ -723,7 +723,9 @@ mbfl_filt_conv_wchar_jis2004_flush(mbfl_convert_filter *filter)
CK((*filter->output_function)(s2, filter->data));
}
- /* back to latin */
+ /* If we had switched to a different charset, go back to ASCII mode
+ * This makes it possible to concatenate arbitrary valid strings
+ * together and get a valid string */
if ((filter->status & 0xff00) != 0) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)(0x28, filter->data)); /* '(' */