From 31a5586d1e007d04cfa10548d9bfb42b2787a7a0 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 21 Jul 2022 17:08:51 +0200 Subject: rb_str_buf_append: add a fast path for ENC_CODERANGE_VALID If the RHS has valid encoding, and both strings have the same encoding, we can use the fast path. However we need to update the LHS coderange. ``` compare-ruby: ruby 3.2.0dev (2022-07-21T14:46:32Z master cdbb9b8555) [arm64-darwin21] built-ruby: ruby 3.2.0dev (2022-07-25T07:25:41Z string-concat-vali.. 11a2772bdd) [arm64-darwin21] warming up... | |compare-ruby|built-ruby| |:-------------------|-----------:|---------:| |binary_concat_7bit | 554.816k| 556.460k| | | -| 1.00x| |utf8_concat_7bit | 556.367k| 555.101k| | | 1.00x| -| |utf8_concat_UTF8 | 412.555k| 556.824k| | | -| 1.35x| ``` --- string.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'string.c') diff --git a/string.c b/string.c index f3f5ea92cc..f5e089aa21 100644 --- a/string.c +++ b/string.c @@ -3329,9 +3329,24 @@ VALUE rb_str_buf_append(VALUE str, VALUE str2) { int str2_cr = rb_enc_str_coderange(str2); - if (str2_cr == ENC_CODERANGE_7BIT && str_enc_fastpath(str)) { - str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2), true); - return str; + + if (str_enc_fastpath(str)) { + switch (str2_cr) { + case ENC_CODERANGE_7BIT: + // If RHS is 7bit we can do simple concatenation + str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2), true); + return str; + case ENC_CODERANGE_VALID: + // If RHS is valid, we can do simple concatenation if encodings are the same + if (ENCODING_GET_INLINED(str) == ENCODING_GET_INLINED(str2)) { + str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2), true); + int str_cr = ENC_CODERANGE(str); + if (UNLIKELY(str_cr != ENC_CODERANGE_VALID)) { + ENC_CODERANGE_SET(str, RB_ENC_CODERANGE_AND(str_cr, str2_cr)); + } + return str; + } + } } rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2), -- cgit v1.2.1