summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authorPeter Zhu <peter@peterzhu.ca>2023-01-11 14:25:34 -0500
committerPeter Zhu <peter@peterzhu.ca>2023-01-12 09:06:15 -0500
commitbfc887f391fde6de9d088039509f6e3eaa40b3ca (patch)
tree9e5604354b561dbb8ce350aac40bb6c36e235148 /string.c
parent207f8d0027d679780d4f3962c305d36885feb652 (diff)
downloadruby-bfc887f391fde6de9d088039509f6e3eaa40b3ca.tar.gz
Add str_enc_copy_direct
This commit adds str_enc_copy_direct, which is like str_enc_copy but does not check the frozen status of str1 and does not check the validity of the encoding of str2. This makes certain string operations ~5% faster. ```ruby puts(Benchmark.measure do 100_000_000.times do "a".downcase end end) ``` Before this patch: ``` 7.587598 0.040858 7.628456 ( 7.669022) ``` After this patch: ``` 7.133128 0.039809 7.172937 ( 7.183124) ```
Diffstat (limited to 'string.c')
-rw-r--r--string.c24
1 files changed, 19 insertions, 5 deletions
diff --git a/string.c b/string.c
index 6a62647820..295871e526 100644
--- a/string.c
+++ b/string.c
@@ -768,6 +768,20 @@ str_enc_copy(VALUE str1, VALUE str2)
rb_enc_set_index(str1, ENCODING_GET(str2));
}
+/* Like str_enc_copy, but does not check frozen status of str1.
+ * You should use this only if you're certain that str1 is not frozen. */
+static inline void
+str_enc_copy_direct(VALUE str1, VALUE str2)
+{
+ int inlined_encoding = RB_ENCODING_GET_INLINED(str2);
+ if (inlined_encoding == ENCODING_INLINE_MAX) {
+ rb_enc_set_index(str1, rb_enc_get_index(str2));
+ }
+ else {
+ ENCODING_SET_INLINED(str1, inlined_encoding);
+ }
+}
+
static void
rb_enc_cr_str_copy_for_substr(VALUE dest, VALUE src)
{
@@ -6156,7 +6170,7 @@ str_byte_substr(VALUE str, long beg, long len, int empty)
VALUE str2 = str_subseq(str, beg, len);
- str_enc_copy(str2, str);
+ str_enc_copy_direct(str2, str);
if (RSTRING_LEN(str2) == 0) {
if (!rb_enc_asciicompat(STR_ENC_GET(str)))
@@ -6382,7 +6396,7 @@ rb_str_reverse(VALUE str)
}
}
STR_SET_LEN(rev, RSTRING_LEN(str));
- str_enc_copy(rev, str);
+ str_enc_copy_direct(rev, str);
ENC_CODERANGE_SET(rev, cr);
return rev;
@@ -7337,7 +7351,7 @@ rb_str_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc)
RB_GC_GUARD(buffer_anchor);
/* TODO: check about string terminator character */
- str_enc_copy(target, source);
+ str_enc_copy_direct(target, source);
/*ENC_CODERANGE_SET(mapped, cr);*/
return target;
@@ -7468,7 +7482,7 @@ rb_str_upcase(int argc, VALUE *argv, VALUE str)
enc = str_true_enc(str);
if (case_option_single_p(flags, enc, str)) {
ret = rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
- str_enc_copy(ret, str);
+ str_enc_copy_direct(ret, str);
upcase_single(ret);
}
else if (flags&ONIGENC_CASE_ASCII_ONLY) {
@@ -7570,7 +7584,7 @@ rb_str_downcase(int argc, VALUE *argv, VALUE str)
enc = str_true_enc(str);
if (case_option_single_p(flags, enc, str)) {
ret = rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
- str_enc_copy(ret, str);
+ str_enc_copy_direct(ret, str);
downcase_single(ret);
}
else if (flags&ONIGENC_CASE_ASCII_ONLY) {