summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Dragan <bulk88@hotmail.com>2012-10-24 16:15:51 -0400
committerFather Chrysostomos <sprout@cpan.org>2012-10-25 20:02:55 -0700
commitf019c49e380f764c1ead36fe3602184804292711 (patch)
treea4a94ed46a4110e81d72daab995701db6efa1e6d
parent3ed356df9354193bbcc5202f066f3c07ae84b443 (diff)
downloadperl-f019c49e380f764c1ead36fe3602184804292711.tar.gz
optimize memory wrap croaks, often used in MEM_WRAP_CHECK
Most perls are built with PERL_MALLOC_WRAP. This causes MEM_WRAP_CHECK macro to perform some checks on the requested allocation size in macro Newx. The checks are performed at the caller, not in the callee (for me on Win32 perl the callee in Newx is Perl_safesysmalloc) of Newx. If the check fails a "Perl_croak_nocontext("%s",PL_memory_wrap)" is done. In x86 machine code, "if(bad_alloc) Perl_croak_nocontext("%s",PL_memory_wrap); will be written as "cond jmp ahead ~15 bytes", "push const pointer", "push const pointer", "call const pointer". For each Newx where the allocation amount was not a constant (constant folding would remove the croak memory wrap branch compleatly), the branch takes 15-19 bytes depending on x86 compiler. There are about 80 Newx'es in the interp (win32 dynamic linking perl) that do the memory wrap check and have a "Perl_croak_nocontext("%s",PL_memory_wrap)" in them after all optimizations by the compiler. This patch reduces the memory wrap branch from 15-19 to 5 bytes on x86. Since croak_memory_wrap is a static and a noreturn, a compiler with IPO may optimize the whole branch to "cond jmp 32 bits relative" at each callsite. A less optimal complier may do "cond jmp 8 bits relative (jump past the "call S_croak_memory_wrap" instruction), then "call S_croak_memory_wrap". Both ways are better than the current situation. The reason why croak_memory_wrap is a static and not an export is that the compiler has more opportunity to optimize/reduce the impact of the memory wrap branch at the call site if the target is in the same image rather than in a different image, which would require using the platform specific dynamic linking mechanism/export table/etc, which often requires a new stack frame per ABI of the platform. If a dynamic linked XS module does not use S_croak_memory_wrap it will be removed from the image by the C compiler. If it is included in the XS image, it is a very small block of code and a 3 byte string litteral. A CPU cache line is typically 32 or 64 bytes and a memory read is typically 16. Cutting the instructions by 10 to 16 bytes out of "hot code" (10 of the ~80 call sites are pp_*) is a worthy goal. In a few places the memory wrap croak is used explictly, not from a MEM_WRAP_CHECK, this patch converts those to use the static. If PERL_MALLOC_WRAP is undef, there are still a couple uses of croak memory wrap, so do not keep S_croak_memory_wrap in a ifdef PERL_MALLOC_WRAP. Also see http://www.nntp.perl.org/group/perl.perl5.porters/2012/10/msg194383.html and [perl #115456].
-rw-r--r--embed.fnc1
-rw-r--r--embed.h1
-rw-r--r--handy.h6
-rw-r--r--inline.h9
-rw-r--r--proto.h3
-rw-r--r--sv.c4
-rw-r--r--util.c6
7 files changed, 22 insertions, 8 deletions
diff --git a/embed.fnc b/embed.fnc
index b7a843134c..9fab558b96 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1553,6 +1553,7 @@ Anpa |Malloc_t|safesysmalloc |MEM_SIZE nbytes
Anpa |Malloc_t|safesyscalloc |MEM_SIZE elements|MEM_SIZE size
Anpa |Malloc_t|safesysrealloc|Malloc_t where|MEM_SIZE nbytes
Anp |Free_t |safesysfree |Malloc_t where
+Asrnx |void |croak_memory_wrap
#if defined(PERL_GLOBAL_STRUCT)
Ap |struct perl_vars *|GetVars
Ap |struct perl_vars*|init_global_struct
diff --git a/embed.h b/embed.h
index b21078cbd4..32987bd791 100644
--- a/embed.h
+++ b/embed.h
@@ -76,6 +76,7 @@
#ifndef PERL_IMPLICIT_CONTEXT
#define croak Perl_croak
#endif
+#define croak_memory_wrap S_croak_memory_wrap
#define croak_no_modify() Perl_croak_no_modify(aTHX)
#define croak_sv(a) Perl_croak_sv(aTHX_ a)
#define croak_xs_usage(a,b) Perl_croak_xs_usage(aTHX_ a,b)
diff --git a/handy.h b/handy.h
index 841dd9066c..3b18a0f72c 100644
--- a/handy.h
+++ b/handy.h
@@ -1141,13 +1141,13 @@ PoisonWith(0xEF) for catching access to freed memory.
* overly eager compilers that will bleat about e.g.
* (U16)n > (size_t)~0/sizeof(U16) always being false. */
#ifdef PERL_MALLOC_WRAP
-#define MEM_WRAP_CHECK(n,t) MEM_WRAP_CHECK_1(n,t,PL_memory_wrap)
+#define MEM_WRAP_CHECK(n,t) \
+ (void)(sizeof(t) > 1 && ((MEM_SIZE)(n)+0.0) > MEM_SIZE_MAX/sizeof(t) && (croak_memory_wrap(),0))
#define MEM_WRAP_CHECK_1(n,t,a) \
(void)(sizeof(t) > 1 && ((MEM_SIZE)(n)+0.0) > MEM_SIZE_MAX/sizeof(t) && (Perl_croak_nocontext("%s",(a)),0))
#define MEM_WRAP_CHECK_(n,t) MEM_WRAP_CHECK(n,t),
-#define PERL_STRLEN_ROUNDUP(n) ((void)(((n) > MEM_SIZE_MAX - 2 * PERL_STRLEN_ROUNDUP_QUANTUM) ? (Perl_croak_nocontext("%s",PL_memory_wrap),0):0),((n-1+PERL_STRLEN_ROUNDUP_QUANTUM)&~((MEM_SIZE)PERL_STRLEN_ROUNDUP_QUANTUM-1)))
-
+#define PERL_STRLEN_ROUNDUP(n) ((void)(((n) > MEM_SIZE_MAX - 2 * PERL_STRLEN_ROUNDUP_QUANTUM) ? (croak_memory_wrap(),0):0),((n-1+PERL_STRLEN_ROUNDUP_QUANTUM)&~((MEM_SIZE)PERL_STRLEN_ROUNDUP_QUANTUM-1)))
#else
#define MEM_WRAP_CHECK(n,t)
diff --git a/inline.h b/inline.h
index f0d45f6736..69807f1fb7 100644
--- a/inline.h
+++ b/inline.h
@@ -104,3 +104,12 @@ S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
}
#endif
+
+/* ------------------------------- handy.h ------------------------------- */
+
+/* saves machine code for a common noreturn idiom typically used in Newx*() */
+static void
+S_croak_memory_wrap(void)
+{
+ Perl_croak_nocontext("%s",PL_memory_wrap);
+}
diff --git a/proto.h b/proto.h
index d756e1c28c..d069792e28 100644
--- a/proto.h
+++ b/proto.h
@@ -640,6 +640,9 @@ PERL_CALLCONV_NO_RET void Perl_croak(pTHX_ const char* pat, ...)
__attribute__noreturn__
__attribute__format__null_ok__(__printf__,pTHX_1,pTHX_2);
+PERL_STATIC_NO_RET void S_croak_memory_wrap(void)
+ __attribute__noreturn__;
+
PERL_CALLCONV_NO_RET void Perl_croak_no_modify(pTHX)
__attribute__noreturn__;
diff --git a/sv.c b/sv.c
index 1bebb81050..050763e23b 100644
--- a/sv.c
+++ b/sv.c
@@ -11098,13 +11098,13 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
have = esignlen + zeros + elen;
if (have < zeros)
- Perl_croak_nocontext("%s", PL_memory_wrap);
+ croak_memory_wrap();
need = (have > width ? have : width);
gap = need - have;
if (need >= (((STRLEN)~0) - SvCUR(sv) - dotstrlen - 1))
- Perl_croak_nocontext("%s", PL_memory_wrap);
+ croak_memory_wrap();
SvGROW(sv, SvCUR(sv) + need + dotstrlen + 1);
p = SvEND(sv);
if (esignlen && fill == '0') {
diff --git a/util.c b/util.c
index 26330349b5..a8cd6fe70c 100644
--- a/util.c
+++ b/util.c
@@ -307,12 +307,12 @@ Perl_safesyscalloc(MEM_SIZE count, MEM_SIZE size)
#endif
}
else
- Perl_croak_nocontext("%s", PL_memory_wrap);
+ croak_memory_wrap();
#ifdef PERL_TRACK_MEMPOOL
if (sTHX <= MEM_SIZE_MAX - (MEM_SIZE)total_size)
total_size += sTHX;
else
- Perl_croak_nocontext("%s", PL_memory_wrap);
+ croak_memory_wrap();
#endif
#ifdef HAS_64K_LIMIT
if (total_size > 0xffff) {
@@ -3257,7 +3257,7 @@ Perl_repeatcpy(register char *to, register const char *from, I32 len, register I
PERL_ARGS_ASSERT_REPEATCPY;
if (count < 0)
- Perl_croak_nocontext("%s",PL_memory_wrap);
+ croak_memory_wrap();
if (len == 1)
memset(to, *from, count);