summaryrefslogtreecommitdiff
path: root/src/pkg/runtime/asm_386.s
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/runtime/asm_386.s')
-rw-r--r--src/pkg/runtime/asm_386.s2292
1 files changed, 0 insertions, 2292 deletions
diff --git a/src/pkg/runtime/asm_386.s b/src/pkg/runtime/asm_386.s
deleted file mode 100644
index 25026417b..000000000
--- a/src/pkg/runtime/asm_386.s
+++ /dev/null
@@ -1,2292 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "zasm_GOOS_GOARCH.h"
-#include "funcdata.h"
-#include "textflag.h"
-
-TEXT runtime·rt0_go(SB),NOSPLIT,$0
- // copy arguments forward on an even stack
- MOVL argc+0(FP), AX
- MOVL argv+4(FP), BX
- SUBL $128, SP // plenty of scratch
- ANDL $~15, SP
- MOVL AX, 120(SP) // save argc, argv away
- MOVL BX, 124(SP)
-
- // set default stack bounds.
- // _cgo_init may update stackguard.
- MOVL $runtime·g0(SB), BP
- LEAL (-64*1024+104)(SP), BX
- MOVL BX, g_stackguard(BP)
- MOVL BX, g_stackguard0(BP)
- MOVL SP, g_stackbase(BP)
-
- // find out information about the processor we're on
- MOVL $0, AX
- CPUID
- CMPL AX, $0
- JE nocpuinfo
- MOVL $1, AX
- CPUID
- MOVL CX, runtime·cpuid_ecx(SB)
- MOVL DX, runtime·cpuid_edx(SB)
-nocpuinfo:
-
- // if there is an _cgo_init, call it to let it
- // initialize and to set up GS. if not,
- // we set up GS ourselves.
- MOVL _cgo_init(SB), AX
- TESTL AX, AX
- JZ needtls
- MOVL $setg_gcc<>(SB), BX
- MOVL BX, 4(SP)
- MOVL BP, 0(SP)
- CALL AX
- // update stackguard after _cgo_init
- MOVL $runtime·g0(SB), CX
- MOVL g_stackguard0(CX), AX
- MOVL AX, g_stackguard(CX)
- // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
- CMPL runtime·iswindows(SB), $0
- JEQ ok
-needtls:
- // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
- CMPL runtime·isplan9(SB), $1
- JEQ ok
-
- // set up %gs
- CALL runtime·ldt0setup(SB)
-
- // store through it, to make sure it works
- get_tls(BX)
- MOVL $0x123, g(BX)
- MOVL runtime·tls0(SB), AX
- CMPL AX, $0x123
- JEQ ok
- MOVL AX, 0 // abort
-ok:
- // set up m and g "registers"
- get_tls(BX)
- LEAL runtime·g0(SB), CX
- MOVL CX, g(BX)
- LEAL runtime·m0(SB), AX
-
- // save m->g0 = g0
- MOVL CX, m_g0(AX)
- // save g0->m = m0
- MOVL AX, g_m(CX)
-
- CALL runtime·emptyfunc(SB) // fault if stack check is wrong
-
- // convention is D is always cleared
- CLD
-
- CALL runtime·check(SB)
-
- // saved argc, argv
- MOVL 120(SP), AX
- MOVL AX, 0(SP)
- MOVL 124(SP), AX
- MOVL AX, 4(SP)
- CALL runtime·args(SB)
- CALL runtime·osinit(SB)
- CALL runtime·schedinit(SB)
-
- // create a new goroutine to start program
- PUSHL $runtime·main·f(SB) // entry
- PUSHL $0 // arg size
- ARGSIZE(8)
- CALL runtime·newproc(SB)
- ARGSIZE(-1)
- POPL AX
- POPL AX
-
- // start this M
- CALL runtime·mstart(SB)
-
- INT $3
- RET
-
-DATA runtime·main·f+0(SB)/4,$runtime·main(SB)
-GLOBL runtime·main·f(SB),RODATA,$4
-
-TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
- INT $3
- RET
-
-TEXT runtime·asminit(SB),NOSPLIT,$0-0
- // Linux and MinGW start the FPU in extended double precision.
- // Other operating systems use double precision.
- // Change to double precision to match them,
- // and to match other hardware that only has double.
- PUSHL $0x27F
- FLDCW 0(SP)
- POPL AX
- RET
-
-/*
- * go-routine
- */
-
-// void gosave(Gobuf*)
-// save state in Gobuf; setjmp
-TEXT runtime·gosave(SB), NOSPLIT, $0-4
- MOVL buf+0(FP), AX // gobuf
- LEAL buf+0(FP), BX // caller's SP
- MOVL BX, gobuf_sp(AX)
- MOVL 0(SP), BX // caller's PC
- MOVL BX, gobuf_pc(AX)
- MOVL $0, gobuf_ret(AX)
- MOVL $0, gobuf_ctxt(AX)
- get_tls(CX)
- MOVL g(CX), BX
- MOVL BX, gobuf_g(AX)
- RET
-
-// void gogo(Gobuf*)
-// restore state from Gobuf; longjmp
-TEXT runtime·gogo(SB), NOSPLIT, $0-4
- MOVL buf+0(FP), BX // gobuf
- MOVL gobuf_g(BX), DX
- MOVL 0(DX), CX // make sure g != nil
- get_tls(CX)
- MOVL DX, g(CX)
- MOVL gobuf_sp(BX), SP // restore SP
- MOVL gobuf_ret(BX), AX
- MOVL gobuf_ctxt(BX), DX
- MOVL $0, gobuf_sp(BX) // clear to help garbage collector
- MOVL $0, gobuf_ret(BX)
- MOVL $0, gobuf_ctxt(BX)
- MOVL gobuf_pc(BX), BX
- JMP BX
-
-// func mcall(fn func(*g))
-// Switch to m->g0's stack, call fn(g).
-// Fn must never return. It should gogo(&g->sched)
-// to keep running g.
-TEXT runtime·mcall(SB), NOSPLIT, $0-4
- MOVL fn+0(FP), DI
-
- get_tls(CX)
- MOVL g(CX), AX // save state in g->sched
- MOVL 0(SP), BX // caller's PC
- MOVL BX, (g_sched+gobuf_pc)(AX)
- LEAL fn+0(FP), BX // caller's SP
- MOVL BX, (g_sched+gobuf_sp)(AX)
- MOVL AX, (g_sched+gobuf_g)(AX)
-
- // switch to m->g0 & its stack, call fn
- MOVL g(CX), BX
- MOVL g_m(BX), BX
- MOVL m_g0(BX), SI
- CMPL SI, AX // if g == m->g0 call badmcall
- JNE 3(PC)
- MOVL $runtime·badmcall(SB), AX
- JMP AX
- MOVL SI, g(CX) // g = m->g0
- MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
- PUSHL AX
- MOVL DI, DX
- MOVL 0(DI), DI
- CALL DI
- POPL AX
- MOVL $runtime·badmcall2(SB), AX
- JMP AX
- RET
-
-// switchtoM is a dummy routine that onM leaves at the bottom
-// of the G stack. We need to distinguish the routine that
-// lives at the bottom of the G stack from the one that lives
-// at the top of the M stack because the one at the top of
-// the M stack terminates the stack walk (see topofstack()).
-TEXT runtime·switchtoM(SB), NOSPLIT, $0-4
- RET
-
-// func onM(fn func())
-TEXT runtime·onM(SB), NOSPLIT, $0-4
- MOVL fn+0(FP), DI // DI = fn
- get_tls(CX)
- MOVL g(CX), AX // AX = g
- MOVL g_m(AX), BX // BX = m
-
- MOVL m_g0(BX), DX // DX = g0
- CMPL AX, DX
- JEQ onm
-
- MOVL m_curg(BX), BP
- CMPL AX, BP
- JEQ oncurg
-
- // Not g0, not curg. Must be gsignal, but that's not allowed.
- // Hide call from linker nosplit analysis.
- MOVL $runtime·badonm(SB), AX
- CALL AX
-
-oncurg:
- // save our state in g->sched. Pretend to
- // be switchtoM if the G stack is scanned.
- MOVL $runtime·switchtoM(SB), (g_sched+gobuf_pc)(AX)
- MOVL SP, (g_sched+gobuf_sp)(AX)
- MOVL AX, (g_sched+gobuf_g)(AX)
-
- // switch to g0
- MOVL DX, g(CX)
- MOVL (g_sched+gobuf_sp)(DX), BX
- // make it look like mstart called onM on g0, to stop traceback
- SUBL $4, BX
- MOVL $runtime·mstart(SB), DX
- MOVL DX, 0(BX)
- MOVL BX, SP
-
- // call target function
- ARGSIZE(0)
- MOVL DI, DX
- MOVL 0(DI), DI
- CALL DI
-
- // switch back to g
- get_tls(CX)
- MOVL g(CX), AX
- MOVL g_m(AX), BX
- MOVL m_curg(BX), AX
- MOVL AX, g(CX)
- MOVL (g_sched+gobuf_sp)(AX), SP
- MOVL $0, (g_sched+gobuf_sp)(AX)
- RET
-
-onm:
- // already on m stack, just call directly
- MOVL DI, DX
- MOVL 0(DI), DI
- CALL DI
- RET
-
-/*
- * support for morestack
- */
-
-// Called during function prolog when more stack is needed.
-//
-// The traceback routines see morestack on a g0 as being
-// the top of a stack (for example, morestack calling newstack
-// calling the scheduler calling newm calling gc), so we must
-// record an argument size. For that purpose, it has no arguments.
-TEXT runtime·morestack(SB),NOSPLIT,$0-0
- // Cannot grow scheduler stack (m->g0).
- get_tls(CX)
- MOVL g(CX), BX
- MOVL g_m(BX), BX
- MOVL m_g0(BX), SI
- CMPL g(CX), SI
- JNE 2(PC)
- INT $3
-
- // Cannot grow signal stack.
- MOVL m_gsignal(BX), SI
- CMPL g(CX), SI
- JNE 2(PC)
- INT $3
-
- // frame size in DI
- // arg size in AX
- // Save in m.
- MOVL DI, m_moreframesize(BX)
- MOVL AX, m_moreargsize(BX)
-
- // Called from f.
- // Set m->morebuf to f's caller.
- MOVL 4(SP), DI // f's caller's PC
- MOVL DI, (m_morebuf+gobuf_pc)(BX)
- LEAL 8(SP), CX // f's caller's SP
- MOVL CX, (m_morebuf+gobuf_sp)(BX)
- MOVL CX, m_moreargp(BX)
- get_tls(CX)
- MOVL g(CX), SI
- MOVL SI, (m_morebuf+gobuf_g)(BX)
-
- // Set g->sched to context in f.
- MOVL 0(SP), AX // f's PC
- MOVL AX, (g_sched+gobuf_pc)(SI)
- MOVL SI, (g_sched+gobuf_g)(SI)
- LEAL 4(SP), AX // f's SP
- MOVL AX, (g_sched+gobuf_sp)(SI)
- MOVL DX, (g_sched+gobuf_ctxt)(SI)
-
- // Call newstack on m->g0's stack.
- MOVL m_g0(BX), BP
- MOVL BP, g(CX)
- MOVL (g_sched+gobuf_sp)(BP), AX
- MOVL -4(AX), BX // fault if CALL would, before smashing SP
- MOVL AX, SP
- CALL runtime·newstack(SB)
- MOVL $0, 0x1003 // crash if newstack returns
- RET
-
-TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
- MOVL $0, DX
- JMP runtime·morestack(SB)
-
-// reflect·call: call a function with the given argument list
-// func call(f *FuncVal, arg *byte, argsize, retoffset uint32).
-// we don't have variable-sized frames, so we use a small number
-// of constant-sized-frame functions to encode a few bits of size in the pc.
-// Caution: ugly multiline assembly macros in your future!
-
-#define DISPATCH(NAME,MAXSIZE) \
- CMPL CX, $MAXSIZE; \
- JA 3(PC); \
- MOVL $NAME(SB), AX; \
- JMP AX
-// Note: can't just "JMP NAME(SB)" - bad inlining results.
-
-TEXT reflect·call(SB), NOSPLIT, $0-16
- MOVL argsize+8(FP), CX
- DISPATCH(runtime·call16, 16)
- DISPATCH(runtime·call32, 32)
- DISPATCH(runtime·call64, 64)
- DISPATCH(runtime·call128, 128)
- DISPATCH(runtime·call256, 256)
- DISPATCH(runtime·call512, 512)
- DISPATCH(runtime·call1024, 1024)
- DISPATCH(runtime·call2048, 2048)
- DISPATCH(runtime·call4096, 4096)
- DISPATCH(runtime·call8192, 8192)
- DISPATCH(runtime·call16384, 16384)
- DISPATCH(runtime·call32768, 32768)
- DISPATCH(runtime·call65536, 65536)
- DISPATCH(runtime·call131072, 131072)
- DISPATCH(runtime·call262144, 262144)
- DISPATCH(runtime·call524288, 524288)
- DISPATCH(runtime·call1048576, 1048576)
- DISPATCH(runtime·call2097152, 2097152)
- DISPATCH(runtime·call4194304, 4194304)
- DISPATCH(runtime·call8388608, 8388608)
- DISPATCH(runtime·call16777216, 16777216)
- DISPATCH(runtime·call33554432, 33554432)
- DISPATCH(runtime·call67108864, 67108864)
- DISPATCH(runtime·call134217728, 134217728)
- DISPATCH(runtime·call268435456, 268435456)
- DISPATCH(runtime·call536870912, 536870912)
- DISPATCH(runtime·call1073741824, 1073741824)
- MOVL $runtime·badreflectcall(SB), AX
- JMP AX
-
-// Argument map for the callXX frames. Each has one stack map.
-DATA gcargs_reflectcall<>+0x00(SB)/4, $1 // 1 stackmap
-DATA gcargs_reflectcall<>+0x04(SB)/4, $8 // 4 words
-DATA gcargs_reflectcall<>+0x08(SB)/1, $(const_BitsPointer+(const_BitsPointer<<2)+(const_BitsScalar<<4)+(const_BitsScalar<<6))
-GLOBL gcargs_reflectcall<>(SB),RODATA,$12
-
-// callXX frames have no locals
-DATA gclocals_reflectcall<>+0x00(SB)/4, $1 // 1 stackmap
-DATA gclocals_reflectcall<>+0x04(SB)/4, $0 // 0 locals
-GLOBL gclocals_reflectcall<>(SB),RODATA,$8
-
-#define CALLFN(NAME,MAXSIZE) \
-TEXT NAME(SB), WRAPPER, $MAXSIZE-16; \
- FUNCDATA $FUNCDATA_ArgsPointerMaps,gcargs_reflectcall<>(SB); \
- FUNCDATA $FUNCDATA_LocalsPointerMaps,gclocals_reflectcall<>(SB);\
- /* copy arguments to stack */ \
- MOVL argptr+4(FP), SI; \
- MOVL argsize+8(FP), CX; \
- MOVL SP, DI; \
- REP;MOVSB; \
- /* call function */ \
- MOVL f+0(FP), DX; \
- MOVL (DX), AX; \
- PCDATA $PCDATA_StackMapIndex, $0; \
- CALL AX; \
- /* copy return values back */ \
- MOVL argptr+4(FP), DI; \
- MOVL argsize+8(FP), CX; \
- MOVL retoffset+12(FP), BX; \
- MOVL SP, SI; \
- ADDL BX, DI; \
- ADDL BX, SI; \
- SUBL BX, CX; \
- REP;MOVSB; \
- RET
-
-CALLFN(runtime·call16, 16)
-CALLFN(runtime·call32, 32)
-CALLFN(runtime·call64, 64)
-CALLFN(runtime·call128, 128)
-CALLFN(runtime·call256, 256)
-CALLFN(runtime·call512, 512)
-CALLFN(runtime·call1024, 1024)
-CALLFN(runtime·call2048, 2048)
-CALLFN(runtime·call4096, 4096)
-CALLFN(runtime·call8192, 8192)
-CALLFN(runtime·call16384, 16384)
-CALLFN(runtime·call32768, 32768)
-CALLFN(runtime·call65536, 65536)
-CALLFN(runtime·call131072, 131072)
-CALLFN(runtime·call262144, 262144)
-CALLFN(runtime·call524288, 524288)
-CALLFN(runtime·call1048576, 1048576)
-CALLFN(runtime·call2097152, 2097152)
-CALLFN(runtime·call4194304, 4194304)
-CALLFN(runtime·call8388608, 8388608)
-CALLFN(runtime·call16777216, 16777216)
-CALLFN(runtime·call33554432, 33554432)
-CALLFN(runtime·call67108864, 67108864)
-CALLFN(runtime·call134217728, 134217728)
-CALLFN(runtime·call268435456, 268435456)
-CALLFN(runtime·call536870912, 536870912)
-CALLFN(runtime·call1073741824, 1073741824)
-
-// Return point when leaving stack.
-//
-// Lessstack can appear in stack traces for the same reason
-// as morestack; in that context, it has 0 arguments.
-TEXT runtime·lessstack(SB), NOSPLIT, $0-0
- // Save return value in m->cret
- get_tls(CX)
- MOVL g(CX), BX
- MOVL g_m(BX), BX
- MOVL AX, m_cret(BX)
-
- // Call oldstack on m->g0's stack.
- MOVL m_g0(BX), BP
- MOVL BP, g(CX)
- MOVL (g_sched+gobuf_sp)(BP), SP
- CALL runtime·oldstack(SB)
- MOVL $0, 0x1004 // crash if oldstack returns
- RET
-
-// bool cas(int32 *val, int32 old, int32 new)
-// Atomically:
-// if(*val == old){
-// *val = new;
-// return 1;
-// }else
-// return 0;
-TEXT runtime·cas(SB), NOSPLIT, $0-13
- MOVL ptr+0(FP), BX
- MOVL old+4(FP), AX
- MOVL new+8(FP), CX
- LOCK
- CMPXCHGL CX, 0(BX)
- JZ 4(PC)
- MOVL $0, AX
- MOVB AX, ret+12(FP)
- RET
- MOVL $1, AX
- MOVB AX, ret+12(FP)
- RET
-
-TEXT runtime·casuintptr(SB), NOSPLIT, $0-13
- JMP runtime·cas(SB)
-
-TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $0-8
- JMP runtime·atomicload(SB)
-
-TEXT runtime·atomicloaduint(SB), NOSPLIT, $0-8
- JMP runtime·atomicload(SB)
-
-// bool runtime·cas64(uint64 *val, uint64 old, uint64 new)
-// Atomically:
-// if(*val == *old){
-// *val = new;
-// return 1;
-// } else {
-// return 0;
-// }
-TEXT runtime·cas64(SB), NOSPLIT, $0-21
- MOVL ptr+0(FP), BP
- MOVL old_lo+4(FP), AX
- MOVL old_hi+8(FP), DX
- MOVL new_lo+12(FP), BX
- MOVL new_hi+16(FP), CX
- LOCK
- CMPXCHG8B 0(BP)
- JNZ cas64_fail
- MOVL $1, AX
- MOVB AX, ret+20(FP)
- RET
-cas64_fail:
- MOVL $0, AX
- MOVB AX, ret+20(FP)
- RET
-
-// bool casp(void **p, void *old, void *new)
-// Atomically:
-// if(*p == old){
-// *p = new;
-// return 1;
-// }else
-// return 0;
-TEXT runtime·casp(SB), NOSPLIT, $0-13
- MOVL ptr+0(FP), BX
- MOVL old+4(FP), AX
- MOVL new+8(FP), CX
- LOCK
- CMPXCHGL CX, 0(BX)
- JZ 4(PC)
- MOVL $0, AX
- MOVB AX, ret+12(FP)
- RET
- MOVL $1, AX
- MOVB AX, ret+12(FP)
- RET
-
-// uint32 xadd(uint32 volatile *val, int32 delta)
-// Atomically:
-// *val += delta;
-// return *val;
-TEXT runtime·xadd(SB), NOSPLIT, $0-12
- MOVL ptr+0(FP), BX
- MOVL delta+4(FP), AX
- MOVL AX, CX
- LOCK
- XADDL AX, 0(BX)
- ADDL CX, AX
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·xchg(SB), NOSPLIT, $0-12
- MOVL ptr+0(FP), BX
- MOVL new+4(FP), AX
- XCHGL AX, 0(BX)
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·xchgp(SB), NOSPLIT, $0-12
- MOVL ptr+0(FP), BX
- MOVL new+4(FP), AX
- XCHGL AX, 0(BX)
- MOVL AX, ret+8(FP)
- RET
-
-TEXT runtime·xchguintptr(SB), NOSPLIT, $0-12
- JMP runtime·xchg(SB)
-
-TEXT runtime·procyield(SB),NOSPLIT,$0-0
- MOVL cycles+0(FP), AX
-again:
- PAUSE
- SUBL $1, AX
- JNZ again
- RET
-
-TEXT runtime·atomicstorep(SB), NOSPLIT, $0-8
- MOVL ptr+0(FP), BX
- MOVL val+4(FP), AX
- XCHGL AX, 0(BX)
- RET
-
-TEXT runtime·atomicstore(SB), NOSPLIT, $0-8
- MOVL ptr+0(FP), BX
- MOVL val+4(FP), AX
- XCHGL AX, 0(BX)
- RET
-
-// uint64 atomicload64(uint64 volatile* addr);
-TEXT runtime·atomicload64(SB), NOSPLIT, $0-12
- MOVL ptr+0(FP), AX
- LEAL ret_lo+4(FP), BX
- // MOVQ (%EAX), %MM0
- BYTE $0x0f; BYTE $0x6f; BYTE $0x00
- // MOVQ %MM0, 0(%EBX)
- BYTE $0x0f; BYTE $0x7f; BYTE $0x03
- // EMMS
- BYTE $0x0F; BYTE $0x77
- RET
-
-// void runtime·atomicstore64(uint64 volatile* addr, uint64 v);
-TEXT runtime·atomicstore64(SB), NOSPLIT, $0-12
- MOVL ptr+0(FP), AX
- // MOVQ and EMMS were introduced on the Pentium MMX.
- // MOVQ 0x8(%ESP), %MM0
- BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08
- // MOVQ %MM0, (%EAX)
- BYTE $0x0f; BYTE $0x7f; BYTE $0x00
- // EMMS
- BYTE $0x0F; BYTE $0x77
- // This is essentially a no-op, but it provides required memory fencing.
- // It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2).
- MOVL $0, AX
- LOCK
- XADDL AX, (SP)
- RET
-
-// void runtime·atomicor8(byte volatile*, byte);
-TEXT runtime·atomicor8(SB), NOSPLIT, $0-5
- MOVL ptr+0(FP), AX
- MOVB val+4(FP), BX
- LOCK
- ORB BX, (AX)
- RET
-
-// void jmpdefer(fn, sp);
-// called from deferreturn.
-// 1. pop the caller
-// 2. sub 5 bytes from the callers return
-// 3. jmp to the argument
-TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
- MOVL fv+0(FP), DX // fn
- MOVL argp+4(FP), BX // caller sp
- LEAL -4(BX), SP // caller sp after CALL
- SUBL $5, (SP) // return to CALL again
- MOVL 0(DX), BX
- JMP BX // but first run the deferred function
-
-// Save state of caller into g->sched.
-TEXT gosave<>(SB),NOSPLIT,$0
- PUSHL AX
- PUSHL BX
- get_tls(BX)
- MOVL g(BX), BX
- LEAL arg+0(FP), AX
- MOVL AX, (g_sched+gobuf_sp)(BX)
- MOVL -4(AX), AX
- MOVL AX, (g_sched+gobuf_pc)(BX)
- MOVL $0, (g_sched+gobuf_ret)(BX)
- MOVL $0, (g_sched+gobuf_ctxt)(BX)
- POPL BX
- POPL AX
- RET
-
-// asmcgocall(void(*fn)(void*), void *arg)
-// Call fn(arg) on the scheduler stack,
-// aligned appropriately for the gcc ABI.
-// See cgocall.c for more details.
-TEXT runtime·asmcgocall(SB),NOSPLIT,$0-8
- MOVL fn+0(FP), AX
- MOVL arg+4(FP), BX
- CALL asmcgocall<>(SB)
- RET
-
-TEXT runtime·asmcgocall_errno(SB),NOSPLIT,$0-12
- MOVL fn+0(FP), AX
- MOVL arg+4(FP), BX
- CALL asmcgocall<>(SB)
- MOVL AX, ret+8(FP)
- RET
-
-TEXT asmcgocall<>(SB),NOSPLIT,$0-12
- // fn in AX, arg in BX
- MOVL SP, DX
-
- // Figure out if we need to switch to m->g0 stack.
- // We get called to create new OS threads too, and those
- // come in on the m->g0 stack already.
- get_tls(CX)
- MOVL g(CX), BP
- MOVL g_m(BP), BP
- MOVL m_g0(BP), SI
- MOVL g(CX), DI
- CMPL SI, DI
- JEQ 4(PC)
- CALL gosave<>(SB)
- MOVL SI, g(CX)
- MOVL (g_sched+gobuf_sp)(SI), SP
-
- // Now on a scheduling stack (a pthread-created stack).
- SUBL $32, SP
- ANDL $~15, SP // alignment, perhaps unnecessary
- MOVL DI, 8(SP) // save g
- MOVL DX, 4(SP) // save SP
- MOVL BX, 0(SP) // first argument in x86-32 ABI
- CALL AX
-
- // Restore registers, g, stack pointer.
- get_tls(CX)
- MOVL 8(SP), DI
- MOVL DI, g(CX)
- MOVL 4(SP), SP
- RET
-
-// cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
-// Turn the fn into a Go func (by taking its address) and call
-// cgocallback_gofunc.
-TEXT runtime·cgocallback(SB),NOSPLIT,$12-12
- LEAL fn+0(FP), AX
- MOVL AX, 0(SP)
- MOVL frame+4(FP), AX
- MOVL AX, 4(SP)
- MOVL framesize+8(FP), AX
- MOVL AX, 8(SP)
- MOVL $runtime·cgocallback_gofunc(SB), AX
- CALL AX
- RET
-
-// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
-// See cgocall.c for more details.
-TEXT runtime·cgocallback_gofunc(SB),NOSPLIT,$12-12
- // If g is nil, Go did not create the current thread.
- // Call needm to obtain one for temporary use.
- // In this case, we're running on the thread stack, so there's
- // lots of space, but the linker doesn't know. Hide the call from
- // the linker analysis by using an indirect call through AX.
- get_tls(CX)
-#ifdef GOOS_windows
- MOVL $0, BP
- CMPL CX, $0
- JEQ 2(PC) // TODO
-#endif
- MOVL g(CX), BP
- CMPL BP, $0
- JEQ needm
- MOVL g_m(BP), BP
- MOVL BP, DX // saved copy of oldm
- JMP havem
-needm:
- MOVL $0, 0(SP)
- MOVL $runtime·needm(SB), AX
- CALL AX
- MOVL 0(SP), DX
- get_tls(CX)
- MOVL g(CX), BP
- MOVL g_m(BP), BP
-
-havem:
- // Now there's a valid m, and we're running on its m->g0.
- // Save current m->g0->sched.sp on stack and then set it to SP.
- // Save current sp in m->g0->sched.sp in preparation for
- // switch back to m->curg stack.
- // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
- MOVL m_g0(BP), SI
- MOVL (g_sched+gobuf_sp)(SI), AX
- MOVL AX, 0(SP)
- MOVL SP, (g_sched+gobuf_sp)(SI)
-
- // Switch to m->curg stack and call runtime.cgocallbackg.
- // Because we are taking over the execution of m->curg
- // but *not* resuming what had been running, we need to
- // save that information (m->curg->sched) so we can restore it.
- // We can restore m->curg->sched.sp easily, because calling
- // runtime.cgocallbackg leaves SP unchanged upon return.
- // To save m->curg->sched.pc, we push it onto the stack.
- // This has the added benefit that it looks to the traceback
- // routine like cgocallbackg is going to return to that
- // PC (because the frame we allocate below has the same
- // size as cgocallback_gofunc's frame declared above)
- // so that the traceback will seamlessly trace back into
- // the earlier calls.
- //
- // In the new goroutine, 0(SP) holds the saved oldm (DX) register.
- // 4(SP) and 8(SP) are unused.
- MOVL m_curg(BP), SI
- MOVL SI, g(CX)
- MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
- MOVL (g_sched+gobuf_pc)(SI), BP
- MOVL BP, -4(DI)
- LEAL -(4+12)(DI), SP
- MOVL DX, 0(SP)
- CALL runtime·cgocallbackg(SB)
- MOVL 0(SP), DX
-
- // Restore g->sched (== m->curg->sched) from saved values.
- get_tls(CX)
- MOVL g(CX), SI
- MOVL 12(SP), BP
- MOVL BP, (g_sched+gobuf_pc)(SI)
- LEAL (12+4)(SP), DI
- MOVL DI, (g_sched+gobuf_sp)(SI)
-
- // Switch back to m->g0's stack and restore m->g0->sched.sp.
- // (Unlike m->curg, the g0 goroutine never uses sched.pc,
- // so we do not have to restore it.)
- MOVL g(CX), BP
- MOVL g_m(BP), BP
- MOVL m_g0(BP), SI
- MOVL SI, g(CX)
- MOVL (g_sched+gobuf_sp)(SI), SP
- MOVL 0(SP), AX
- MOVL AX, (g_sched+gobuf_sp)(SI)
-
- // If the m on entry was nil, we called needm above to borrow an m
- // for the duration of the call. Since the call is over, return it with dropm.
- CMPL DX, $0
- JNE 3(PC)
- MOVL $runtime·dropm(SB), AX
- CALL AX
-
- // Done!
- RET
-
-// void setg(G*); set g. for use by needm.
-TEXT runtime·setg(SB), NOSPLIT, $0-4
- MOVL gg+0(FP), BX
-#ifdef GOOS_windows
- CMPL BX, $0
- JNE settls
- MOVL $0, 0x14(FS)
- RET
-settls:
- MOVL g_m(BX), AX
- LEAL m_tls(AX), AX
- MOVL AX, 0x14(FS)
-#endif
- get_tls(CX)
- MOVL BX, g(CX)
- RET
-
-// void setg_gcc(G*); set g. for use by gcc
-TEXT setg_gcc<>(SB), NOSPLIT, $0
- get_tls(AX)
- MOVL gg+0(FP), DX
- MOVL DX, g(AX)
- RET
-
-// check that SP is in range [g->stackbase, g->stackguard)
-TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
- get_tls(CX)
- MOVL g(CX), AX
- CMPL g_stackbase(AX), SP
- JHI 2(PC)
- INT $3
- CMPL SP, g_stackguard(AX)
- JHI 2(PC)
- INT $3
- RET
-
-TEXT runtime·getcallerpc(SB),NOSPLIT,$0-8
- MOVL argp+0(FP),AX // addr of first arg
- MOVL -4(AX),AX // get calling pc
- MOVL AX, ret+4(FP)
- RET
-
-TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-8
- MOVL p+0(FP),AX // addr of first arg
- MOVL -4(AX),AX // get calling pc
- MOVL AX, ret+4(FP)
- RET
-
-TEXT runtime·setcallerpc(SB),NOSPLIT,$0-8
- MOVL argp+0(FP),AX // addr of first arg
- MOVL pc+4(FP), BX
- MOVL BX, -4(AX) // set calling pc
- RET
-
-TEXT runtime·getcallersp(SB), NOSPLIT, $0-8
- MOVL argp+0(FP), AX
- MOVL AX, ret+4(FP)
- RET
-
-// func gogetcallersp(p unsafe.Pointer) uintptr
-TEXT runtime·gogetcallersp(SB),NOSPLIT,$0-8
- MOVL p+0(FP),AX // addr of first arg
- MOVL AX, ret+4(FP)
- RET
-
-// int64 runtime·cputicks(void), so really
-// void runtime·cputicks(int64 *ticks)
-TEXT runtime·cputicks(SB),NOSPLIT,$0-8
- RDTSC
- MOVL AX, ret_lo+0(FP)
- MOVL DX, ret_hi+4(FP)
- RET
-
-TEXT runtime·gocputicks(SB),NOSPLIT,$0-8
- RDTSC
- MOVL AX, ret_lo+0(FP)
- MOVL DX, ret_hi+4(FP)
- RET
-
-TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0
- // set up ldt 7 to point at tls0
- // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
- // the entry number is just a hint. setldt will set up GS with what it used.
- MOVL $7, 0(SP)
- LEAL runtime·tls0(SB), AX
- MOVL AX, 4(SP)
- MOVL $32, 8(SP) // sizeof(tls array)
- CALL runtime·setldt(SB)
- RET
-
-TEXT runtime·emptyfunc(SB),0,$0-0
- RET
-
-TEXT runtime·abort(SB),NOSPLIT,$0-0
- INT $0x3
-
-TEXT runtime·stackguard(SB),NOSPLIT,$0-8
- MOVL SP, DX
- MOVL DX, sp+0(FP)
- get_tls(CX)
- MOVL g(CX), BX
- MOVL g_stackguard(BX), DX
- MOVL DX, limit+4(FP)
- RET
-
-GLOBL runtime·tls0(SB), $32
-
-// hash function using AES hardware instructions
-TEXT runtime·aeshash(SB),NOSPLIT,$0-16
- MOVL p+0(FP), AX // ptr to data
- MOVL s+4(FP), CX // size
- JMP runtime·aeshashbody(SB)
-
-TEXT runtime·aeshashstr(SB),NOSPLIT,$0-16
- MOVL p+0(FP), AX // ptr to string object
- // s+4(FP) is ignored, it is always sizeof(String)
- MOVL 4(AX), CX // length of string
- MOVL (AX), AX // string data
- JMP runtime·aeshashbody(SB)
-
-// AX: data
-// CX: length
-TEXT runtime·aeshashbody(SB),NOSPLIT,$0-16
- MOVL h+8(FP), X0 // seed to low 32 bits of xmm0
- PINSRD $1, CX, X0 // size to next 32 bits of xmm0
- MOVO runtime·aeskeysched+0(SB), X2
- MOVO runtime·aeskeysched+16(SB), X3
- CMPL CX, $16
- JB aessmall
-aesloop:
- CMPL CX, $16
- JBE aesloopend
- MOVOU (AX), X1
- AESENC X2, X0
- AESENC X1, X0
- SUBL $16, CX
- ADDL $16, AX
- JMP aesloop
-// 1-16 bytes remaining
-aesloopend:
- // This load may overlap with the previous load above.
- // We'll hash some bytes twice, but that's ok.
- MOVOU -16(AX)(CX*1), X1
- JMP partial
-// 0-15 bytes
-aessmall:
- TESTL CX, CX
- JE finalize // 0 bytes
-
- CMPB AX, $0xf0
- JA highpartial
-
- // 16 bytes loaded at this address won't cross
- // a page boundary, so we can load it directly.
- MOVOU (AX), X1
- ADDL CX, CX
- PAND masks<>(SB)(CX*8), X1
- JMP partial
-highpartial:
- // address ends in 1111xxxx. Might be up against
- // a page boundary, so load ending at last byte.
- // Then shift bytes down using pshufb.
- MOVOU -16(AX)(CX*1), X1
- ADDL CX, CX
- PSHUFB shifts<>(SB)(CX*8), X1
-partial:
- // incorporate partial block into hash
- AESENC X3, X0
- AESENC X1, X0
-finalize:
- // finalize hash
- AESENC X2, X0
- AESENC X3, X0
- AESENC X2, X0
- MOVL X0, ret+12(FP)
- RET
-
-TEXT runtime·aeshash32(SB),NOSPLIT,$0-16
- MOVL p+0(FP), AX // ptr to data
- // s+4(FP) is ignored, it is always sizeof(int32)
- MOVL h+8(FP), X0 // seed
- PINSRD $1, (AX), X0 // data
- AESENC runtime·aeskeysched+0(SB), X0
- AESENC runtime·aeskeysched+16(SB), X0
- AESENC runtime·aeskeysched+0(SB), X0
- MOVL X0, ret+12(FP)
- RET
-
-TEXT runtime·aeshash64(SB),NOSPLIT,$0-16
- MOVL p+0(FP), AX // ptr to data
- // s+4(FP) is ignored, it is always sizeof(int64)
- MOVQ (AX), X0 // data
- PINSRD $2, h+8(FP), X0 // seed
- AESENC runtime·aeskeysched+0(SB), X0
- AESENC runtime·aeskeysched+16(SB), X0
- AESENC runtime·aeskeysched+0(SB), X0
- MOVL X0, ret+12(FP)
- RET
-
-// simple mask to get rid of data in the high part of the register.
-DATA masks<>+0x00(SB)/4, $0x00000000
-DATA masks<>+0x04(SB)/4, $0x00000000
-DATA masks<>+0x08(SB)/4, $0x00000000
-DATA masks<>+0x0c(SB)/4, $0x00000000
-
-DATA masks<>+0x10(SB)/4, $0x000000ff
-DATA masks<>+0x14(SB)/4, $0x00000000
-DATA masks<>+0x18(SB)/4, $0x00000000
-DATA masks<>+0x1c(SB)/4, $0x00000000
-
-DATA masks<>+0x20(SB)/4, $0x0000ffff
-DATA masks<>+0x24(SB)/4, $0x00000000
-DATA masks<>+0x28(SB)/4, $0x00000000
-DATA masks<>+0x2c(SB)/4, $0x00000000
-
-DATA masks<>+0x30(SB)/4, $0x00ffffff
-DATA masks<>+0x34(SB)/4, $0x00000000
-DATA masks<>+0x38(SB)/4, $0x00000000
-DATA masks<>+0x3c(SB)/4, $0x00000000
-
-DATA masks<>+0x40(SB)/4, $0xffffffff
-DATA masks<>+0x44(SB)/4, $0x00000000
-DATA masks<>+0x48(SB)/4, $0x00000000
-DATA masks<>+0x4c(SB)/4, $0x00000000
-
-DATA masks<>+0x50(SB)/4, $0xffffffff
-DATA masks<>+0x54(SB)/4, $0x000000ff
-DATA masks<>+0x58(SB)/4, $0x00000000
-DATA masks<>+0x5c(SB)/4, $0x00000000
-
-DATA masks<>+0x60(SB)/4, $0xffffffff
-DATA masks<>+0x64(SB)/4, $0x0000ffff
-DATA masks<>+0x68(SB)/4, $0x00000000
-DATA masks<>+0x6c(SB)/4, $0x00000000
-
-DATA masks<>+0x70(SB)/4, $0xffffffff
-DATA masks<>+0x74(SB)/4, $0x00ffffff
-DATA masks<>+0x78(SB)/4, $0x00000000
-DATA masks<>+0x7c(SB)/4, $0x00000000
-
-DATA masks<>+0x80(SB)/4, $0xffffffff
-DATA masks<>+0x84(SB)/4, $0xffffffff
-DATA masks<>+0x88(SB)/4, $0x00000000
-DATA masks<>+0x8c(SB)/4, $0x00000000
-
-DATA masks<>+0x90(SB)/4, $0xffffffff
-DATA masks<>+0x94(SB)/4, $0xffffffff
-DATA masks<>+0x98(SB)/4, $0x000000ff
-DATA masks<>+0x9c(SB)/4, $0x00000000
-
-DATA masks<>+0xa0(SB)/4, $0xffffffff
-DATA masks<>+0xa4(SB)/4, $0xffffffff
-DATA masks<>+0xa8(SB)/4, $0x0000ffff
-DATA masks<>+0xac(SB)/4, $0x00000000
-
-DATA masks<>+0xb0(SB)/4, $0xffffffff
-DATA masks<>+0xb4(SB)/4, $0xffffffff
-DATA masks<>+0xb8(SB)/4, $0x00ffffff
-DATA masks<>+0xbc(SB)/4, $0x00000000
-
-DATA masks<>+0xc0(SB)/4, $0xffffffff
-DATA masks<>+0xc4(SB)/4, $0xffffffff
-DATA masks<>+0xc8(SB)/4, $0xffffffff
-DATA masks<>+0xcc(SB)/4, $0x00000000
-
-DATA masks<>+0xd0(SB)/4, $0xffffffff
-DATA masks<>+0xd4(SB)/4, $0xffffffff
-DATA masks<>+0xd8(SB)/4, $0xffffffff
-DATA masks<>+0xdc(SB)/4, $0x000000ff
-
-DATA masks<>+0xe0(SB)/4, $0xffffffff
-DATA masks<>+0xe4(SB)/4, $0xffffffff
-DATA masks<>+0xe8(SB)/4, $0xffffffff
-DATA masks<>+0xec(SB)/4, $0x0000ffff
-
-DATA masks<>+0xf0(SB)/4, $0xffffffff
-DATA masks<>+0xf4(SB)/4, $0xffffffff
-DATA masks<>+0xf8(SB)/4, $0xffffffff
-DATA masks<>+0xfc(SB)/4, $0x00ffffff
-
-GLOBL masks<>(SB),RODATA,$256
-
-// these are arguments to pshufb. They move data down from
-// the high bytes of the register to the low bytes of the register.
-// index is how many bytes to move.
-DATA shifts<>+0x00(SB)/4, $0x00000000
-DATA shifts<>+0x04(SB)/4, $0x00000000
-DATA shifts<>+0x08(SB)/4, $0x00000000
-DATA shifts<>+0x0c(SB)/4, $0x00000000
-
-DATA shifts<>+0x10(SB)/4, $0xffffff0f
-DATA shifts<>+0x14(SB)/4, $0xffffffff
-DATA shifts<>+0x18(SB)/4, $0xffffffff
-DATA shifts<>+0x1c(SB)/4, $0xffffffff
-
-DATA shifts<>+0x20(SB)/4, $0xffff0f0e
-DATA shifts<>+0x24(SB)/4, $0xffffffff
-DATA shifts<>+0x28(SB)/4, $0xffffffff
-DATA shifts<>+0x2c(SB)/4, $0xffffffff
-
-DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
-DATA shifts<>+0x34(SB)/4, $0xffffffff
-DATA shifts<>+0x38(SB)/4, $0xffffffff
-DATA shifts<>+0x3c(SB)/4, $0xffffffff
-
-DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
-DATA shifts<>+0x44(SB)/4, $0xffffffff
-DATA shifts<>+0x48(SB)/4, $0xffffffff
-DATA shifts<>+0x4c(SB)/4, $0xffffffff
-
-DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
-DATA shifts<>+0x54(SB)/4, $0xffffff0f
-DATA shifts<>+0x58(SB)/4, $0xffffffff
-DATA shifts<>+0x5c(SB)/4, $0xffffffff
-
-DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
-DATA shifts<>+0x64(SB)/4, $0xffff0f0e
-DATA shifts<>+0x68(SB)/4, $0xffffffff
-DATA shifts<>+0x6c(SB)/4, $0xffffffff
-
-DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
-DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
-DATA shifts<>+0x78(SB)/4, $0xffffffff
-DATA shifts<>+0x7c(SB)/4, $0xffffffff
-
-DATA shifts<>+0x80(SB)/4, $0x0b0a0908
-DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
-DATA shifts<>+0x88(SB)/4, $0xffffffff
-DATA shifts<>+0x8c(SB)/4, $0xffffffff
-
-DATA shifts<>+0x90(SB)/4, $0x0a090807
-DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
-DATA shifts<>+0x98(SB)/4, $0xffffff0f
-DATA shifts<>+0x9c(SB)/4, $0xffffffff
-
-DATA shifts<>+0xa0(SB)/4, $0x09080706
-DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
-DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
-DATA shifts<>+0xac(SB)/4, $0xffffffff
-
-DATA shifts<>+0xb0(SB)/4, $0x08070605
-DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
-DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
-DATA shifts<>+0xbc(SB)/4, $0xffffffff
-
-DATA shifts<>+0xc0(SB)/4, $0x07060504
-DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
-DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
-DATA shifts<>+0xcc(SB)/4, $0xffffffff
-
-DATA shifts<>+0xd0(SB)/4, $0x06050403
-DATA shifts<>+0xd4(SB)/4, $0x0a090807
-DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
-DATA shifts<>+0xdc(SB)/4, $0xffffff0f
-
-DATA shifts<>+0xe0(SB)/4, $0x05040302
-DATA shifts<>+0xe4(SB)/4, $0x09080706
-DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
-DATA shifts<>+0xec(SB)/4, $0xffff0f0e
-
-DATA shifts<>+0xf0(SB)/4, $0x04030201
-DATA shifts<>+0xf4(SB)/4, $0x08070605
-DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
-DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
-
-GLOBL shifts<>(SB),RODATA,$256
-
-TEXT runtime·memeq(SB),NOSPLIT,$0-13
- MOVL a+0(FP), SI
- MOVL b+4(FP), DI
- MOVL size+8(FP), BX
- CALL runtime·memeqbody(SB)
- MOVB AX, ret+12(FP)
- RET
-
-// eqstring tests whether two strings are equal.
-// See runtime_test.go:eqstring_generic for
-// equivalent Go code.
-TEXT runtime·eqstring(SB),NOSPLIT,$0-17
- MOVL s1len+4(FP), AX
- MOVL s2len+12(FP), BX
- CMPL AX, BX
- JNE different
- MOVL s1str+0(FP), SI
- MOVL s2str+8(FP), DI
- CMPL SI, DI
- JEQ same
- CALL runtime·memeqbody(SB)
- MOVB AX, v+16(FP)
- RET
-same:
- MOVB $1, v+16(FP)
- RET
-different:
- MOVB $0, v+16(FP)
- RET
-
-TEXT bytes·Equal(SB),NOSPLIT,$0-25
- MOVL a_len+4(FP), BX
- MOVL b_len+16(FP), CX
- XORL AX, AX
- CMPL BX, CX
- JNE eqret
- MOVL a+0(FP), SI
- MOVL b+12(FP), DI
- CALL runtime·memeqbody(SB)
-eqret:
- MOVB AX, ret+24(FP)
- RET
-
-// a in SI
-// b in DI
-// count in BX
-TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
- XORL AX, AX
-
- CMPL BX, $4
- JB small
-
- // 64 bytes at a time using xmm registers
-hugeloop:
- CMPL BX, $64
- JB bigloop
- TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
- JE bigloop
- MOVOU (SI), X0
- MOVOU (DI), X1
- MOVOU 16(SI), X2
- MOVOU 16(DI), X3
- MOVOU 32(SI), X4
- MOVOU 32(DI), X5
- MOVOU 48(SI), X6
- MOVOU 48(DI), X7
- PCMPEQB X1, X0
- PCMPEQB X3, X2
- PCMPEQB X5, X4
- PCMPEQB X7, X6
- PAND X2, X0
- PAND X6, X4
- PAND X4, X0
- PMOVMSKB X0, DX
- ADDL $64, SI
- ADDL $64, DI
- SUBL $64, BX
- CMPL DX, $0xffff
- JEQ hugeloop
- RET
-
- // 4 bytes at a time using 32-bit register
-bigloop:
- CMPL BX, $4
- JBE leftover
- MOVL (SI), CX
- MOVL (DI), DX
- ADDL $4, SI
- ADDL $4, DI
- SUBL $4, BX
- CMPL CX, DX
- JEQ bigloop
- RET
-
- // remaining 0-4 bytes
-leftover:
- MOVL -4(SI)(BX*1), CX
- MOVL -4(DI)(BX*1), DX
- CMPL CX, DX
- SETEQ AX
- RET
-
-small:
- CMPL BX, $0
- JEQ equal
-
- LEAL 0(BX*8), CX
- NEGL CX
-
- MOVL SI, DX
- CMPB DX, $0xfc
- JA si_high
-
- // load at SI won't cross a page boundary.
- MOVL (SI), SI
- JMP si_finish
-si_high:
- // address ends in 111111xx. Load up to bytes we want, move to correct position.
- MOVL -4(SI)(BX*1), SI
- SHRL CX, SI
-si_finish:
-
- // same for DI.
- MOVL DI, DX
- CMPB DX, $0xfc
- JA di_high
- MOVL (DI), DI
- JMP di_finish
-di_high:
- MOVL -4(DI)(BX*1), DI
- SHRL CX, DI
-di_finish:
-
- SUBL SI, DI
- SHLL CX, DI
-equal:
- SETEQ AX
- RET
-
-TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
- MOVL s1_base+0(FP), SI
- MOVL s1_len+4(FP), BX
- MOVL s2_base+8(FP), DI
- MOVL s2_len+12(FP), DX
- CALL runtime·cmpbody(SB)
- MOVL AX, ret+16(FP)
- RET
-
-TEXT runtime·cmpbytes(SB),NOSPLIT,$0-28
- MOVL s1+0(FP), SI
- MOVL s1+4(FP), BX
- MOVL s2+12(FP), DI
- MOVL s2+16(FP), DX
- CALL runtime·cmpbody(SB)
- MOVL AX, ret+24(FP)
- RET
-
-TEXT bytes·IndexByte(SB),NOSPLIT,$0
- MOVL s+0(FP), SI
- MOVL s_len+4(FP), CX
- MOVB c+12(FP), AL
- MOVL SI, DI
- CLD; REPN; SCASB
- JZ 3(PC)
- MOVL $-1, ret+16(FP)
- RET
- SUBL SI, DI
- SUBL $1, DI
- MOVL DI, ret+16(FP)
- RET
-
-TEXT strings·IndexByte(SB),NOSPLIT,$0
- MOVL s+0(FP), SI
- MOVL s_len+4(FP), CX
- MOVB c+8(FP), AL
- MOVL SI, DI
- CLD; REPN; SCASB
- JZ 3(PC)
- MOVL $-1, ret+12(FP)
- RET
- SUBL SI, DI
- SUBL $1, DI
- MOVL DI, ret+12(FP)
- RET
-
-// input:
-// SI = a
-// DI = b
-// BX = alen
-// DX = blen
-// output:
-// AX = 1/0/-1
-TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
- CMPL SI, DI
- JEQ cmp_allsame
- CMPL BX, DX
- MOVL DX, BP
- CMOVLLT BX, BP // BP = min(alen, blen)
- CMPL BP, $4
- JB cmp_small
- TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
- JE cmp_mediumloop
-cmp_largeloop:
- CMPL BP, $16
- JB cmp_mediumloop
- MOVOU (SI), X0
- MOVOU (DI), X1
- PCMPEQB X0, X1
- PMOVMSKB X1, AX
- XORL $0xffff, AX // convert EQ to NE
- JNE cmp_diff16 // branch if at least one byte is not equal
- ADDL $16, SI
- ADDL $16, DI
- SUBL $16, BP
- JMP cmp_largeloop
-
-cmp_diff16:
- BSFL AX, BX // index of first byte that differs
- XORL AX, AX
- MOVB (SI)(BX*1), CX
- CMPB CX, (DI)(BX*1)
- SETHI AX
- LEAL -1(AX*2), AX // convert 1/0 to +1/-1
- RET
-
-cmp_mediumloop:
- CMPL BP, $4
- JBE cmp_0through4
- MOVL (SI), AX
- MOVL (DI), CX
- CMPL AX, CX
- JNE cmp_diff4
- ADDL $4, SI
- ADDL $4, DI
- SUBL $4, BP
- JMP cmp_mediumloop
-
-cmp_0through4:
- MOVL -4(SI)(BP*1), AX
- MOVL -4(DI)(BP*1), CX
- CMPL AX, CX
- JEQ cmp_allsame
-
-cmp_diff4:
- BSWAPL AX // reverse order of bytes
- BSWAPL CX
- XORL AX, CX // find bit differences
- BSRL CX, CX // index of highest bit difference
- SHRL CX, AX // move a's bit to bottom
- ANDL $1, AX // mask bit
- LEAL -1(AX*2), AX // 1/0 => +1/-1
- RET
-
- // 0-3 bytes in common
-cmp_small:
- LEAL (BP*8), CX
- NEGL CX
- JEQ cmp_allsame
-
- // load si
- CMPB SI, $0xfc
- JA cmp_si_high
- MOVL (SI), SI
- JMP cmp_si_finish
-cmp_si_high:
- MOVL -4(SI)(BP*1), SI
- SHRL CX, SI
-cmp_si_finish:
- SHLL CX, SI
-
- // same for di
- CMPB DI, $0xfc
- JA cmp_di_high
- MOVL (DI), DI
- JMP cmp_di_finish
-cmp_di_high:
- MOVL -4(DI)(BP*1), DI
- SHRL CX, DI
-cmp_di_finish:
- SHLL CX, DI
-
- BSWAPL SI // reverse order of bytes
- BSWAPL DI
- XORL SI, DI // find bit differences
- JEQ cmp_allsame
- BSRL DI, CX // index of highest bit difference
- SHRL CX, SI // move a's bit to bottom
- ANDL $1, SI // mask bit
- LEAL -1(SI*2), AX // 1/0 => +1/-1
- RET
-
- // all the bytes in common are the same, so we just need
- // to compare the lengths.
-cmp_allsame:
- XORL AX, AX
- XORL CX, CX
- CMPL BX, DX
- SETGT AX // 1 if alen > blen
- SETEQ CX // 1 if alen == blen
- LEAL -1(CX)(AX*2), AX // 1,0,-1 result
- RET
-
-// A Duff's device for zeroing memory.
-// The compiler jumps to computed addresses within
-// this routine to zero chunks of memory. Do not
-// change this code without also changing the code
-// in ../../cmd/8g/ggen.c:clearfat.
-// AX: zero
-// DI: ptr to memory to be zeroed
-// DI is updated as a side effect.
-TEXT runtime·duffzero(SB), NOSPLIT, $0-0
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- STOSL
- RET
-
-// A Duff's device for copying memory.
-// The compiler jumps to computed addresses within
-// this routine to copy chunks of memory. Source
-// and destination must not overlap. Do not
-// change this code without also changing the code
-// in ../../cmd/6g/cgen.c:sgen.
-// SI: ptr to source memory
-// DI: ptr to destination memory
-// SI and DI are updated as a side effect.
-
-// NOTE: this is equivalent to a sequence of MOVSL but
-// for some reason MOVSL is really slow.
-TEXT runtime·duffcopy(SB), NOSPLIT, $0-0
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- MOVL (SI),CX
- ADDL $4,SI
- MOVL CX,(DI)
- ADDL $4,DI
-
- RET
-
-TEXT runtime·timenow(SB), NOSPLIT, $0-0
- JMP time·now(SB)
-
-TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
- get_tls(CX)
- MOVL g(CX), AX
- MOVL g_m(AX), AX
- MOVL m_fastrand(AX), DX
- ADDL DX, DX
- MOVL DX, BX
- XORL $0x88888eef, DX
- CMOVLMI BX, DX
- MOVL DX, m_fastrand(AX)
- MOVL DX, ret+0(FP)
- RET
-
-TEXT runtime·return0(SB), NOSPLIT, $0
- MOVL $0, AX
- RET