diff options
Diffstat (limited to 'src/runtime/memclr_386.s')
-rw-r--r-- | src/runtime/memclr_386.s | 46 |
1 files changed, 23 insertions, 23 deletions
diff --git a/src/runtime/memclr_386.s b/src/runtime/memclr_386.s index 1520aea2e..3f20b69c8 100644 --- a/src/runtime/memclr_386.s +++ b/src/runtime/memclr_386.s @@ -15,31 +15,31 @@ TEXT runtime·memclr(SB), NOSPLIT, $0-8 XORL AX, AX // MOVOU seems always faster than REP STOSL. -clr_tail: +tail: TESTL BX, BX - JEQ clr_0 + JEQ _0 CMPL BX, $2 - JBE clr_1or2 + JBE _1or2 CMPL BX, $4 - JBE clr_3or4 + JBE _3or4 CMPL BX, $8 - JBE clr_5through8 + JBE _5through8 CMPL BX, $16 - JBE clr_9through16 + JBE _9through16 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 JEQ nosse2 PXOR X0, X0 CMPL BX, $32 - JBE clr_17through32 + JBE _17through32 CMPL BX, $64 - JBE clr_33through64 + JBE _33through64 CMPL BX, $128 - JBE clr_65through128 + JBE _65through128 CMPL BX, $256 - JBE clr_129through256 + JBE _129through256 // TODO: use branch table and BSR to make this just a single dispatch -clr_loop: +loop: MOVOU X0, 0(DI) MOVOU X0, 16(DI) MOVOU X0, 32(DI) @@ -59,40 +59,40 @@ clr_loop: SUBL $256, BX ADDL $256, DI CMPL BX, $256 - JAE clr_loop - JMP clr_tail + JAE loop + JMP tail -clr_1or2: +_1or2: MOVB AX, (DI) MOVB AX, -1(DI)(BX*1) RET -clr_0: +_0: RET -clr_3or4: +_3or4: MOVW AX, (DI) MOVW AX, -2(DI)(BX*1) RET -clr_5through8: +_5through8: MOVL AX, (DI) MOVL AX, -4(DI)(BX*1) RET -clr_9through16: +_9through16: MOVL AX, (DI) MOVL AX, 4(DI) MOVL AX, -8(DI)(BX*1) MOVL AX, -4(DI)(BX*1) RET -clr_17through32: +_17through32: MOVOU X0, (DI) MOVOU X0, -16(DI)(BX*1) RET -clr_33through64: +_33through64: MOVOU X0, (DI) MOVOU X0, 16(DI) MOVOU X0, -32(DI)(BX*1) MOVOU X0, -16(DI)(BX*1) RET -clr_65through128: +_65through128: MOVOU X0, (DI) MOVOU X0, 16(DI) MOVOU X0, 32(DI) @@ -102,7 +102,7 @@ clr_65through128: MOVOU X0, -32(DI)(BX*1) MOVOU X0, -16(DI)(BX*1) RET -clr_129through256: +_129through256: MOVOU X0, (DI) MOVOU X0, 16(DI) MOVOU X0, 32(DI) @@ -126,5 +126,5 @@ nosse2: REP STOSL ANDL $3, BX - JNE clr_tail + JNE tail RET |