diff options
author | R?my Oudompheng <oudomphe@phare.normalesup.org> | 2014-02-07 23:58:21 +0100 |
---|---|---|
committer | R?my Oudompheng <oudomphe@phare.normalesup.org> | 2014-02-07 23:58:21 +0100 |
commit | d44065bb5fd796a1a7e77b5395f72eb002a89a18 (patch) | |
tree | 3116da7714020fd4205e739d4564c383e3e5b953 | |
parent | 220082f1da44861400751e8a20731e6b195b534c (diff) | |
download | go-d44065bb5fd796a1a7e77b5395f72eb002a89a18.tar.gz |
cmd/6g: faster memmove/memset-like code using unaligned load/stores.
This changes makes sgen and clearfat use unaligned instructions for
the trailing bytes, like the runtime memmove does, resulting in faster
code when manipulating types whose size is not a multiple of 8.
LGTM=khr
R=khr, iant, rsc
CC=golang-codereviews
https://codereview.appspot.com/51740044
-rw-r--r-- | src/cmd/6g/cgen.c | 35 | ||||
-rw-r--r-- | src/cmd/6g/ggen.c | 24 |
2 files changed, 46 insertions, 13 deletions
diff --git a/src/cmd/6g/cgen.c b/src/cmd/6g/cgen.c index da16071a8..76ece93b0 100644 --- a/src/cmd/6g/cgen.c +++ b/src/cmd/6g/cgen.c @@ -1436,14 +1436,33 @@ sgen(Node *n, Node *ns, int64 w) gins(AMOVSQ, N, N); // MOVQ *(SI)+,*(DI)+ q--; } - - if(c >= 4) { - gins(AMOVSL, N, N); // MOVL *(SI)+,*(DI)+ - c -= 4; - } - while(c > 0) { - gins(AMOVSB, N, N); // MOVB *(SI)+,*(DI)+ - c--; + // copy the remaining c bytes + if(w < 4 || c <= 1 || (odst < osrc && osrc < odst+w)) { + while(c > 0) { + gins(AMOVSB, N, N); // MOVB *(SI)+,*(DI)+ + c--; + } + } else if(w < 8 || c <= 4) { + nodsi.op = OINDREG; + noddi.op = OINDREG; + nodsi.type = types[TINT32]; + noddi.type = types[TINT32]; + if(c > 4) { + nodsi.xoffset = 0; + noddi.xoffset = 0; + gmove(&nodsi, &noddi); + } + nodsi.xoffset = c-4; + noddi.xoffset = c-4; + gmove(&nodsi, &noddi); + } else { + nodsi.op = OINDREG; + noddi.op = OINDREG; + nodsi.type = types[TINT64]; + noddi.type = types[TINT64]; + nodsi.xoffset = c-8; + noddi.xoffset = c-8; + gmove(&nodsi, &noddi); } } diff --git a/src/cmd/6g/ggen.c b/src/cmd/6g/ggen.c index 2bdb12bdd..1b8bf7e40 100644 --- a/src/cmd/6g/ggen.c +++ b/src/cmd/6g/ggen.c @@ -1016,7 +1016,8 @@ void clearfat(Node *nl) { int64 w, c, q; - Node n1, oldn1, ax, oldax; + Node n1, oldn1, ax, oldax, di, z; + Prog *p; /* clear a fat object */ if(debug['g']) @@ -1048,10 +1049,23 @@ clearfat(Node *nl) q--; } - if(c >= 4) { - gconreg(AMOVQ, c, D_CX); - gins(AREP, N, N); // repeat - gins(ASTOSB, N, N); // STOB AL,*(DI)+ + z = ax; + di = n1; + if(w >= 8 && c >= 4) { + di.op = OINDREG; + di.type = z.type = types[TINT64]; + p = gins(AMOVQ, &z, &di); + p->to.scale = 1; + p->to.offset = c-8; + } else if(c >= 4) { + di.op = OINDREG; + di.type = z.type = types[TINT32]; + p = gins(AMOVL, &z, &di); + if(c > 4) { + p = gins(AMOVL, &z, &di); + p->to.scale = 1; + p->to.offset = c-4; + } } else while(c > 0) { gins(ASTOSB, N, N); // STOB AL,*(DI)+ |