summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorR?my Oudompheng <oudomphe@phare.normalesup.org>2014-02-07 23:58:21 +0100
committerR?my Oudompheng <oudomphe@phare.normalesup.org>2014-02-07 23:58:21 +0100
commitd44065bb5fd796a1a7e77b5395f72eb002a89a18 (patch)
tree3116da7714020fd4205e739d4564c383e3e5b953
parent220082f1da44861400751e8a20731e6b195b534c (diff)
downloadgo-d44065bb5fd796a1a7e77b5395f72eb002a89a18.tar.gz
cmd/6g: faster memmove/memset-like code using unaligned load/stores.
This changes makes sgen and clearfat use unaligned instructions for the trailing bytes, like the runtime memmove does, resulting in faster code when manipulating types whose size is not a multiple of 8. LGTM=khr R=khr, iant, rsc CC=golang-codereviews https://codereview.appspot.com/51740044
-rw-r--r--src/cmd/6g/cgen.c35
-rw-r--r--src/cmd/6g/ggen.c24
2 files changed, 46 insertions, 13 deletions
diff --git a/src/cmd/6g/cgen.c b/src/cmd/6g/cgen.c
index da16071a8..76ece93b0 100644
--- a/src/cmd/6g/cgen.c
+++ b/src/cmd/6g/cgen.c
@@ -1436,14 +1436,33 @@ sgen(Node *n, Node *ns, int64 w)
gins(AMOVSQ, N, N); // MOVQ *(SI)+,*(DI)+
q--;
}
-
- if(c >= 4) {
- gins(AMOVSL, N, N); // MOVL *(SI)+,*(DI)+
- c -= 4;
- }
- while(c > 0) {
- gins(AMOVSB, N, N); // MOVB *(SI)+,*(DI)+
- c--;
+ // copy the remaining c bytes
+ if(w < 4 || c <= 1 || (odst < osrc && osrc < odst+w)) {
+ while(c > 0) {
+ gins(AMOVSB, N, N); // MOVB *(SI)+,*(DI)+
+ c--;
+ }
+ } else if(w < 8 || c <= 4) {
+ nodsi.op = OINDREG;
+ noddi.op = OINDREG;
+ nodsi.type = types[TINT32];
+ noddi.type = types[TINT32];
+ if(c > 4) {
+ nodsi.xoffset = 0;
+ noddi.xoffset = 0;
+ gmove(&nodsi, &noddi);
+ }
+ nodsi.xoffset = c-4;
+ noddi.xoffset = c-4;
+ gmove(&nodsi, &noddi);
+ } else {
+ nodsi.op = OINDREG;
+ noddi.op = OINDREG;
+ nodsi.type = types[TINT64];
+ noddi.type = types[TINT64];
+ nodsi.xoffset = c-8;
+ noddi.xoffset = c-8;
+ gmove(&nodsi, &noddi);
}
}
diff --git a/src/cmd/6g/ggen.c b/src/cmd/6g/ggen.c
index 2bdb12bdd..1b8bf7e40 100644
--- a/src/cmd/6g/ggen.c
+++ b/src/cmd/6g/ggen.c
@@ -1016,7 +1016,8 @@ void
clearfat(Node *nl)
{
int64 w, c, q;
- Node n1, oldn1, ax, oldax;
+ Node n1, oldn1, ax, oldax, di, z;
+ Prog *p;
/* clear a fat object */
if(debug['g'])
@@ -1048,10 +1049,23 @@ clearfat(Node *nl)
q--;
}
- if(c >= 4) {
- gconreg(AMOVQ, c, D_CX);
- gins(AREP, N, N); // repeat
- gins(ASTOSB, N, N); // STOB AL,*(DI)+
+ z = ax;
+ di = n1;
+ if(w >= 8 && c >= 4) {
+ di.op = OINDREG;
+ di.type = z.type = types[TINT64];
+ p = gins(AMOVQ, &z, &di);
+ p->to.scale = 1;
+ p->to.offset = c-8;
+ } else if(c >= 4) {
+ di.op = OINDREG;
+ di.type = z.type = types[TINT32];
+ p = gins(AMOVL, &z, &di);
+ if(c > 4) {
+ p = gins(AMOVL, &z, &di);
+ p->to.scale = 1;
+ p->to.offset = c-4;
+ }
} else
while(c > 0) {
gins(ASTOSB, N, N); // STOB AL,*(DI)+