From 1f658971dca02d251811968d2d8d7cf0c4ecf6a5 Mon Sep 17 00:00:00 2001 From: Elias Naur Date: Wed, 14 Aug 2013 15:38:54 +0000 Subject: runtime.cmd/ld: Add ARM external linking and implement -shared in terms of external linking This CL is an aggregate of 10271047, 10499043, 9733044. Descriptions of each follow: 10499043 runtime,cmd/ld: Merge TLS symbols and teach 5l about ARM TLS This CL prepares for external linking support to ARM. The pseudo-symbols runtime.g and runtime.m are merged into a single runtime.tlsgm symbol. When external linking, the offset of a thread local variable is stored at a memory location instead of being embedded into a offset of a ldr instruction. With a single runtime.tlsgm symbol for both g and m, only one such offset is needed. The larger part of this CL moves TLS code from gcc compiled to internally compiled. The TLS code now uses the modern MRC instruction, and 5l is taught about TLS fallbacks in case the instruction is not available or appropriate. 10271047 This CL adds support for -linkmode external to 5l. For 5l itself, use addrel to allow for D_CALL relocations to be handled by the host linker. Of the cases listed in rsc's comment in issue 4069, only case 5 and 63 needed an update. One of the TODO: addrel cases was since replaced, and the rest of the cases are either covered by indirection through addpool (cases with LTO or LFROM flags) or stubs (case 74). The addpool cases are covered because addpool emits AWORD instructions, which in turn are handled by case 11. In the runtime, change the argv argument in the rt0* functions slightly to be a pointer to the argv list, instead of relying on a particular location of argv. 9733044 The -shared flag to 6l outputs a shared library, implemented in Go and callable from non-Go programs such as C. The main part of this CL change the thread local storage model. Go uses the fastest and least general mode, local exec. TLS data in shared libraries normally requires at least the local dynamic mode, however, this CL instead opts for using the initial exec mode. Initial exec mode is faster than local dynamic mode and can be used in linux since the linker has reserved a limited amount of TLS space for performance sensitive TLS code. Initial exec mode requires an extra load from the GOT table to determine the TLS offset. This penalty will not be paid if ld is not in -shared mode, since TLS accesses will be reduced to local exec. The elf sections .init_array and .rela.init_array are added to register the Go runtime entry with cgo at library load time. The "hidden" attribute is added to Cgo functions called from Go, since Go does not generate call through the GOT table, and adding non-GOT relocations for a global function is not supported by gcc. Cgo symbols don't need to be global and avoiding the GOT table is also faster. The changes to 8l are only removes code relevant to the old -shared mode where internal linking was used. This CL only address the low level linker work. It can be submitted by itself, but to be useful, the runtime changes in CL 9738047 is also needed. Design discussion at https://groups.google.com/forum/?fromgroups#!topic/golang-nuts/zmjXkGrEx6Q Fixes issue 5590. R=rsc CC=golang-dev https://codereview.appspot.com/12871044 Committer: Russ Cox --- src/cmd/5l/5.out.h | 2 +- src/cmd/5l/asm.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++-------- src/cmd/5l/l.h | 4 +-- src/cmd/5l/noop.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++- src/cmd/5l/obj.c | 33 ++++++++++++------- src/cmd/5l/optab.c | 2 +- src/cmd/5l/pass.c | 7 ++++ src/cmd/5l/span.c | 12 +++---- 8 files changed, 201 insertions(+), 37 deletions(-) (limited to 'src/cmd/5l') diff --git a/src/cmd/5l/5.out.h b/src/cmd/5l/5.out.h index b47eee3aa..6b2f6e80c 100644 --- a/src/cmd/5l/5.out.h +++ b/src/cmd/5l/5.out.h @@ -273,7 +273,7 @@ enum as #define D_PLT1 (D_NONE+44) // R_ARM_PLT32, 2nd inst: add ip, ip, #0xNN000 #define D_PLT2 (D_NONE+45) // R_ARM_PLT32, 3rd inst: ldr pc, [ip, #0xNNN]! #define D_CALL (D_NONE+46) // R_ARM_PLT32/R_ARM_CALL/R_ARM_JUMP24, bl xxxxx or b yyyyy -#define D_TLS (D_NONE+47) +#define D_TLS (D_NONE+47) // R_ARM_TLS_LE32 /* * this is the ranlib header diff --git a/src/cmd/5l/asm.c b/src/cmd/5l/asm.c index 774332b46..b88f249e2 100644 --- a/src/cmd/5l/asm.c +++ b/src/cmd/5l/asm.c @@ -93,12 +93,6 @@ braddoff(int32 a, int32 b) return (((uint32)a) & 0xff000000U) | (0x00ffffffU & (uint32)(a + b)); } -Sym * -lookuprel(void) -{ - return lookup(".rel", 0); -} - void adddynrela(Sym *rel, Sym *s, Reloc *r) { @@ -264,6 +258,26 @@ elfreloc1(Reloc *r, vlong sectoff) else return -1; break; + + case D_CALL: + if(r->siz == 4) { + if((r->add & 0xff000000) == 0xeb000000) // BL + LPUT(R_ARM_CALL | elfsym<<8); + else + LPUT(R_ARM_JUMP24 | elfsym<<8); + } else + return -1; + break; + + case D_TLS: + if(r->siz == 4) { + if(flag_shared) + LPUT(R_ARM_TLS_IE32 | elfsym<<8); + else + LPUT(R_ARM_TLS_LE32 | elfsym<<8); + } else + return -1; + break; } return 0; @@ -308,6 +322,34 @@ machoreloc1(Reloc *r, vlong sectoff) int archreloc(Reloc *r, Sym *s, vlong *val) { + Sym *rs; + + if(linkmode == LinkExternal) { + switch(r->type) { + case D_CALL: + r->done = 0; + + // set up addend for eventual relocation via outer symbol. + rs = r->sym; + r->xadd = r->add; + if(r->xadd & 0x800000) + r->xadd |= ~0xffffff; + r->xadd *= 4; + while(rs->outer != nil) { + r->xadd += symaddr(rs) - symaddr(rs->outer); + rs = rs->outer; + } + + if(rs->type != SHOSTOBJ && rs->sect == nil) + diag("missing section for %s", rs->name); + r->xsym = rs; + + *val = braddoff((0xff000000U & (uint32)r->add), + (0xffffff & (uint32)(r->xadd / 4))); + return 0; + } + return -1; + } switch(r->type) { case D_CONST: *val = r->add; @@ -766,7 +808,7 @@ nopstat(char *f, Count *c) } void -asmout(Prog *p, Optab *o, int32 *out) +asmout(Prog *p, Optab *o, int32 *out, Sym *gmsym) { int32 o1, o2, o3, o4, o5, o6, v; int r, rf, rt, rt2; @@ -849,11 +891,19 @@ if(debug['G']) print("%ux: %s: arm %d\n", (uint32)(p->pc), p->from.sym->name, p- break; case 5: /* bra s */ + o1 = opbra(p->as, p->scond); v = -8; - // TODO: Use addrel. + if(p->to.sym != S && p->to.sym->type != 0) { + rel = addrel(cursym); + rel->off = pc - cursym->value; + rel->siz = 4; + rel->sym = p->to.sym; + rel->add = o1 | ((v >> 2) & 0xffffff); + rel->type = D_CALL; + break; + } if(p->cond != P) v = (p->cond->pc - pc) - 8; - o1 = opbra(p->as, p->scond); o1 |= (v >> 2) & 0xffffff; break; @@ -911,7 +961,13 @@ if(debug['G']) print("%ux: %s: arm %d\n", (uint32)(p->pc), p->from.sym->name, p- rel->siz = 4; rel->sym = p->to.sym; rel->add = p->to.offset; - if(flag_shared) { + if(rel->sym == gmsym) { + rel->type = D_TLS; + if(flag_shared) + rel->add += pc - p->pcrel->pc - 8 - rel->siz; + rel->xadd = rel->add; + rel->xsym = rel->sym; + } else if(flag_shared) { rel->type = D_PCREL; rel->add += pc - p->pcrel->pc - 8; } else @@ -1242,9 +1298,22 @@ if(debug['G']) print("%ux: %s: arm %d\n", (uint32)(p->pc), p->from.sym->name, p- case 63: /* bcase */ if(p->cond != P) { - o1 = p->cond->pc; - if(flag_shared) - o1 = o1 - p->pcrel->pc - 16; + rel = addrel(cursym); + rel->off = pc - cursym->value; + rel->siz = 4; + if(p->to.sym != S && p->to.sym->type != 0) { + rel->sym = p->to.sym; + rel->add = p->to.offset; + } else { + rel->sym = cursym; + rel->add = p->cond->pc - cursym->value; + } + if(o->flag & LPCREL) { + rel->type = D_PCREL; + rel->add += pc - p->pcrel->pc - 16 + rel->siz; + } else + rel->type = D_ADDR; + o1 = 0; } break; diff --git a/src/cmd/5l/l.h b/src/cmd/5l/l.h index 10d8b5bd3..ae4b05ba1 100644 --- a/src/cmd/5l/l.h +++ b/src/cmd/5l/l.h @@ -183,7 +183,6 @@ struct Sym Reloc* r; int32 nr; int32 maxr; - int rel_ro; }; #define SIGNINTERN (1729*325*1729) @@ -293,7 +292,6 @@ EXTERN int32 INITDAT; /* data location */ EXTERN int32 INITRND; /* data round above text location */ EXTERN int32 INITTEXT; /* text location */ EXTERN char* INITENTRY; /* entry point */ -EXTERN char* LIBINITENTRY; /* shared library entry point */ EXTERN int32 autosize; EXTERN Auto* curauto; EXTERN Auto* curhist; @@ -364,7 +362,7 @@ int aclass(Adr*); void addhist(int32, int); Prog* appendp(Prog*); void asmb(void); -void asmout(Prog*, Optab*, int32*); +void asmout(Prog*, Optab*, int32*, Sym*); int32 atolwhex(char*); Prog* brloop(Prog*); void buildop(void); diff --git a/src/cmd/5l/noop.c b/src/cmd/5l/noop.c index e8d09160e..44f4c22cf 100644 --- a/src/cmd/5l/noop.c +++ b/src/cmd/5l/noop.c @@ -60,13 +60,14 @@ noops(void) int o; int32 arg; Prog *pmorestack; - Sym *symmorestack; + Sym *symmorestack, *tlsfallback, *gmsym; /* * find leaf subroutines * strip NOPs * expand RET * expand BECOME pseudo + * fixup TLS */ if(debug['v']) @@ -81,6 +82,10 @@ noops(void) pmorestack = symmorestack->text; pmorestack->reg |= NOSPLIT; + tlsfallback = lookup("runtime.read_tls_fallback", 0); + gmsym = S; + if(linkmode == LinkExternal) + gmsym = lookup("runtime.tlsgm", 0); q = P; for(cursym = textp; cursym != nil; cursym = cursym->next) { for(p = cursym->text; p != P; p = p->link) { @@ -145,6 +150,82 @@ noops(void) } } break; + case AWORD: + // Rewrite TLS register fetch: MRC 15, 0, , C13, C0, 3 + if((p->to.offset & 0xffff0fff) == 0xee1d0f70) { + if(HEADTYPE == Hopenbsd) { + p->as = ARET; + } else if(goarm < 7) { + if(tlsfallback->type != STEXT) { + diag("runtime·read_tls_fallback not defined"); + errorexit(); + } + // BL runtime.read_tls_fallback(SB) + p->as = ABL; + p->to.type = D_BRANCH; + p->to.sym = tlsfallback; + p->cond = tlsfallback->text; + p->to.offset = 0; + cursym->text->mark &= ~LEAF; + } + if(linkmode == LinkExternal) { + // runtime.tlsgm is relocated with R_ARM_TLS_LE32 + // and $runtime.tlsgm will contain the TLS offset. + // + // MOV $runtime.tlsgm+tlsoffset(SB), REGTMP + // ADD REGTMP, + // + // In shared mode, runtime.tlsgm is relocated with + // R_ARM_TLS_IE32 and runtime.tlsgm(SB) will point + // to the GOT entry containing the TLS offset. + // + // MOV runtime.tlsgm(SB), REGTMP + // ADD REGTMP, + // SUB -tlsoffset, + // + // The SUB compensates for tlsoffset + // used in runtime.save_gm and runtime.load_gm. + q = p; + p = appendp(p); + p->as = AMOVW; + p->scond = 14; + p->reg = NREG; + if(flag_shared) { + p->from.type = D_OREG; + p->from.offset = 0; + } else { + p->from.type = D_CONST; + p->from.offset = tlsoffset; + } + p->from.sym = gmsym; + p->from.name = D_EXTERN; + p->to.type = D_REG; + p->to.reg = REGTMP; + p->to.offset = 0; + + p = appendp(p); + p->as = AADD; + p->scond = 14; + p->reg = NREG; + p->from.type = D_REG; + p->from.reg = REGTMP; + p->to.type = D_REG; + p->to.reg = (q->to.offset & 0xf000) >> 12; + p->to.offset = 0; + + if(flag_shared) { + p = appendp(p); + p->as = ASUB; + p->scond = 14; + p->reg = NREG; + p->from.type = D_CONST; + p->from.offset = -tlsoffset; + p->to.type = D_REG; + p->to.reg = (q->to.offset & 0xf000) >> 12; + p->to.offset = 0; + } + } + } } q = p; } diff --git a/src/cmd/5l/obj.c b/src/cmd/5l/obj.c index 168cf01de..824a05fd8 100644 --- a/src/cmd/5l/obj.c +++ b/src/cmd/5l/obj.c @@ -81,8 +81,7 @@ main(int argc, char *argv[]) INITDAT = -1; INITRND = -1; INITENTRY = 0; - LIBINITENTRY = 0; - linkmode = LinkInternal; // TODO: LinkAuto once everything works. + linkmode = LinkAuto; nuxiinit(); p = getgoarm(); @@ -126,34 +125,43 @@ main(int argc, char *argv[]) flagstr("r", "dir1:dir2:...: set ELF dynamic linker search path", &rpath); flagcount("race", "enable race detector", &flag_race); flagcount("s", "disable symbol table", &debug['s']); + flagcount("shared", "generate shared object (implies -linkmode external)", &flag_shared); flagstr("tmpdir", "leave temporary files in this directory", &tmpdir); flagcount("u", "reject unsafe packages", &debug['u']); flagcount("v", "print link trace", &debug['v']); flagcount("w", "disable DWARF generation", &debug['w']); - flagcount("shared", "generate shared object", &flag_shared); - // TODO: link mode flag flagparse(&argc, &argv, usage); if(argc != 1) usage(); + if(flag_shared) + linkmode = LinkExternal; + + mywhatsys(); + + if(HEADTYPE == -1) + HEADTYPE = headtype(goos); + // getgoextlinkenabled is based on GO_EXTLINK_ENABLED when // Go was built; see ../../make.bash. if(linkmode == LinkAuto && strcmp(getgoextlinkenabled(), "0") == 0) linkmode = LinkInternal; - if(linkmode == LinkExternal) { - diag("only -linkmode=internal is supported"); - errorexit(); - } else if(linkmode == LinkAuto) { - linkmode = LinkInternal; + switch(HEADTYPE) { + default: + if(linkmode == LinkAuto) + linkmode = LinkInternal; + if(linkmode == LinkExternal && strcmp(getgoextlinkenabled(), "1") != 0) + sysfatal("cannot use -linkmode=external with -H %s", headstr(HEADTYPE)); + break; + case Hlinux: + break; } libinit(); - if(HEADTYPE == -1) - HEADTYPE = headtype(goos); switch(HEADTYPE) { default: diag("unknown -H option"); @@ -208,7 +216,7 @@ main(int argc, char *argv[]) case Hnetbsd: debug['d'] = 0; // with dynamic linking tlsoffset = -8; // hardcoded number, first 4-byte word for g, and then 4-byte word for m - // this number is known to ../../pkg/runtime/cgo/gcc_linux_arm.c + // this number is known to ../../pkg/runtime/rt0_*_arm.s elfinit(); HEADR = ELFRESERVE; if(INITTEXT == -1) @@ -253,6 +261,7 @@ main(int argc, char *argv[]) // mark some functions that are only referenced after linker code editing if(debug['F']) mark(rlookup("_sfloat", 0)); + mark(lookup("runtime.read_tls_fallback", 0)); deadcode(); if(textp == nil) { diag("no code"); diff --git a/src/cmd/5l/optab.c b/src/cmd/5l/optab.c index dc9e5e99f..3d05d6d09 100644 --- a/src/cmd/5l/optab.c +++ b/src/cmd/5l/optab.c @@ -191,7 +191,7 @@ Optab optab[] = { AMOVBU, C_REG, C_NONE, C_SHIFT, 61, 4, 0 }, { ACASE, C_REG, C_NONE, C_NONE, 62, 4, 0, LPCREL, 8 }, - { ABCASE, C_NONE, C_NONE, C_SBRA, 63, 4, 0 }, + { ABCASE, C_NONE, C_NONE, C_SBRA, 63, 4, 0, LPCREL, 0 }, { AMOVH, C_REG, C_NONE, C_HAUTO, 70, 4, REGSP, 0 }, { AMOVH, C_REG, C_NONE, C_HOREG, 70, 4, 0, 0 }, diff --git a/src/cmd/5l/pass.c b/src/cmd/5l/pass.c index a7e776845..cd8897989 100644 --- a/src/cmd/5l/pass.c +++ b/src/cmd/5l/pass.c @@ -246,6 +246,13 @@ patch(void) p->cond = q; } } + if(flag_shared) { + s = lookup("init_array", 0); + s->type = SINITARR; + s->reachable = 1; + s->hide = 1; + addaddr(s, lookup(INITENTRY, 0)); + } for(cursym = textp; cursym != nil; cursym = cursym->next) { for(p = cursym->text; p != P; p = p->link) { diff --git a/src/cmd/5l/span.c b/src/cmd/5l/span.c index 7201c006f..e7cc0b4b1 100644 --- a/src/cmd/5l/span.c +++ b/src/cmd/5l/span.c @@ -90,7 +90,7 @@ span(void) int32 c, otxt, out[6]; Section *sect; uchar *bp; - Sym *sub; + Sym *sub, *gmsym; if(debug['v']) Bprint(&bso, "%5.2f span\n", cputime()); @@ -237,6 +237,9 @@ span(void) * code references to be relocated too, and then * perhaps we'd be able to parallelize the span loop above. */ + gmsym = S; + if(linkmode == LinkExternal) + gmsym = lookup("runtime.tlsgm", 0); for(cursym = textp; cursym != nil; cursym = cursym->next) { p = cursym->text; if(p == P || p->link == P) @@ -249,7 +252,7 @@ span(void) pc = p->pc; curp = p; o = oplook(p); - asmout(p, o, out); + asmout(p, o, out, gmsym); for(i=0; isize/4; i++) { v = out[i]; *bp++ = v; @@ -574,10 +577,7 @@ aclass(Adr *a) if(s == S) break; instoffset = 0; // s.b. unused but just in case - if(flag_shared) - return C_LCONADDR; - else - return C_LCON; + return C_LCONADDR; case D_AUTO: instoffset = autosize + a->offset; -- cgit v1.2.1