From d160479b0b146594d35acc6b6f708df197ed2453 Mon Sep 17 00:00:00 2001 From: Mrunal Patel Date: Thu, 2 Oct 2014 11:37:06 -0700 Subject: syscall: support UID/GID map files for Linux user namespaces Fixes issue 8447. LGTM=iant R=golang-codereviews, bradfitz, iant CC=golang-codereviews https://codereview.appspot.com/126190043 Committer: Ian Lance Taylor --- src/syscall/exec_linux.go | 123 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 111 insertions(+), 12 deletions(-) (limited to 'src/syscall') diff --git a/src/syscall/exec_linux.go b/src/syscall/exec_linux.go index f61dfc424..72e2f0b69 100644 --- a/src/syscall/exec_linux.go +++ b/src/syscall/exec_linux.go @@ -10,19 +10,29 @@ import ( "unsafe" ) +// SysProcIDMap holds Container ID to Host ID mappings used for User Namespaces in Linux. +// See user_namespaces(7). +type SysProcIDMap struct { + ContainerID int // Container ID. + HostID int // Host ID. + Size int // Size. +} + type SysProcAttr struct { - Chroot string // Chroot. - Credential *Credential // Credential. - Ptrace bool // Enable tracing. - Setsid bool // Create session. - Setpgid bool // Set process group ID to new pid (SYSV setpgrp) - Setctty bool // Set controlling terminal to fd Ctty (only meaningful if Setsid is set) - Noctty bool // Detach fd 0 from controlling terminal - Ctty int // Controlling TTY fd (Linux only) - Pdeathsig Signal // Signal that the process will get when its parent dies (Linux only) - Cloneflags uintptr // Flags for clone calls (Linux only) - Foreground bool // Set foreground process group to child's pid. (Implies Setpgid. Stdin should be a TTY) - Joinpgrp int // If != 0, child's process group ID. (Setpgid must not be set) + Chroot string // Chroot. + Credential *Credential // Credential. + Ptrace bool // Enable tracing. + Setsid bool // Create session. + Setpgid bool // Set process group ID to new pid (SYSV setpgrp) + Setctty bool // Set controlling terminal to fd Ctty (only meaningful if Setsid is set) + Noctty bool // Detach fd 0 from controlling terminal + Ctty int // Controlling TTY fd (Linux only) + Pdeathsig Signal // Signal that the process will get when its parent dies (Linux only) + Cloneflags uintptr // Flags for clone calls (Linux only) + Foreground bool // Set foreground process group to child's pid. (Implies Setpgid. Stdin should be a TTY) + Joinpgrp int // If != 0, child's process group ID. (Setpgid must not be set) + UidMappings []SysProcIDMap // User ID mappings for user namespaces. + GidMappings []SysProcIDMap // Group ID mappings for user namespaces. } // Implemented in runtime package. @@ -44,8 +54,10 @@ func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr var ( r1 uintptr err1 Errno + err2 Errno nextfd int i int + p [2]int ) // Guard against side effects of shuffling fds below. @@ -61,6 +73,14 @@ func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr } nextfd++ + // Allocate another pipe for parent to child communication for + // synchronizing writing of User ID/Group ID mappings. + if sys.UidMappings != nil || sys.GidMappings != nil { + if err := forkExecPipe(p[:]); err != nil { + return 0, err.(Errno) + } + } + // About to call fork. // No more allocation or calls of non-assembly functions. runtime_BeforeFork() @@ -75,6 +95,16 @@ func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr runtime_AfterFork() pid = int(r1) + if sys.UidMappings != nil || sys.GidMappings != nil { + Close(p[0]) + err := writeUidGidMappings(pid, sys) + if err != nil { + err2 = err.(Errno) + } + RawSyscall(SYS_WRITE, uintptr(p[1]), uintptr(unsafe.Pointer(&err2)), unsafe.Sizeof(err2)) + Close(p[1]) + } + if sys.Joinpgrp != 0 { // Place the child in the specified process group. RawSyscall(SYS_SETPGID, r1, uintptr(sys.Joinpgrp), 0) @@ -93,6 +123,25 @@ func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr // Fork succeeded, now in child. + // Wait for User ID/Group ID mappings to be written. + if sys.UidMappings != nil || sys.GidMappings != nil { + if _, _, err1 = RawSyscall(SYS_CLOSE, uintptr(p[1]), 0, 0); err1 != 0 { + goto childerror + } + r1, _, err1 = RawSyscall(SYS_READ, uintptr(p[0]), uintptr(unsafe.Pointer(&err2)), unsafe.Sizeof(err2)) + if err1 != 0 { + goto childerror + } + if r1 != unsafe.Sizeof(err2) { + err1 = EINVAL + goto childerror + } + if err2 != 0 { + err1 = err2 + goto childerror + } + } + // Parent death signal if sys.Pdeathsig != 0 { _, _, err1 = RawSyscall6(SYS_PRCTL, PR_SET_PDEATHSIG, uintptr(sys.Pdeathsig), 0, 0, 0, 0) @@ -296,3 +345,53 @@ func forkExecPipe(p []int) (err error) { } return } + +// writeIDMappings writes the user namespace User ID or Group ID mappings to the specified path. +func writeIDMappings(path string, idMap []SysProcIDMap) error { + fd, err := Open(path, O_RDWR, 0) + if err != nil { + return err + } + + data := "" + for _, im := range idMap { + data = data + itoa(im.ContainerID) + " " + itoa(im.HostID) + " " + itoa(im.Size) + "\n" + } + + bytes, err := ByteSliceFromString(data) + if err != nil { + Close(fd) + return err + } + + if _, err := Write(fd, bytes); err != nil { + Close(fd) + return err + } + + if err := Close(fd); err != nil { + return err + } + + return nil +} + +// writeUidGidMappings writes User ID and Group ID mappings for user namespaces +// for a process and it is called from the parent process. +func writeUidGidMappings(pid int, sys *SysProcAttr) error { + if sys.UidMappings != nil { + uidf := "/proc/" + itoa(pid) + "/uid_map" + if err := writeIDMappings(uidf, sys.UidMappings); err != nil { + return err + } + } + + if sys.GidMappings != nil { + gidf := "/proc/" + itoa(pid) + "/gid_map" + if err := writeIDMappings(gidf, sys.GidMappings); err != nil { + return err + } + } + + return nil +} -- cgit v1.2.1