summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenfe-Mickael Laventure <mickael.laventure@gmail.com>2017-09-22 06:52:41 -0700
committerKenfe-Mickael Laventure <mickael.laventure@gmail.com>2017-10-20 07:11:37 -0700
commitddae20c032058a0fd42c34c2e9750ee8f6296ac8 (patch)
tree259188c655005657f8ef3b5dd11f256aca8ba88f
parent7acea2a243d25c061d12a2a2f8bbd4e5955a85f4 (diff)
downloaddocker-ddae20c032058a0fd42c34c2e9750ee8f6296ac8.tar.gz
Update libcontainerd to use containerd 1.0
Signed-off-by: Kenfe-Mickael Laventure <mickael.laventure@gmail.com>
-rw-r--r--api/server/router/container/exec.go2
-rw-r--r--builder/dockerfile/containerbackend.go2
-rw-r--r--cmd/dockerd/config.go2
-rw-r--r--cmd/dockerd/config_unix.go2
-rw-r--r--cmd/dockerd/config_windows.go1
-rw-r--r--cmd/dockerd/daemon.go17
-rw-r--r--cmd/dockerd/daemon_linux.go2
-rw-r--r--cmd/dockerd/daemon_solaris.go16
-rw-r--r--cmd/dockerd/daemon_unix.go58
-rw-r--r--cmd/dockerd/daemon_windows.go15
-rw-r--r--container/container.go37
-rw-r--r--container/container_unix.go9
-rw-r--r--container/container_windows.go6
-rw-r--r--container/state.go17
-rw-r--r--container/state_unix.go10
-rw-r--r--container/state_windows.go7
-rw-r--r--container/stream/streams.go6
-rw-r--r--daemon/checkpoint.go17
-rw-r--r--daemon/config/config.go5
-rw-r--r--daemon/config/config_common_unix.go2
-rw-r--r--daemon/daemon.go125
-rw-r--r--daemon/daemon_unix.go202
-rw-r--r--daemon/daemon_windows.go88
-rw-r--r--daemon/delete.go1
-rw-r--r--daemon/errors.go5
-rw-r--r--daemon/exec.go40
-rw-r--r--daemon/exec/exec.go66
-rw-r--r--daemon/exec_linux.go14
-rw-r--r--daemon/exec_solaris.go4
-rw-r--r--daemon/exec_windows.go4
-rw-r--r--daemon/info_unix.go29
-rw-r--r--daemon/kill.go5
-rw-r--r--daemon/logger/plugin_unix.go2
-rw-r--r--daemon/monitor.go201
-rw-r--r--daemon/monitor_linux.go10
-rw-r--r--daemon/monitor_solaris.go9
-rw-r--r--daemon/monitor_windows.go50
-rw-r--r--daemon/oci_linux.go9
-rw-r--r--daemon/pause.go15
-rw-r--r--daemon/reload.go4
-rw-r--r--daemon/resize.go5
-rw-r--r--daemon/start.go57
-rw-r--r--daemon/start_unix.go45
-rw-r--r--daemon/start_windows.go13
-rw-r--r--daemon/top_unix.go15
-rw-r--r--daemon/top_unix_test.go10
-rw-r--r--daemon/top_windows.go12
-rw-r--r--daemon/unpause.go13
-rw-r--r--daemon/update.go3
-rw-r--r--daemon/update_linux.go47
-rw-r--r--daemon/update_windows.go6
-rw-r--r--hack/make/.go-autogen6
-rw-r--r--integration-cli/daemon/daemon.go4
-rw-r--r--integration-cli/docker_api_stats_test.go2
-rw-r--r--integration-cli/docker_cli_attach_test.go5
-rw-r--r--integration-cli/docker_cli_build_unix_test.go5
-rw-r--r--integration-cli/docker_cli_daemon_test.go14
-rw-r--r--integration-cli/docker_cli_events_test.go16
-rw-r--r--integration-cli/docker_cli_logs_test.go1
-rw-r--r--integration-cli/docker_cli_network_unix_test.go1
-rw-r--r--integration-cli/docker_cli_run_test.go1
-rw-r--r--integration-cli/docker_deprecated_api_v124_test.go2
-rw-r--r--integration-cli/events_utils_test.go2
-rw-r--r--integration/service/create_test.go12
-rw-r--r--libcontainerd/client.go46
-rw-r--r--libcontainerd/client_daemon.go802
-rw-r--r--libcontainerd/client_daemon_linux.go96
-rw-r--r--libcontainerd/client_daemon_windows.go53
-rw-r--r--libcontainerd/client_linux.go616
-rw-r--r--libcontainerd/client_local_windows.go1340
-rw-r--r--libcontainerd/client_solaris.go104
-rw-r--r--libcontainerd/client_unix.go141
-rw-r--r--libcontainerd/client_windows.go886
-rw-r--r--libcontainerd/container.go13
-rw-r--r--libcontainerd/container_unix.go246
-rw-r--r--libcontainerd/container_windows.go338
-rw-r--r--libcontainerd/errors.go46
-rw-r--r--libcontainerd/io.go36
-rw-r--r--libcontainerd/io_unix.go60
-rw-r--r--libcontainerd/io_windows.go138
-rw-r--r--libcontainerd/oom_linux.go31
-rw-r--r--libcontainerd/oom_solaris.go5
-rw-r--r--libcontainerd/pausemonitor_unix.go42
-rw-r--r--libcontainerd/process.go18
-rw-r--r--libcontainerd/process_unix.go107
-rw-r--r--libcontainerd/process_windows.go14
-rw-r--r--libcontainerd/queue.go (renamed from libcontainerd/queue_unix.go)2
-rw-r--r--libcontainerd/queue_test.go (renamed from libcontainerd/queue_unix_test.go)2
-rw-r--r--libcontainerd/remote.go20
-rw-r--r--libcontainerd/remote_daemon.go317
-rw-r--r--libcontainerd/remote_daemon_options.go141
-rw-r--r--libcontainerd/remote_daemon_options_unix.go36
-rw-r--r--libcontainerd/remote_daemon_process.go56
-rw-r--r--libcontainerd/remote_daemon_process_unix.go61
-rw-r--r--libcontainerd/remote_daemon_unix.go56
-rw-r--r--libcontainerd/remote_daemon_windows.go50
-rw-r--r--libcontainerd/remote_local.go59
-rw-r--r--libcontainerd/remote_unix.go565
-rw-r--r--libcontainerd/remote_windows.go36
-rw-r--r--libcontainerd/types.go126
-rw-r--r--libcontainerd/types_linux.go57
-rw-r--r--libcontainerd/types_solaris.go43
-rw-r--r--libcontainerd/types_windows.go24
-rw-r--r--libcontainerd/utils_linux.go63
-rw-r--r--libcontainerd/utils_solaris.go27
-rw-r--r--libcontainerd/utils_windows.go8
-rw-r--r--oci/defaults.go18
-rw-r--r--pkg/authorization/plugin.go12
-rw-r--r--pkg/mount/mount.go10
-rw-r--r--pkg/system/process_windows.go18
-rw-r--r--pkg/system/rm.go2
-rw-r--r--plugin/executor/containerd/containerd.go103
-rw-r--r--plugin/manager_linux.go15
113 files changed, 4556 insertions, 3962 deletions
diff --git a/api/server/router/container/exec.go b/api/server/router/container/exec.go
index aa2ebb187b..97c27d844f 100644
--- a/api/server/router/container/exec.go
+++ b/api/server/router/container/exec.go
@@ -126,7 +126,7 @@ func (s *containerRouter) postContainerExecStart(ctx context.Context, w http.Res
return err
}
stdout.Write([]byte(err.Error() + "\r\n"))
- logrus.Errorf("Error running exec in container: %v", err)
+ logrus.Errorf("Error running exec %s in container: %v", execName, err)
}
return nil
}
diff --git a/builder/dockerfile/containerbackend.go b/builder/dockerfile/containerbackend.go
index ec1bd6b9f5..add0a876df 100644
--- a/builder/dockerfile/containerbackend.go
+++ b/builder/dockerfile/containerbackend.go
@@ -102,7 +102,7 @@ func (c *containerManager) Run(ctx context.Context, cID string, stdout, stderr i
func logCancellationError(cancelErrCh chan error, msg string) {
if cancelErr := <-cancelErrCh; cancelErr != nil {
- logrus.Debugf("Build cancelled (%v): ", cancelErr, msg)
+ logrus.Debugf("Build cancelled (%v): %s", cancelErr, msg)
}
}
diff --git a/cmd/dockerd/config.go b/cmd/dockerd/config.go
index f80641b1f6..f142b7538c 100644
--- a/cmd/dockerd/config.go
+++ b/cmd/dockerd/config.go
@@ -27,6 +27,8 @@ func installCommonConfigFlags(conf *config.Config, flags *pflag.FlagSet) {
flags.Var(opts.NewNamedListOptsRef("exec-opts", &conf.ExecOptions, nil), "exec-opt", "Runtime execution options")
flags.StringVarP(&conf.Pidfile, "pidfile", "p", defaultPidFile, "Path to use for daemon PID file")
flags.StringVarP(&conf.Root, "graph", "g", defaultDataRoot, "Root of the Docker runtime")
+ flags.StringVar(&conf.ExecRoot, "exec-root", defaultExecRoot, "Root directory for execution state files")
+ flags.StringVar(&conf.ContainerdAddr, "containerd", "", "containerd grpc address")
// "--graph" is "soft-deprecated" in favor of "data-root". This flag was added
// before Docker 1.0, so won't be removed, only hidden, to discourage its usage.
diff --git a/cmd/dockerd/config_unix.go b/cmd/dockerd/config_unix.go
index ad27a46726..dcc7dc5e81 100644
--- a/cmd/dockerd/config_unix.go
+++ b/cmd/dockerd/config_unix.go
@@ -29,13 +29,11 @@ func installConfigFlags(conf *config.Config, flags *pflag.FlagSet) {
flags.BoolVar(&conf.BridgeConfig.EnableIPForward, "ip-forward", true, "Enable net.ipv4.ip_forward")
flags.BoolVar(&conf.BridgeConfig.EnableIPMasq, "ip-masq", true, "Enable IP masquerading")
flags.BoolVar(&conf.BridgeConfig.EnableIPv6, "ipv6", false, "Enable IPv6 networking")
- flags.StringVar(&conf.ExecRoot, "exec-root", defaultExecRoot, "Root directory for execution state files")
flags.StringVar(&conf.BridgeConfig.FixedCIDRv6, "fixed-cidr-v6", "", "IPv6 subnet for fixed IPs")
flags.BoolVar(&conf.BridgeConfig.EnableUserlandProxy, "userland-proxy", true, "Use userland proxy for loopback traffic")
flags.StringVar(&conf.BridgeConfig.UserlandProxyPath, "userland-proxy-path", "", "Path to the userland proxy binary")
flags.StringVar(&conf.CgroupParent, "cgroup-parent", "", "Set parent cgroup for all containers")
flags.StringVar(&conf.RemappedRoot, "userns-remap", "", "User/Group setting for user namespaces")
- flags.StringVar(&conf.ContainerdAddr, "containerd", "", "Path to containerd socket")
flags.BoolVar(&conf.LiveRestoreEnabled, "live-restore", false, "Enable live restore of docker when containers are still running")
flags.IntVar(&conf.OOMScoreAdjust, "oom-score-adjust", -500, "Set the oom_score_adj for the daemon")
flags.BoolVar(&conf.Init, "init", false, "Run an init in the container to forward signals and reap processes")
diff --git a/cmd/dockerd/config_windows.go b/cmd/dockerd/config_windows.go
index 79cdd25048..36af76645f 100644
--- a/cmd/dockerd/config_windows.go
+++ b/cmd/dockerd/config_windows.go
@@ -11,6 +11,7 @@ import (
var (
defaultPidFile string
defaultDataRoot = filepath.Join(os.Getenv("programdata"), "docker")
+ defaultExecRoot = filepath.Join(os.Getenv("programdata"), "docker", "exec-root")
)
// installConfigFlags adds flags to the pflag.FlagSet to configure the daemon
diff --git a/cmd/dockerd/daemon.go b/cmd/dockerd/daemon.go
index c8fcafb300..44e16677e7 100644
--- a/cmd/dockerd/daemon.go
+++ b/cmd/dockerd/daemon.go
@@ -204,7 +204,11 @@ func (cli *DaemonCli) start(opts *daemonOptions) (err error) {
return err
}
- containerdRemote, err := libcontainerd.New(cli.getLibcontainerdRoot(), cli.getPlatformRemoteOptions()...)
+ rOpts, err := cli.getRemoteOptions()
+ if err != nil {
+ return fmt.Errorf("Failed to generate containerd options: %s", err)
+ }
+ containerdRemote, err := libcontainerd.New(filepath.Join(cli.Config.Root, "containerd"), filepath.Join(cli.Config.ExecRoot, "containerd"), rOpts...)
if err != nil {
return err
}
@@ -560,6 +564,17 @@ func (cli *DaemonCli) initMiddlewares(s *apiserver.Server, cfg *apiserver.Config
return nil
}
+func (cli *DaemonCli) getRemoteOptions() ([]libcontainerd.RemoteOption, error) {
+ opts := []libcontainerd.RemoteOption{}
+
+ pOpts, err := cli.getPlatformRemoteOptions()
+ if err != nil {
+ return nil, err
+ }
+ opts = append(opts, pOpts...)
+ return opts, nil
+}
+
// validates that the plugins requested with the --authorization-plugin flag are valid AuthzDriver
// plugins present on the host and available to the daemon
func validateAuthzPlugins(requestedPlugins []string, pg plugingetter.PluginGetter) error {
diff --git a/cmd/dockerd/daemon_linux.go b/cmd/dockerd/daemon_linux.go
index a909ee4fbd..b58f0f08a3 100644
--- a/cmd/dockerd/daemon_linux.go
+++ b/cmd/dockerd/daemon_linux.go
@@ -11,5 +11,5 @@ func preNotifySystem() {
// notifySystem sends a message to the host when the server is ready to be used
func notifySystem() {
// Tell the init daemon we are accepting requests
- go systemdDaemon.SdNotify("READY=1")
+ go systemdDaemon.SdNotify(false, "READY=1")
}
diff --git a/cmd/dockerd/daemon_solaris.go b/cmd/dockerd/daemon_solaris.go
index 9ee18dad7d..6f82421e97 100644
--- a/cmd/dockerd/daemon_solaris.go
+++ b/cmd/dockerd/daemon_solaris.go
@@ -41,20 +41,8 @@ func preNotifySystem() {
func notifySystem() {
}
-func (cli *DaemonCli) getPlatformRemoteOptions() []libcontainerd.RemoteOption {
- opts := []libcontainerd.RemoteOption{}
- if cli.Config.ContainerdAddr != "" {
- opts = append(opts, libcontainerd.WithRemoteAddr(cli.Config.ContainerdAddr))
- } else {
- opts = append(opts, libcontainerd.WithStartDaemon(true))
- }
- return opts
-}
-
-// getLibcontainerdRoot gets the root directory for libcontainerd/containerd to
-// store their state.
-func (cli *DaemonCli) getLibcontainerdRoot() string {
- return filepath.Join(cli.Config.ExecRoot, "libcontainerd")
+func (cli *DaemonCli) getPlatformRemoteOptions() ([]libcontainerd.RemoteOption, error) {
+ return nil, nil
}
// getSwarmRunRoot gets the root directory for swarm to store runtime state
diff --git a/cmd/dockerd/daemon_unix.go b/cmd/dockerd/daemon_unix.go
index 7909d98da5..324b299e18 100644
--- a/cmd/dockerd/daemon_unix.go
+++ b/cmd/dockerd/daemon_unix.go
@@ -10,9 +10,11 @@ import (
"path/filepath"
"strconv"
+ "github.com/containerd/containerd/linux"
"github.com/docker/docker/cmd/dockerd/hack"
"github.com/docker/docker/daemon"
"github.com/docker/docker/libcontainerd"
+ "github.com/docker/docker/pkg/parsers/kernel"
"github.com/docker/libnetwork/portallocator"
"golang.org/x/sys/unix"
)
@@ -35,42 +37,48 @@ func getDaemonConfDir(_ string) string {
return "/etc/docker"
}
-// setupConfigReloadTrap configures the USR2 signal to reload the configuration.
-func (cli *DaemonCli) setupConfigReloadTrap() {
- c := make(chan os.Signal, 1)
- signal.Notify(c, unix.SIGHUP)
- go func() {
- for range c {
- cli.reloadConfig()
- }
- }()
-}
+func (cli *DaemonCli) getPlatformRemoteOptions() ([]libcontainerd.RemoteOption, error) {
+ // On older kernel, letting putting the containerd-shim in its own
+ // namespace will effectively prevent operations such as unlink, rename
+ // and remove on mountpoints that were present at the time the shim
+ // namespace was created. This would led to a famous EBUSY will trying to
+ // remove shm mounts.
+ var noNewNS bool
+ if !kernel.CheckKernelVersion(3, 18, 0) {
+ noNewNS = true
+ }
-func (cli *DaemonCli) getPlatformRemoteOptions() []libcontainerd.RemoteOption {
opts := []libcontainerd.RemoteOption{
- libcontainerd.WithDebugLog(cli.Config.Debug),
libcontainerd.WithOOMScore(cli.Config.OOMScoreAdjust),
+ libcontainerd.WithPlugin("linux", &linux.Config{
+ Shim: daemon.DefaultShimBinary,
+ Runtime: daemon.DefaultRuntimeBinary,
+ RuntimeRoot: filepath.Join(cli.Config.Root, "runc"),
+ ShimDebug: cli.Config.Debug,
+ ShimNoMountNS: noNewNS,
+ }),
+ }
+ if cli.Config.Debug {
+ opts = append(opts, libcontainerd.WithLogLevel("debug"))
}
if cli.Config.ContainerdAddr != "" {
opts = append(opts, libcontainerd.WithRemoteAddr(cli.Config.ContainerdAddr))
} else {
opts = append(opts, libcontainerd.WithStartDaemon(true))
}
- if daemon.UsingSystemd(cli.Config) {
- args := []string{"--systemd-cgroup=true"}
- opts = append(opts, libcontainerd.WithRuntimeArgs(args))
- }
- if cli.Config.LiveRestoreEnabled {
- opts = append(opts, libcontainerd.WithLiveRestore(true))
- }
- opts = append(opts, libcontainerd.WithRuntimePath(daemon.DefaultRuntimeBinary))
- return opts
+
+ return opts, nil
}
-// getLibcontainerdRoot gets the root directory for libcontainerd/containerd to
-// store their state.
-func (cli *DaemonCli) getLibcontainerdRoot() string {
- return filepath.Join(cli.Config.ExecRoot, "libcontainerd")
+// setupConfigReloadTrap configures the USR2 signal to reload the configuration.
+func (cli *DaemonCli) setupConfigReloadTrap() {
+ c := make(chan os.Signal, 1)
+ signal.Notify(c, unix.SIGHUP)
+ go func() {
+ for range c {
+ cli.reloadConfig()
+ }
+ }()
}
// getSwarmRunRoot gets the root directory for swarm to store runtime state
diff --git a/cmd/dockerd/daemon_windows.go b/cmd/dockerd/daemon_windows.go
index 77bade2de3..0007ddef22 100644
--- a/cmd/dockerd/daemon_windows.go
+++ b/cmd/dockerd/daemon_windows.go
@@ -48,6 +48,10 @@ func notifyShutdown(err error) {
}
}
+func (cli *DaemonCli) getPlatformRemoteOptions() ([]libcontainerd.RemoteOption, error) {
+ return nil, nil
+}
+
// setupConfigReloadTrap configures a Win32 event to reload the configuration.
func (cli *DaemonCli) setupConfigReloadTrap() {
go func() {
@@ -65,17 +69,6 @@ func (cli *DaemonCli) setupConfigReloadTrap() {
}()
}
-func (cli *DaemonCli) getPlatformRemoteOptions() []libcontainerd.RemoteOption {
- return nil
-}
-
-// getLibcontainerdRoot gets the root directory for libcontainerd to store its
-// state. The Windows libcontainerd implementation does not need to write a spec
-// or state to disk, so this is a no-op.
-func (cli *DaemonCli) getLibcontainerdRoot() string {
- return ""
-}
-
// getSwarmRunRoot gets the root directory for swarm to store runtime state
// For example, the control socket
func (cli *DaemonCli) getSwarmRunRoot() string {
diff --git a/container/container.go b/container/container.go
index f16e6a1bde..10b9bcbe51 100644
--- a/container/container.go
+++ b/container/container.go
@@ -15,6 +15,7 @@ import (
"syscall"
"time"
+ "github.com/containerd/containerd"
containertypes "github.com/docker/docker/api/types/container"
mounttypes "github.com/docker/docker/api/types/mount"
networktypes "github.com/docker/docker/api/types/network"
@@ -61,6 +62,18 @@ var (
errInvalidNetwork = errors.New("invalid network settings while building port map info")
)
+// ExitStatus provides exit reasons for a container.
+type ExitStatus struct {
+ // The exit code with which the container exited.
+ ExitCode int
+
+ // Whether the container encountered an OOM.
+ OOMKilled bool
+
+ // Time at which the container died
+ ExitedAt time.Time
+}
+
// Container holds the structure defining a container object.
type Container struct {
StreamConfig *stream.Config
@@ -996,10 +1009,10 @@ func (container *Container) CloseStreams() error {
}
// InitializeStdio is called by libcontainerd to connect the stdio.
-func (container *Container) InitializeStdio(iop libcontainerd.IOPipe) error {
+func (container *Container) InitializeStdio(iop *libcontainerd.IOPipe) (containerd.IO, error) {
if err := container.startLogging(); err != nil {
container.Reset(false)
- return err
+ return nil, err
}
container.StreamConfig.CopyToPipe(iop)
@@ -1012,7 +1025,7 @@ func (container *Container) InitializeStdio(iop libcontainerd.IOPipe) error {
}
}
- return nil
+ return &cio{IO: iop, sc: container.StreamConfig}, nil
}
// SecretMountPath returns the path of the secret mount for the container
@@ -1069,3 +1082,21 @@ func (container *Container) CreateDaemonEnvironment(tty bool, linkedEnv []string
env = ReplaceOrAppendEnvValues(env, container.Config.Env)
return env
}
+
+type cio struct {
+ containerd.IO
+
+ sc *stream.Config
+}
+
+func (i *cio) Close() error {
+ i.IO.Close()
+
+ return i.sc.CloseStreams()
+}
+
+func (i *cio) Wait() {
+ i.sc.Wait()
+
+ i.IO.Wait()
+}
diff --git a/container/container_unix.go b/container/container_unix.go
index 796c48d984..611bdfd9fb 100644
--- a/container/container_unix.go
+++ b/container/container_unix.go
@@ -24,15 +24,6 @@ const (
containerSecretMountPath = "/run/secrets"
)
-// ExitStatus provides exit reasons for a container.
-type ExitStatus struct {
- // The exit code with which the container exited.
- ExitCode int
-
- // Whether the container encountered an OOM.
- OOMKilled bool
-}
-
// TrySetNetworkMount attempts to set the network mounts given a provided destination and
// the path to use for it; return true if the given destination was a network mount file
func (container *Container) TrySetNetworkMount(destination string, path string) bool {
diff --git a/container/container_windows.go b/container/container_windows.go
index 2dbea5905e..45d51e4f23 100644
--- a/container/container_windows.go
+++ b/container/container_windows.go
@@ -18,12 +18,6 @@ const (
containerInternalConfigsDirPath = `C:\ProgramData\Docker\internal\configs`
)
-// ExitStatus provides exit reasons for a container.
-type ExitStatus struct {
- // The exit code with which the container exited.
- ExitCode int
-}
-
// UnmountIpcMount unmounts Ipc related mounts.
// This is a NOOP on windows.
func (container *Container) UnmountIpcMount(unmount func(pth string) error) error {
diff --git a/container/state.go b/container/state.go
index cdf51d37d2..1a4c45cbab 100644
--- a/container/state.go
+++ b/container/state.go
@@ -276,6 +276,7 @@ func (s *State) SetExitCode(ec int) {
// SetRunning sets the state of the container to "running".
func (s *State) SetRunning(pid int, initial bool) {
s.ErrorMsg = ""
+ s.Paused = false
s.Running = true
s.Restarting = false
if initial {
@@ -294,9 +295,14 @@ func (s *State) SetStopped(exitStatus *ExitStatus) {
s.Paused = false
s.Restarting = false
s.Pid = 0
- s.FinishedAt = time.Now().UTC()
- s.setFromExitStatus(exitStatus)
- close(s.waitStop) // Fire waiters for stop
+ if exitStatus.ExitedAt.IsZero() {
+ s.FinishedAt = time.Now().UTC()
+ } else {
+ s.FinishedAt = exitStatus.ExitedAt
+ }
+ s.ExitCodeValue = exitStatus.ExitCode
+ s.OOMKilled = exitStatus.OOMKilled
+ close(s.waitStop) // fire waiters for stop
s.waitStop = make(chan struct{})
}
@@ -310,8 +316,9 @@ func (s *State) SetRestarting(exitStatus *ExitStatus) {
s.Paused = false
s.Pid = 0
s.FinishedAt = time.Now().UTC()
- s.setFromExitStatus(exitStatus)
- close(s.waitStop) // Fire waiters for stop
+ s.ExitCodeValue = exitStatus.ExitCode
+ s.OOMKilled = exitStatus.OOMKilled
+ close(s.waitStop) // fire waiters for stop
s.waitStop = make(chan struct{})
}
diff --git a/container/state_unix.go b/container/state_unix.go
deleted file mode 100644
index a2fa5afc28..0000000000
--- a/container/state_unix.go
+++ /dev/null
@@ -1,10 +0,0 @@
-// +build linux freebsd
-
-package container
-
-// setFromExitStatus is a platform specific helper function to set the state
-// based on the ExitStatus structure.
-func (s *State) setFromExitStatus(exitStatus *ExitStatus) {
- s.ExitCodeValue = exitStatus.ExitCode
- s.OOMKilled = exitStatus.OOMKilled
-}
diff --git a/container/state_windows.go b/container/state_windows.go
deleted file mode 100644
index 1229650efa..0000000000
--- a/container/state_windows.go
+++ /dev/null
@@ -1,7 +0,0 @@
-package container
-
-// setFromExitStatus is a platform specific helper function to set the state
-// based on the ExitStatus structure.
-func (s *State) setFromExitStatus(exitStatus *ExitStatus) {
- s.ExitCodeValue = exitStatus.ExitCode
-}
diff --git a/container/stream/streams.go b/container/stream/streams.go
index 7e734d81c4..106e2b1814 100644
--- a/container/stream/streams.go
+++ b/container/stream/streams.go
@@ -114,12 +114,12 @@ func (c *Config) CloseStreams() error {
}
// CopyToPipe connects streamconfig with a libcontainerd.IOPipe
-func (c *Config) CopyToPipe(iop libcontainerd.IOPipe) {
+func (c *Config) CopyToPipe(iop *libcontainerd.IOPipe) {
copyFunc := func(w io.Writer, r io.ReadCloser) {
c.Add(1)
go func() {
if _, err := pools.Copy(w, r); err != nil {
- logrus.Errorf("stream copy error: %+v", err)
+ logrus.Errorf("stream copy error: %v", err)
}
r.Close()
c.Done()
@@ -138,7 +138,7 @@ func (c *Config) CopyToPipe(iop libcontainerd.IOPipe) {
go func() {
pools.Copy(iop.Stdin, stdin)
if err := iop.Stdin.Close(); err != nil {
- logrus.Warnf("failed to close stdin: %+v", err)
+ logrus.Warnf("failed to close stdin: %v", err)
}
}()
}
diff --git a/daemon/checkpoint.go b/daemon/checkpoint.go
index 7bdcae5154..5765af7c5a 100644
--- a/daemon/checkpoint.go
+++ b/daemon/checkpoint.go
@@ -1,6 +1,7 @@
package daemon
import (
+ "context"
"encoding/json"
"fmt"
"io/ioutil"
@@ -17,7 +18,7 @@ var (
)
// getCheckpointDir verifies checkpoint directory for create,remove, list options and checks if checkpoint already exists
-func getCheckpointDir(checkDir, checkpointID string, ctrName string, ctrID string, ctrCheckpointDir string, create bool) (string, error) {
+func getCheckpointDir(checkDir, checkpointID, ctrName, ctrID, ctrCheckpointDir string, create bool) (string, error) {
var checkpointDir string
var err2 error
if checkDir != "" {
@@ -32,7 +33,10 @@ func getCheckpointDir(checkDir, checkpointID string, ctrName string, ctrID strin
case err == nil && stat.IsDir():
err2 = fmt.Errorf("checkpoint with name %s already exists for container %s", checkpointID, ctrName)
case err != nil && os.IsNotExist(err):
- err2 = nil
+ err2 = os.MkdirAll(checkpointAbsDir, 0700)
+ if os.IsExist(err2) {
+ err2 = nil
+ }
case err != nil:
err2 = err
case err == nil:
@@ -48,7 +52,7 @@ func getCheckpointDir(checkDir, checkpointID string, ctrName string, ctrID strin
err2 = fmt.Errorf("%s exists and is not a directory", checkpointAbsDir)
}
}
- return checkpointDir, err2
+ return checkpointAbsDir, err2
}
// CheckpointCreate checkpoints the process running in a container with CRIU
@@ -62,6 +66,10 @@ func (daemon *Daemon) CheckpointCreate(name string, config types.CheckpointCreat
return fmt.Errorf("Container %s not running", name)
}
+ if container.Config.Tty {
+ return fmt.Errorf("checkpoint not support on containers with tty")
+ }
+
if !validCheckpointNamePattern.MatchString(config.CheckpointID) {
return fmt.Errorf("Invalid checkpoint ID (%s), only %s are allowed", config.CheckpointID, validCheckpointNameChars)
}
@@ -71,8 +79,9 @@ func (daemon *Daemon) CheckpointCreate(name string, config types.CheckpointCreat
return fmt.Errorf("cannot checkpoint container %s: %s", name, err)
}
- err = daemon.containerd.CreateCheckpoint(container.ID, config.CheckpointID, checkpointDir, config.Exit)
+ err = daemon.containerd.CreateCheckpoint(context.Background(), container.ID, checkpointDir, config.Exit)
if err != nil {
+ os.RemoveAll(checkpointDir)
return fmt.Errorf("Cannot checkpoint container %s: %s", name, err)
}
diff --git a/daemon/config/config.go b/daemon/config/config.go
index 501c07af76..3408e4dc50 100644
--- a/daemon/config/config.go
+++ b/daemon/config/config.go
@@ -101,6 +101,7 @@ type CommonConfig struct {
RawLogs bool `json:"raw-logs,omitempty"`
RootDeprecated string `json:"graph,omitempty"`
Root string `json:"data-root,omitempty"`
+ ExecRoot string `json:"exec-root,omitempty"`
SocketGroup string `json:"group,omitempty"`
CorsHeaders string `json:"api-cors-header,omitempty"`
@@ -172,6 +173,10 @@ type CommonConfig struct {
NodeGenericResources string `json:"node-generic-resources,omitempty"`
// NetworkControlPlaneMTU allows to specify the control plane MTU, this will allow to optimize the network use in some components
NetworkControlPlaneMTU int `json:"network-control-plane-mtu,omitempty"`
+
+ // ContainerAddr is the address used to connect to containerd if we're
+ // not starting it ourselves
+ ContainerdAddr string `json:"containerd,omitempty"`
}
// IsValueSet returns true if a configuration value
diff --git a/daemon/config/config_common_unix.go b/daemon/config/config_common_unix.go
index d11cceba20..cea3fffdda 100644
--- a/daemon/config/config_common_unix.go
+++ b/daemon/config/config_common_unix.go
@@ -11,8 +11,6 @@ import (
// CommonUnixConfig defines configuration of a docker daemon that is
// common across Unix platforms.
type CommonUnixConfig struct {
- ExecRoot string `json:"exec-root,omitempty"`
- ContainerdAddr string `json:"containerd,omitempty"`
Runtimes map[string]types.Runtime `json:"runtimes,omitempty"`
DefaultRuntime string `json:"default-runtime,omitempty"`
DefaultInitBinary string `json:"default-init,omitempty"`
diff --git a/daemon/daemon.go b/daemon/daemon.go
index fb6ac1feb3..ece1f2a8fe 100644
--- a/daemon/daemon.go
+++ b/daemon/daemon.go
@@ -18,7 +18,7 @@ import (
"sync"
"time"
- containerd "github.com/containerd/containerd/api/grpc/types"
+ "github.com/docker/docker/api/errdefs"
"github.com/docker/docker/api/types"
containertypes "github.com/docker/docker/api/types/container"
"github.com/docker/docker/api/types/swarm"
@@ -62,11 +62,10 @@ import (
"github.com/pkg/errors"
)
-var (
- // DefaultRuntimeBinary is the default runtime to be used by
- // containerd if none is specified
- DefaultRuntimeBinary = "docker-runc"
+// MainNamespace is the name of the namespace used for users containers
+const MainNamespace = "moby"
+var (
errSystemNotSupported = errors.New("the Docker daemon is not supported on this platform")
)
@@ -170,7 +169,7 @@ func (daemon *Daemon) restore() error {
continue
}
container.RWLayer = rwlayer
- logrus.Debugf("Loaded container %v", container.ID)
+ logrus.Debugf("Loaded container %v, isRunning: %v", container.ID, container.IsRunning())
containers[container.ID] = container
} else {
@@ -209,8 +208,10 @@ func (daemon *Daemon) restore() error {
}
}
- var wg sync.WaitGroup
- var mapLock sync.Mutex
+ var (
+ wg sync.WaitGroup
+ mapLock sync.Mutex
+ )
for _, c := range containers {
wg.Add(1)
go func(c *container.Container) {
@@ -221,11 +222,74 @@ func (daemon *Daemon) restore() error {
}
daemon.setStateCounter(c)
+
+ logrus.WithFields(logrus.Fields{
+ "container": c.ID,
+ "running": c.IsRunning(),
+ "paused": c.IsPaused(),
+ }).Debug("restoring container")
+
+ var (
+ err error
+ alive bool
+ ec uint32
+ exitedAt time.Time
+ )
+
+ alive, _, err = daemon.containerd.Restore(context.Background(), c.ID, c.InitializeStdio)
+ if err != nil && !errdefs.IsNotFound(err) {
+ logrus.Errorf("Failed to restore container %s with containerd: %s", c.ID, err)
+ return
+ }
+ if !alive {
+ ec, exitedAt, err = daemon.containerd.DeleteTask(context.Background(), c.ID)
+ if err != nil && !errdefs.IsNotFound(err) {
+ logrus.WithError(err).Errorf("Failed to delete container %s from containerd", c.ID)
+ return
+ }
+ }
+
if c.IsRunning() || c.IsPaused() {
c.RestartManager().Cancel() // manually start containers because some need to wait for swarm networking
- if err := daemon.containerd.Restore(c.ID, c.InitializeStdio); err != nil {
- logrus.Errorf("Failed to restore %s with containerd: %s", c.ID, err)
- return
+
+ if c.IsPaused() && alive {
+ s, err := daemon.containerd.Status(context.Background(), c.ID)
+ if err != nil {
+ logrus.WithError(err).WithField("container", c.ID).
+ Errorf("Failed to get container status")
+ } else {
+ logrus.WithField("container", c.ID).WithField("state", s).
+ Info("restored container paused")
+ switch s {
+ case libcontainerd.StatusPaused, libcontainerd.StatusPausing:
+ // nothing to do
+ case libcontainerd.StatusStopped:
+ alive = false
+ case libcontainerd.StatusUnknown:
+ logrus.WithField("container", c.ID).
+ Error("Unknown status for container during restore")
+ default:
+ // running
+ c.Lock()
+ c.Paused = false
+ daemon.setStateCounter(c)
+ if err := c.CheckpointTo(daemon.containersReplica); err != nil {
+ logrus.WithError(err).WithField("container", c.ID).
+ Error("Failed to update stopped container state")
+ }
+ c.Unlock()
+ }
+ }
+ }
+
+ if !alive {
+ c.Lock()
+ c.SetStopped(&container.ExitStatus{ExitCode: int(ec), ExitedAt: exitedAt})
+ daemon.Cleanup(c)
+ if err := c.CheckpointTo(daemon.containersReplica); err != nil {
+ logrus.Errorf("Failed to update stopped container %s state: %v", c.ID, err)
+ }
+ c.Unlock()
}
// we call Mount and then Unmount to get BaseFs of the container
@@ -253,11 +317,9 @@ func (daemon *Daemon) restore() error {
activeSandboxes[c.NetworkSettings.SandboxID] = options
mapLock.Unlock()
}
+ } else {
+ // get list of containers we need to restart
- }
- // fixme: only if not running
- // get list of containers we need to restart
- if !c.IsRunning() && !c.IsPaused() {
// Do not autostart containers which
// has endpoints in a swarm scope
// network yet since the cluster is
@@ -289,7 +351,7 @@ func (daemon *Daemon) restore() error {
c.RemovalInProgress = false
c.Dead = true
if err := c.CheckpointTo(daemon.containersReplica); err != nil {
- logrus.Errorf("Failed to update container %s state: %v", c.ID, err)
+ logrus.Errorf("Failed to update RemovalInProgress container %s state: %v", c.ID, err)
}
}
c.Unlock()
@@ -559,6 +621,7 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe
d := &Daemon{
configStore: config,
+ PluginStore: pluginStore,
startupDone: make(chan struct{}),
}
// Ensure the daemon is properly shutdown if there is a failure during
@@ -606,6 +669,16 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe
return nil, err
}
+ // Create the directory where we'll store the runtime scripts (i.e. in
+ // order to support runtimeArgs)
+ daemonRuntimes := filepath.Join(config.Root, "runtimes")
+ if err := system.MkdirAll(daemonRuntimes, 0700, ""); err != nil && !os.IsExist(err) {
+ return nil, err
+ }
+ if err := d.loadRuntimes(); err != nil {
+ return nil, err
+ }
+
if runtime.GOOS == "windows" {
if err := system.MkdirAll(filepath.Join(config.Root, "credentialspecs"), 0, ""); err != nil && !os.IsExist(err) {
return nil, err
@@ -635,7 +708,6 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe
}
d.RegistryService = registryService
- d.PluginStore = pluginStore
logger.RegisterPluginGetter(d.PluginStore)
metricsSockPath, err := d.listenMetricsSock()
@@ -645,7 +717,7 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe
registerMetricsPluginCallback(d.PluginStore, metricsSockPath)
createPluginExec := func(m *plugin.Manager) (plugin.Executor, error) {
- return pluginexec.New(containerdRemote, m)
+ return pluginexec.New(getPluginExecRoot(config.Root), containerdRemote, m)
}
// Plugin system initialization should happen before restore. Do not change order.
@@ -802,13 +874,13 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe
d.idMappings = idMappings
d.seccompEnabled = sysInfo.Seccomp
d.apparmorEnabled = sysInfo.AppArmor
+ d.containerdRemote = containerdRemote
d.linkIndex = newLinkIndex()
- d.containerdRemote = containerdRemote
go d.execCommandGC()
- d.containerd, err = containerdRemote.Client(d)
+ d.containerd, err = containerdRemote.NewClient(MainNamespace, d)
if err != nil {
return nil, err
}
@@ -1171,19 +1243,6 @@ func (daemon *Daemon) networkOptions(dconfig *config.Config, pg plugingetter.Plu
return options, nil
}
-func copyBlkioEntry(entries []*containerd.BlkioStatsEntry) []types.BlkioStatEntry {
- out := make([]types.BlkioStatEntry, len(entries))
- for i, re := range entries {
- out[i] = types.BlkioStatEntry{
- Major: re.Major,
- Minor: re.Minor,
- Op: re.Op,
- Value: re.Value,
- }
- }
- return out
-}
-
// GetCluster returns the cluster
func (daemon *Daemon) GetCluster() Cluster {
return daemon.cluster
diff --git a/daemon/daemon_unix.go b/daemon/daemon_unix.go
index 2b0e206629..b0b624772a 100644
--- a/daemon/daemon_unix.go
+++ b/daemon/daemon_unix.go
@@ -5,6 +5,7 @@ package daemon
import (
"bufio"
"bytes"
+ "context"
"fmt"
"io/ioutil"
"net"
@@ -16,6 +17,7 @@ import (
"strings"
"time"
+ containerd_cgroups "github.com/containerd/cgroups"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/blkiodev"
pblkiodev "github.com/docker/docker/api/types/blkiodev"
@@ -26,6 +28,7 @@ import (
"github.com/docker/docker/opts"
"github.com/docker/docker/pkg/containerfs"
"github.com/docker/docker/pkg/idtools"
+ "github.com/docker/docker/pkg/ioutils"
"github.com/docker/docker/pkg/parsers"
"github.com/docker/docker/pkg/parsers/kernel"
"github.com/docker/docker/pkg/sysinfo"
@@ -38,7 +41,6 @@ import (
"github.com/docker/libnetwork/netutils"
"github.com/docker/libnetwork/options"
lntypes "github.com/docker/libnetwork/types"
- "github.com/golang/protobuf/ptypes"
"github.com/opencontainers/runc/libcontainer/cgroups"
rsystem "github.com/opencontainers/runc/libcontainer/system"
specs "github.com/opencontainers/runtime-spec/specs-go"
@@ -50,6 +52,14 @@ import (
)
const (
+ // DefaultShimBinary is the default shim to be used by containerd if none
+ // is specified
+ DefaultShimBinary = "docker-containerd-shim"
+
+ // DefaultRuntimeBinary is the default runtime to be used by
+ // containerd if none is specified
+ DefaultRuntimeBinary = "docker-runc"
+
// See https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/tree/kernel/sched/sched.h?id=8cd9234c64c584432f6992fe944ca9e46ca8ea76#n269
linuxMinCPUShares = 2
linuxMaxCPUShares = 262144
@@ -63,6 +73,10 @@ const (
// constant for cgroup drivers
cgroupFsDriver = "cgroupfs"
cgroupSystemdDriver = "systemd"
+
+ // DefaultRuntimeName is the default runtime to be used by
+ // containerd if none is specified
+ DefaultRuntimeName = "docker-runc"
)
type containerGetter interface {
@@ -623,6 +637,54 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.
return warnings, nil
}
+func (daemon *Daemon) loadRuntimes() error {
+ return daemon.initRuntimes(daemon.configStore.Runtimes)
+}
+
+func (daemon *Daemon) initRuntimes(runtimes map[string]types.Runtime) (err error) {
+ runtimeDir := filepath.Join(daemon.configStore.Root, "runtimes")
+ // Remove old temp directory if any
+ os.RemoveAll(runtimeDir + "-old")
+ tmpDir, err := ioutils.TempDir(daemon.configStore.Root, "gen-runtimes")
+ if err != nil {
+ return errors.Wrapf(err, "failed to get temp dir to generate runtime scripts")
+ }
+ defer func() {
+ if err != nil {
+ if err1 := os.RemoveAll(tmpDir); err1 != nil {
+ logrus.WithError(err1).WithField("dir", tmpDir).
+ Warnf("failed to remove tmp dir")
+ }
+ return
+ }
+
+ if err = os.Rename(runtimeDir, runtimeDir+"-old"); err != nil {
+ return
+ }
+ if err = os.Rename(tmpDir, runtimeDir); err != nil {
+ err = errors.Wrapf(err, "failed to setup runtimes dir, new containers may not start")
+ return
+ }
+ if err = os.RemoveAll(runtimeDir + "-old"); err != nil {
+ logrus.WithError(err).WithField("dir", tmpDir).
+ Warnf("failed to remove old runtimes dir")
+ }
+ }()
+
+ for name, rt := range runtimes {
+ if len(rt.Args) == 0 {
+ continue
+ }
+
+ script := filepath.Join(tmpDir, name)
+ content := fmt.Sprintf("#!/bin/sh\n%s %s $@\n", rt.Path, strings.Join(rt.Args, " "))
+ if err := ioutil.WriteFile(script, []byte(content), 0700); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
// reloadPlatform updates configuration with platform specific options
// and updates the passed attributes
func (daemon *Daemon) reloadPlatform(conf *config.Config, attributes map[string]string) error {
@@ -631,9 +693,12 @@ func (daemon *Daemon) reloadPlatform(conf *config.Config, attributes map[string]
}
if conf.IsValueSet("runtimes") {
- daemon.configStore.Runtimes = conf.Runtimes
// Always set the default one
- daemon.configStore.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary}
+ conf.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary}
+ if err := daemon.initRuntimes(conf.Runtimes); err != nil {
+ return err
+ }
+ daemon.configStore.Runtimes = conf.Runtimes
}
if conf.DefaultRuntime != "" {
@@ -692,7 +757,7 @@ func verifyDaemonSettings(conf *config.Config) error {
if conf.Runtimes == nil {
conf.Runtimes = make(map[string]types.Runtime)
}
- conf.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary}
+ conf.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeName}
return nil
}
@@ -1214,11 +1279,24 @@ func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container
return daemon.Unmount(container)
}
+func copyBlkioEntry(entries []*containerd_cgroups.BlkIOEntry) []types.BlkioStatEntry {
+ out := make([]types.BlkioStatEntry, len(entries))
+ for i, re := range entries {
+ out[i] = types.BlkioStatEntry{
+ Major: re.Major,
+ Minor: re.Minor,
+ Op: re.Op,
+ Value: re.Value,
+ }
+ }
+ return out
+}
+
func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
if !c.IsRunning() {
return nil, errNotRunning(c.ID)
}
- stats, err := daemon.containerd.Stats(c.ID)
+ cs, err := daemon.containerd.Stats(context.Background(), c.ID)
if err != nil {
if strings.Contains(err.Error(), "container not found") {
return nil, containerNotFound(c.ID)
@@ -1226,54 +1304,98 @@ func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
return nil, err
}
s := &types.StatsJSON{}
- cgs := stats.CgroupStats
- if cgs != nil {
+ s.Read = cs.Read
+ stats := cs.Metrics
+ if stats.Blkio != nil {
s.BlkioStats = types.BlkioStats{
- IoServiceBytesRecursive: copyBlkioEntry(cgs.BlkioStats.IoServiceBytesRecursive),
- IoServicedRecursive: copyBlkioEntry(cgs.BlkioStats.IoServicedRecursive),
- IoQueuedRecursive: copyBlkioEntry(cgs.BlkioStats.IoQueuedRecursive),
- IoServiceTimeRecursive: copyBlkioEntry(cgs.BlkioStats.IoServiceTimeRecursive),
- IoWaitTimeRecursive: copyBlkioEntry(cgs.BlkioStats.IoWaitTimeRecursive),
- IoMergedRecursive: copyBlkioEntry(cgs.BlkioStats.IoMergedRecursive),
- IoTimeRecursive: copyBlkioEntry(cgs.BlkioStats.IoTimeRecursive),
- SectorsRecursive: copyBlkioEntry(cgs.BlkioStats.SectorsRecursive),
- }
- cpu := cgs.CpuStats
+ IoServiceBytesRecursive: copyBlkioEntry(stats.Blkio.IoServiceBytesRecursive),
+ IoServicedRecursive: copyBlkioEntry(stats.Blkio.IoServicedRecursive),
+ IoQueuedRecursive: copyBlkioEntry(stats.Blkio.IoQueuedRecursive),
+ IoServiceTimeRecursive: copyBlkioEntry(stats.Blkio.IoServiceTimeRecursive),
+ IoWaitTimeRecursive: copyBlkioEntry(stats.Blkio.IoWaitTimeRecursive),
+ IoMergedRecursive: copyBlkioEntry(stats.Blkio.IoMergedRecursive),
+ IoTimeRecursive: copyBlkioEntry(stats.Blkio.IoTimeRecursive),
+ SectorsRecursive: copyBlkioEntry(stats.Blkio.SectorsRecursive),
+ }
+ }
+ if stats.CPU != nil {
s.CPUStats = types.CPUStats{
CPUUsage: types.CPUUsage{
- TotalUsage: cpu.CpuUsage.TotalUsage,
- PercpuUsage: cpu.CpuUsage.PercpuUsage,
- UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode,
- UsageInUsermode: cpu.CpuUsage.UsageInUsermode,
+ TotalUsage: stats.CPU.Usage.Total,
+ PercpuUsage: stats.CPU.Usage.PerCPU,
+ UsageInKernelmode: stats.CPU.Usage.Kernel,
+ UsageInUsermode: stats.CPU.Usage.User,
},
ThrottlingData: types.ThrottlingData{
- Periods: cpu.ThrottlingData.Periods,
- ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods,
- ThrottledTime: cpu.ThrottlingData.ThrottledTime,
+ Periods: stats.CPU.Throttling.Periods,
+ ThrottledPeriods: stats.CPU.Throttling.ThrottledPeriods,
+ ThrottledTime: stats.CPU.Throttling.ThrottledTime,
},
}
- mem := cgs.MemoryStats.Usage
- s.MemoryStats = types.MemoryStats{
- Usage: mem.Usage,
- MaxUsage: mem.MaxUsage,
- Stats: cgs.MemoryStats.Stats,
- Failcnt: mem.Failcnt,
- Limit: mem.Limit,
+ }
+
+ if stats.Memory != nil {
+ raw := make(map[string]uint64)
+ raw["cache"] = stats.Memory.Cache
+ raw["rss"] = stats.Memory.RSS
+ raw["rss_huge"] = stats.Memory.RSSHuge
+ raw["mapped_file"] = stats.Memory.MappedFile
+ raw["dirty"] = stats.Memory.Dirty
+ raw["writeback"] = stats.Memory.Writeback
+ raw["pgpgin"] = stats.Memory.PgPgIn
+ raw["pgpgout"] = stats.Memory.PgPgOut
+ raw["pgfault"] = stats.Memory.PgFault
+ raw["pgmajfault"] = stats.Memory.PgMajFault
+ raw["inactive_anon"] = stats.Memory.InactiveAnon
+ raw["active_anon"] = stats.Memory.ActiveAnon
+ raw["inactive_file"] = stats.Memory.InactiveFile
+ raw["active_file"] = stats.Memory.ActiveFile
+ raw["unevictable"] = stats.Memory.Unevictable
+ raw["hierarchical_memory_limit"] = stats.Memory.HierarchicalMemoryLimit
+ raw["hierarchical_memsw_limit"] = stats.Memory.HierarchicalSwapLimit
+ raw["total_cache"] = stats.Memory.TotalCache
+ raw["total_rss"] = stats.Memory.TotalRSS
+ raw["total_rss_huge"] = stats.Memory.TotalRSSHuge
+ raw["total_mapped_file"] = stats.Memory.TotalMappedFile
+ raw["total_dirty"] = stats.Memory.TotalDirty
+ raw["total_writeback"] = stats.Memory.TotalWriteback
+ raw["total_pgpgin"] = stats.Memory.TotalPgPgIn
+ raw["total_pgpgout"] = stats.Memory.TotalPgPgOut
+ raw["total_pgfault"] = stats.Memory.TotalPgFault
+ raw["total_pgmajfault"] = stats.Memory.TotalPgMajFault
+ raw["total_inactive_anon"] = stats.Memory.TotalInactiveAnon
+ raw["total_active_anon"] = stats.Memory.TotalActiveAnon
+ raw["total_inactive_file"] = stats.Memory.TotalInactiveFile
+ raw["total_active_file"] = stats.Memory.TotalActiveFile
+ raw["total_unevictable"] = stats.Memory.TotalUnevictable
+
+ if stats.Memory.Usage != nil {
+ s.MemoryStats = types.MemoryStats{
+ Stats: raw,
+ Usage: stats.Memory.Usage.Usage,
+ MaxUsage: stats.Memory.Usage.Max,
+ Limit: stats.Memory.Usage.Limit,
+ Failcnt: stats.Memory.Usage.Failcnt,
+ }
+ } else {
+ s.MemoryStats = types.MemoryStats{
+ Stats: raw,
+ }
}
+
// if the container does not set memory limit, use the machineMemory
- if mem.Limit > daemon.machineMemory && daemon.machineMemory > 0 {
+ if s.MemoryStats.Limit > daemon.machineMemory && daemon.machineMemory > 0 {
s.MemoryStats.Limit = daemon.machineMemory
}
- if cgs.PidsStats != nil {
- s.PidsStats = types.PidsStats{
- Current: cgs.PidsStats.Current,
- }
- }
}
- s.Read, err = ptypes.Timestamp(stats.Timestamp)
- if err != nil {
- return nil, err
+
+ if stats.Pids != nil {
+ s.PidsStats = types.PidsStats{
+ Current: stats.Pids.Current,
+ Limit: stats.Pids.Limit,
+ }
}
+
return s, nil
}
diff --git a/daemon/daemon_windows.go b/daemon/daemon_windows.go
index 3c179ccc3e..a79ed4f071 100644
--- a/daemon/daemon_windows.go
+++ b/daemon/daemon_windows.go
@@ -1,6 +1,7 @@
package daemon
import (
+ "context"
"fmt"
"os"
"path/filepath"
@@ -532,7 +533,7 @@ func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
}
// Obtain the stats from HCS via libcontainerd
- stats, err := daemon.containerd.Stats(c.ID)
+ stats, err := daemon.containerd.Stats(context.Background(), c.ID)
if err != nil {
if strings.Contains(err.Error(), "container not found") {
return nil, containerNotFound(c.ID)
@@ -542,49 +543,48 @@ func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
// Start with an empty structure
s := &types.StatsJSON{}
+ s.Stats.Read = stats.Read
+ s.Stats.NumProcs = platform.NumProcs()
- // Populate the CPU/processor statistics
- s.CPUStats = types.CPUStats{
- CPUUsage: types.CPUUsage{
- TotalUsage: stats.Processor.TotalRuntime100ns,
- UsageInKernelmode: stats.Processor.RuntimeKernel100ns,
- UsageInUsermode: stats.Processor.RuntimeKernel100ns,
- },
- }
-
- // Populate the memory statistics
- s.MemoryStats = types.MemoryStats{
- Commit: stats.Memory.UsageCommitBytes,
- CommitPeak: stats.Memory.UsageCommitPeakBytes,
- PrivateWorkingSet: stats.Memory.UsagePrivateWorkingSetBytes,
- }
-
- // Populate the storage statistics
- s.StorageStats = types.StorageStats{
- ReadCountNormalized: stats.Storage.ReadCountNormalized,
- ReadSizeBytes: stats.Storage.ReadSizeBytes,
- WriteCountNormalized: stats.Storage.WriteCountNormalized,
- WriteSizeBytes: stats.Storage.WriteSizeBytes,
- }
-
- // Populate the network statistics
- s.Networks = make(map[string]types.NetworkStats)
-
- for _, nstats := range stats.Network {
- s.Networks[nstats.EndpointId] = types.NetworkStats{
- RxBytes: nstats.BytesReceived,
- RxPackets: nstats.PacketsReceived,
- RxDropped: nstats.DroppedPacketsIncoming,
- TxBytes: nstats.BytesSent,
- TxPackets: nstats.PacketsSent,
- TxDropped: nstats.DroppedPacketsOutgoing,
+ if stats.HCSStats != nil {
+ hcss := stats.HCSStats
+ // Populate the CPU/processor statistics
+ s.CPUStats = types.CPUStats{
+ CPUUsage: types.CPUUsage{
+ TotalUsage: hcss.Processor.TotalRuntime100ns,
+ UsageInKernelmode: hcss.Processor.RuntimeKernel100ns,
+ UsageInUsermode: hcss.Processor.RuntimeKernel100ns,
+ },
}
- }
- // Set the timestamp
- s.Stats.Read = stats.Timestamp
- s.Stats.NumProcs = platform.NumProcs()
+ // Populate the memory statistics
+ s.MemoryStats = types.MemoryStats{
+ Commit: hcss.Memory.UsageCommitBytes,
+ CommitPeak: hcss.Memory.UsageCommitPeakBytes,
+ PrivateWorkingSet: hcss.Memory.UsagePrivateWorkingSetBytes,
+ }
+ // Populate the storage statistics
+ s.StorageStats = types.StorageStats{
+ ReadCountNormalized: hcss.Storage.ReadCountNormalized,
+ ReadSizeBytes: hcss.Storage.ReadSizeBytes,
+ WriteCountNormalized: hcss.Storage.WriteCountNormalized,
+ WriteSizeBytes: hcss.Storage.WriteSizeBytes,
+ }
+
+ // Populate the network statistics
+ s.Networks = make(map[string]types.NetworkStats)
+ for _, nstats := range hcss.Network {
+ s.Networks[nstats.EndpointId] = types.NetworkStats{
+ RxBytes: nstats.BytesReceived,
+ RxPackets: nstats.PacketsReceived,
+ RxDropped: nstats.DroppedPacketsIncoming,
+ TxBytes: nstats.BytesSent,
+ TxPackets: nstats.PacketsSent,
+ TxDropped: nstats.DroppedPacketsOutgoing,
+ }
+ }
+ }
return s, nil
}
@@ -664,3 +664,11 @@ func getRealPath(path string) (string, error) {
}
return fileutils.ReadSymlinkedDirectory(path)
}
+
+func (daemon *Daemon) loadRuntimes() error {
+ return nil
+}
+
+func (daemon *Daemon) initRuntimes(_ map[string]types.Runtime) error {
+ return nil
+}
diff --git a/daemon/delete.go b/daemon/delete.go
index 3009400c09..6db08f38cd 100644
--- a/daemon/delete.go
+++ b/daemon/delete.go
@@ -141,6 +141,7 @@ func (daemon *Daemon) cleanupContainer(container *container.Container, forceRemo
}
container.SetRemoved()
stateCtr.del(container.ID)
+
daemon.LogContainerEvent(container, "destroy")
return nil
}
diff --git a/daemon/errors.go b/daemon/errors.go
index 9a9d45598d..889261fa35 100644
--- a/daemon/errors.go
+++ b/daemon/errors.go
@@ -64,6 +64,11 @@ func errExecPaused(id string) error {
return stateConflictError{cause}
}
+func errNotPaused(id string) error {
+ cause := errors.Errorf("Container %s is already paused", id)
+ return stateConflictError{cause}
+}
+
type nameConflictError struct {
id string
name string
diff --git a/daemon/exec.go b/daemon/exec.go
index 9b3e583bf9..afdfc9c2bf 100644
--- a/daemon/exec.go
+++ b/daemon/exec.go
@@ -13,10 +13,10 @@ import (
"github.com/docker/docker/container"
"github.com/docker/docker/container/stream"
"github.com/docker/docker/daemon/exec"
- "github.com/docker/docker/libcontainerd"
"github.com/docker/docker/pkg/pools"
"github.com/docker/docker/pkg/signal"
"github.com/docker/docker/pkg/term"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
@@ -31,6 +31,14 @@ func (d *Daemon) registerExecCommand(container *container.Container, config *exe
d.execCommands.Add(config.ID, config)
}
+func (d *Daemon) registerExecPidUnlocked(container *container.Container, config *exec.Config) {
+ logrus.Debugf("registering pid %v for exec %v", config.Pid, config.ID)
+ // Storing execs in container in order to kill them gracefully whenever the container is stopped or removed.
+ container.ExecCommands.SetPidUnlocked(config.ID, config.Pid)
+ // Storing execs in daemon for easy access via Engine API.
+ d.execCommands.SetPidUnlocked(config.ID, config.Pid)
+}
+
// ExecExists looks up the exec instance and returns a bool if it exists or not.
// It will also return the error produced by `getConfig`
func (d *Daemon) ExecExists(name string) (bool, error) {
@@ -70,8 +78,8 @@ func (d *Daemon) getExecConfig(name string) (*exec.Config, error) {
}
func (d *Daemon) unregisterExecCommand(container *container.Container, execConfig *exec.Config) {
- container.ExecCommands.Delete(execConfig.ID)
- d.execCommands.Delete(execConfig.ID)
+ container.ExecCommands.Delete(execConfig.ID, execConfig.Pid)
+ d.execCommands.Delete(execConfig.ID, execConfig.Pid)
}
func (d *Daemon) getActiveContainer(name string) (*container.Container, error) {
@@ -181,7 +189,7 @@ func (d *Daemon) ContainerExecStart(ctx context.Context, name string, stdin io.R
logrus.Errorf("failed to cleanup exec %s streams: %s", c.ID, err)
}
ec.Unlock()
- c.ExecCommands.Delete(ec.ID)
+ c.ExecCommands.Delete(ec.ID, ec.Pid)
}
}()
@@ -207,13 +215,17 @@ func (d *Daemon) ContainerExecStart(ctx context.Context, name string, stdin io.R
ec.StreamConfig.NewNopInputPipe()
}
- p := libcontainerd.Process{
+ p := &specs.Process{
Args: append([]string{ec.Entrypoint}, ec.Args...),
Env: ec.Env,
Terminal: ec.Tty,
+ Cwd: c.Config.WorkingDir,
+ }
+ if p.Cwd == "" {
+ p.Cwd = "/"
}
- if err := execSetPlatformOpt(c, ec, &p); err != nil {
+ if err := d.execSetPlatformOpt(c, ec, p); err != nil {
return err
}
@@ -231,22 +243,28 @@ func (d *Daemon) ContainerExecStart(ctx context.Context, name string, stdin io.R
ec.StreamConfig.AttachStreams(&attachConfig)
attachErr := ec.StreamConfig.CopyStreams(ctx, &attachConfig)
- systemPid, err := d.containerd.AddProcess(ctx, c.ID, name, p, ec.InitializeStdio)
+ // Synchronize with libcontainerd event loop
+ ec.Lock()
+ c.ExecCommands.Lock()
+ systemPid, err := d.containerd.Exec(ctx, c.ID, ec.ID, p, cStdin != nil, ec.InitializeStdio)
if err != nil {
+ c.ExecCommands.Unlock()
+ ec.Unlock()
return translateContainerdStartErr(ec.Entrypoint, ec.SetExitCode, err)
}
- ec.Lock()
ec.Pid = systemPid
+ d.registerExecPidUnlocked(c, ec)
+ c.ExecCommands.Unlock()
ec.Unlock()
select {
case <-ctx.Done():
logrus.Debugf("Sending TERM signal to process %v in container %v", name, c.ID)
- d.containerd.SignalProcess(c.ID, name, int(signal.SignalMap["TERM"]))
+ d.containerd.SignalProcess(ctx, c.ID, name, int(signal.SignalMap["TERM"]))
select {
case <-time.After(termProcessTimeout * time.Second):
logrus.Infof("Container %v, process %v failed to exit within %d seconds of signal TERM - using the force", c.ID, name, termProcessTimeout)
- d.containerd.SignalProcess(c.ID, name, int(signal.SignalMap["KILL"]))
+ d.containerd.SignalProcess(ctx, c.ID, name, int(signal.SignalMap["KILL"]))
case <-attachErr:
// TERM signal worked
}
@@ -273,7 +291,7 @@ func (d *Daemon) execCommandGC() {
for id, config := range d.execCommands.Commands() {
if config.CanRemove {
cleaned++
- d.execCommands.Delete(id)
+ d.execCommands.Delete(id, config.Pid)
} else {
if _, exists := liveExecCommands[id]; !exists {
config.CanRemove = true
diff --git a/daemon/exec/exec.go b/daemon/exec/exec.go
index f4efb4d54e..7aa2383e32 100644
--- a/daemon/exec/exec.go
+++ b/daemon/exec/exec.go
@@ -4,6 +4,7 @@ import (
"runtime"
"sync"
+ "github.com/containerd/containerd"
"github.com/docker/docker/container/stream"
"github.com/docker/docker/libcontainerd"
"github.com/docker/docker/pkg/stringid"
@@ -42,8 +43,26 @@ func NewConfig() *Config {
}
}
+type cio struct {
+ containerd.IO
+
+ sc *stream.Config
+}
+
+func (i *cio) Close() error {
+ i.IO.Close()
+
+ return i.sc.CloseStreams()
+}
+
+func (i *cio) Wait() {
+ i.sc.Wait()
+
+ i.IO.Wait()
+}
+
// InitializeStdio is called by libcontainerd to connect the stdio.
-func (c *Config) InitializeStdio(iop libcontainerd.IOPipe) error {
+func (c *Config) InitializeStdio(iop *libcontainerd.IOPipe) (containerd.IO, error) {
c.StreamConfig.CopyToPipe(iop)
if c.StreamConfig.Stdin() == nil && !c.Tty && runtime.GOOS == "windows" {
@@ -54,7 +73,7 @@ func (c *Config) InitializeStdio(iop libcontainerd.IOPipe) error {
}
}
- return nil
+ return &cio{IO: iop, sc: c.StreamConfig}, nil
}
// CloseStreams closes the stdio streams for the exec
@@ -69,45 +88,66 @@ func (c *Config) SetExitCode(code int) {
// Store keeps track of the exec configurations.
type Store struct {
- commands map[string]*Config
+ byID map[string]*Config
+ byPid map[int]*Config
sync.RWMutex
}
// NewStore initializes a new exec store.
func NewStore() *Store {
- return &Store{commands: make(map[string]*Config)}
+ return &Store{
+ byID: make(map[string]*Config),
+ byPid: make(map[int]*Config),
+ }
}
// Commands returns the exec configurations in the store.
func (e *Store) Commands() map[string]*Config {
e.RLock()
- commands := make(map[string]*Config, len(e.commands))
- for id, config := range e.commands {
- commands[id] = config
+ byID := make(map[string]*Config, len(e.byID))
+ for id, config := range e.byID {
+ byID[id] = config
}
e.RUnlock()
- return commands
+ return byID
}
// Add adds a new exec configuration to the store.
func (e *Store) Add(id string, Config *Config) {
e.Lock()
- e.commands[id] = Config
+ e.byID[id] = Config
e.Unlock()
}
+// SetPidUnlocked adds an association between a Pid and a config, it does not
+// synchronized with other operations.
+func (e *Store) SetPidUnlocked(id string, pid int) {
+ if config, ok := e.byID[id]; ok {
+ e.byPid[pid] = config
+ }
+}
+
// Get returns an exec configuration by its id.
func (e *Store) Get(id string) *Config {
e.RLock()
- res := e.commands[id]
+ res := e.byID[id]
+ e.RUnlock()
+ return res
+}
+
+// ByPid returns an exec configuration by its pid.
+func (e *Store) ByPid(pid int) *Config {
+ e.RLock()
+ res := e.byPid[pid]
e.RUnlock()
return res
}
// Delete removes an exec configuration from the store.
-func (e *Store) Delete(id string) {
+func (e *Store) Delete(id string, pid int) {
e.Lock()
- delete(e.commands, id)
+ delete(e.byPid, pid)
+ delete(e.byID, id)
e.Unlock()
}
@@ -115,7 +155,7 @@ func (e *Store) Delete(id string) {
func (e *Store) List() []string {
var IDs []string
e.RLock()
- for id := range e.commands {
+ for id := range e.byID {
IDs = append(IDs, id)
}
e.RUnlock()
diff --git a/daemon/exec_linux.go b/daemon/exec_linux.go
index bb11c11e44..525ce01050 100644
--- a/daemon/exec_linux.go
+++ b/daemon/exec_linux.go
@@ -4,25 +4,30 @@ import (
"github.com/docker/docker/container"
"github.com/docker/docker/daemon/caps"
"github.com/docker/docker/daemon/exec"
- "github.com/docker/docker/libcontainerd"
"github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runtime-spec/specs-go"
)
-func execSetPlatformOpt(c *container.Container, ec *exec.Config, p *libcontainerd.Process) error {
+func (daemon *Daemon) execSetPlatformOpt(c *container.Container, ec *exec.Config, p *specs.Process) error {
if len(ec.User) > 0 {
uid, gid, additionalGids, err := getUser(c, ec.User)
if err != nil {
return err
}
- p.User = &specs.User{
+ p.User = specs.User{
UID: uid,
GID: gid,
AdditionalGids: additionalGids,
}
}
if ec.Privileged {
- p.Capabilities = caps.GetAllCapabilities()
+ if p.Capabilities == nil {
+ p.Capabilities = &specs.LinuxCapabilities{}
+ }
+ p.Capabilities.Bounding = caps.GetAllCapabilities()
+ p.Capabilities.Permitted = p.Capabilities.Bounding
+ p.Capabilities.Inheritable = p.Capabilities.Bounding
+ p.Capabilities.Effective = p.Capabilities.Bounding
}
if apparmor.IsEnabled() {
var appArmorProfile string
@@ -46,5 +51,6 @@ func execSetPlatformOpt(c *container.Container, ec *exec.Config, p *libcontainer
}
}
}
+ daemon.setRlimits(&specs.Spec{Process: p}, c)
return nil
}
diff --git a/daemon/exec_solaris.go b/daemon/exec_solaris.go
index 7003355d91..7c1fc20a0c 100644
--- a/daemon/exec_solaris.go
+++ b/daemon/exec_solaris.go
@@ -3,9 +3,9 @@ package daemon
import (
"github.com/docker/docker/container"
"github.com/docker/docker/daemon/exec"
- "github.com/docker/docker/libcontainerd"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
)
-func execSetPlatformOpt(c *container.Container, ec *exec.Config, p *libcontainerd.Process) error {
+func (daemon *Daemon) execSetPlatformOpt(_ *container.Container, _ *exec.Config, _ *specs.Process) error {
return nil
}
diff --git a/daemon/exec_windows.go b/daemon/exec_windows.go
index 03246d91cd..d8754eb18d 100644
--- a/daemon/exec_windows.go
+++ b/daemon/exec_windows.go
@@ -3,10 +3,10 @@ package daemon
import (
"github.com/docker/docker/container"
"github.com/docker/docker/daemon/exec"
- "github.com/docker/docker/libcontainerd"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
)
-func execSetPlatformOpt(c *container.Container, ec *exec.Config, p *libcontainerd.Process) error {
+func (daemon *Daemon) execSetPlatformOpt(c *container.Container, ec *exec.Config, p *specs.Process) error {
// Process arguments need to be escaped before sending to OCI.
if c.OS == "windows" {
p.Args = escapeArgs(p.Args)
diff --git a/daemon/info_unix.go b/daemon/info_unix.go
index f43af6274f..fd2bbb45c3 100644
--- a/daemon/info_unix.go
+++ b/daemon/info_unix.go
@@ -3,7 +3,6 @@
package daemon
import (
- "context"
"os/exec"
"strings"
@@ -28,16 +27,8 @@ func (daemon *Daemon) FillPlatformInfo(v *types.Info, sysInfo *sysinfo.SysInfo)
v.DefaultRuntime = daemon.configStore.GetDefaultRuntimeName()
v.InitBinary = daemon.configStore.GetInitPath()
- v.ContainerdCommit.Expected = dockerversion.ContainerdCommitID
- if sv, err := daemon.containerd.GetServerVersion(context.Background()); err == nil {
- v.ContainerdCommit.ID = sv.Revision
- } else {
- logrus.Warnf("failed to retrieve containerd version: %v", err)
- v.ContainerdCommit.ID = "N/A"
- }
-
v.RuncCommit.Expected = dockerversion.RuncCommitID
- defaultRuntimeBinary := daemon.configStore.GetRuntime(daemon.configStore.GetDefaultRuntimeName()).Path
+ defaultRuntimeBinary := daemon.configStore.GetRuntime(v.DefaultRuntime).Path
if rv, err := exec.Command(defaultRuntimeBinary, "--version").Output(); err == nil {
parts := strings.Split(strings.TrimSpace(string(rv)), "\n")
if len(parts) == 3 {
@@ -56,6 +47,24 @@ func (daemon *Daemon) FillPlatformInfo(v *types.Info, sysInfo *sysinfo.SysInfo)
v.RuncCommit.ID = "N/A"
}
+ v.ContainerdCommit.Expected = dockerversion.ContainerdCommitID
+ if rv, err := exec.Command("docker-containerd", "--version").Output(); err == nil {
+ parts := strings.Split(strings.TrimSpace(string(rv)), " ")
+ if len(parts) == 3 {
+ v.ContainerdCommit.ID = parts[2]
+ }
+ switch {
+ case v.ContainerdCommit.ID == "":
+ logrus.Warnf("failed to retrieve docker-containerd version: unknown format", string(rv))
+ v.ContainerdCommit.ID = "N/A"
+ case strings.HasSuffix(v.ContainerdCommit.ID, "-g"+v.ContainerdCommit.ID[len(v.ContainerdCommit.ID)-7:]):
+ v.ContainerdCommit.ID = v.ContainerdCommit.Expected
+ }
+ } else {
+ logrus.Warnf("failed to retrieve docker-containerd version: %v", err)
+ v.ContainerdCommit.ID = "N/A"
+ }
+
defaultInitBinary := daemon.configStore.GetInitPath()
if rv, err := exec.Command(defaultInitBinary, "--version").Output(); err == nil {
ver, err := parseInitVersion(string(rv))
diff --git a/daemon/kill.go b/daemon/kill.go
index bb3e87cae3..a230eaa76e 100644
--- a/daemon/kill.go
+++ b/daemon/kill.go
@@ -9,6 +9,7 @@ import (
"time"
containerpkg "github.com/docker/docker/container"
+ "github.com/docker/docker/libcontainerd"
"github.com/docker/docker/pkg/signal"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
@@ -108,7 +109,7 @@ func (daemon *Daemon) killWithSignal(container *containerpkg.Container, sig int)
if unpause {
// above kill signal will be sent once resume is finished
- if err := daemon.containerd.Resume(container.ID); err != nil {
+ if err := daemon.containerd.Resume(context.Background(), container.ID); err != nil {
logrus.Warn("Cannot unpause container %s: %s", container.ID, err)
}
}
@@ -177,5 +178,5 @@ func (daemon *Daemon) killPossiblyDeadProcess(container *containerpkg.Container,
}
func (daemon *Daemon) kill(c *containerpkg.Container, sig int) error {
- return daemon.containerd.Signal(c.ID, sig)
+ return daemon.containerd.SignalProcess(context.Background(), c.ID, libcontainerd.InitProcessName, sig)
}
diff --git a/daemon/logger/plugin_unix.go b/daemon/logger/plugin_unix.go
index f254c9c57d..f93d7af0ee 100644
--- a/daemon/logger/plugin_unix.go
+++ b/daemon/logger/plugin_unix.go
@@ -6,8 +6,8 @@ import (
"context"
"io"
+ "github.com/containerd/fifo"
"github.com/pkg/errors"
- "github.com/tonistiigi/fifo"
"golang.org/x/sys/unix"
)
diff --git a/daemon/monitor.go b/daemon/monitor.go
index 3946e7aaec..c0a265dac5 100644
--- a/daemon/monitor.go
+++ b/daemon/monitor.go
@@ -1,6 +1,7 @@
package daemon
import (
+ "context"
"errors"
"fmt"
"runtime"
@@ -25,15 +26,15 @@ func (daemon *Daemon) setStateCounter(c *container.Container) {
}
}
-// StateChanged updates daemon state changes from containerd
-func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
- c := daemon.containers.Get(id)
- if c == nil {
+// ProcessEvent is called by libcontainerd whenever an event occurs
+func (daemon *Daemon) ProcessEvent(id string, e libcontainerd.EventType, ei libcontainerd.EventInfo) error {
+ c, err := daemon.GetContainer(id)
+ if c == nil || err != nil {
return fmt.Errorf("no such container: %s", id)
}
- switch e.State {
- case libcontainerd.StateOOM:
+ switch e {
+ case libcontainerd.EventOOM:
// StateOOM is Linux specific and should never be hit on Windows
if runtime.GOOS == "windows" {
return errors.New("received StateOOM from libcontainerd on Windows. This should never happen")
@@ -43,63 +44,72 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
return err
}
daemon.LogContainerEvent(c, "oom")
- case libcontainerd.StateExit:
+ case libcontainerd.EventExit:
+ if int(ei.Pid) == c.Pid {
+ _, _, err := daemon.containerd.DeleteTask(context.Background(), c.ID)
+ if err != nil {
+ logrus.WithError(err).Warnf("failed to delete container %s from containerd", c.ID)
+ }
- c.Lock()
- c.StreamConfig.Wait()
- c.Reset(false)
-
- // If daemon is being shutdown, don't let the container restart
- restart, wait, err := c.RestartManager().ShouldRestart(e.ExitCode, daemon.IsShuttingDown() || c.HasBeenManuallyStopped, time.Since(c.StartedAt))
- if err == nil && restart {
- c.RestartCount++
- c.SetRestarting(platformConstructExitStatus(e))
- } else {
- c.SetStopped(platformConstructExitStatus(e))
- defer daemon.autoRemove(c)
- }
+ c.Lock()
+ c.StreamConfig.Wait()
+ c.Reset(false)
- // cancel healthcheck here, they will be automatically
- // restarted if/when the container is started again
- daemon.stopHealthchecks(c)
- attributes := map[string]string{
- "exitCode": strconv.Itoa(int(e.ExitCode)),
- }
- daemon.LogContainerEventWithAttributes(c, "die", attributes)
- daemon.Cleanup(c)
-
- if err == nil && restart {
- go func() {
- err := <-wait
- if err == nil {
- // daemon.netController is initialized when daemon is restoring containers.
- // But containerStart will use daemon.netController segment.
- // So to avoid panic at startup process, here must wait util daemon restore done.
- daemon.waitForStartupDone()
- if err = daemon.containerStart(c, "", "", false); err != nil {
- logrus.Debugf("failed to restart container: %+v", err)
+ exitStatus := container.ExitStatus{
+ ExitCode: int(ei.ExitCode),
+ ExitedAt: ei.ExitedAt,
+ OOMKilled: ei.OOMKilled,
+ }
+ restart, wait, err := c.RestartManager().ShouldRestart(ei.ExitCode, daemon.IsShuttingDown() || c.HasBeenManuallyStopped, time.Since(c.StartedAt))
+ if err == nil && restart {
+ c.RestartCount++
+ c.SetRestarting(&exitStatus)
+ } else {
+ c.SetStopped(&exitStatus)
+ defer daemon.autoRemove(c)
+ }
+
+ // cancel healthcheck here, they will be automatically
+ // restarted if/when the container is started again
+ daemon.stopHealthchecks(c)
+ attributes := map[string]string{
+ "exitCode": strconv.Itoa(int(ei.ExitCode)),
+ }
+ daemon.LogContainerEventWithAttributes(c, "die", attributes)
+ daemon.Cleanup(c)
+
+ if err == nil && restart {
+ go func() {
+ err := <-wait
+ if err == nil {
+ // daemon.netController is initialized when daemon is restoring containers.
+ // But containerStart will use daemon.netController segment.
+ // So to avoid panic at startup process, here must wait util daemon restore done.
+ daemon.waitForStartupDone()
+ if err = daemon.containerStart(c, "", "", false); err != nil {
+ logrus.Debugf("failed to restart container: %+v", err)
+ }
}
- }
- if err != nil {
- c.SetStopped(platformConstructExitStatus(e))
- defer daemon.autoRemove(c)
- if err != restartmanager.ErrRestartCanceled {
- logrus.Errorf("restartmanger wait error: %+v", err)
+ if err != nil {
+ c.SetStopped(&exitStatus)
+ defer daemon.autoRemove(c)
+ if err != restartmanager.ErrRestartCanceled {
+ logrus.Errorf("restartmanger wait error: %+v", err)
+ }
}
- }
- }()
- }
-
- daemon.setStateCounter(c)
+ }()
+ }
- defer c.Unlock()
- if err := c.CheckpointTo(daemon.containersReplica); err != nil {
- return err
+ daemon.setStateCounter(c)
+ defer c.Unlock()
+ if err := c.CheckpointTo(daemon.containersReplica); err != nil {
+ return err
+ }
+ return daemon.postRunProcessing(c, ei)
}
- return daemon.postRunProcessing(c, e)
- case libcontainerd.StateExitProcess:
- if execConfig := c.ExecCommands.Get(e.ProcessID); execConfig != nil {
- ec := int(e.ExitCode)
+
+ if execConfig := c.ExecCommands.ByPid(int(ei.Pid)); execConfig != nil {
+ ec := int(ei.ExitCode)
execConfig.Lock()
defer execConfig.Unlock()
execConfig.ExitCode = &ec
@@ -111,42 +121,59 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
// remove the exec command from the container's store only and not the
// daemon's store so that the exec command can be inspected.
- c.ExecCommands.Delete(execConfig.ID)
+ c.ExecCommands.Delete(execConfig.ID, execConfig.Pid)
} else {
- logrus.Warnf("Ignoring StateExitProcess for %v but no exec command found", e)
+ logrus.WithFields(logrus.Fields{
+ "container": c.ID,
+ "exec-pid": ei.Pid,
+ }).Warnf("Ignoring Exit Event, no such exec command found")
}
- case libcontainerd.StateStart, libcontainerd.StateRestore:
- // Container is already locked in this case
- c.SetRunning(int(e.Pid), e.State == libcontainerd.StateStart)
- c.HasBeenManuallyStopped = false
- c.HasBeenStartedBefore = true
- daemon.setStateCounter(c)
-
- daemon.initHealthMonitor(c)
- if err := c.CheckpointTo(daemon.containersReplica); err != nil {
- c.Reset(false)
- return err
+ case libcontainerd.EventStart:
+ c.Lock()
+ defer c.Unlock()
+
+ // This is here to handle start not generated by docker
+ if !c.Running {
+ c.SetRunning(int(ei.Pid), false)
+ c.HasBeenManuallyStopped = false
+ c.HasBeenStartedBefore = true
+ daemon.setStateCounter(c)
+
+ daemon.initHealthMonitor(c)
+
+ if err := c.CheckpointTo(daemon.containersReplica); err != nil {
+ return err
+ }
+ daemon.LogContainerEvent(c, "start")
}
- daemon.LogContainerEvent(c, "start")
- case libcontainerd.StatePause:
- // Container is already locked in this case
- c.Paused = true
- daemon.setStateCounter(c)
- daemon.updateHealthMonitor(c)
- if err := c.CheckpointTo(daemon.containersReplica); err != nil {
- return err
+ case libcontainerd.EventPaused:
+ c.Lock()
+ defer c.Unlock()
+
+ if !c.Paused {
+ c.Paused = true
+ daemon.setStateCounter(c)
+ daemon.updateHealthMonitor(c)
+ if err := c.CheckpointTo(daemon.containersReplica); err != nil {
+ return err
+ }
+ daemon.LogContainerEvent(c, "pause")
}
- daemon.LogContainerEvent(c, "pause")
- case libcontainerd.StateResume:
- // Container is already locked in this case
- c.Paused = false
- daemon.setStateCounter(c)
- daemon.updateHealthMonitor(c)
- if err := c.CheckpointTo(daemon.containersReplica); err != nil {
- return err
+ case libcontainerd.EventResumed:
+ c.Lock()
+ defer c.Unlock()
+
+ if c.Paused {
+ c.Paused = false
+ daemon.setStateCounter(c)
+ daemon.updateHealthMonitor(c)
+
+ if err := c.CheckpointTo(daemon.containersReplica); err != nil {
+ return err
+ }
+ daemon.LogContainerEvent(c, "unpause")
}
- daemon.LogContainerEvent(c, "unpause")
}
return nil
}
diff --git a/daemon/monitor_linux.go b/daemon/monitor_linux.go
index 09f5af50c6..0995758000 100644
--- a/daemon/monitor_linux.go
+++ b/daemon/monitor_linux.go
@@ -5,15 +5,7 @@ import (
"github.com/docker/docker/libcontainerd"
)
-// platformConstructExitStatus returns a platform specific exit status structure
-func platformConstructExitStatus(e libcontainerd.StateInfo) *container.ExitStatus {
- return &container.ExitStatus{
- ExitCode: int(e.ExitCode),
- OOMKilled: e.OOMKilled,
- }
-}
-
// postRunProcessing perfoms any processing needed on the container after it has stopped.
-func (daemon *Daemon) postRunProcessing(container *container.Container, e libcontainerd.StateInfo) error {
+func (daemon *Daemon) postRunProcessing(_ *container.Container, _ libcontainerd.EventInfo) error {
return nil
}
diff --git a/daemon/monitor_solaris.go b/daemon/monitor_solaris.go
index 5ccfada76a..0995758000 100644
--- a/daemon/monitor_solaris.go
+++ b/daemon/monitor_solaris.go
@@ -5,14 +5,7 @@ import (
"github.com/docker/docker/libcontainerd"
)
-// platformConstructExitStatus returns a platform specific exit status structure
-func platformConstructExitStatus(e libcontainerd.StateInfo) *container.ExitStatus {
- return &container.ExitStatus{
- ExitCode: int(e.ExitCode),
- }
-}
-
// postRunProcessing perfoms any processing needed on the container after it has stopped.
-func (daemon *Daemon) postRunProcessing(container *container.Container, e libcontainerd.StateInfo) error {
+func (daemon *Daemon) postRunProcessing(_ *container.Container, _ libcontainerd.EventInfo) error {
return nil
}
diff --git a/daemon/monitor_windows.go b/daemon/monitor_windows.go
index 15d656de0e..dd5a09ada8 100644
--- a/daemon/monitor_windows.go
+++ b/daemon/monitor_windows.go
@@ -1,40 +1,52 @@
package daemon
import (
- "fmt"
+ "context"
"github.com/docker/docker/container"
"github.com/docker/docker/libcontainerd"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
)
-// platformConstructExitStatus returns a platform specific exit status structure
-func platformConstructExitStatus(e libcontainerd.StateInfo) *container.ExitStatus {
- return &container.ExitStatus{
- ExitCode: int(e.ExitCode),
- }
-}
-
-// postRunProcessing perfoms any processing needed on the container after it has stopped.
-func (daemon *Daemon) postRunProcessing(container *container.Container, e libcontainerd.StateInfo) error {
- if e.ExitCode == 0 && e.UpdatePending {
- spec, err := daemon.createSpec(container)
+// postRunProcessing starts a servicing container if required
+func (daemon *Daemon) postRunProcessing(c *container.Container, ei libcontainerd.EventInfo) error {
+ if ei.ExitCode == 0 && ei.UpdatePending {
+ spec, err := daemon.createSpec(c)
if err != nil {
return err
}
-
// Turn on servicing
spec.Windows.Servicing = true
- copts, err := daemon.getLibcontainerdCreateOptions(container)
+ copts, err := daemon.getLibcontainerdCreateOptions(c)
if err != nil {
return err
}
- // Create a new servicing container, which will start, complete the update, and merge back the
- // results if it succeeded, all as part of the below function call.
- if err := daemon.containerd.Create((container.ID + "_servicing"), "", "", *spec, container.InitializeStdio, copts...); err != nil {
- container.SetExitCode(-1)
- return fmt.Errorf("Post-run update servicing failed: %s", err)
+ // Create a new servicing container, which will start, complete the
+ // update, and merge back the results if it succeeded, all as part of
+ // the below function call.
+ ctx := context.Background()
+ svcID := c.ID + "_servicing"
+ logger := logrus.WithField("container", svcID)
+ if err := daemon.containerd.Create(ctx, svcID, spec, copts); err != nil {
+ c.SetExitCode(-1)
+ return errors.Wrap(err, "post-run update servicing failed")
+ }
+ _, err = daemon.containerd.Start(ctx, svcID, "", false, nil)
+ if err != nil {
+ logger.WithError(err).Warn("failed to run servicing container")
+ if err := daemon.containerd.Delete(ctx, svcID); err != nil {
+ logger.WithError(err).Warn("failed to delete servicing container")
+ }
+ } else {
+ if _, _, err := daemon.containerd.DeleteTask(ctx, svcID); err != nil {
+ logger.WithError(err).Warn("failed to delete servicing container task")
+ }
+ if err := daemon.containerd.Delete(ctx, svcID); err != nil {
+ logger.WithError(err).Warn("failed to delete servicing container")
+ }
}
}
return nil
diff --git a/daemon/oci_linux.go b/daemon/oci_linux.go
index 89ac627ff0..b4a6bf60d2 100644
--- a/daemon/oci_linux.go
+++ b/daemon/oci_linux.go
@@ -156,7 +156,7 @@ func setDevices(s *specs.Spec, c *container.Container) error {
return nil
}
-func setRlimits(daemon *Daemon, s *specs.Spec, c *container.Container) error {
+func (daemon *Daemon) setRlimits(s *specs.Spec, c *container.Container) error {
var rlimits []specs.POSIXRlimit
// We want to leave the original HostConfig alone so make a copy here
@@ -755,6 +755,7 @@ func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) {
if err := setResources(&s, c.HostConfig.Resources); err != nil {
return nil, fmt.Errorf("linux runtime spec resources: %v", err)
}
+ s.Process.OOMScoreAdj = &c.HostConfig.OomScoreAdj
s.Linux.Sysctl = c.HostConfig.Sysctls
p := s.Linux.CgroupsPath
@@ -763,11 +764,11 @@ func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) {
if err != nil {
return nil, err
}
- p, _ = cgroups.GetOwnCgroup("cpu")
+ _, err = cgroups.GetOwnCgroup("cpu")
if err != nil {
return nil, err
}
- p = filepath.Join(initPath, p)
+ p = filepath.Join(initPath, s.Linux.CgroupsPath)
}
// Clean path to guard against things like ../../../BAD
@@ -782,7 +783,7 @@ func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) {
if err := setDevices(&s, c); err != nil {
return nil, fmt.Errorf("linux runtime spec devices: %v", err)
}
- if err := setRlimits(daemon, &s, c); err != nil {
+ if err := daemon.setRlimits(&s, c); err != nil {
return nil, fmt.Errorf("linux runtime spec rlimits: %v", err)
}
if err := setUser(&s, c); err != nil {
diff --git a/daemon/pause.go b/daemon/pause.go
index 3fecea59c9..b751cc4880 100644
--- a/daemon/pause.go
+++ b/daemon/pause.go
@@ -1,9 +1,11 @@
package daemon
import (
+ "context"
"fmt"
"github.com/docker/docker/container"
+ "github.com/sirupsen/logrus"
)
// ContainerPause pauses a container
@@ -33,7 +35,7 @@ func (daemon *Daemon) containerPause(container *container.Container) error {
// We cannot Pause the container which is already paused
if container.Paused {
- return fmt.Errorf("Container %s is already paused", container.ID)
+ return errNotPaused(container.ID)
}
// We cannot Pause the container which is restarting
@@ -41,9 +43,18 @@ func (daemon *Daemon) containerPause(container *container.Container) error {
return errContainerIsRestarting(container.ID)
}
- if err := daemon.containerd.Pause(container.ID); err != nil {
+ if err := daemon.containerd.Pause(context.Background(), container.ID); err != nil {
return fmt.Errorf("Cannot pause container %s: %s", container.ID, err)
}
+ container.Paused = true
+ daemon.setStateCounter(container)
+ daemon.updateHealthMonitor(container)
+ daemon.LogContainerEvent(container, "pause")
+
+ if err := container.CheckpointTo(daemon.containersReplica); err != nil {
+ logrus.WithError(err).Warn("could not save container to disk")
+ }
+
return nil
}
diff --git a/daemon/reload.go b/daemon/reload.go
index a6674ec951..0d16bc8d02 100644
--- a/daemon/reload.go
+++ b/daemon/reload.go
@@ -6,7 +6,6 @@ import (
"github.com/docker/docker/daemon/config"
"github.com/docker/docker/daemon/discovery"
- "github.com/docker/docker/libcontainerd"
"github.com/sirupsen/logrus"
)
@@ -303,9 +302,6 @@ func (daemon *Daemon) reloadLiveRestore(conf *config.Config, attributes map[stri
// update corresponding configuration
if conf.IsValueSet("live-restore") {
daemon.configStore.LiveRestoreEnabled = conf.LiveRestoreEnabled
- if err := daemon.containerdRemote.UpdateOptions(libcontainerd.WithLiveRestore(conf.LiveRestoreEnabled)); err != nil {
- return err
- }
}
// prepare reload event attributes with updatable configurations
diff --git a/daemon/resize.go b/daemon/resize.go
index 0923d0fe12..a992a073a5 100644
--- a/daemon/resize.go
+++ b/daemon/resize.go
@@ -1,6 +1,7 @@
package daemon
import (
+ "context"
"fmt"
"github.com/docker/docker/libcontainerd"
@@ -18,7 +19,7 @@ func (daemon *Daemon) ContainerResize(name string, height, width int) error {
return errNotRunning(container.ID)
}
- if err = daemon.containerd.Resize(container.ID, libcontainerd.InitFriendlyName, width, height); err == nil {
+ if err = daemon.containerd.ResizeTerminal(context.Background(), container.ID, libcontainerd.InitProcessName, width, height); err == nil {
attributes := map[string]string{
"height": fmt.Sprintf("%d", height),
"width": fmt.Sprintf("%d", width),
@@ -36,5 +37,5 @@ func (daemon *Daemon) ContainerExecResize(name string, height, width int) error
if err != nil {
return err
}
- return daemon.containerd.Resize(ec.ContainerID, ec.ID, width, height)
+ return daemon.containerd.ResizeTerminal(context.Background(), ec.ContainerID, ec.ID, width, height)
}
diff --git a/daemon/start.go b/daemon/start.go
index ab8443c855..3b9f0f9f63 100644
--- a/daemon/start.go
+++ b/daemon/start.go
@@ -1,6 +1,7 @@
package daemon
import (
+ "context"
"runtime"
"time"
@@ -113,6 +114,11 @@ func (daemon *Daemon) containerStart(container *container.Container, checkpoint
return stateConflictError{errors.New("container is marked for removal and cannot be started")}
}
+ if checkpointDir != "" {
+ // TODO(mlaventure): how would we support that?
+ return notAllowedError{errors.New("custom checkpointdir is not supported")}
+ }
+
// if we encounter an error during start we need to ensure that any other
// setup has been cleaned up properly
defer func() {
@@ -152,28 +158,56 @@ func (daemon *Daemon) containerStart(container *container.Container, checkpoint
return systemError{err}
}
- createOptions, err := daemon.getLibcontainerdCreateOptions(container)
- if err != nil {
- return err
- }
-
if resetRestartManager {
container.ResetRestartManager(true)
}
- if checkpointDir == "" {
- checkpointDir = container.CheckpointDir()
+ if daemon.saveApparmorConfig(container); err != nil {
+ return err
}
- if daemon.saveApparmorConfig(container); err != nil {
+ if checkpoint != "" {
+ checkpointDir, err = getCheckpointDir(checkpointDir, checkpoint, container.Name, container.ID, container.CheckpointDir(), false)
+ if err != nil {
+ return err
+ }
+ }
+
+ createOptions, err := daemon.getLibcontainerdCreateOptions(container)
+ if err != nil {
return err
}
- if err := daemon.containerd.Create(container.ID, checkpoint, checkpointDir, *spec, container.InitializeStdio, createOptions...); err != nil {
+ err = daemon.containerd.Create(context.Background(), container.ID, spec, createOptions)
+ if err != nil {
+ return translateContainerdStartErr(container.Path, container.SetExitCode, err)
+ }
+
+ // TODO(mlaventure): we need to specify checkpoint options here
+ pid, err := daemon.containerd.Start(context.Background(), container.ID, checkpointDir,
+ container.StreamConfig.Stdin() != nil || container.Config.Tty,
+ container.InitializeStdio)
+ if err != nil {
+ if err := daemon.containerd.Delete(context.Background(), container.ID); err != nil {
+ logrus.WithError(err).WithField("container", container.ID).
+ Error("failed to delete failed start container")
+ }
return translateContainerdStartErr(container.Path, container.SetExitCode, err)
+ }
+
+ container.SetRunning(pid, true)
+ container.HasBeenManuallyStopped = false
+ container.HasBeenStartedBefore = true
+ daemon.setStateCounter(container)
+
+ daemon.initHealthMonitor(container)
+ if err := container.CheckpointTo(daemon.containersReplica); err != nil {
+ logrus.WithError(err).WithField("container", container.ID).
+ Errorf("failed to store container")
}
+ daemon.LogContainerEvent(container, "start")
containerActions.WithValues("start").UpdateSince(start)
return nil
@@ -209,5 +243,10 @@ func (daemon *Daemon) Cleanup(container *container.Container) {
logrus.Warnf("%s cleanup: Failed to umount volumes: %v", container.ID, err)
}
}
+
container.CancelAttachContext()
+
+ if err := daemon.containerd.Delete(context.Background(), container.ID); err != nil {
+ logrus.Errorf("%s cleanup: failed to delete container from containerd: %v", container.ID, err)
+ }
}
diff --git a/daemon/start_unix.go b/daemon/start_unix.go
index 87ab0850c2..a8402bb303 100644
--- a/daemon/start_unix.go
+++ b/daemon/start_unix.go
@@ -3,29 +3,54 @@
package daemon
import (
+ "fmt"
+ "os/exec"
+ "path/filepath"
+
+ "github.com/containerd/containerd/linux/runcopts"
"github.com/docker/docker/container"
- "github.com/docker/docker/libcontainerd"
"github.com/pkg/errors"
)
-// getLibcontainerdCreateOptions callers must hold a lock on the container
-func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) ([]libcontainerd.CreateOption, error) {
- createOptions := []libcontainerd.CreateOption{}
+func (daemon *Daemon) getRuntimeScript(container *container.Container) (string, error) {
+ name := container.HostConfig.Runtime
+ rt := daemon.configStore.GetRuntime(name)
+ if rt == nil {
+ return "", validationError{errors.Errorf("no such runtime '%s'", name)}
+ }
+ if len(rt.Args) > 0 {
+ // First check that the target exist, as using it in a script won't
+ // give us the right error
+ if _, err := exec.LookPath(rt.Path); err != nil {
+ return "", translateContainerdStartErr(container.Path, container.SetExitCode, err)
+ }
+ return filepath.Join(daemon.configStore.Root, "runtimes", name), nil
+ }
+ return rt.Path, nil
+}
+
+// getLibcontainerdCreateOptions callers must hold a lock on the container
+func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) (interface{}, error) {
// Ensure a runtime has been assigned to this container
if container.HostConfig.Runtime == "" {
container.HostConfig.Runtime = daemon.configStore.GetDefaultRuntimeName()
container.CheckpointTo(daemon.containersReplica)
}
- rt := daemon.configStore.GetRuntime(container.HostConfig.Runtime)
- if rt == nil {
- return nil, validationError{errors.Errorf("no such runtime '%s'", container.HostConfig.Runtime)}
+ path, err := daemon.getRuntimeScript(container)
+ if err != nil {
+ return nil, err
}
+ opts := &runcopts.RuncOptions{
+ Runtime: path,
+ RuntimeRoot: filepath.Join(daemon.configStore.ExecRoot,
+ fmt.Sprintf("runtime-%s", container.HostConfig.Runtime)),
+ }
+
if UsingSystemd(daemon.configStore) {
- rt.Args = append(rt.Args, "--systemd-cgroup=true")
+ opts.SystemdCgroup = true
}
- createOptions = append(createOptions, libcontainerd.WithRuntime(rt.Path, rt.Args))
- return createOptions, nil
+ return opts, nil
}
diff --git a/daemon/start_windows.go b/daemon/start_windows.go
index 3de6391eae..55588be6ca 100644
--- a/daemon/start_windows.go
+++ b/daemon/start_windows.go
@@ -3,12 +3,9 @@ package daemon
import (
"github.com/Microsoft/opengcs/client"
"github.com/docker/docker/container"
- "github.com/docker/docker/libcontainerd"
)
-func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) ([]libcontainerd.CreateOption, error) {
- createOptions := []libcontainerd.CreateOption{}
-
+func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) (interface{}, error) {
// LCOW options.
if container.OS == "linux" {
config := &client.Config{}
@@ -33,11 +30,9 @@ func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Contain
if err := config.Validate(); err != nil {
return nil, err
}
- lcowOpts := &libcontainerd.LCOWOption{
- Config: config,
- }
- createOptions = append(createOptions, lcowOpts)
+
+ return config, nil
}
- return createOptions, nil
+ return nil, nil
}
diff --git a/daemon/top_unix.go b/daemon/top_unix.go
index 22e88b702e..cbb993f658 100644
--- a/daemon/top_unix.go
+++ b/daemon/top_unix.go
@@ -3,6 +3,7 @@
package daemon
import (
+ "context"
"fmt"
"os/exec"
"regexp"
@@ -50,16 +51,16 @@ func appendProcess2ProcList(procList *container.ContainerTopOKBody, fields []str
procList.Processes = append(procList.Processes, process)
}
-func hasPid(pids []int, pid int) bool {
- for _, i := range pids {
- if i == pid {
+func hasPid(procs []uint32, pid int) bool {
+ for _, p := range procs {
+ if int(p) == pid {
return true
}
}
return false
}
-func parsePSOutput(output []byte, pids []int) (*container.ContainerTopOKBody, error) {
+func parsePSOutput(output []byte, procs []uint32) (*container.ContainerTopOKBody, error) {
procList := &container.ContainerTopOKBody{}
lines := strings.Split(string(output), "\n")
@@ -101,7 +102,7 @@ func parsePSOutput(output []byte, pids []int) (*container.ContainerTopOKBody, er
return nil, fmt.Errorf("Unexpected pid '%s': %s", fields[pidIndex], err)
}
- if hasPid(pids, p) {
+ if hasPid(procs, p) {
preContainedPidFlag = true
appendProcess2ProcList(procList, fields)
continue
@@ -138,7 +139,7 @@ func (daemon *Daemon) ContainerTop(name string, psArgs string) (*container.Conta
return nil, errContainerIsRestarting(container.ID)
}
- pids, err := daemon.containerd.GetPidsForContainer(container.ID)
+ procs, err := daemon.containerd.ListPids(context.Background(), container.ID)
if err != nil {
return nil, err
}
@@ -147,7 +148,7 @@ func (daemon *Daemon) ContainerTop(name string, psArgs string) (*container.Conta
if err != nil {
return nil, fmt.Errorf("Error running ps: %v", err)
}
- procList, err := parsePSOutput(output, pids)
+ procList, err := parsePSOutput(output, procs)
if err != nil {
return nil, err
}
diff --git a/daemon/top_unix_test.go b/daemon/top_unix_test.go
index 9a3749f711..4cc4a20700 100644
--- a/daemon/top_unix_test.go
+++ b/daemon/top_unix_test.go
@@ -36,7 +36,7 @@ func TestContainerTopValidatePSArgs(t *testing.T) {
func TestContainerTopParsePSOutput(t *testing.T) {
tests := []struct {
output []byte
- pids []int
+ pids []uint32
errExpected bool
}{
{[]byte(` PID COMMAND
@@ -44,26 +44,26 @@ func TestContainerTopParsePSOutput(t *testing.T) {
43 bar
- -
100 baz
-`), []int{42, 43}, false},
+`), []uint32{42, 43}, false},
{[]byte(` UID COMMAND
42 foo
43 bar
- -
100 baz
-`), []int{42, 43}, true},
+`), []uint32{42, 43}, true},
// unicode space (U+2003, 0xe2 0x80 0x83)
{[]byte(` PID COMMAND
42 foo
43 bar
- -
100 baz
-`), []int{42, 43}, true},
+`), []uint32{42, 43}, true},
// the first space is U+2003, the second one is ascii.
{[]byte(` PID COMMAND
42 foo
43 bar
100 baz
-`), []int{42, 43}, true},
+`), []uint32{42, 43}, true},
}
for _, f := range tests {
diff --git a/daemon/top_windows.go b/daemon/top_windows.go
index 000720b004..40828ffb81 100644
--- a/daemon/top_windows.go
+++ b/daemon/top_windows.go
@@ -1,6 +1,7 @@
package daemon
import (
+ "context"
"errors"
"fmt"
"time"
@@ -34,7 +35,15 @@ func (daemon *Daemon) ContainerTop(name string, psArgs string) (*containertypes.
return nil, err
}
- s, err := daemon.containerd.Summary(container.ID)
+ if !container.IsRunning() {
+ return nil, errNotRunning(container.ID)
+ }
+
+ if container.IsRestarting() {
+ return nil, errContainerIsRestarting(container.ID)
+ }
+
+ s, err := daemon.containerd.Summary(context.Background(), container.ID)
if err != nil {
return nil, err
}
@@ -49,5 +58,6 @@ func (daemon *Daemon) ContainerTop(name string, psArgs string) (*containertypes.
fmt.Sprintf("%02d:%02d:%02d.%03d", int(d.Hours()), int(d.Minutes())%60, int(d.Seconds())%60, int(d.Nanoseconds()/1000000)%1000),
units.HumanSize(float64(j.MemoryWorkingSetPrivateBytes))})
}
+
return procList, nil
}
diff --git a/daemon/unpause.go b/daemon/unpause.go
index e66b3868dc..2e41f2070b 100644
--- a/daemon/unpause.go
+++ b/daemon/unpause.go
@@ -1,9 +1,11 @@
package daemon
import (
+ "context"
"fmt"
"github.com/docker/docker/container"
+ "github.com/sirupsen/logrus"
)
// ContainerUnpause unpauses a container
@@ -30,9 +32,18 @@ func (daemon *Daemon) containerUnpause(container *container.Container) error {
return fmt.Errorf("Container %s is not paused", container.ID)
}
- if err := daemon.containerd.Resume(container.ID); err != nil {
+ if err := daemon.containerd.Resume(context.Background(), container.ID); err != nil {
return fmt.Errorf("Cannot unpause container %s: %s", container.ID, err)
}
+ container.Paused = false
+ daemon.setStateCounter(container)
+ daemon.updateHealthMonitor(container)
+ daemon.LogContainerEvent(container, "unpause")
+
+ if err := container.CheckpointTo(daemon.containersReplica); err != nil {
+ logrus.WithError(err).Warnf("could not save container to disk")
+ }
+
return nil
}
diff --git a/daemon/update.go b/daemon/update.go
index c969ebb21c..0a79c199f3 100644
--- a/daemon/update.go
+++ b/daemon/update.go
@@ -1,6 +1,7 @@
package daemon
import (
+ "context"
"fmt"
"github.com/docker/docker/api/types/container"
@@ -76,7 +77,7 @@ func (daemon *Daemon) update(name string, hostConfig *container.HostConfig) erro
// If container is running (including paused), we need to update configs
// to the real world.
if container.IsRunning() && !container.IsRestarting() {
- if err := daemon.containerd.UpdateResources(container.ID, toContainerdResources(hostConfig.Resources)); err != nil {
+ if err := daemon.containerd.UpdateResources(context.Background(), container.ID, toContainerdResources(hostConfig.Resources)); err != nil {
restoreConfig = true
// TODO: it would be nice if containerd responded with better errors here so we can classify this better.
return errCannotUpdate(container.ID, systemError{err})
diff --git a/daemon/update_linux.go b/daemon/update_linux.go
index c128967218..41d3b5324a 100644
--- a/daemon/update_linux.go
+++ b/daemon/update_linux.go
@@ -7,26 +7,43 @@ import (
"github.com/docker/docker/api/types/container"
"github.com/docker/docker/libcontainerd"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
)
-func toContainerdResources(resources container.Resources) libcontainerd.Resources {
+func toContainerdResources(resources container.Resources) *libcontainerd.Resources {
var r libcontainerd.Resources
- r.BlkioWeight = uint64(resources.BlkioWeight)
- r.CpuShares = uint64(resources.CPUShares)
+
+ r.BlockIO = &specs.LinuxBlockIO{
+ Weight: &resources.BlkioWeight,
+ }
+
+ shares := uint64(resources.CPUShares)
+ r.CPU = &specs.LinuxCPU{
+ Shares: &shares,
+ Cpus: resources.CpusetCpus,
+ Mems: resources.CpusetMems,
+ }
+
+ var (
+ period uint64
+ quota int64
+ )
if resources.NanoCPUs != 0 {
- r.CpuPeriod = uint64(100 * time.Millisecond / time.Microsecond)
- r.CpuQuota = uint64(resources.NanoCPUs) * r.CpuPeriod / 1e9
- } else {
- r.CpuPeriod = uint64(resources.CPUPeriod)
- r.CpuQuota = uint64(resources.CPUQuota)
+ period = uint64(100 * time.Millisecond / time.Microsecond)
+ quota = resources.NanoCPUs * int64(period) / 1e9
}
- r.CpusetCpus = resources.CpusetCpus
- r.CpusetMems = resources.CpusetMems
- r.MemoryLimit = uint64(resources.Memory)
+ r.CPU.Period = &period
+ r.CPU.Quota = &quota
+
+ r.Memory = &specs.LinuxMemory{
+ Limit: &resources.Memory,
+ Reservation: &resources.MemoryReservation,
+ Kernel: &resources.KernelMemory,
+ }
+
if resources.MemorySwap > 0 {
- r.MemorySwap = uint64(resources.MemorySwap)
+ r.Memory.Swap = &resources.MemorySwap
}
- r.MemoryReservation = uint64(resources.MemoryReservation)
- r.KernelMemoryLimit = uint64(resources.KernelMemory)
- return r
+
+ return &r
}
diff --git a/daemon/update_windows.go b/daemon/update_windows.go
index 01466260bb..4f85f41dda 100644
--- a/daemon/update_windows.go
+++ b/daemon/update_windows.go
@@ -7,7 +7,7 @@ import (
"github.com/docker/docker/libcontainerd"
)
-func toContainerdResources(resources container.Resources) libcontainerd.Resources {
- var r libcontainerd.Resources
- return r
+func toContainerdResources(resources container.Resources) *libcontainerd.Resources {
+ // We don't support update, so do nothing
+ return nil
}
diff --git a/hack/make/.go-autogen b/hack/make/.go-autogen
index ec20180672..b68e3a7534 100644
--- a/hack/make/.go-autogen
+++ b/hack/make/.go-autogen
@@ -17,6 +17,7 @@ const (
Version string = "$VERSION"
BuildTime string = "$BUILDTIME"
IAmStatic string = "${IAMSTATIC:-true}"
+ ContainerdCommitID string = "${CONTAINERD_COMMIT}"
)
// AUTOGENERATED FILE; see /go/src/github.com/docker/docker/hack/make/.go-autogen
@@ -31,9 +32,8 @@ package dockerversion
// Default build-time variable for library-import.
// This file is overridden on build with build-time informations.
const (
- ContainerdCommitID string = "${CONTAINERD_COMMIT}"
- RuncCommitID string = "${RUNC_COMMIT}"
- InitCommitID string = "${TINI_COMMIT}"
+ RuncCommitID string = "${RUNC_COMMIT}"
+ InitCommitID string = "${TINI_COMMIT}"
)
// AUTOGENERATED FILE; see /go/src/github.com/docker/docker/hack/make/.go-autogen
diff --git a/integration-cli/daemon/daemon.go b/integration-cli/daemon/daemon.go
index 06bf504fa6..f6ad6559b2 100644
--- a/integration-cli/daemon/daemon.go
+++ b/integration-cli/daemon/daemon.go
@@ -222,7 +222,7 @@ func (d *Daemon) StartWithLogFile(out *os.File, providedArgs ...string) error {
return errors.Wrapf(err, "[%s] could not find docker binary in $PATH", d.id)
}
args := append(d.GlobalFlags,
- "--containerd", "/var/run/docker/libcontainerd/docker-containerd.sock",
+ "--containerd", "/var/run/docker/containerd/docker-containerd.sock",
"--data-root", d.Root,
"--exec-root", d.execRoot,
"--pidfile", fmt.Sprintf("%s/docker.pid", d.Folder),
@@ -457,6 +457,8 @@ out2:
return err
}
+ d.cmd.Wait()
+
if err := os.Remove(fmt.Sprintf("%s/docker.pid", d.Folder)); err != nil {
return err
}
diff --git a/integration-cli/docker_api_stats_test.go b/integration-cli/docker_api_stats_test.go
index 2e8515a3f6..7c9de1c083 100644
--- a/integration-cli/docker_api_stats_test.go
+++ b/integration-cli/docker_api_stats_test.go
@@ -285,7 +285,7 @@ func (s *DockerSuite) TestAPIStatsNoStreamConnectedContainers(c *check.C) {
id2 := strings.TrimSpace(out2)
c.Assert(waitRun(id2), checker.IsNil)
- ch := make(chan error)
+ ch := make(chan error, 1)
go func() {
resp, body, err := request.Get(fmt.Sprintf("/containers/%s/stats?stream=false", id2))
defer body.Close()
diff --git a/integration-cli/docker_cli_attach_test.go b/integration-cli/docker_cli_attach_test.go
index db43beb7d2..353cb65e5d 100644
--- a/integration-cli/docker_cli_attach_test.go
+++ b/integration-cli/docker_cli_attach_test.go
@@ -147,7 +147,10 @@ func (s *DockerSuite) TestAttachDisconnect(c *check.C) {
c.Assert(err, check.IsNil)
defer stdout.Close()
c.Assert(cmd.Start(), check.IsNil)
- defer cmd.Process.Kill()
+ defer func() {
+ cmd.Process.Kill()
+ cmd.Wait()
+ }()
_, err = stdin.Write([]byte("hello\n"))
c.Assert(err, check.IsNil)
diff --git a/integration-cli/docker_cli_build_unix_test.go b/integration-cli/docker_cli_build_unix_test.go
index 91a329fae8..d857bd2f2c 100644
--- a/integration-cli/docker_cli_build_unix_test.go
+++ b/integration-cli/docker_cli_build_unix_test.go
@@ -149,6 +149,11 @@ func (s *DockerSuite) TestBuildCancellationKillsSleep(c *check.C) {
if err := buildCmd.Start(); err != nil {
c.Fatalf("failed to run build: %s", err)
}
+ // always clean up
+ defer func() {
+ buildCmd.Process.Kill()
+ buildCmd.Wait()
+ }()
matchCID := regexp.MustCompile("Running in (.+)")
scanner := bufio.NewScanner(stdoutBuild)
diff --git a/integration-cli/docker_cli_daemon_test.go b/integration-cli/docker_cli_daemon_test.go
index ccf50543e9..a974b6fede 100644
--- a/integration-cli/docker_cli_daemon_test.go
+++ b/integration-cli/docker_cli_daemon_test.go
@@ -28,6 +28,7 @@ import (
"github.com/docker/docker/api"
"github.com/docker/docker/api/types"
"github.com/docker/docker/client"
+ moby_daemon "github.com/docker/docker/daemon"
"github.com/docker/docker/integration-cli/checker"
"github.com/docker/docker/integration-cli/cli"
"github.com/docker/docker/integration-cli/daemon"
@@ -1448,7 +1449,8 @@ func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonAndContainerKill(c *chec
c.Assert(strings.Contains(string(mountOut), id), check.Equals, true, comment)
// kill the container
- icmd.RunCommand(ctrBinary, "--address", "unix:///var/run/docker/libcontainerd/docker-containerd.sock", "containers", "kill", id).Assert(c, icmd.Success)
+ icmd.RunCommand(ctrBinary, "--address", "/var/run/docker/containerd/docker-containerd.sock",
+ "--namespace", moby_daemon.MainNamespace, "tasks", "kill", id).Assert(c, icmd.Success)
// restart daemon.
d.Restart(c)
@@ -1987,7 +1989,6 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithNames(c *check.C) {
// TestDaemonRestartWithKilledRunningContainer requires live restore of running containers
func (s *DockerDaemonSuite) TestDaemonRestartWithKilledRunningContainer(t *check.C) {
- // TODO(mlaventure): Not sure what would the exit code be on windows
testRequires(t, DaemonIsLinux)
s.d.StartWithBusybox(t)
@@ -2008,7 +2009,8 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithKilledRunningContainer(t *check
}
// kill the container
- icmd.RunCommand(ctrBinary, "--address", "unix:///var/run/docker/libcontainerd/docker-containerd.sock", "containers", "kill", cid).Assert(t, icmd.Success)
+ icmd.RunCommand(ctrBinary, "--address", "/var/run/docker/containerd/docker-containerd.sock",
+ "--namespace", moby_daemon.MainNamespace, "tasks", "kill", cid).Assert(t, icmd.Success)
// Give time to containerd to process the command if we don't
// the exit event might be received after we do the inspect
@@ -2076,7 +2078,6 @@ func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonCrash(c *check.C) {
// TestDaemonRestartWithUnpausedRunningContainer requires live restore of running containers.
func (s *DockerDaemonSuite) TestDaemonRestartWithUnpausedRunningContainer(t *check.C) {
- // TODO(mlaventure): Not sure what would the exit code be on windows
testRequires(t, DaemonIsLinux)
s.d.StartWithBusybox(t, "--live-restore")
@@ -2103,8 +2104,9 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithUnpausedRunningContainer(t *che
// resume the container
result := icmd.RunCommand(
ctrBinary,
- "--address", "unix:///var/run/docker/libcontainerd/docker-containerd.sock",
- "containers", "resume", cid)
+ "--address", "/var/run/docker/containerd/docker-containerd.sock",
+ "--namespace", moby_daemon.MainNamespace,
+ "tasks", "resume", cid)
result.Assert(t, icmd.Success)
// Give time to containerd to process the command if we don't
diff --git a/integration-cli/docker_cli_events_test.go b/integration-cli/docker_cli_events_test.go
index e179a0ebd3..dff54a4463 100644
--- a/integration-cli/docker_cli_events_test.go
+++ b/integration-cli/docker_cli_events_test.go
@@ -86,6 +86,7 @@ func (s *DockerSuite) TestEventsLimit(c *check.C) {
// timeouts creating so many containers simultaneously. This is a due to
// a bug in the Windows platform. It will be fixed in a Windows Update.
numContainers := 17
+ eventPerContainer := 7 // create, attach, network connect, start, die, network disconnect, destroy
numConcurrentContainers := numContainers
if testEnv.DaemonPlatform() == "windows" {
numConcurrentContainers = 4
@@ -93,17 +94,19 @@ func (s *DockerSuite) TestEventsLimit(c *check.C) {
sem := make(chan bool, numConcurrentContainers)
errChan := make(chan error, numContainers)
+ startTime := daemonUnixTime(c)
+
args := []string{"run", "--rm", "busybox", "true"}
for i := 0; i < numContainers; i++ {
sem <- true
- go func() {
+ go func(i int) {
defer func() { <-sem }()
out, err := exec.Command(dockerBinary, args...).CombinedOutput()
if err != nil {
err = fmt.Errorf("%v: %s", err, string(out))
}
errChan <- err
- }()
+ }(i)
}
// Wait for all goroutines to finish
@@ -116,10 +119,10 @@ func (s *DockerSuite) TestEventsLimit(c *check.C) {
c.Assert(err, checker.IsNil, check.Commentf("%q failed with error", strings.Join(args, " ")))
}
- out, _ := dockerCmd(c, "events", "--since=0", "--until", daemonUnixTime(c))
+ out, _ := dockerCmd(c, "events", "--since="+startTime, "--until", daemonUnixTime(c))
events := strings.Split(out, "\n")
nEvents := len(events) - 1
- c.Assert(nEvents, checker.Equals, 256, check.Commentf("events should be limited to 256, but received %d", nEvents))
+ c.Assert(nEvents, checker.Equals, numContainers*eventPerContainer, check.Commentf("events should be limited to 256, but received %d", nEvents))
}
func (s *DockerSuite) TestEventsContainerEvents(c *check.C) {
@@ -533,7 +536,10 @@ func (s *DockerSuite) TestEventsAttach(c *check.C) {
c.Assert(err, checker.IsNil)
defer stdout.Close()
c.Assert(cmd.Start(), checker.IsNil)
- defer cmd.Process.Kill()
+ defer func() {
+ cmd.Process.Kill()
+ cmd.Wait()
+ }()
// Make sure we're done attaching by writing/reading some stuff
_, err = stdin.Write([]byte("hello\n"))
diff --git a/integration-cli/docker_cli_logs_test.go b/integration-cli/docker_cli_logs_test.go
index f75da1849c..41927a2806 100644
--- a/integration-cli/docker_cli_logs_test.go
+++ b/integration-cli/docker_cli_logs_test.go
@@ -230,6 +230,7 @@ func (s *DockerSuite) TestLogsFollowSlowStdoutConsumer(c *check.C) {
stdout, err := logCmd.StdoutPipe()
c.Assert(err, checker.IsNil)
c.Assert(logCmd.Start(), checker.IsNil)
+ defer func() { go logCmd.Wait() }()
// First read slowly
bytes1, err := ConsumeWithSpeed(stdout, 10, 50*time.Millisecond, stopSlowRead)
diff --git a/integration-cli/docker_cli_network_unix_test.go b/integration-cli/docker_cli_network_unix_test.go
index 4762e3993c..4bb542386b 100644
--- a/integration-cli/docker_cli_network_unix_test.go
+++ b/integration-cli/docker_cli_network_unix_test.go
@@ -1625,6 +1625,7 @@ func (s *DockerSuite) TestEmbeddedDNSInvalidInput(c *check.C) {
func (s *DockerSuite) TestDockerNetworkConnectFailsNoInspectChange(c *check.C) {
dockerCmd(c, "run", "-d", "--name=bb", "busybox", "top")
c.Assert(waitRun("bb"), check.IsNil)
+ defer dockerCmd(c, "stop", "bb")
ns0 := inspectField(c, "bb", "NetworkSettings.Networks.bridge")
diff --git a/integration-cli/docker_cli_run_test.go b/integration-cli/docker_cli_run_test.go
index 8198fded72..67bf585d04 100644
--- a/integration-cli/docker_cli_run_test.go
+++ b/integration-cli/docker_cli_run_test.go
@@ -2249,6 +2249,7 @@ func (s *DockerSuite) TestRunSlowStdoutConsumer(c *check.C) {
if err := cont.Start(); err != nil {
c.Fatal(err)
}
+ defer func() { go cont.Wait() }()
n, err := ConsumeWithSpeed(stdout, 10000, 5*time.Millisecond, nil)
if err != nil {
c.Fatal(err)
diff --git a/integration-cli/docker_deprecated_api_v124_test.go b/integration-cli/docker_deprecated_api_v124_test.go
index edf3e570f5..214ae08667 100644
--- a/integration-cli/docker_deprecated_api_v124_test.go
+++ b/integration-cli/docker_deprecated_api_v124_test.go
@@ -206,8 +206,10 @@ func (s *DockerSuite) TestDeprecatedPostContainersStartWithLinksInHostConfigIdLi
testRequires(c, DaemonIsLinux)
name := "test-host-config-links"
out, _ := dockerCmd(c, "run", "--name", "link0", "-d", "busybox", "top")
+ defer dockerCmd(c, "stop", "link0")
id := strings.TrimSpace(out)
dockerCmd(c, "create", "--name", name, "--link", id, "busybox", "top")
+ defer dockerCmd(c, "stop", name)
hc := inspectFieldJSON(c, name, "HostConfig")
config := `{"HostConfig":` + hc + `}`
diff --git a/integration-cli/events_utils_test.go b/integration-cli/events_utils_test.go
index 580188950a..356b2c326d 100644
--- a/integration-cli/events_utils_test.go
+++ b/integration-cli/events_utils_test.go
@@ -69,7 +69,7 @@ func (e *eventObserver) Start() error {
// Stop stops the events command.
func (e *eventObserver) Stop() {
e.command.Process.Kill()
- e.command.Process.Release()
+ e.command.Wait()
}
// Match tries to match the events output with a given matcher.
diff --git a/integration/service/create_test.go b/integration/service/create_test.go
index cb0823dfbd..e94185a542 100644
--- a/integration/service/create_test.go
+++ b/integration/service/create_test.go
@@ -1,6 +1,7 @@
package service
import (
+ "runtime"
"testing"
"time"
@@ -42,8 +43,15 @@ func TestCreateWithLBSandbox(t *testing.T) {
})
require.NoError(t, err)
+ pollSettings := func(config *poll.Settings) {
+ if runtime.GOARCH == "arm" {
+ config.Timeout = 30 * time.Second
+ config.Delay = 100 * time.Millisecond
+ }
+ }
+
serviceID := serviceResp.ID
- poll.WaitOn(t, serviceRunningTasksCount(client, serviceID, instances))
+ poll.WaitOn(t, serviceRunningTasksCount(client, serviceID, instances), pollSettings)
_, _, err = client.ServiceInspectWithRaw(context.Background(), serviceID, types.ServiceInspectOptions{})
require.NoError(t, err)
@@ -55,7 +63,7 @@ func TestCreateWithLBSandbox(t *testing.T) {
err = client.ServiceRemove(context.Background(), serviceID)
require.NoError(t, err)
- poll.WaitOn(t, serviceIsRemoved(client, serviceID))
+ poll.WaitOn(t, serviceIsRemoved(client, serviceID), pollSettings)
err = client.NetworkRemove(context.Background(), overlayID)
require.NoError(t, err)
diff --git a/libcontainerd/client.go b/libcontainerd/client.go
deleted file mode 100644
index c9004b813b..0000000000
--- a/libcontainerd/client.go
+++ /dev/null
@@ -1,46 +0,0 @@
-package libcontainerd
-
-import (
- "fmt"
- "sync"
-
- "github.com/docker/docker/pkg/locker"
-)
-
-// clientCommon contains the platform agnostic fields used in the client structure
-type clientCommon struct {
- backend Backend
- containers map[string]*container
- locker *locker.Locker
- mapMutex sync.RWMutex // protects read/write operations from containers map
-}
-
-func (clnt *client) lock(containerID string) {
- clnt.locker.Lock(containerID)
-}
-
-func (clnt *client) unlock(containerID string) {
- clnt.locker.Unlock(containerID)
-}
-
-// must hold a lock for cont.containerID
-func (clnt *client) appendContainer(cont *container) {
- clnt.mapMutex.Lock()
- clnt.containers[cont.containerID] = cont
- clnt.mapMutex.Unlock()
-}
-func (clnt *client) deleteContainer(containerID string) {
- clnt.mapMutex.Lock()
- delete(clnt.containers, containerID)
- clnt.mapMutex.Unlock()
-}
-
-func (clnt *client) getContainer(containerID string) (*container, error) {
- clnt.mapMutex.RLock()
- container, ok := clnt.containers[containerID]
- defer clnt.mapMutex.RUnlock()
- if !ok {
- return nil, fmt.Errorf("invalid container: %s", containerID) // fixme: typed error
- }
- return container, nil
-}
diff --git a/libcontainerd/client_daemon.go b/libcontainerd/client_daemon.go
new file mode 100644
index 0000000000..e6514374ce
--- /dev/null
+++ b/libcontainerd/client_daemon.go
@@ -0,0 +1,802 @@
+// +build !windows
+
+package libcontainerd
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "io"
+ "os"
+ "path/filepath"
+ "reflect"
+ "runtime"
+ "strings"
+ "sync"
+ "syscall"
+ "time"
+
+ "google.golang.org/grpc"
+
+ "github.com/containerd/containerd"
+ eventsapi "github.com/containerd/containerd/api/services/events/v1"
+ "github.com/containerd/containerd/api/types"
+ "github.com/containerd/containerd/archive"
+ "github.com/containerd/containerd/content"
+ "github.com/containerd/containerd/images"
+ "github.com/containerd/containerd/linux/runcopts"
+ "github.com/containerd/typeurl"
+ "github.com/docker/docker/pkg/ioutils"
+ "github.com/opencontainers/image-spec/specs-go/v1"
+ "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+)
+
+// InitProcessName is the name given to the first process of a
+// container
+const InitProcessName = "init"
+
+type container struct {
+ sync.Mutex
+
+ bundleDir string
+ ctr containerd.Container
+ task containerd.Task
+ execs map[string]containerd.Process
+ oomKilled bool
+}
+
+type client struct {
+ sync.RWMutex // protects containers map
+
+ remote *containerd.Client
+ stateDir string
+ logger *logrus.Entry
+
+ namespace string
+ backend Backend
+ eventQ queue
+ containers map[string]*container
+}
+
+func (c *client) Restore(ctx context.Context, id string, attachStdio StdioCallback) (alive bool, pid int, err error) {
+ c.Lock()
+ defer c.Unlock()
+
+ var cio containerd.IO
+ defer func() {
+ err = wrapError(err)
+ }()
+
+ ctr, err := c.remote.LoadContainer(ctx, id)
+ if err != nil {
+ return false, -1, errors.WithStack(err)
+ }
+
+ defer func() {
+ if err != nil && cio != nil {
+ cio.Cancel()
+ cio.Close()
+ }
+ }()
+
+ t, err := ctr.Task(ctx, func(fifos *containerd.FIFOSet) (containerd.IO, error) {
+ io, err := newIOPipe(fifos)
+ if err != nil {
+ return nil, err
+ }
+
+ cio, err = attachStdio(io)
+ return cio, err
+ })
+ if err != nil && !strings.Contains(err.Error(), "no running task found") {
+ return false, -1, err
+ }
+
+ if t != nil {
+ s, err := t.Status(ctx)
+ if err != nil {
+ return false, -1, err
+ }
+
+ alive = s.Status != containerd.Stopped
+ pid = int(t.Pid())
+ }
+ c.containers[id] = &container{
+ bundleDir: filepath.Join(c.stateDir, id),
+ ctr: ctr,
+ task: t,
+ // TODO(mlaventure): load execs
+ }
+
+ c.logger.WithFields(logrus.Fields{
+ "container": id,
+ "alive": alive,
+ "pid": pid,
+ }).Debug("restored container")
+
+ return alive, pid, nil
+}
+
+func (c *client) Create(ctx context.Context, id string, ociSpec *specs.Spec, runtimeOptions interface{}) error {
+ if ctr := c.getContainer(id); ctr != nil {
+ return errors.WithStack(newConflictError("id already in use"))
+ }
+
+ bdir, err := prepareBundleDir(filepath.Join(c.stateDir, id), ociSpec)
+ if err != nil {
+ return wrapSystemError(errors.Wrap(err, "prepare bundle dir failed"))
+ }
+
+ c.logger.WithField("bundle", bdir).WithField("root", ociSpec.Root.Path).Debug("bundle dir created")
+
+ cdCtr, err := c.remote.NewContainer(ctx, id,
+ containerd.WithSpec(ociSpec),
+ // TODO(mlaventure): when containerd support lcow, revisit runtime value
+ containerd.WithRuntime(fmt.Sprintf("io.containerd.runtime.v1.%s", runtime.GOOS), runtimeOptions))
+ if err != nil {
+ return err
+ }
+
+ c.Lock()
+ c.containers[id] = &container{
+ bundleDir: bdir,
+ ctr: cdCtr,
+ }
+ c.Unlock()
+
+ return nil
+}
+
+// Start create and start a task for the specified containerd id
+func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin bool, attachStdio StdioCallback) (int, error) {
+ ctr := c.getContainer(id)
+ switch {
+ case ctr == nil:
+ return -1, errors.WithStack(newNotFoundError("no such container"))
+ case ctr.task != nil:
+ return -1, errors.WithStack(newConflictError("container already started"))
+ }
+
+ var (
+ cp *types.Descriptor
+ t containerd.Task
+ cio containerd.IO
+ err error
+ stdinCloseSync = make(chan struct{})
+ )
+
+ if checkpointDir != "" {
+ // write checkpoint to the content store
+ tar := archive.Diff(ctx, "", checkpointDir)
+ cp, err = c.writeContent(ctx, images.MediaTypeContainerd1Checkpoint, checkpointDir, tar)
+ // remove the checkpoint when we're done
+ defer func() {
+ if cp != nil {
+ err := c.remote.ContentStore().Delete(context.Background(), cp.Digest)
+ if err != nil {
+ c.logger.WithError(err).WithFields(logrus.Fields{
+ "ref": checkpointDir,
+ "digest": cp.Digest,
+ }).Warnf("failed to delete temporary checkpoint entry")
+ }
+ }
+ }()
+ if err := tar.Close(); err != nil {
+ return -1, errors.Wrap(err, "failed to close checkpoint tar stream")
+ }
+ if err != nil {
+ return -1, errors.Wrapf(err, "failed to upload checkpoint to containerd")
+ }
+ }
+
+ spec, err := ctr.ctr.Spec(ctx)
+ if err != nil {
+ return -1, errors.Wrap(err, "failed to retrieve spec")
+ }
+ uid, gid := getSpecUser(spec)
+ t, err = ctr.ctr.NewTask(ctx,
+ func(id string) (containerd.IO, error) {
+ cio, err = c.createIO(ctr.bundleDir, id, InitProcessName, stdinCloseSync, withStdin, spec.Process.Terminal, attachStdio)
+ return cio, err
+ },
+ func(_ context.Context, _ *containerd.Client, info *containerd.TaskInfo) error {
+ info.Checkpoint = cp
+ info.Options = &runcopts.CreateOptions{
+ IoUid: uint32(uid),
+ IoGid: uint32(gid),
+ }
+ return nil
+ })
+ if err != nil {
+ close(stdinCloseSync)
+ if cio != nil {
+ cio.Cancel()
+ cio.Close()
+ }
+ return -1, err
+ }
+
+ c.Lock()
+ c.containers[id].task = t
+ c.Unlock()
+
+ // Signal c.createIO that it can call CloseIO
+ close(stdinCloseSync)
+
+ if err := t.Start(ctx); err != nil {
+ if _, err := t.Delete(ctx); err != nil {
+ c.logger.WithError(err).WithField("container", id).
+ Error("failed to delete task after fail start")
+ }
+ c.Lock()
+ c.containers[id].task = nil
+ c.Unlock()
+ return -1, err
+ }
+
+ return int(t.Pid()), nil
+}
+
+func (c *client) Exec(ctx context.Context, containerID, processID string, spec *specs.Process, withStdin bool, attachStdio StdioCallback) (int, error) {
+ ctr := c.getContainer(containerID)
+ switch {
+ case ctr == nil:
+ return -1, errors.WithStack(newNotFoundError("no such container"))
+ case ctr.task == nil:
+ return -1, errors.WithStack(newInvalidParameterError("container is not running"))
+ case ctr.execs != nil && ctr.execs[processID] != nil:
+ return -1, errors.WithStack(newConflictError("id already in use"))
+ }
+
+ var (
+ p containerd.Process
+ cio containerd.IO
+ err error
+ stdinCloseSync = make(chan struct{})
+ )
+ defer func() {
+ if err != nil {
+ if cio != nil {
+ cio.Cancel()
+ cio.Close()
+ }
+ }
+ }()
+
+ p, err = ctr.task.Exec(ctx, processID, spec, func(id string) (containerd.IO, error) {
+ cio, err = c.createIO(ctr.bundleDir, containerID, processID, stdinCloseSync, withStdin, spec.Terminal, attachStdio)
+ return cio, err
+ })
+ if err != nil {
+ close(stdinCloseSync)
+ if cio != nil {
+ cio.Cancel()
+ cio.Close()
+ }
+ return -1, err
+ }
+
+ ctr.Lock()
+ if ctr.execs == nil {
+ ctr.execs = make(map[string]containerd.Process)
+ }
+ ctr.execs[processID] = p
+ ctr.Unlock()
+
+ // Signal c.createIO that it can call CloseIO
+ close(stdinCloseSync)
+
+ if err = p.Start(ctx); err != nil {
+ p.Delete(context.Background())
+ ctr.Lock()
+ delete(ctr.execs, processID)
+ ctr.Unlock()
+ return -1, err
+ }
+
+ return int(p.Pid()), nil
+}
+
+func (c *client) SignalProcess(ctx context.Context, containerID, processID string, signal int) error {
+ p, err := c.getProcess(containerID, processID)
+ if err != nil {
+ return err
+ }
+ return p.Kill(ctx, syscall.Signal(signal))
+}
+
+func (c *client) ResizeTerminal(ctx context.Context, containerID, processID string, width, height int) error {
+ p, err := c.getProcess(containerID, processID)
+ if err != nil {
+ return err
+ }
+
+ return p.Resize(ctx, uint32(width), uint32(height))
+}
+
+func (c *client) CloseStdin(ctx context.Context, containerID, processID string) error {
+ p, err := c.getProcess(containerID, processID)
+ if err != nil {
+ return err
+ }
+
+ return p.CloseIO(ctx, containerd.WithStdinCloser)
+}
+
+func (c *client) Pause(ctx context.Context, containerID string) error {
+ p, err := c.getProcess(containerID, InitProcessName)
+ if err != nil {
+ return err
+ }
+
+ return p.(containerd.Task).Pause(ctx)
+}
+
+func (c *client) Resume(ctx context.Context, containerID string) error {
+ p, err := c.getProcess(containerID, InitProcessName)
+ if err != nil {
+ return err
+ }
+
+ return p.(containerd.Task).Resume(ctx)
+}
+
+func (c *client) Stats(ctx context.Context, containerID string) (*Stats, error) {
+ p, err := c.getProcess(containerID, InitProcessName)
+ if err != nil {
+ return nil, err
+ }
+
+ m, err := p.(containerd.Task).Metrics(ctx)
+ if err != nil {
+ return nil, err
+ }
+
+ v, err := typeurl.UnmarshalAny(m.Data)
+ if err != nil {
+ return nil, err
+ }
+ return interfaceToStats(m.Timestamp, v), nil
+}
+
+func (c *client) ListPids(ctx context.Context, containerID string) ([]uint32, error) {
+ p, err := c.getProcess(containerID, InitProcessName)
+ if err != nil {
+ return nil, err
+ }
+
+ pis, err := p.(containerd.Task).Pids(ctx)
+ if err != nil {
+ return nil, err
+ }
+
+ var pids []uint32
+ for _, i := range pis {
+ pids = append(pids, i.Pid)
+ }
+
+ return pids, nil
+}
+
+func (c *client) Summary(ctx context.Context, containerID string) ([]Summary, error) {
+ p, err := c.getProcess(containerID, InitProcessName)
+ if err != nil {
+ return nil, err
+ }
+
+ pis, err := p.(containerd.Task).Pids(ctx)
+ if err != nil {
+ return nil, err
+ }
+
+ var infos []Summary
+ for _, pi := range pis {
+ i, err := typeurl.UnmarshalAny(pi.Info)
+ if err != nil {
+ return nil, errors.Wrap(err, "unable to decode process details")
+ }
+ s, err := summaryFromInterface(i)
+ if err != nil {
+ return nil, err
+ }
+ infos = append(infos, *s)
+ }
+
+ return infos, nil
+}
+
+func (c *client) DeleteTask(ctx context.Context, containerID string) (uint32, time.Time, error) {
+ p, err := c.getProcess(containerID, InitProcessName)
+ if err != nil {
+ return 255, time.Now(), nil
+ }
+
+ status, err := p.(containerd.Task).Delete(ctx)
+ if err != nil {
+ return 255, time.Now(), nil
+ }
+
+ c.Lock()
+ if ctr, ok := c.containers[containerID]; ok {
+ ctr.task = nil
+ }
+ c.Unlock()
+
+ return status.ExitCode(), status.ExitTime(), nil
+}
+
+func (c *client) Delete(ctx context.Context, containerID string) error {
+ ctr := c.getContainer(containerID)
+ if ctr == nil {
+ return errors.WithStack(newNotFoundError("no such container"))
+ }
+
+ if err := ctr.ctr.Delete(ctx); err != nil {
+ return err
+ }
+
+ if os.Getenv("LIBCONTAINERD_NOCLEAN") == "1" {
+ if err := os.RemoveAll(ctr.bundleDir); err != nil {
+ c.logger.WithError(err).WithFields(logrus.Fields{
+ "container": containerID,
+ "bundle": ctr.bundleDir,
+ }).Error("failed to remove state dir")
+ }
+ }
+
+ c.removeContainer(containerID)
+
+ return nil
+}
+
+func (c *client) Status(ctx context.Context, containerID string) (Status, error) {
+ ctr := c.getContainer(containerID)
+ if ctr == nil {
+ return StatusUnknown, errors.WithStack(newNotFoundError("no such container"))
+ }
+
+ s, err := ctr.task.Status(ctx)
+ if err != nil {
+ return StatusUnknown, err
+ }
+
+ return Status(s.Status), nil
+}
+
+func (c *client) CreateCheckpoint(ctx context.Context, containerID, checkpointDir string, exit bool) error {
+ p, err := c.getProcess(containerID, InitProcessName)
+ if err != nil {
+ return err
+ }
+
+ img, err := p.(containerd.Task).Checkpoint(ctx)
+ if err != nil {
+ return err
+ }
+ // Whatever happens, delete the checkpoint from containerd
+ defer func() {
+ err := c.remote.ImageService().Delete(context.Background(), img.Name())
+ if err != nil {
+ c.logger.WithError(err).WithField("digest", img.Target().Digest).
+ Warnf("failed to delete checkpoint image")
+ }
+ }()
+
+ b, err := content.ReadBlob(ctx, c.remote.ContentStore(), img.Target().Digest)
+ if err != nil {
+ return wrapSystemError(errors.Wrapf(err, "failed to retrieve checkpoint data"))
+ }
+ var index v1.Index
+ if err := json.Unmarshal(b, &index); err != nil {
+ return wrapSystemError(errors.Wrapf(err, "failed to decode checkpoint data"))
+ }
+
+ var cpDesc *v1.Descriptor
+ for _, m := range index.Manifests {
+ if m.MediaType == images.MediaTypeContainerd1Checkpoint {
+ cpDesc = &m
+ break
+ }
+ }
+ if cpDesc == nil {
+ return wrapSystemError(errors.Wrapf(err, "invalid checkpoint"))
+ }
+
+ rat, err := c.remote.ContentStore().ReaderAt(ctx, cpDesc.Digest)
+ if err != nil {
+ return wrapSystemError(errors.Wrapf(err, "failed to get checkpoint reader"))
+ }
+ defer rat.Close()
+ _, err = archive.Apply(ctx, checkpointDir, content.NewReader(rat))
+ if err != nil {
+ return wrapSystemError(errors.Wrapf(err, "failed to read checkpoint reader"))
+ }
+
+ return err
+}
+
+func (c *client) getContainer(id string) *container {
+ c.RLock()
+ ctr := c.containers[id]
+ c.RUnlock()
+
+ return ctr
+}
+
+func (c *client) removeContainer(id string) {
+ c.Lock()
+ delete(c.containers, id)
+ c.Unlock()
+}
+
+func (c *client) getProcess(containerID, processID string) (containerd.Process, error) {
+ ctr := c.getContainer(containerID)
+ switch {
+ case ctr == nil:
+ return nil, errors.WithStack(newNotFoundError("no such container"))
+ case ctr.task == nil:
+ return nil, errors.WithStack(newNotFoundError("container is not running"))
+ case processID == InitProcessName:
+ return ctr.task, nil
+ default:
+ ctr.Lock()
+ defer ctr.Unlock()
+ if ctr.execs == nil {
+ return nil, errors.WithStack(newNotFoundError("no execs"))
+ }
+ }
+
+ p := ctr.execs[processID]
+ if p == nil {
+ return nil, errors.WithStack(newNotFoundError("no such exec"))
+ }
+
+ return p, nil
+}
+
+// createIO creates the io to be used by a process
+// This needs to get a pointer to interface as upon closure the process may not have yet been registered
+func (c *client) createIO(bundleDir, containerID, processID string, stdinCloseSync chan struct{}, withStdin, withTerminal bool, attachStdio StdioCallback) (containerd.IO, error) {
+ fifos := newFIFOSet(bundleDir, containerID, processID, withStdin, withTerminal)
+ io, err := newIOPipe(fifos)
+ if err != nil {
+ return nil, err
+ }
+
+ if io.Stdin != nil {
+ var (
+ err error
+ stdinOnce sync.Once
+ )
+ pipe := io.Stdin
+ io.Stdin = ioutils.NewWriteCloserWrapper(pipe, func() error {
+ stdinOnce.Do(func() {
+ err = pipe.Close()
+ // Do the rest in a new routine to avoid a deadlock if the
+ // Exec/Start call failed.
+ go func() {
+ <-stdinCloseSync
+ p, err := c.getProcess(containerID, processID)
+ if err == nil {
+ err = p.CloseIO(context.Background(), containerd.WithStdinCloser)
+ if err != nil && strings.Contains(err.Error(), "transport is closing") {
+ err = nil
+ }
+ }
+ }()
+ })
+ return err
+ })
+ }
+
+ cio, err := attachStdio(io)
+ if err != nil {
+ io.Cancel()
+ io.Close()
+ }
+ return cio, err
+}
+
+func (c *client) processEvent(ctr *container, et EventType, ei EventInfo) {
+ c.eventQ.append(ei.ContainerID, func() {
+ err := c.backend.ProcessEvent(ei.ContainerID, et, ei)
+ if err != nil {
+ c.logger.WithError(err).WithFields(logrus.Fields{
+ "container": ei.ContainerID,
+ "event": et,
+ "event-info": ei,
+ }).Error("failed to process event")
+ }
+
+ if et == EventExit && ei.ProcessID != ei.ContainerID {
+ var p containerd.Process
+ ctr.Lock()
+ if ctr.execs != nil {
+ p = ctr.execs[ei.ProcessID]
+ }
+ ctr.Unlock()
+ if p == nil {
+ c.logger.WithError(errors.New("no such process")).
+ WithFields(logrus.Fields{
+ "container": ei.ContainerID,
+ "process": ei.ProcessID,
+ }).Error("exit event")
+ return
+ }
+ _, err = p.Delete(context.Background())
+ if err != nil {
+ c.logger.WithError(err).WithFields(logrus.Fields{
+ "container": ei.ContainerID,
+ "process": ei.ProcessID,
+ }).Warn("failed to delete process")
+ }
+ c.Lock()
+ delete(ctr.execs, ei.ProcessID)
+ c.Unlock()
+ }
+ })
+}
+
+func (c *client) processEventStream(ctx context.Context) {
+ var (
+ err error
+ eventStream eventsapi.Events_SubscribeClient
+ ev *eventsapi.Envelope
+ et EventType
+ ei EventInfo
+ ctr *container
+ )
+ defer func() {
+ if err != nil {
+ select {
+ case <-ctx.Done():
+ c.logger.WithError(ctx.Err()).
+ Info("stopping event stream following graceful shutdown")
+ default:
+ go c.processEventStream(ctx)
+ }
+ }
+ }()
+
+ eventStream, err = c.remote.EventService().Subscribe(ctx, &eventsapi.SubscribeRequest{
+ Filters: []string{"namespace==" + c.namespace + ",topic~=/tasks/.+"},
+ }, grpc.FailFast(false))
+ if err != nil {
+ return
+ }
+
+ var oomKilled bool
+ for {
+ ev, err = eventStream.Recv()
+ if err != nil {
+ c.logger.WithError(err).Error("failed to get event")
+ return
+ }
+
+ if ev.Event == nil {
+ c.logger.WithField("event", ev).Warn("invalid event")
+ continue
+ }
+
+ v, err := typeurl.UnmarshalAny(ev.Event)
+ if err != nil {
+ c.logger.WithError(err).WithField("event", ev).Warn("failed to unmarshal event")
+ continue
+ }
+
+ c.logger.WithField("topic", ev.Topic).Debug("event")
+
+ switch t := v.(type) {
+ case *eventsapi.TaskCreate:
+ et = EventCreate
+ ei = EventInfo{
+ ContainerID: t.ContainerID,
+ ProcessID: t.ContainerID,
+ Pid: t.Pid,
+ }
+ case *eventsapi.TaskStart:
+ et = EventStart
+ ei = EventInfo{
+ ContainerID: t.ContainerID,
+ ProcessID: t.ContainerID,
+ Pid: t.Pid,
+ }
+ case *eventsapi.TaskExit:
+ et = EventExit
+ ei = EventInfo{
+ ContainerID: t.ContainerID,
+ ProcessID: t.ID,
+ Pid: t.Pid,
+ ExitCode: t.ExitStatus,
+ ExitedAt: t.ExitedAt,
+ }
+ case *eventsapi.TaskOOM:
+ et = EventOOM
+ ei = EventInfo{
+ ContainerID: t.ContainerID,
+ OOMKilled: true,
+ }
+ oomKilled = true
+ case *eventsapi.TaskExecAdded:
+ et = EventExecAdded
+ ei = EventInfo{
+ ContainerID: t.ContainerID,
+ ProcessID: t.ExecID,
+ }
+ case *eventsapi.TaskExecStarted:
+ et = EventExecStarted
+ ei = EventInfo{
+ ContainerID: t.ContainerID,
+ ProcessID: t.ExecID,
+ Pid: t.Pid,
+ }
+ case *eventsapi.TaskPaused:
+ et = EventPaused
+ ei = EventInfo{
+ ContainerID: t.ContainerID,
+ }
+ case *eventsapi.TaskResumed:
+ et = EventResumed
+ ei = EventInfo{
+ ContainerID: t.ContainerID,
+ }
+ default:
+ c.logger.WithFields(logrus.Fields{
+ "topic": ev.Topic,
+ "type": reflect.TypeOf(t)},
+ ).Info("ignoring event")
+ continue
+ }
+
+ ctr = c.getContainer(ei.ContainerID)
+ if ctr == nil {
+ c.logger.WithField("container", ei.ContainerID).Warn("unknown container")
+ continue
+ }
+
+ if oomKilled {
+ ctr.oomKilled = true
+ oomKilled = false
+ }
+ ei.OOMKilled = ctr.oomKilled
+
+ c.processEvent(ctr, et, ei)
+ }
+}
+
+func (c *client) writeContent(ctx context.Context, mediaType, ref string, r io.Reader) (*types.Descriptor, error) {
+ writer, err := c.remote.ContentStore().Writer(ctx, ref, 0, "")
+ if err != nil {
+ return nil, err
+ }
+ defer writer.Close()
+ size, err := io.Copy(writer, r)
+ if err != nil {
+ return nil, err
+ }
+ labels := map[string]string{
+ "containerd.io/gc.root": time.Now().UTC().Format(time.RFC3339),
+ }
+ if err := writer.Commit(ctx, 0, "", content.WithLabels(labels)); err != nil {
+ return nil, err
+ }
+ return &types.Descriptor{
+ MediaType: mediaType,
+ Digest: writer.Digest(),
+ Size_: size,
+ }, nil
+}
+
+func wrapError(err error) error {
+ if err != nil {
+ msg := err.Error()
+ for _, s := range []string{"container does not exist", "not found", "no such container"} {
+ if strings.Contains(msg, s) {
+ return wrapNotFoundError(err)
+ }
+ }
+ }
+ return err
+}
diff --git a/libcontainerd/client_daemon_linux.go b/libcontainerd/client_daemon_linux.go
new file mode 100644
index 0000000000..03371954cc
--- /dev/null
+++ b/libcontainerd/client_daemon_linux.go
@@ -0,0 +1,96 @@
+package libcontainerd
+
+import (
+ "context"
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+
+ "github.com/containerd/containerd"
+ "github.com/docker/docker/pkg/idtools"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+func summaryFromInterface(i interface{}) (*Summary, error) {
+ return &Summary{}, nil
+}
+
+func (c *client) UpdateResources(ctx context.Context, containerID string, resources *Resources) error {
+ p, err := c.getProcess(containerID, InitProcessName)
+ if err != nil {
+ return err
+ }
+
+ // go doesn't like the alias in 1.8, this means this need to be
+ // platform specific
+ return p.(containerd.Task).Update(ctx, containerd.WithResources((*specs.LinuxResources)(resources)))
+}
+
+func hostIDFromMap(id uint32, mp []specs.LinuxIDMapping) int {
+ for _, m := range mp {
+ if id >= m.ContainerID && id <= m.ContainerID+m.Size-1 {
+ return int(m.HostID + id - m.ContainerID)
+ }
+ }
+ return 0
+}
+
+func getSpecUser(ociSpec *specs.Spec) (int, int) {
+ var (
+ uid int
+ gid int
+ )
+
+ for _, ns := range ociSpec.Linux.Namespaces {
+ if ns.Type == specs.UserNamespace {
+ uid = hostIDFromMap(0, ociSpec.Linux.UIDMappings)
+ gid = hostIDFromMap(0, ociSpec.Linux.GIDMappings)
+ break
+ }
+ }
+
+ return uid, gid
+}
+
+func prepareBundleDir(bundleDir string, ociSpec *specs.Spec) (string, error) {
+ uid, gid := getSpecUser(ociSpec)
+ if uid == 0 && gid == 0 {
+ return bundleDir, idtools.MkdirAllAndChownNew(bundleDir, 0755, idtools.IDPair{0, 0})
+ }
+
+ p := string(filepath.Separator)
+ components := strings.Split(bundleDir, string(filepath.Separator))
+ for _, d := range components[1:] {
+ p = filepath.Join(p, d)
+ fi, err := os.Stat(p)
+ if err != nil && !os.IsNotExist(err) {
+ return "", err
+ }
+ if os.IsNotExist(err) || fi.Mode()&1 == 0 {
+ p = fmt.Sprintf("%s.%d.%d", p, uid, gid)
+ if err := idtools.MkdirAndChown(p, 0700, idtools.IDPair{uid, gid}); err != nil && !os.IsExist(err) {
+ return "", err
+ }
+ }
+ }
+
+ return p, nil
+}
+
+func newFIFOSet(bundleDir, containerID, processID string, withStdin, withTerminal bool) *containerd.FIFOSet {
+ fifos := &containerd.FIFOSet{
+ Terminal: withTerminal,
+ Out: filepath.Join(bundleDir, processID+"-stdout"),
+ }
+
+ if withStdin {
+ fifos.In = filepath.Join(bundleDir, processID+"-stdin")
+ }
+
+ if !fifos.Terminal {
+ fifos.Err = filepath.Join(bundleDir, processID+"-stderr")
+ }
+
+ return fifos
+}
diff --git a/libcontainerd/client_daemon_windows.go b/libcontainerd/client_daemon_windows.go
new file mode 100644
index 0000000000..9bb5d86f44
--- /dev/null
+++ b/libcontainerd/client_daemon_windows.go
@@ -0,0 +1,53 @@
+package libcontainerd
+
+import (
+ "fmt"
+
+ "github.com/containerd/containerd"
+ "github.com/containerd/containerd/windows/hcsshimtypes"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/pkg/errors"
+)
+
+func summaryFromInterface(i interface{}) (*Summary, error) {
+ switch pd := i.(type) {
+ case *hcsshimtypes.ProcessDetails:
+ return &Summary{
+ CreateTimestamp: pd.CreatedAt,
+ ImageName: pd.ImageName,
+ KernelTime100ns: pd.KernelTime_100Ns,
+ MemoryCommitBytes: pd.MemoryCommitBytes,
+ MemoryWorkingSetPrivateBytes: pd.MemoryWorkingSetPrivateBytes,
+ MemoryWorkingSetSharedBytes: pd.MemoryWorkingSetSharedBytes,
+ ProcessId: pd.ProcessID,
+ UserTime100ns: pd.UserTime_100Ns,
+ }, nil
+ default:
+ return nil, errors.Errorf("Unknown process details type %T", pd)
+ }
+}
+
+func prepareBundleDir(bundleDir string, ociSpec *specs.Spec) (string, error) {
+ return bundleDir, nil
+}
+
+func pipeName(containerID, processID, name string) string {
+ return fmt.Sprintf(`\\.\pipe\containerd-%s-%s-%s`, containerID, processID, name)
+}
+
+func newFIFOSet(bundleDir, containerID, processID string, withStdin, withTerminal bool) *containerd.FIFOSet {
+ fifos := &containerd.FIFOSet{
+ Terminal: withTerminal,
+ Out: pipeName(containerID, processID, "stdout"),
+ }
+
+ if withStdin {
+ fifos.In = pipeName(containerID, processID, "stdin")
+ }
+
+ if !fifos.Terminal {
+ fifos.Err = pipeName(containerID, processID, "stderr")
+ }
+
+ return fifos
+}
diff --git a/libcontainerd/client_linux.go b/libcontainerd/client_linux.go
deleted file mode 100644
index 12808fd0c1..0000000000
--- a/libcontainerd/client_linux.go
+++ /dev/null
@@ -1,616 +0,0 @@
-package libcontainerd
-
-import (
- "fmt"
- "os"
- "strings"
- "sync"
- "time"
-
- containerd "github.com/containerd/containerd/api/grpc/types"
- containerd_runtime_types "github.com/containerd/containerd/runtime"
- "github.com/docker/docker/pkg/ioutils"
- "github.com/docker/docker/pkg/mount"
- "github.com/golang/protobuf/ptypes"
- "github.com/golang/protobuf/ptypes/timestamp"
- specs "github.com/opencontainers/runtime-spec/specs-go"
- "github.com/sirupsen/logrus"
- "golang.org/x/net/context"
- "golang.org/x/sys/unix"
-)
-
-type client struct {
- clientCommon
-
- // Platform specific properties below here.
- remote *remote
- q queue
- exitNotifiers map[string]*exitNotifier
- liveRestore bool
-}
-
-// GetServerVersion returns the connected server version information
-func (clnt *client) GetServerVersion(ctx context.Context) (*ServerVersion, error) {
- resp, err := clnt.remote.apiClient.GetServerVersion(ctx, &containerd.GetServerVersionRequest{})
- if err != nil {
- return nil, err
- }
-
- sv := &ServerVersion{
- GetServerVersionResponse: *resp,
- }
-
- return sv, nil
-}
-
-// AddProcess is the handler for adding a process to an already running
-// container. It's called through docker exec. It returns the system pid of the
-// exec'd process.
-func (clnt *client) AddProcess(ctx context.Context, containerID, processFriendlyName string, specp Process, attachStdio StdioCallback) (pid int, err error) {
- clnt.lock(containerID)
- defer clnt.unlock(containerID)
- container, err := clnt.getContainer(containerID)
- if err != nil {
- return -1, err
- }
-
- spec, err := container.spec()
- if err != nil {
- return -1, err
- }
- sp := spec.Process
- sp.Args = specp.Args
- sp.Terminal = specp.Terminal
- if len(specp.Env) > 0 {
- sp.Env = specp.Env
- }
- if specp.Cwd != nil {
- sp.Cwd = *specp.Cwd
- }
- if specp.User != nil {
- sp.User = specs.User{
- UID: specp.User.UID,
- GID: specp.User.GID,
- AdditionalGids: specp.User.AdditionalGids,
- }
- }
- if specp.Capabilities != nil {
- sp.Capabilities.Bounding = specp.Capabilities
- sp.Capabilities.Effective = specp.Capabilities
- sp.Capabilities.Inheritable = specp.Capabilities
- sp.Capabilities.Permitted = specp.Capabilities
- }
-
- p := container.newProcess(processFriendlyName)
-
- r := &containerd.AddProcessRequest{
- Args: sp.Args,
- Cwd: sp.Cwd,
- Terminal: sp.Terminal,
- Id: containerID,
- Env: sp.Env,
- User: &containerd.User{
- Uid: sp.User.UID,
- Gid: sp.User.GID,
- AdditionalGids: sp.User.AdditionalGids,
- },
- Pid: processFriendlyName,
- Stdin: p.fifo(unix.Stdin),
- Stdout: p.fifo(unix.Stdout),
- Stderr: p.fifo(unix.Stderr),
- Capabilities: sp.Capabilities.Effective,
- ApparmorProfile: sp.ApparmorProfile,
- SelinuxLabel: sp.SelinuxLabel,
- NoNewPrivileges: sp.NoNewPrivileges,
- Rlimits: convertRlimits(sp.Rlimits),
- }
-
- fifoCtx, cancel := context.WithCancel(context.Background())
- defer func() {
- if err != nil {
- cancel()
- }
- }()
-
- iopipe, err := p.openFifos(fifoCtx, sp.Terminal)
- if err != nil {
- return -1, err
- }
-
- resp, err := clnt.remote.apiClient.AddProcess(ctx, r)
- if err != nil {
- p.closeFifos(iopipe)
- return -1, err
- }
-
- var stdinOnce sync.Once
- stdin := iopipe.Stdin
- iopipe.Stdin = ioutils.NewWriteCloserWrapper(stdin, func() error {
- var err error
- stdinOnce.Do(func() { // on error from attach we don't know if stdin was already closed
- err = stdin.Close()
- if err2 := p.sendCloseStdin(); err == nil {
- err = err2
- }
- })
- return err
- })
-
- container.processes[processFriendlyName] = p
-
- if err := attachStdio(*iopipe); err != nil {
- p.closeFifos(iopipe)
- return -1, err
- }
-
- return int(resp.SystemPid), nil
-}
-
-func (clnt *client) SignalProcess(containerID string, pid string, sig int) error {
- clnt.lock(containerID)
- defer clnt.unlock(containerID)
- _, err := clnt.remote.apiClient.Signal(context.Background(), &containerd.SignalRequest{
- Id: containerID,
- Pid: pid,
- Signal: uint32(sig),
- })
- return err
-}
-
-func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error {
- clnt.lock(containerID)
- defer clnt.unlock(containerID)
- if _, err := clnt.getContainer(containerID); err != nil {
- return err
- }
- _, err := clnt.remote.apiClient.UpdateProcess(context.Background(), &containerd.UpdateProcessRequest{
- Id: containerID,
- Pid: processFriendlyName,
- Width: uint32(width),
- Height: uint32(height),
- })
- return err
-}
-
-func (clnt *client) Pause(containerID string) error {
- return clnt.setState(containerID, StatePause)
-}
-
-func (clnt *client) setState(containerID, state string) error {
- clnt.lock(containerID)
- container, err := clnt.getContainer(containerID)
- if err != nil {
- clnt.unlock(containerID)
- return err
- }
- if container.systemPid == 0 {
- clnt.unlock(containerID)
- return fmt.Errorf("No active process for container %s", containerID)
- }
- st := "running"
- if state == StatePause {
- st = "paused"
- }
- chstate := make(chan struct{})
- _, err = clnt.remote.apiClient.UpdateContainer(context.Background(), &containerd.UpdateContainerRequest{
- Id: containerID,
- Pid: InitFriendlyName,
- Status: st,
- })
- if err != nil {
- clnt.unlock(containerID)
- return err
- }
- container.pauseMonitor.append(state, chstate)
- clnt.unlock(containerID)
- <-chstate
- return nil
-}
-
-func (clnt *client) Resume(containerID string) error {
- return clnt.setState(containerID, StateResume)
-}
-
-func (clnt *client) Stats(containerID string) (*Stats, error) {
- resp, err := clnt.remote.apiClient.Stats(context.Background(), &containerd.StatsRequest{containerID})
- if err != nil {
- return nil, err
- }
- return (*Stats)(resp), nil
-}
-
-// Take care of the old 1.11.0 behavior in case the version upgrade
-// happened without a clean daemon shutdown
-func (clnt *client) cleanupOldRootfs(containerID string) {
- // Unmount and delete the bundle folder
- if mts, err := mount.GetMounts(); err == nil {
- for _, mts := range mts {
- if strings.HasSuffix(mts.Mountpoint, containerID+"/rootfs") {
- if err := unix.Unmount(mts.Mountpoint, unix.MNT_DETACH); err == nil {
- os.RemoveAll(strings.TrimSuffix(mts.Mountpoint, "/rootfs"))
- }
- break
- }
- }
- }
-}
-
-func (clnt *client) setExited(containerID string, exitCode uint32) error {
- clnt.lock(containerID)
- defer clnt.unlock(containerID)
-
- err := clnt.backend.StateChanged(containerID, StateInfo{
- CommonStateInfo: CommonStateInfo{
- State: StateExit,
- ExitCode: exitCode,
- }})
-
- clnt.cleanupOldRootfs(containerID)
-
- return err
-}
-
-func (clnt *client) GetPidsForContainer(containerID string) ([]int, error) {
- cont, err := clnt.getContainerdContainer(containerID)
- if err != nil {
- return nil, err
- }
- pids := make([]int, len(cont.Pids))
- for i, p := range cont.Pids {
- pids[i] = int(p)
- }
- return pids, nil
-}
-
-// Summary returns a summary of the processes running in a container.
-// This is a no-op on Linux.
-func (clnt *client) Summary(containerID string) ([]Summary, error) {
- return nil, nil
-}
-
-func (clnt *client) getContainerdContainer(containerID string) (*containerd.Container, error) {
- resp, err := clnt.remote.apiClient.State(context.Background(), &containerd.StateRequest{Id: containerID})
- if err != nil {
- return nil, err
- }
- for _, cont := range resp.Containers {
- if cont.Id == containerID {
- return cont, nil
- }
- }
- return nil, fmt.Errorf("invalid state response")
-}
-
-func (clnt *client) UpdateResources(containerID string, resources Resources) error {
- clnt.lock(containerID)
- defer clnt.unlock(containerID)
- container, err := clnt.getContainer(containerID)
- if err != nil {
- return err
- }
- if container.systemPid == 0 {
- return fmt.Errorf("No active process for container %s", containerID)
- }
- _, err = clnt.remote.apiClient.UpdateContainer(context.Background(), &containerd.UpdateContainerRequest{
- Id: containerID,
- Pid: InitFriendlyName,
- Resources: (*containerd.UpdateResource)(&resources),
- })
- return err
-}
-
-func (clnt *client) getExitNotifier(containerID string) *exitNotifier {
- clnt.mapMutex.RLock()
- defer clnt.mapMutex.RUnlock()
- return clnt.exitNotifiers[containerID]
-}
-
-func (clnt *client) getOrCreateExitNotifier(containerID string) *exitNotifier {
- clnt.mapMutex.Lock()
- w, ok := clnt.exitNotifiers[containerID]
- defer clnt.mapMutex.Unlock()
- if !ok {
- w = &exitNotifier{c: make(chan struct{}), client: clnt}
- clnt.exitNotifiers[containerID] = w
- }
- return w
-}
-
-func (clnt *client) restore(cont *containerd.Container, lastEvent *containerd.Event, attachStdio StdioCallback, options ...CreateOption) (err error) {
- clnt.lock(cont.Id)
- defer clnt.unlock(cont.Id)
-
- logrus.Debugf("libcontainerd: restore container %s state %s", cont.Id, cont.Status)
-
- containerID := cont.Id
- if _, err := clnt.getContainer(containerID); err == nil {
- return fmt.Errorf("container %s is already active", containerID)
- }
-
- defer func() {
- if err != nil {
- clnt.deleteContainer(cont.Id)
- }
- }()
-
- container := clnt.newContainer(cont.BundlePath, options...)
- container.systemPid = systemPid(cont)
-
- var terminal bool
- for _, p := range cont.Processes {
- if p.Pid == InitFriendlyName {
- terminal = p.Terminal
- }
- }
-
- fifoCtx, cancel := context.WithCancel(context.Background())
- defer func() {
- if err != nil {
- cancel()
- }
- }()
-
- iopipe, err := container.openFifos(fifoCtx, terminal)
- if err != nil {
- return err
- }
- var stdinOnce sync.Once
- stdin := iopipe.Stdin
- iopipe.Stdin = ioutils.NewWriteCloserWrapper(stdin, func() error {
- var err error
- stdinOnce.Do(func() { // on error from attach we don't know if stdin was already closed
- err = stdin.Close()
- })
- return err
- })
-
- if err := attachStdio(*iopipe); err != nil {
- container.closeFifos(iopipe)
- return err
- }
-
- clnt.appendContainer(container)
-
- err = clnt.backend.StateChanged(containerID, StateInfo{
- CommonStateInfo: CommonStateInfo{
- State: StateRestore,
- Pid: container.systemPid,
- }})
-
- if err != nil {
- container.closeFifos(iopipe)
- return err
- }
-
- if lastEvent != nil {
- // This should only be a pause or resume event
- if lastEvent.Type == StatePause || lastEvent.Type == StateResume {
- return clnt.backend.StateChanged(containerID, StateInfo{
- CommonStateInfo: CommonStateInfo{
- State: lastEvent.Type,
- Pid: container.systemPid,
- }})
- }
-
- logrus.Warnf("libcontainerd: unexpected backlog event: %#v", lastEvent)
- }
-
- return nil
-}
-
-func (clnt *client) getContainerLastEventSinceTime(id string, tsp *timestamp.Timestamp) (*containerd.Event, error) {
- er := &containerd.EventsRequest{
- Timestamp: tsp,
- StoredOnly: true,
- Id: id,
- }
- events, err := clnt.remote.apiClient.Events(context.Background(), er)
- if err != nil {
- logrus.Errorf("libcontainerd: failed to get container events stream for %s: %q", er.Id, err)
- return nil, err
- }
-
- var ev *containerd.Event
- for {
- e, err := events.Recv()
- if err != nil {
- if err.Error() == "EOF" {
- break
- }
- logrus.Errorf("libcontainerd: failed to get container event for %s: %q", id, err)
- return nil, err
- }
- ev = e
- logrus.Debugf("libcontainerd: received past event %#v", ev)
- }
-
- return ev, nil
-}
-
-func (clnt *client) getContainerLastEvent(id string) (*containerd.Event, error) {
- ev, err := clnt.getContainerLastEventSinceTime(id, clnt.remote.restoreFromTimestamp)
- if err == nil && ev == nil {
- // If ev is nil and the container is running in containerd,
- // we already consumed all the event of the
- // container, included the "exit" one.
- // Thus, we request all events containerd has in memory for
- // this container in order to get the last one (which should
- // be an exit event)
- logrus.Warnf("libcontainerd: client is out of sync, restore was called on a fully synced container (%s).", id)
- // Request all events since beginning of time
- t := time.Unix(0, 0)
- tsp, err := ptypes.TimestampProto(t)
- if err != nil {
- logrus.Errorf("libcontainerd: getLastEventSinceTime() failed to convert timestamp: %q", err)
- return nil, err
- }
-
- return clnt.getContainerLastEventSinceTime(id, tsp)
- }
-
- return ev, err
-}
-
-func (clnt *client) Restore(containerID string, attachStdio StdioCallback, options ...CreateOption) error {
- // Synchronize with live events
- clnt.remote.Lock()
- defer clnt.remote.Unlock()
- // Check that containerd still knows this container.
- //
- // In the unlikely event that Restore for this container process
- // the its past event before the main loop, the event will be
- // processed twice. However, this is not an issue as all those
- // events will do is change the state of the container to be
- // exactly the same.
- cont, err := clnt.getContainerdContainer(containerID)
- // Get its last event
- ev, eerr := clnt.getContainerLastEvent(containerID)
- if err != nil || containerd_runtime_types.State(cont.Status) == containerd_runtime_types.Stopped {
- if err != nil {
- logrus.Warnf("libcontainerd: failed to retrieve container %s state: %v", containerID, err)
- }
- if ev != nil && (ev.Pid != InitFriendlyName || ev.Type != StateExit) {
- // Wait a while for the exit event
- timeout := time.NewTimer(10 * time.Second)
- tick := time.NewTicker(100 * time.Millisecond)
- stop:
- for {
- select {
- case <-timeout.C:
- break stop
- case <-tick.C:
- ev, eerr = clnt.getContainerLastEvent(containerID)
- if eerr != nil {
- break stop
- }
- if ev != nil && ev.Pid == InitFriendlyName && ev.Type == StateExit {
- break stop
- }
- }
- }
- timeout.Stop()
- tick.Stop()
- }
-
- // get the exit status for this container, if we don't have
- // one, indicate an error
- ec := uint32(255)
- if eerr == nil && ev != nil && ev.Pid == InitFriendlyName && ev.Type == StateExit {
- ec = ev.Status
- }
- clnt.setExited(containerID, ec)
-
- return nil
- }
-
- // container is still alive
- if clnt.liveRestore {
- if err := clnt.restore(cont, ev, attachStdio, options...); err != nil {
- logrus.Errorf("libcontainerd: error restoring %s: %v", containerID, err)
- }
- return nil
- }
-
- // Kill the container if liveRestore == false
- w := clnt.getOrCreateExitNotifier(containerID)
- clnt.lock(cont.Id)
- container := clnt.newContainer(cont.BundlePath)
- container.systemPid = systemPid(cont)
- clnt.appendContainer(container)
- clnt.unlock(cont.Id)
-
- container.discardFifos()
-
- if err := clnt.Signal(containerID, int(unix.SIGTERM)); err != nil {
- logrus.Errorf("libcontainerd: error sending sigterm to %v: %v", containerID, err)
- }
-
- // Let the main loop handle the exit event
- clnt.remote.Unlock()
-
- if ev != nil && ev.Type == StatePause {
- // resume container, it depends on the main loop, so we do it after Unlock()
- logrus.Debugf("libcontainerd: %s was paused, resuming it so it can die", containerID)
- if err := clnt.Resume(containerID); err != nil {
- return fmt.Errorf("failed to resume container: %v", err)
- }
- }
-
- select {
- case <-time.After(10 * time.Second):
- if err := clnt.Signal(containerID, int(unix.SIGKILL)); err != nil {
- logrus.Errorf("libcontainerd: error sending sigkill to %v: %v", containerID, err)
- }
- select {
- case <-time.After(2 * time.Second):
- case <-w.wait():
- // relock because of the defer
- clnt.remote.Lock()
- return nil
- }
- case <-w.wait():
- // relock because of the defer
- clnt.remote.Lock()
- return nil
- }
- // relock because of the defer
- clnt.remote.Lock()
-
- clnt.deleteContainer(containerID)
-
- return clnt.setExited(containerID, uint32(255))
-}
-
-func (clnt *client) CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error {
- clnt.lock(containerID)
- defer clnt.unlock(containerID)
- if _, err := clnt.getContainer(containerID); err != nil {
- return err
- }
-
- _, err := clnt.remote.apiClient.CreateCheckpoint(context.Background(), &containerd.CreateCheckpointRequest{
- Id: containerID,
- Checkpoint: &containerd.Checkpoint{
- Name: checkpointID,
- Exit: exit,
- Tcp: true,
- UnixSockets: true,
- Shell: false,
- EmptyNS: []string{"network"},
- },
- CheckpointDir: checkpointDir,
- })
- return err
-}
-
-func (clnt *client) DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error {
- clnt.lock(containerID)
- defer clnt.unlock(containerID)
- if _, err := clnt.getContainer(containerID); err != nil {
- return err
- }
-
- _, err := clnt.remote.apiClient.DeleteCheckpoint(context.Background(), &containerd.DeleteCheckpointRequest{
- Id: containerID,
- Name: checkpointID,
- CheckpointDir: checkpointDir,
- })
- return err
-}
-
-func (clnt *client) ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error) {
- clnt.lock(containerID)
- defer clnt.unlock(containerID)
- if _, err := clnt.getContainer(containerID); err != nil {
- return nil, err
- }
-
- resp, err := clnt.remote.apiClient.ListCheckpoint(context.Background(), &containerd.ListCheckpointRequest{
- Id: containerID,
- CheckpointDir: checkpointDir,
- })
- if err != nil {
- return nil, err
- }
- return (*Checkpoints)(resp), nil
-}
diff --git a/libcontainerd/client_local_windows.go b/libcontainerd/client_local_windows.go
new file mode 100644
index 0000000000..209b00db67
--- /dev/null
+++ b/libcontainerd/client_local_windows.go
@@ -0,0 +1,1340 @@
+package libcontainerd
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "os"
+ "path"
+ "path/filepath"
+ "regexp"
+ "strings"
+ "sync"
+ "syscall"
+ "time"
+
+ "github.com/Microsoft/hcsshim"
+ opengcs "github.com/Microsoft/opengcs/client"
+ "github.com/docker/docker/pkg/sysinfo"
+ "github.com/docker/docker/pkg/system"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/sys/windows"
+)
+
+const InitProcessName = "init"
+
+type process struct {
+ id string
+ pid int
+ hcsProcess hcsshim.Process
+}
+
+type container struct {
+ sync.Mutex
+
+ // The ociSpec is required, as client.Create() needs a spec, but can
+ // be called from the RestartManager context which does not otherwise
+ // have access to the Spec
+ ociSpec *specs.Spec
+
+ isWindows bool
+ manualStopRequested bool
+ hcsContainer hcsshim.Container
+
+ id string
+ status Status
+ exitedAt time.Time
+ exitCode uint32
+ waitCh chan struct{}
+ init *process
+ execs map[string]*process
+ updatePending bool
+}
+
+// Win32 error codes that are used for various workarounds
+// These really should be ALL_CAPS to match golangs syscall library and standard
+// Win32 error conventions, but golint insists on CamelCase.
+const (
+ CoEClassstring = syscall.Errno(0x800401F3) // Invalid class string
+ ErrorNoNetwork = syscall.Errno(1222) // The network is not present or not started
+ ErrorBadPathname = syscall.Errno(161) // The specified path is invalid
+ ErrorInvalidObject = syscall.Errno(0x800710D8) // The object identifier does not represent a valid object
+)
+
+// defaultOwner is a tag passed to HCS to allow it to differentiate between
+// container creator management stacks. We hard code "docker" in the case
+// of docker.
+const defaultOwner = "docker"
+
+// Create is the entrypoint to create a container from a spec.
+// Table below shows the fields required for HCS JSON calling parameters,
+// where if not populated, is omitted.
+// +-----------------+--------------------------------------------+---------------------------------------------------+
+// | | Isolation=Process | Isolation=Hyper-V |
+// +-----------------+--------------------------------------------+---------------------------------------------------+
+// | VolumePath | \\?\\Volume{GUIDa} | |
+// | LayerFolderPath | %root%\windowsfilter\containerID | %root%\windowsfilter\containerID (servicing only) |
+// | Layers[] | ID=GUIDb;Path=%root%\windowsfilter\layerID | ID=GUIDb;Path=%root%\windowsfilter\layerID |
+// | HvRuntime | | ImagePath=%root%\BaseLayerID\UtilityVM |
+// +-----------------+--------------------------------------------+---------------------------------------------------+
+//
+// Isolation=Process example:
+//
+// {
+// "SystemType": "Container",
+// "Name": "5e0055c814a6005b8e57ac59f9a522066e0af12b48b3c26a9416e23907698776",
+// "Owner": "docker",
+// "VolumePath": "\\\\\\\\?\\\\Volume{66d1ef4c-7a00-11e6-8948-00155ddbef9d}",
+// "IgnoreFlushesDuringBoot": true,
+// "LayerFolderPath": "C:\\\\control\\\\windowsfilter\\\\5e0055c814a6005b8e57ac59f9a522066e0af12b48b3c26a9416e23907698776",
+// "Layers": [{
+// "ID": "18955d65-d45a-557b-bf1c-49d6dfefc526",
+// "Path": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c"
+// }],
+// "HostName": "5e0055c814a6",
+// "MappedDirectories": [],
+// "HvPartition": false,
+// "EndpointList": ["eef2649d-bb17-4d53-9937-295a8efe6f2c"],
+// "Servicing": false
+//}
+//
+// Isolation=Hyper-V example:
+//
+//{
+// "SystemType": "Container",
+// "Name": "475c2c58933b72687a88a441e7e0ca4bd72d76413c5f9d5031fee83b98f6045d",
+// "Owner": "docker",
+// "IgnoreFlushesDuringBoot": true,
+// "Layers": [{
+// "ID": "18955d65-d45a-557b-bf1c-49d6dfefc526",
+// "Path": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c"
+// }],
+// "HostName": "475c2c58933b",
+// "MappedDirectories": [],
+// "HvPartition": true,
+// "EndpointList": ["e1bb1e61-d56f-405e-b75d-fd520cefa0cb"],
+// "DNSSearchList": "a.com,b.com,c.com",
+// "HvRuntime": {
+// "ImagePath": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c\\\\UtilityVM"
+// },
+// "Servicing": false
+//}
+func (c *client) Create(_ context.Context, id string, spec *specs.Spec, runtimeOptions interface{}) error {
+ if ctr := c.getContainer(id); ctr != nil {
+ return errors.WithStack(newConflictError("id already in use"))
+ }
+
+ // spec.Linux must be nil for Windows containers, but spec.Windows
+ // will be filled in regardless of container platform. This is a
+ // temporary workaround due to LCOW requiring layer folder paths,
+ // which are stored under spec.Windows.
+ //
+ // TODO: @darrenstahlmsft fix this once the OCI spec is updated to
+ // support layer folder paths for LCOW
+ if spec.Linux == nil {
+ return c.createWindows(id, spec, runtimeOptions)
+ }
+ return c.createLinux(id, spec, runtimeOptions)
+}
+
+func (c *client) createWindows(id string, spec *specs.Spec, runtimeOptions interface{}) error {
+ logger := c.logger.WithField("container", id)
+ configuration := &hcsshim.ContainerConfig{
+ SystemType: "Container",
+ Name: id,
+ Owner: defaultOwner,
+ IgnoreFlushesDuringBoot: spec.Windows.IgnoreFlushesDuringBoot,
+ HostName: spec.Hostname,
+ HvPartition: false,
+ Servicing: spec.Windows.Servicing,
+ }
+
+ if spec.Windows.Resources != nil {
+ if spec.Windows.Resources.CPU != nil {
+ if spec.Windows.Resources.CPU.Count != nil {
+ // This check is being done here rather than in adaptContainerSettings
+ // because we don't want to update the HostConfig in case this container
+ // is moved to a host with more CPUs than this one.
+ cpuCount := *spec.Windows.Resources.CPU.Count
+ hostCPUCount := uint64(sysinfo.NumCPU())
+ if cpuCount > hostCPUCount {
+ c.logger.Warnf("Changing requested CPUCount of %d to current number of processors, %d", cpuCount, hostCPUCount)
+ cpuCount = hostCPUCount
+ }
+ configuration.ProcessorCount = uint32(cpuCount)
+ }
+ if spec.Windows.Resources.CPU.Shares != nil {
+ configuration.ProcessorWeight = uint64(*spec.Windows.Resources.CPU.Shares)
+ }
+ if spec.Windows.Resources.CPU.Maximum != nil {
+ configuration.ProcessorMaximum = int64(*spec.Windows.Resources.CPU.Maximum)
+ }
+ }
+ if spec.Windows.Resources.Memory != nil {
+ if spec.Windows.Resources.Memory.Limit != nil {
+ configuration.MemoryMaximumInMB = int64(*spec.Windows.Resources.Memory.Limit) / 1024 / 1024
+ }
+ }
+ if spec.Windows.Resources.Storage != nil {
+ if spec.Windows.Resources.Storage.Bps != nil {
+ configuration.StorageBandwidthMaximum = *spec.Windows.Resources.Storage.Bps
+ }
+ if spec.Windows.Resources.Storage.Iops != nil {
+ configuration.StorageIOPSMaximum = *spec.Windows.Resources.Storage.Iops
+ }
+ }
+ }
+
+ if spec.Windows.HyperV != nil {
+ configuration.HvPartition = true
+ }
+
+ if spec.Windows.Network != nil {
+ configuration.EndpointList = spec.Windows.Network.EndpointList
+ configuration.AllowUnqualifiedDNSQuery = spec.Windows.Network.AllowUnqualifiedDNSQuery
+ if spec.Windows.Network.DNSSearchList != nil {
+ configuration.DNSSearchList = strings.Join(spec.Windows.Network.DNSSearchList, ",")
+ }
+ configuration.NetworkSharedContainerName = spec.Windows.Network.NetworkSharedContainerName
+ }
+
+ if cs, ok := spec.Windows.CredentialSpec.(string); ok {
+ configuration.Credentials = cs
+ }
+
+ // We must have least two layers in the spec, the bottom one being a
+ // base image, the top one being the RW layer.
+ if spec.Windows.LayerFolders == nil || len(spec.Windows.LayerFolders) < 2 {
+ return fmt.Errorf("OCI spec is invalid - at least two LayerFolders must be supplied to the runtime")
+ }
+
+ // Strip off the top-most layer as that's passed in separately to HCS
+ configuration.LayerFolderPath = spec.Windows.LayerFolders[len(spec.Windows.LayerFolders)-1]
+ layerFolders := spec.Windows.LayerFolders[:len(spec.Windows.LayerFolders)-1]
+
+ if configuration.HvPartition {
+ // We don't currently support setting the utility VM image explicitly.
+ // TODO @swernli/jhowardmsft circa RS3/4, this may be re-locatable.
+ if spec.Windows.HyperV.UtilityVMPath != "" {
+ return errors.New("runtime does not support an explicit utility VM path for Hyper-V containers")
+ }
+
+ // Find the upper-most utility VM image.
+ var uvmImagePath string
+ for _, path := range layerFolders {
+ fullPath := filepath.Join(path, "UtilityVM")
+ _, err := os.Stat(fullPath)
+ if err == nil {
+ uvmImagePath = fullPath
+ break
+ }
+ if !os.IsNotExist(err) {
+ return err
+ }
+ }
+ if uvmImagePath == "" {
+ return errors.New("utility VM image could not be found")
+ }
+ configuration.HvRuntime = &hcsshim.HvRuntime{ImagePath: uvmImagePath}
+
+ if spec.Root.Path != "" {
+ return errors.New("OCI spec is invalid - Root.Path must be omitted for a Hyper-V container")
+ }
+ } else {
+ const volumeGUIDRegex = `^\\\\\?\\(Volume)\{{0,1}[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}(\}){0,1}\}\\$`
+ if _, err := regexp.MatchString(volumeGUIDRegex, spec.Root.Path); err != nil {
+ return fmt.Errorf(`OCI spec is invalid - Root.Path '%s' must be a volume GUID path in the format '\\?\Volume{GUID}\'`, spec.Root.Path)
+ }
+ // HCS API requires the trailing backslash to be removed
+ configuration.VolumePath = spec.Root.Path[:len(spec.Root.Path)-1]
+ }
+
+ if spec.Root.Readonly {
+ return errors.New(`OCI spec is invalid - Root.Readonly must not be set on Windows`)
+ }
+
+ for _, layerPath := range layerFolders {
+ _, filename := filepath.Split(layerPath)
+ g, err := hcsshim.NameToGuid(filename)
+ if err != nil {
+ return err
+ }
+ configuration.Layers = append(configuration.Layers, hcsshim.Layer{
+ ID: g.ToString(),
+ Path: layerPath,
+ })
+ }
+
+ // Add the mounts (volumes, bind mounts etc) to the structure
+ var mds []hcsshim.MappedDir
+ var mps []hcsshim.MappedPipe
+ for _, mount := range spec.Mounts {
+ const pipePrefix = `\\.\pipe\`
+ if mount.Type != "" {
+ return fmt.Errorf("OCI spec is invalid - Mount.Type '%s' must not be set", mount.Type)
+ }
+ if strings.HasPrefix(mount.Destination, pipePrefix) {
+ mp := hcsshim.MappedPipe{
+ HostPath: mount.Source,
+ ContainerPipeName: mount.Destination[len(pipePrefix):],
+ }
+ mps = append(mps, mp)
+ } else {
+ md := hcsshim.MappedDir{
+ HostPath: mount.Source,
+ ContainerPath: mount.Destination,
+ ReadOnly: false,
+ }
+ for _, o := range mount.Options {
+ if strings.ToLower(o) == "ro" {
+ md.ReadOnly = true
+ }
+ }
+ mds = append(mds, md)
+ }
+ }
+ configuration.MappedDirectories = mds
+ if len(mps) > 0 && system.GetOSVersion().Build < 16210 { // replace with Win10 RS3 build number at RTM
+ return errors.New("named pipe mounts are not supported on this version of Windows")
+ }
+ configuration.MappedPipes = mps
+
+ hcsContainer, err := hcsshim.CreateContainer(id, configuration)
+ if err != nil {
+ return err
+ }
+
+ // Construct a container object for calling start on it.
+ ctr := &container{
+ id: id,
+ execs: make(map[string]*process),
+ isWindows: true,
+ ociSpec: spec,
+ hcsContainer: hcsContainer,
+ status: StatusCreated,
+ waitCh: make(chan struct{}),
+ }
+
+ // Start the container. If this is a servicing container, this call
+ // will block until the container is done with the servicing
+ // execution.
+ logger.Debug("starting container")
+ if err = hcsContainer.Start(); err != nil {
+ c.logger.WithError(err).Error("failed to start container")
+ ctr.debugGCS()
+ if err := c.terminateContainer(ctr); err != nil {
+ c.logger.WithError(err).Error("failed to cleanup after a failed Start")
+ } else {
+ c.logger.Debug("cleaned up after failed Start by calling Terminate")
+ }
+ return err
+ }
+ ctr.debugGCS()
+
+ c.Lock()
+ c.containers[id] = ctr
+ c.Unlock()
+
+ logger.Debug("createWindows() completed successfully")
+ return nil
+
+}
+
+func (c *client) createLinux(id string, spec *specs.Spec, runtimeOptions interface{}) error {
+ logrus.Debugf("libcontainerd: createLinux(): containerId %s ", id)
+ logger := c.logger.WithField("container", id)
+
+ if runtimeOptions == nil {
+ return fmt.Errorf("lcow option must be supplied to the runtime")
+ }
+ lcowConfig, ok := runtimeOptions.(*opengcs.Config)
+ if !ok {
+ return fmt.Errorf("lcow option must be supplied to the runtime")
+ }
+
+ configuration := &hcsshim.ContainerConfig{
+ HvPartition: true,
+ Name: id,
+ SystemType: "container",
+ ContainerType: "linux",
+ Owner: defaultOwner,
+ TerminateOnLastHandleClosed: true,
+ }
+
+ if lcowConfig.ActualMode == opengcs.ModeActualVhdx {
+ configuration.HvRuntime = &hcsshim.HvRuntime{
+ ImagePath: lcowConfig.Vhdx,
+ BootSource: "Vhd",
+ WritableBootSource: false,
+ }
+ } else {
+ configuration.HvRuntime = &hcsshim.HvRuntime{
+ ImagePath: lcowConfig.KirdPath,
+ LinuxKernelFile: lcowConfig.KernelFile,
+ LinuxInitrdFile: lcowConfig.InitrdFile,
+ LinuxBootParameters: lcowConfig.BootParameters,
+ }
+ }
+
+ if spec.Windows == nil {
+ return fmt.Errorf("spec.Windows must not be nil for LCOW containers")
+ }
+
+ // We must have least one layer in the spec
+ if spec.Windows.LayerFolders == nil || len(spec.Windows.LayerFolders) == 0 {
+ return fmt.Errorf("OCI spec is invalid - at least one LayerFolders must be supplied to the runtime")
+ }
+
+ // Strip off the top-most layer as that's passed in separately to HCS
+ configuration.LayerFolderPath = spec.Windows.LayerFolders[len(spec.Windows.LayerFolders)-1]
+ layerFolders := spec.Windows.LayerFolders[:len(spec.Windows.LayerFolders)-1]
+
+ for _, layerPath := range layerFolders {
+ _, filename := filepath.Split(layerPath)
+ g, err := hcsshim.NameToGuid(filename)
+ if err != nil {
+ return err
+ }
+ configuration.Layers = append(configuration.Layers, hcsshim.Layer{
+ ID: g.ToString(),
+ Path: filepath.Join(layerPath, "layer.vhd"),
+ })
+ }
+
+ if spec.Windows.Network != nil {
+ configuration.EndpointList = spec.Windows.Network.EndpointList
+ configuration.AllowUnqualifiedDNSQuery = spec.Windows.Network.AllowUnqualifiedDNSQuery
+ if spec.Windows.Network.DNSSearchList != nil {
+ configuration.DNSSearchList = strings.Join(spec.Windows.Network.DNSSearchList, ",")
+ }
+ configuration.NetworkSharedContainerName = spec.Windows.Network.NetworkSharedContainerName
+ }
+
+ // Add the mounts (volumes, bind mounts etc) to the structure. We have to do
+ // some translation for both the mapped directories passed into HCS and in
+ // the spec.
+ //
+ // For HCS, we only pass in the mounts from the spec which are type "bind".
+ // Further, the "ContainerPath" field (which is a little mis-leadingly
+ // named when it applies to the utility VM rather than the container in the
+ // utility VM) is moved to under /tmp/gcs/<ID>/binds, where this is passed
+ // by the caller through a 'uvmpath' option.
+ //
+ // We do similar translation for the mounts in the spec by stripping out
+ // the uvmpath option, and translating the Source path to the location in the
+ // utility VM calculated above.
+ //
+ // From inside the utility VM, you would see a 9p mount such as in the following
+ // where a host folder has been mapped to /target. The line with /tmp/gcs/<ID>/binds
+ // specifically:
+ //
+ // / # mount
+ // rootfs on / type rootfs (rw,size=463736k,nr_inodes=115934)
+ // proc on /proc type proc (rw,relatime)
+ // sysfs on /sys type sysfs (rw,relatime)
+ // udev on /dev type devtmpfs (rw,relatime,size=498100k,nr_inodes=124525,mode=755)
+ // tmpfs on /run type tmpfs (rw,relatime)
+ // cgroup on /sys/fs/cgroup type cgroup (rw,relatime,cpuset,cpu,cpuacct,blkio,memory,devices,freezer,net_cls,perf_event,net_prio,hugetlb,pids,rdma)
+ // mqueue on /dev/mqueue type mqueue (rw,relatime)
+ // devpts on /dev/pts type devpts (rw,relatime,mode=600,ptmxmode=000)
+ // /binds/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/target on /binds/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/target type 9p (rw,sync,dirsync,relatime,trans=fd,rfdno=6,wfdno=6)
+ // /dev/pmem0 on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/layer0 type ext4 (ro,relatime,block_validity,delalloc,norecovery,barrier,dax,user_xattr,acl)
+ // /dev/sda on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch type ext4 (rw,relatime,block_validity,delalloc,barrier,user_xattr,acl)
+ // overlay on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/rootfs type overlay (rw,relatime,lowerdir=/tmp/base/:/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/layer0,upperdir=/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch/upper,workdir=/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch/work)
+ //
+ // /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc # ls -l
+ // total 16
+ // drwx------ 3 0 0 60 Sep 7 18:54 binds
+ // -rw-r--r-- 1 0 0 3345 Sep 7 18:54 config.json
+ // drwxr-xr-x 10 0 0 4096 Sep 6 17:26 layer0
+ // drwxr-xr-x 1 0 0 4096 Sep 7 18:54 rootfs
+ // drwxr-xr-x 5 0 0 4096 Sep 7 18:54 scratch
+ //
+ // /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc # ls -l binds
+ // total 0
+ // drwxrwxrwt 2 0 0 4096 Sep 7 16:51 target
+
+ mds := []hcsshim.MappedDir{}
+ specMounts := []specs.Mount{}
+ for _, mount := range spec.Mounts {
+ specMount := mount
+ if mount.Type == "bind" {
+ // Strip out the uvmpath from the options
+ updatedOptions := []string{}
+ uvmPath := ""
+ readonly := false
+ for _, opt := range mount.Options {
+ dropOption := false
+ elements := strings.SplitN(opt, "=", 2)
+ switch elements[0] {
+ case "uvmpath":
+ uvmPath = elements[1]
+ dropOption = true
+ case "rw":
+ case "ro":
+ readonly = true
+ case "rbind":
+ default:
+ return fmt.Errorf("unsupported option %q", opt)
+ }
+ if !dropOption {
+ updatedOptions = append(updatedOptions, opt)
+ }
+ }
+ mount.Options = updatedOptions
+ if uvmPath == "" {
+ return fmt.Errorf("no uvmpath for bind mount %+v", mount)
+ }
+ md := hcsshim.MappedDir{
+ HostPath: mount.Source,
+ ContainerPath: path.Join(uvmPath, mount.Destination),
+ CreateInUtilityVM: true,
+ ReadOnly: readonly,
+ }
+ mds = append(mds, md)
+ specMount.Source = path.Join(uvmPath, mount.Destination)
+ }
+ specMounts = append(specMounts, specMount)
+ }
+ configuration.MappedDirectories = mds
+
+ hcsContainer, err := hcsshim.CreateContainer(id, configuration)
+ if err != nil {
+ return err
+ }
+
+ spec.Mounts = specMounts
+
+ // Construct a container object for calling start on it.
+ ctr := &container{
+ id: id,
+ execs: make(map[string]*process),
+ isWindows: true,
+ ociSpec: spec,
+ hcsContainer: hcsContainer,
+ status: StatusCreated,
+ waitCh: make(chan struct{}),
+ }
+
+ // Start the container. If this is a servicing container, this call
+ // will block until the container is done with the servicing
+ // execution.
+ logger.Debug("starting container")
+ if err = hcsContainer.Start(); err != nil {
+ c.logger.WithError(err).Error("failed to start container")
+ ctr.debugGCS()
+ if err := c.terminateContainer(ctr); err != nil {
+ c.logger.WithError(err).Error("failed to cleanup after a failed Start")
+ } else {
+ c.logger.Debug("cleaned up after failed Start by calling Terminate")
+ }
+ return err
+ }
+ ctr.debugGCS()
+
+ c.Lock()
+ c.containers[id] = ctr
+ c.Unlock()
+
+ c.eventQ.append(id, func() {
+ ei := EventInfo{
+ ContainerID: id,
+ }
+ c.logger.WithFields(logrus.Fields{
+ "container": ctr.id,
+ "event": EventCreate,
+ }).Info("sending event")
+ err := c.backend.ProcessEvent(id, EventCreate, ei)
+ if err != nil {
+ c.logger.WithError(err).WithFields(logrus.Fields{
+ "container": id,
+ "event": EventCreate,
+ }).Error("failed to process event")
+ }
+ })
+
+ logger.Debug("createLinux() completed successfully")
+ return nil
+}
+
+func (c *client) Start(_ context.Context, id, _ string, withStdin bool, attachStdio StdioCallback) (int, error) {
+ ctr := c.getContainer(id)
+ switch {
+ case ctr == nil:
+ return -1, errors.WithStack(newNotFoundError("no such container"))
+ case ctr.init != nil:
+ return -1, errors.WithStack(newConflictError("container already started"))
+ }
+
+ logger := c.logger.WithField("container", id)
+
+ // Note we always tell HCS to create stdout as it's required
+ // regardless of '-i' or '-t' options, so that docker can always grab
+ // the output through logs. We also tell HCS to always create stdin,
+ // even if it's not used - it will be closed shortly. Stderr is only
+ // created if it we're not -t.
+ var (
+ emulateConsole bool
+ createStdErrPipe bool
+ )
+ if ctr.ociSpec.Process != nil {
+ emulateConsole = ctr.ociSpec.Process.Terminal
+ createStdErrPipe = !ctr.ociSpec.Process.Terminal && !ctr.ociSpec.Windows.Servicing
+ }
+
+ createProcessParms := &hcsshim.ProcessConfig{
+ EmulateConsole: emulateConsole,
+ WorkingDirectory: ctr.ociSpec.Process.Cwd,
+ CreateStdInPipe: !ctr.ociSpec.Windows.Servicing,
+ CreateStdOutPipe: !ctr.ociSpec.Windows.Servicing,
+ CreateStdErrPipe: createStdErrPipe,
+ }
+
+ if ctr.ociSpec.Process != nil && ctr.ociSpec.Process.ConsoleSize != nil {
+ createProcessParms.ConsoleSize[0] = uint(ctr.ociSpec.Process.ConsoleSize.Height)
+ createProcessParms.ConsoleSize[1] = uint(ctr.ociSpec.Process.ConsoleSize.Width)
+ }
+
+ // Configure the environment for the process
+ createProcessParms.Environment = setupEnvironmentVariables(ctr.ociSpec.Process.Env)
+ if ctr.isWindows {
+ createProcessParms.CommandLine = strings.Join(ctr.ociSpec.Process.Args, " ")
+ } else {
+ createProcessParms.CommandArgs = ctr.ociSpec.Process.Args
+ }
+ createProcessParms.User = ctr.ociSpec.Process.User.Username
+
+ // LCOW requires the raw OCI spec passed through HCS and onwards to
+ // GCS for the utility VM.
+ if !ctr.isWindows {
+ ociBuf, err := json.Marshal(ctr.ociSpec)
+ if err != nil {
+ return -1, err
+ }
+ ociRaw := json.RawMessage(ociBuf)
+ createProcessParms.OCISpecification = &ociRaw
+ }
+
+ ctr.Lock()
+ defer ctr.Unlock()
+
+ // Start the command running in the container.
+ newProcess, err := ctr.hcsContainer.CreateProcess(createProcessParms)
+ if err != nil {
+ logger.WithError(err).Error("CreateProcess() failed")
+ return -1, err
+ }
+ defer func() {
+ if err != nil {
+ if err := newProcess.Kill(); err != nil {
+ logger.WithError(err).Error("failed to kill process")
+ }
+ go func() {
+ if err := newProcess.Wait(); err != nil {
+ logger.WithError(err).Error("failed to wait for process")
+ }
+ if err := newProcess.Close(); err != nil {
+ logger.WithError(err).Error("failed to clean process resources")
+ }
+ }()
+ }
+ }()
+ p := &process{
+ hcsProcess: newProcess,
+ id: InitProcessName,
+ pid: newProcess.Pid(),
+ }
+ logger.WithField("pid", p.pid).Debug("init process started")
+
+ // If this is a servicing container, wait on the process synchronously here and
+ // if it succeeds, wait for it cleanly shutdown and merge into the parent container.
+ if ctr.ociSpec.Windows.Servicing {
+ // reapProcess takes the lock
+ ctr.Unlock()
+ defer ctr.Lock()
+ exitCode := c.reapProcess(ctr, p)
+
+ if exitCode != 0 {
+ return -1, errors.Errorf("libcontainerd: servicing container %s returned non-zero exit code %d", ctr.id, exitCode)
+ }
+
+ return p.pid, nil
+ }
+
+ var (
+ stdout, stderr io.ReadCloser
+ stdin io.WriteCloser
+ )
+ stdin, stdout, stderr, err = newProcess.Stdio()
+ if err != nil {
+ logger.WithError(err).Error("failed to get stdio pipes")
+ return -1, err
+ }
+
+ iopipe := &IOPipe{Terminal: ctr.ociSpec.Process.Terminal}
+ iopipe.Stdin = createStdInCloser(stdin, newProcess)
+
+ // Convert io.ReadClosers to io.Readers
+ if stdout != nil {
+ iopipe.Stdout = ioutil.NopCloser(&autoClosingReader{ReadCloser: stdout})
+ }
+ if stderr != nil {
+ iopipe.Stderr = ioutil.NopCloser(&autoClosingReader{ReadCloser: stderr})
+ }
+
+ _, err = attachStdio(iopipe)
+ if err != nil {
+ logger.WithError(err).Error("failed to attache stdio")
+ return -1, err
+ }
+ ctr.status = StatusRunning
+ ctr.init = p
+
+ // Spin up a go routine waiting for exit to handle cleanup
+ go c.reapProcess(ctr, p)
+
+ // Generate the associated event
+ c.eventQ.append(id, func() {
+ ei := EventInfo{
+ ContainerID: id,
+ ProcessID: InitProcessName,
+ Pid: uint32(p.pid),
+ }
+ c.logger.WithFields(logrus.Fields{
+ "container": ctr.id,
+ "event": EventStart,
+ "event-info": ei,
+ }).Info("sending event")
+ err := c.backend.ProcessEvent(ei.ContainerID, EventStart, ei)
+ if err != nil {
+ c.logger.WithError(err).WithFields(logrus.Fields{
+ "container": id,
+ "event": EventStart,
+ "event-info": ei,
+ }).Error("failed to process event")
+ }
+ })
+ logger.Debug("start() completed")
+ return p.pid, nil
+}
+
+// Exec adds a process in an running container
+func (c *client) Exec(ctx context.Context, containerID, processID string, spec *specs.Process, withStdin bool, attachStdio StdioCallback) (int, error) {
+ ctr := c.getContainer(containerID)
+ switch {
+ case ctr == nil:
+ return -1, errors.WithStack(newNotFoundError("no such container"))
+ case ctr.hcsContainer == nil:
+ return -1, errors.WithStack(newInvalidParameterError("container is not running"))
+ case ctr.execs != nil && ctr.execs[processID] != nil:
+ return -1, errors.WithStack(newConflictError("id already in use"))
+ }
+ logger := c.logger.WithFields(logrus.Fields{
+ "container": containerID,
+ "exec": processID,
+ })
+
+ // Note we always tell HCS to
+ // create stdout as it's required regardless of '-i' or '-t' options, so that
+ // docker can always grab the output through logs. We also tell HCS to always
+ // create stdin, even if it's not used - it will be closed shortly. Stderr
+ // is only created if it we're not -t.
+ createProcessParms := hcsshim.ProcessConfig{
+ CreateStdInPipe: true,
+ CreateStdOutPipe: true,
+ CreateStdErrPipe: !spec.Terminal,
+ }
+ if spec.Terminal {
+ createProcessParms.EmulateConsole = true
+ if spec.ConsoleSize != nil {
+ createProcessParms.ConsoleSize[0] = uint(spec.ConsoleSize.Height)
+ createProcessParms.ConsoleSize[1] = uint(spec.ConsoleSize.Width)
+ }
+ }
+
+ // Take working directory from the process to add if it is defined,
+ // otherwise take from the first process.
+ if spec.Cwd != "" {
+ createProcessParms.WorkingDirectory = spec.Cwd
+ } else {
+ createProcessParms.WorkingDirectory = ctr.ociSpec.Process.Cwd
+ }
+
+ // Configure the environment for the process
+ createProcessParms.Environment = setupEnvironmentVariables(spec.Env)
+ if ctr.isWindows {
+ createProcessParms.CommandLine = strings.Join(spec.Args, " ")
+ } else {
+ createProcessParms.CommandArgs = spec.Args
+ }
+ createProcessParms.User = spec.User.Username
+
+ logger.Debugf("exec commandLine: %s", createProcessParms.CommandLine)
+
+ // Start the command running in the container.
+ var (
+ stdout, stderr io.ReadCloser
+ stdin io.WriteCloser
+ )
+ newProcess, err := ctr.hcsContainer.CreateProcess(&createProcessParms)
+ if err != nil {
+ logger.WithError(err).Errorf("exec's CreateProcess() failed")
+ return -1, err
+ }
+ pid := newProcess.Pid()
+ defer func() {
+ if err != nil {
+ if err := newProcess.Kill(); err != nil {
+ logger.WithError(err).Error("failed to kill process")
+ }
+ go func() {
+ if err := newProcess.Wait(); err != nil {
+ logger.WithError(err).Error("failed to wait for process")
+ }
+ if err := newProcess.Close(); err != nil {
+ logger.WithError(err).Error("failed to clean process resources")
+ }
+ }()
+ }
+ }()
+
+ stdin, stdout, stderr, err = newProcess.Stdio()
+ if err != nil {
+ logger.WithError(err).Error("getting std pipes failed")
+ return -1, err
+ }
+
+ iopipe := &IOPipe{Terminal: spec.Terminal}
+ iopipe.Stdin = createStdInCloser(stdin, newProcess)
+
+ // Convert io.ReadClosers to io.Readers
+ if stdout != nil {
+ iopipe.Stdout = ioutil.NopCloser(&autoClosingReader{ReadCloser: stdout})
+ }
+ if stderr != nil {
+ iopipe.Stderr = ioutil.NopCloser(&autoClosingReader{ReadCloser: stderr})
+ }
+
+ // Tell the engine to attach streams back to the client
+ _, err = attachStdio(iopipe)
+ if err != nil {
+ return -1, err
+ }
+
+ p := &process{
+ id: processID,
+ pid: pid,
+ hcsProcess: newProcess,
+ }
+
+ // Add the process to the container's list of processes
+ ctr.Lock()
+ ctr.execs[processID] = p
+ ctr.Unlock()
+
+ // Spin up a go routine waiting for exit to handle cleanup
+ go c.reapProcess(ctr, p)
+
+ c.eventQ.append(ctr.id, func() {
+ ei := EventInfo{
+ ContainerID: ctr.id,
+ ProcessID: p.id,
+ Pid: uint32(p.pid),
+ }
+ c.logger.WithFields(logrus.Fields{
+ "container": ctr.id,
+ "event": EventExecAdded,
+ "event-info": ei,
+ }).Info("sending event")
+ err := c.backend.ProcessEvent(ctr.id, EventExecAdded, ei)
+ if err != nil {
+ c.logger.WithError(err).WithFields(logrus.Fields{
+ "container": ctr.id,
+ "event": EventExecAdded,
+ "event-info": ei,
+ }).Error("failed to process event")
+ }
+ err = c.backend.ProcessEvent(ctr.id, EventExecStarted, ei)
+ if err != nil {
+ c.logger.WithError(err).WithFields(logrus.Fields{
+ "container": ctr.id,
+ "event": EventExecStarted,
+ "event-info": ei,
+ }).Error("failed to process event")
+ }
+ })
+
+ return pid, nil
+}
+
+// Signal handles `docker stop` on Windows. While Linux has support for
+// the full range of signals, signals aren't really implemented on Windows.
+// We fake supporting regular stop and -9 to force kill.
+func (c *client) SignalProcess(_ context.Context, containerID, processID string, signal int) error {
+ ctr, p, err := c.getProcess(containerID, processID)
+ if err != nil {
+ return err
+ }
+
+ ctr.manualStopRequested = true
+
+ logger := c.logger.WithFields(logrus.Fields{
+ "container": containerID,
+ "process": processID,
+ "pid": p.pid,
+ "signal": signal,
+ })
+ logger.Debug("Signal()")
+
+ if processID == InitProcessName {
+ if syscall.Signal(signal) == syscall.SIGKILL {
+ // Terminate the compute system
+ if err := ctr.hcsContainer.Terminate(); err != nil {
+ if !hcsshim.IsPending(err) {
+ logger.WithError(err).Error("failed to terminate hccshim container")
+ }
+ }
+ } else {
+ // Shut down the container
+ if err := ctr.hcsContainer.Shutdown(); err != nil {
+ if !hcsshim.IsPending(err) && !hcsshim.IsAlreadyStopped(err) {
+ // ignore errors
+ logger.WithError(err).Error("failed to shutdown hccshim container")
+ }
+ }
+ }
+ } else {
+ return p.hcsProcess.Kill()
+ }
+
+ return nil
+}
+
+// Resize handles a CLI event to resize an interactive docker run or docker
+// exec window.
+func (c *client) ResizeTerminal(_ context.Context, containerID, processID string, width, height int) error {
+ _, p, err := c.getProcess(containerID, processID)
+ if err != nil {
+ return err
+ }
+
+ c.logger.WithFields(logrus.Fields{
+ "container": containerID,
+ "process": processID,
+ "height": height,
+ "width": width,
+ "pid": p.pid,
+ }).Debug("resizing")
+ return p.hcsProcess.ResizeConsole(uint16(height), uint16(width))
+}
+
+func (c *client) CloseStdin(_ context.Context, containerID, processID string) error {
+ _, p, err := c.getProcess(containerID, processID)
+ if err != nil {
+ return err
+ }
+
+ return p.hcsProcess.CloseStdin()
+}
+
+// Pause handles pause requests for containers
+func (c *client) Pause(_ context.Context, containerID string) error {
+ ctr, _, err := c.getProcess(containerID, InitProcessName)
+ if err != nil {
+ return err
+ }
+
+ if ctr.ociSpec.Windows.HyperV == nil {
+ return errors.New("cannot pause Windows Server Containers")
+ }
+
+ ctr.Lock()
+ defer ctr.Unlock()
+
+ if err = ctr.hcsContainer.Pause(); err != nil {
+ return err
+ }
+
+ ctr.status = StatusPaused
+
+ c.eventQ.append(containerID, func() {
+ err := c.backend.ProcessEvent(containerID, EventPaused, EventInfo{
+ ContainerID: containerID,
+ ProcessID: InitProcessName,
+ })
+ c.logger.WithFields(logrus.Fields{
+ "container": ctr.id,
+ "event": EventPaused,
+ }).Info("sending event")
+ if err != nil {
+ c.logger.WithError(err).WithFields(logrus.Fields{
+ "container": containerID,
+ "event": EventPaused,
+ }).Error("failed to process event")
+ }
+ })
+
+ return nil
+}
+
+// Resume handles resume requests for containers
+func (c *client) Resume(_ context.Context, containerID string) error {
+ ctr, _, err := c.getProcess(containerID, InitProcessName)
+ if err != nil {
+ return err
+ }
+
+ if ctr.ociSpec.Windows.HyperV == nil {
+ return errors.New("cannot resume Windows Server Containers")
+ }
+
+ ctr.Lock()
+ defer ctr.Unlock()
+
+ if err = ctr.hcsContainer.Resume(); err != nil {
+ return err
+ }
+
+ ctr.status = StatusRunning
+
+ c.eventQ.append(containerID, func() {
+ err := c.backend.ProcessEvent(containerID, EventResumed, EventInfo{
+ ContainerID: containerID,
+ ProcessID: InitProcessName,
+ })
+ c.logger.WithFields(logrus.Fields{
+ "container": ctr.id,
+ "event": EventResumed,
+ }).Info("sending event")
+ if err != nil {
+ c.logger.WithError(err).WithFields(logrus.Fields{
+ "container": containerID,
+ "event": EventResumed,
+ }).Error("failed to process event")
+ }
+ })
+
+ return nil
+}
+
+// Stats handles stats requests for containers
+func (c *client) Stats(_ context.Context, containerID string) (*Stats, error) {
+ ctr, _, err := c.getProcess(containerID, InitProcessName)
+ if err != nil {
+ return nil, err
+ }
+
+ readAt := time.Now()
+ s, err := ctr.hcsContainer.Statistics()
+ if err != nil {
+ return nil, err
+ }
+ return &Stats{
+ Read: readAt,
+ HCSStats: &s,
+ }, nil
+}
+
+// Restore is the handler for restoring a container
+func (c *client) Restore(ctx context.Context, id string, attachStdio StdioCallback) (bool, int, error) {
+ c.logger.WithField("container", id).Debug("restore()")
+
+ // TODO Windows: On RS1, a re-attach isn't possible.
+ // However, there is a scenario in which there is an issue.
+ // Consider a background container. The daemon dies unexpectedly.
+ // HCS will still have the compute service alive and running.
+ // For consistence, we call in to shoot it regardless if HCS knows about it
+ // We explicitly just log a warning if the terminate fails.
+ // Then we tell the backend the container exited.
+ if hc, err := hcsshim.OpenContainer(id); err == nil {
+ const terminateTimeout = time.Minute * 2
+ err := hc.Terminate()
+
+ if hcsshim.IsPending(err) {
+ err = hc.WaitTimeout(terminateTimeout)
+ } else if hcsshim.IsAlreadyStopped(err) {
+ err = nil
+ }
+
+ if err != nil {
+ c.logger.WithField("container", id).WithError(err).Debug("terminate failed on restore")
+ return false, -1, err
+ }
+ }
+ return false, -1, nil
+}
+
+// GetPidsForContainer returns a list of process IDs running in a container.
+// Not used on Windows.
+func (c *client) ListPids(_ context.Context, _ string) ([]uint32, error) {
+ return nil, errors.New("not implemented on Windows")
+}
+
+// Summary returns a summary of the processes running in a container.
+// This is present in Windows to support docker top. In linux, the
+// engine shells out to ps to get process information. On Windows, as
+// the containers could be Hyper-V containers, they would not be
+// visible on the container host. However, libcontainerd does have
+// that information.
+func (c *client) Summary(_ context.Context, containerID string) ([]Summary, error) {
+ ctr, _, err := c.getProcess(containerID, InitProcessName)
+ if err != nil {
+ return nil, err
+ }
+
+ p, err := ctr.hcsContainer.ProcessList()
+ if err != nil {
+ return nil, err
+ }
+
+ pl := make([]Summary, len(p))
+ for i := range p {
+ pl[i] = Summary(p[i])
+ }
+ return pl, nil
+}
+
+func (c *client) DeleteTask(ctx context.Context, containerID string) (uint32, time.Time, error) {
+ ec := -1
+ ctr := c.getContainer(containerID)
+ if ctr == nil {
+ return uint32(ec), time.Now(), errors.WithStack(newNotFoundError("no such container"))
+ }
+
+ select {
+ case <-ctx.Done():
+ return uint32(ec), time.Now(), errors.WithStack(ctx.Err())
+ case <-ctr.waitCh:
+ default:
+ return uint32(ec), time.Now(), errors.New("container is not stopped")
+ }
+
+ ctr.Lock()
+ defer ctr.Unlock()
+ return ctr.exitCode, ctr.exitedAt, nil
+}
+
+func (c *client) Delete(_ context.Context, containerID string) error {
+ c.Lock()
+ defer c.Unlock()
+ ctr := c.containers[containerID]
+ if ctr == nil {
+ return errors.WithStack(newNotFoundError("no such container"))
+ }
+
+ ctr.Lock()
+ defer ctr.Unlock()
+
+ switch ctr.status {
+ case StatusCreated:
+ if err := c.shutdownContainer(ctr); err != nil {
+ return err
+ }
+ fallthrough
+ case StatusStopped:
+ delete(c.containers, containerID)
+ return nil
+ }
+
+ return errors.WithStack(newInvalidParameterError("container is not stopped"))
+}
+
+func (c *client) Status(ctx context.Context, containerID string) (Status, error) {
+ c.Lock()
+ defer c.Unlock()
+ ctr := c.containers[containerID]
+ if ctr == nil {
+ return StatusUnknown, errors.WithStack(newNotFoundError("no such container"))
+ }
+
+ ctr.Lock()
+ defer ctr.Unlock()
+ return ctr.status, nil
+}
+
+func (c *client) UpdateResources(ctx context.Context, containerID string, resources *Resources) error {
+ // Updating resource isn't supported on Windows
+ // but we should return nil for enabling updating container
+ return nil
+}
+
+func (c *client) CreateCheckpoint(ctx context.Context, containerID, checkpointDir string, exit bool) error {
+ return errors.New("Windows: Containers do not support checkpoints")
+}
+
+func (c *client) getContainer(id string) *container {
+ c.Lock()
+ ctr := c.containers[id]
+ c.Unlock()
+
+ return ctr
+}
+
+func (c *client) getProcess(containerID, processID string) (*container, *process, error) {
+ ctr := c.getContainer(containerID)
+ switch {
+ case ctr == nil:
+ return nil, nil, errors.WithStack(newNotFoundError("no such container"))
+ case ctr.init == nil:
+ return nil, nil, errors.WithStack(newNotFoundError("container is not running"))
+ case processID == InitProcessName:
+ return ctr, ctr.init, nil
+ default:
+ ctr.Lock()
+ defer ctr.Unlock()
+ if ctr.execs == nil {
+ return nil, nil, errors.WithStack(newNotFoundError("no execs"))
+ }
+ }
+
+ p := ctr.execs[processID]
+ if p == nil {
+ return nil, nil, errors.WithStack(newNotFoundError("no such exec"))
+ }
+
+ return ctr, p, nil
+}
+
+func (c *client) shutdownContainer(ctr *container) error {
+ const shutdownTimeout = time.Minute * 5
+ err := ctr.hcsContainer.Shutdown()
+
+ if hcsshim.IsPending(err) {
+ err = ctr.hcsContainer.WaitTimeout(shutdownTimeout)
+ } else if hcsshim.IsAlreadyStopped(err) {
+ err = nil
+ }
+
+ if err != nil {
+ c.logger.WithError(err).WithField("container", ctr.id).
+ Debug("failed to shutdown container, terminating it")
+ return c.terminateContainer(ctr)
+ }
+
+ return nil
+}
+
+func (c *client) terminateContainer(ctr *container) error {
+ const terminateTimeout = time.Minute * 5
+ err := ctr.hcsContainer.Terminate()
+
+ if hcsshim.IsPending(err) {
+ err = ctr.hcsContainer.WaitTimeout(terminateTimeout)
+ } else if hcsshim.IsAlreadyStopped(err) {
+ err = nil
+ }
+
+ if err != nil {
+ c.logger.WithError(err).WithField("container", ctr.id).
+ Debug("failed to terminate container")
+ return err
+ }
+
+ return nil
+}
+
+func (c *client) reapProcess(ctr *container, p *process) int {
+ logger := c.logger.WithFields(logrus.Fields{
+ "container": ctr.id,
+ "process": p.id,
+ })
+
+ // Block indefinitely for the process to exit.
+ if err := p.hcsProcess.Wait(); err != nil {
+ if herr, ok := err.(*hcsshim.ProcessError); ok && herr.Err != windows.ERROR_BROKEN_PIPE {
+ logger.WithError(err).Warnf("Wait() failed (container may have been killed)")
+ }
+ // Fall through here, do not return. This ensures we attempt to
+ // continue the shutdown in HCS and tell the docker engine that the
+ // process/container has exited to avoid a container being dropped on
+ // the floor.
+ }
+ exitedAt := time.Now()
+
+ exitCode, err := p.hcsProcess.ExitCode()
+ if err != nil {
+ if herr, ok := err.(*hcsshim.ProcessError); ok && herr.Err != windows.ERROR_BROKEN_PIPE {
+ logger.WithError(err).Warnf("unable to get exit code for process")
+ }
+ // Since we got an error retrieving the exit code, make sure that the
+ // code we return doesn't incorrectly indicate success.
+ exitCode = -1
+
+ // Fall through here, do not return. This ensures we attempt to
+ // continue the shutdown in HCS and tell the docker engine that the
+ // process/container has exited to avoid a container being dropped on
+ // the floor.
+ }
+
+ if err := p.hcsProcess.Close(); err != nil {
+ logger.WithError(err).Warnf("failed to cleanup hcs process resources")
+ }
+
+ var pendingUpdates bool
+ if p.id == InitProcessName {
+ // Update container status
+ ctr.Lock()
+ ctr.status = StatusStopped
+ ctr.exitedAt = exitedAt
+ ctr.exitCode = uint32(exitCode)
+ close(ctr.waitCh)
+ ctr.Unlock()
+
+ // Handle any servicing
+ if exitCode == 0 && ctr.isWindows && !ctr.ociSpec.Windows.Servicing {
+ pendingUpdates, err = ctr.hcsContainer.HasPendingUpdates()
+ logger.Infof("Pending updates: %v", pendingUpdates)
+ if err != nil {
+ logger.WithError(err).
+ Warnf("failed to check for pending updates (container may have been killed)")
+ }
+ }
+
+ if err := c.shutdownContainer(ctr); err != nil {
+ logger.WithError(err).Warn("failed to shutdown container")
+ } else {
+ logger.Debug("completed container shutdown")
+ }
+
+ if err := ctr.hcsContainer.Close(); err != nil {
+ logger.WithError(err).Error("failed to clean hcs container resources")
+ }
+ }
+
+ if !(ctr.isWindows && ctr.ociSpec.Windows.Servicing) {
+ c.eventQ.append(ctr.id, func() {
+ ei := EventInfo{
+ ContainerID: ctr.id,
+ ProcessID: p.id,
+ Pid: uint32(p.pid),
+ ExitCode: uint32(exitCode),
+ ExitedAt: exitedAt,
+ UpdatePending: pendingUpdates,
+ }
+ c.logger.WithFields(logrus.Fields{
+ "container": ctr.id,
+ "event": EventExit,
+ "event-info": ei,
+ }).Info("sending event")
+ err := c.backend.ProcessEvent(ctr.id, EventExit, ei)
+ if err != nil {
+ c.logger.WithError(err).WithFields(logrus.Fields{
+ "container": ctr.id,
+ "event": EventExit,
+ "event-info": ei,
+ }).Error("failed to process event")
+ }
+ if p.id != InitProcessName {
+ ctr.Lock()
+ delete(ctr.execs, p.id)
+ ctr.Unlock()
+ }
+ })
+ }
+
+ return exitCode
+}
diff --git a/libcontainerd/client_solaris.go b/libcontainerd/client_solaris.go
deleted file mode 100644
index c54cea3bfa..0000000000
--- a/libcontainerd/client_solaris.go
+++ /dev/null
@@ -1,104 +0,0 @@
-package libcontainerd
-
-import (
- containerd "github.com/containerd/containerd/api/grpc/types"
- "golang.org/x/net/context"
-)
-
-type client struct {
- clientCommon
-
- // Platform specific properties below here.
- remote *remote
- q queue
- exitNotifiers map[string]*exitNotifier
- liveRestore bool
-}
-
-// GetServerVersion returns the connected server version information
-func (clnt *client) GetServerVersion(ctx context.Context) (*ServerVersion, error) {
- resp, err := clnt.remote.apiClient.GetServerVersion(ctx, &containerd.GetServerVersionRequest{})
- if err != nil {
- return nil, err
- }
-
- sv := &ServerVersion{
- GetServerVersionResponse: *resp,
- }
-
- return sv, nil
-}
-
-func (clnt *client) AddProcess(ctx context.Context, containerID, processFriendlyName string, specp Process, attachStdio StdioCallback) (int, error) {
- return -1, nil
-}
-
-func (clnt *client) SignalProcess(containerID string, pid string, sig int) error {
- return nil
-}
-
-func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error {
- return nil
-}
-
-func (clnt *client) Pause(containerID string) error {
- return nil
-}
-
-func (clnt *client) Resume(containerID string) error {
- return nil
-}
-
-func (clnt *client) Stats(containerID string) (*Stats, error) {
- return nil, nil
-}
-
-func (clnt *client) getExitNotifier(containerID string) *exitNotifier {
- clnt.mapMutex.RLock()
- defer clnt.mapMutex.RUnlock()
- return clnt.exitNotifiers[containerID]
-}
-
-func (clnt *client) getOrCreateExitNotifier(containerID string) *exitNotifier {
- clnt.mapMutex.Lock()
- defer clnt.mapMutex.Unlock()
- w, ok := clnt.exitNotifiers[containerID]
- if !ok {
- w = &exitNotifier{c: make(chan struct{}), client: clnt}
- clnt.exitNotifiers[containerID] = w
- }
- return w
-}
-
-// Restore is the handler for restoring a container
-func (clnt *client) Restore(containerID string, attachStdio StdioCallback, options ...CreateOption) error {
- return nil
-}
-
-func (clnt *client) GetPidsForContainer(containerID string) ([]int, error) {
- return nil, nil
-}
-
-// Summary returns a summary of the processes running in a container.
-func (clnt *client) Summary(containerID string) ([]Summary, error) {
- return nil, nil
-}
-
-// UpdateResources updates resources for a running container.
-func (clnt *client) UpdateResources(containerID string, resources Resources) error {
- // Updating resource isn't supported on Solaris
- // but we should return nil for enabling updating container
- return nil
-}
-
-func (clnt *client) CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error {
- return nil
-}
-
-func (clnt *client) DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error {
- return nil
-}
-
-func (clnt *client) ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error) {
- return nil, nil
-}
diff --git a/libcontainerd/client_unix.go b/libcontainerd/client_unix.go
deleted file mode 100644
index 202a5b09b1..0000000000
--- a/libcontainerd/client_unix.go
+++ /dev/null
@@ -1,141 +0,0 @@
-// +build linux solaris
-
-package libcontainerd
-
-import (
- "encoding/json"
- "fmt"
- "os"
- "path/filepath"
- "strings"
- "sync"
-
- containerd "github.com/containerd/containerd/api/grpc/types"
- "github.com/docker/docker/pkg/idtools"
- specs "github.com/opencontainers/runtime-spec/specs-go"
- "github.com/sirupsen/logrus"
- "golang.org/x/net/context"
-)
-
-func (clnt *client) prepareBundleDir(uid, gid int) (string, error) {
- root, err := filepath.Abs(clnt.remote.stateDir)
- if err != nil {
- return "", err
- }
- if uid == 0 && gid == 0 {
- return root, nil
- }
- p := string(filepath.Separator)
- for _, d := range strings.Split(root, string(filepath.Separator))[1:] {
- p = filepath.Join(p, d)
- fi, err := os.Stat(p)
- if err != nil && !os.IsNotExist(err) {
- return "", err
- }
- if os.IsNotExist(err) || fi.Mode()&1 == 0 {
- p = fmt.Sprintf("%s.%d.%d", p, uid, gid)
- if err := idtools.MkdirAndChown(p, 0700, idtools.IDPair{uid, gid}); err != nil && !os.IsExist(err) {
- return "", err
- }
- }
- }
- return p, nil
-}
-
-func (clnt *client) Create(containerID string, checkpoint string, checkpointDir string, spec specs.Spec, attachStdio StdioCallback, options ...CreateOption) (err error) {
- clnt.lock(containerID)
- defer clnt.unlock(containerID)
-
- if _, err := clnt.getContainer(containerID); err == nil {
- return fmt.Errorf("Container %s is already active", containerID)
- }
-
- uid, gid, err := getRootIDs(spec)
- if err != nil {
- return err
- }
- dir, err := clnt.prepareBundleDir(uid, gid)
- if err != nil {
- return err
- }
-
- container := clnt.newContainer(filepath.Join(dir, containerID), options...)
- if err := container.clean(); err != nil {
- return err
- }
-
- defer func() {
- if err != nil {
- container.clean()
- clnt.deleteContainer(containerID)
- }
- }()
-
- if err := idtools.MkdirAllAndChown(container.dir, 0700, idtools.IDPair{uid, gid}); err != nil && !os.IsExist(err) {
- return err
- }
-
- f, err := os.Create(filepath.Join(container.dir, configFilename))
- if err != nil {
- return err
- }
- defer f.Close()
- if err := json.NewEncoder(f).Encode(spec); err != nil {
- return err
- }
- return container.start(&spec, checkpoint, checkpointDir, attachStdio)
-}
-
-func (clnt *client) Signal(containerID string, sig int) error {
- clnt.lock(containerID)
- defer clnt.unlock(containerID)
- _, err := clnt.remote.apiClient.Signal(context.Background(), &containerd.SignalRequest{
- Id: containerID,
- Pid: InitFriendlyName,
- Signal: uint32(sig),
- })
- return err
-}
-
-func (clnt *client) newContainer(dir string, options ...CreateOption) *container {
- container := &container{
- containerCommon: containerCommon{
- process: process{
- dir: dir,
- processCommon: processCommon{
- containerID: filepath.Base(dir),
- client: clnt,
- friendlyName: InitFriendlyName,
- },
- },
- processes: make(map[string]*process),
- },
- }
- for _, option := range options {
- if err := option.Apply(container); err != nil {
- logrus.Errorf("libcontainerd: newContainer(): %v", err)
- }
- }
- return container
-}
-
-type exitNotifier struct {
- id string
- client *client
- c chan struct{}
- once sync.Once
-}
-
-func (en *exitNotifier) close() {
- en.once.Do(func() {
- close(en.c)
- en.client.mapMutex.Lock()
- if en == en.client.exitNotifiers[en.id] {
- delete(en.client.exitNotifiers, en.id)
- }
- en.client.mapMutex.Unlock()
- })
-}
-func (en *exitNotifier) wait() <-chan struct{} {
- return en.c
-}
diff --git a/libcontainerd/client_windows.go b/libcontainerd/client_windows.go
deleted file mode 100644
index df9e40ea3c..0000000000
--- a/libcontainerd/client_windows.go
+++ /dev/null
@@ -1,886 +0,0 @@
-package libcontainerd
-
-import (
- "encoding/json"
- "errors"
- "fmt"
- "io"
- "io/ioutil"
- "os"
- "path"
- "path/filepath"
- "regexp"
- "strings"
- "syscall"
- "time"
-
- "golang.org/x/net/context"
-
- "github.com/Microsoft/hcsshim"
- opengcs "github.com/Microsoft/opengcs/client"
- "github.com/docker/docker/pkg/sysinfo"
- "github.com/docker/docker/pkg/system"
- specs "github.com/opencontainers/runtime-spec/specs-go"
- "github.com/sirupsen/logrus"
-)
-
-type client struct {
- clientCommon
-
- // Platform specific properties below here (none presently on Windows)
-}
-
-// Win32 error codes that are used for various workarounds
-// These really should be ALL_CAPS to match golangs syscall library and standard
-// Win32 error conventions, but golint insists on CamelCase.
-const (
- CoEClassstring = syscall.Errno(0x800401F3) // Invalid class string
- ErrorNoNetwork = syscall.Errno(1222) // The network is not present or not started
- ErrorBadPathname = syscall.Errno(161) // The specified path is invalid
- ErrorInvalidObject = syscall.Errno(0x800710D8) // The object identifier does not represent a valid object
-)
-
-// defaultOwner is a tag passed to HCS to allow it to differentiate between
-// container creator management stacks. We hard code "docker" in the case
-// of docker.
-const defaultOwner = "docker"
-
-// Create is the entrypoint to create a container from a spec, and if successfully
-// created, start it too. Table below shows the fields required for HCS JSON calling parameters,
-// where if not populated, is omitted.
-// +-----------------+--------------------------------------------+---------------------------------------------------+
-// | | Isolation=Process | Isolation=Hyper-V |
-// +-----------------+--------------------------------------------+---------------------------------------------------+
-// | VolumePath | \\?\\Volume{GUIDa} | |
-// | LayerFolderPath | %root%\windowsfilter\containerID | %root%\windowsfilter\containerID (servicing only) |
-// | Layers[] | ID=GUIDb;Path=%root%\windowsfilter\layerID | ID=GUIDb;Path=%root%\windowsfilter\layerID |
-// | HvRuntime | | ImagePath=%root%\BaseLayerID\UtilityVM |
-// +-----------------+--------------------------------------------+---------------------------------------------------+
-//
-// Isolation=Process example:
-//
-// {
-// "SystemType": "Container",
-// "Name": "5e0055c814a6005b8e57ac59f9a522066e0af12b48b3c26a9416e23907698776",
-// "Owner": "docker",
-// "VolumePath": "\\\\\\\\?\\\\Volume{66d1ef4c-7a00-11e6-8948-00155ddbef9d}",
-// "IgnoreFlushesDuringBoot": true,
-// "LayerFolderPath": "C:\\\\control\\\\windowsfilter\\\\5e0055c814a6005b8e57ac59f9a522066e0af12b48b3c26a9416e23907698776",
-// "Layers": [{
-// "ID": "18955d65-d45a-557b-bf1c-49d6dfefc526",
-// "Path": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c"
-// }],
-// "HostName": "5e0055c814a6",
-// "MappedDirectories": [],
-// "HvPartition": false,
-// "EndpointList": ["eef2649d-bb17-4d53-9937-295a8efe6f2c"],
-// "Servicing": false
-//}
-//
-// Isolation=Hyper-V example:
-//
-//{
-// "SystemType": "Container",
-// "Name": "475c2c58933b72687a88a441e7e0ca4bd72d76413c5f9d5031fee83b98f6045d",
-// "Owner": "docker",
-// "IgnoreFlushesDuringBoot": true,
-// "Layers": [{
-// "ID": "18955d65-d45a-557b-bf1c-49d6dfefc526",
-// "Path": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c"
-// }],
-// "HostName": "475c2c58933b",
-// "MappedDirectories": [],
-// "HvPartition": true,
-// "EndpointList": ["e1bb1e61-d56f-405e-b75d-fd520cefa0cb"],
-// "DNSSearchList": "a.com,b.com,c.com",
-// "HvRuntime": {
-// "ImagePath": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c\\\\UtilityVM"
-// },
-// "Servicing": false
-//}
-func (clnt *client) Create(containerID string, checkpoint string, checkpointDir string, spec specs.Spec, attachStdio StdioCallback, options ...CreateOption) error {
- clnt.lock(containerID)
- defer clnt.unlock(containerID)
- if b, err := json.Marshal(spec); err == nil {
- logrus.Debugln("libcontainerd: client.Create() with spec", string(b))
- }
-
- // spec.Linux must be nil for Windows containers, but spec.Windows will be filled in regardless of container platform.
- // This is a temporary workaround due to LCOW requiring layer folder paths, which are stored under spec.Windows.
- // TODO: @darrenstahlmsft fix this once the OCI spec is updated to support layer folder paths for LCOW
- if spec.Linux == nil {
- return clnt.createWindows(containerID, checkpoint, checkpointDir, spec, attachStdio, options...)
- }
- return clnt.createLinux(containerID, checkpoint, checkpointDir, spec, attachStdio, options...)
-}
-
-func (clnt *client) createWindows(containerID string, checkpoint string, checkpointDir string, spec specs.Spec, attachStdio StdioCallback, options ...CreateOption) error {
- configuration := &hcsshim.ContainerConfig{
- SystemType: "Container",
- Name: containerID,
- Owner: defaultOwner,
- IgnoreFlushesDuringBoot: spec.Windows.IgnoreFlushesDuringBoot,
- HostName: spec.Hostname,
- HvPartition: false,
- Servicing: spec.Windows.Servicing,
- }
-
- if spec.Windows.Resources != nil {
- if spec.Windows.Resources.CPU != nil {
- if spec.Windows.Resources.CPU.Count != nil {
- // This check is being done here rather than in adaptContainerSettings
- // because we don't want to update the HostConfig in case this container
- // is moved to a host with more CPUs than this one.
- cpuCount := *spec.Windows.Resources.CPU.Count
- hostCPUCount := uint64(sysinfo.NumCPU())
- if cpuCount > hostCPUCount {
- logrus.Warnf("Changing requested CPUCount of %d to current number of processors, %d", cpuCount, hostCPUCount)
- cpuCount = hostCPUCount
- }
- configuration.ProcessorCount = uint32(cpuCount)
- }
- if spec.Windows.Resources.CPU.Shares != nil {
- configuration.ProcessorWeight = uint64(*spec.Windows.Resources.CPU.Shares)
- }
- if spec.Windows.Resources.CPU.Maximum != nil {
- configuration.ProcessorMaximum = int64(*spec.Windows.Resources.CPU.Maximum)
- }
- }
- if spec.Windows.Resources.Memory != nil {
- if spec.Windows.Resources.Memory.Limit != nil {
- configuration.MemoryMaximumInMB = int64(*spec.Windows.Resources.Memory.Limit) / 1024 / 1024
- }
- }
- if spec.Windows.Resources.Storage != nil {
- if spec.Windows.Resources.Storage.Bps != nil {
- configuration.StorageBandwidthMaximum = *spec.Windows.Resources.Storage.Bps
- }
- if spec.Windows.Resources.Storage.Iops != nil {
- configuration.StorageIOPSMaximum = *spec.Windows.Resources.Storage.Iops
- }
- }
- }
-
- if spec.Windows.HyperV != nil {
- configuration.HvPartition = true
- }
-
- if spec.Windows.Network != nil {
- configuration.EndpointList = spec.Windows.Network.EndpointList
- configuration.AllowUnqualifiedDNSQuery = spec.Windows.Network.AllowUnqualifiedDNSQuery
- if spec.Windows.Network.DNSSearchList != nil {
- configuration.DNSSearchList = strings.Join(spec.Windows.Network.DNSSearchList, ",")
- }
- configuration.NetworkSharedContainerName = spec.Windows.Network.NetworkSharedContainerName
- }
-
- if cs, ok := spec.Windows.CredentialSpec.(string); ok {
- configuration.Credentials = cs
- }
-
- // We must have least two layers in the spec, the bottom one being a base image,
- // the top one being the RW layer.
- if spec.Windows.LayerFolders == nil || len(spec.Windows.LayerFolders) < 2 {
- return fmt.Errorf("OCI spec is invalid - at least two LayerFolders must be supplied to the runtime")
- }
-
- // Strip off the top-most layer as that's passed in separately to HCS
- configuration.LayerFolderPath = spec.Windows.LayerFolders[len(spec.Windows.LayerFolders)-1]
- layerFolders := spec.Windows.LayerFolders[:len(spec.Windows.LayerFolders)-1]
-
- if configuration.HvPartition {
- // We don't currently support setting the utility VM image explicitly.
- // TODO @swernli/jhowardmsft circa RS3/4, this may be re-locatable.
- if spec.Windows.HyperV.UtilityVMPath != "" {
- return errors.New("runtime does not support an explicit utility VM path for Hyper-V containers")
- }
-
- // Find the upper-most utility VM image.
- var uvmImagePath string
- for _, path := range layerFolders {
- fullPath := filepath.Join(path, "UtilityVM")
- _, err := os.Stat(fullPath)
- if err == nil {
- uvmImagePath = fullPath
- break
- }
- if !os.IsNotExist(err) {
- return err
- }
- }
- if uvmImagePath == "" {
- return errors.New("utility VM image could not be found")
- }
- configuration.HvRuntime = &hcsshim.HvRuntime{ImagePath: uvmImagePath}
-
- if spec.Root.Path != "" {
- return errors.New("OCI spec is invalid - Root.Path must be omitted for a Hyper-V container")
- }
- } else {
- const volumeGUIDRegex = `^\\\\\?\\(Volume)\{{0,1}[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}(\}){0,1}\}\\$`
- if _, err := regexp.MatchString(volumeGUIDRegex, spec.Root.Path); err != nil {
- return fmt.Errorf(`OCI spec is invalid - Root.Path '%s' must be a volume GUID path in the format '\\?\Volume{GUID}\'`, spec.Root.Path)
- }
- // HCS API requires the trailing backslash to be removed
- configuration.VolumePath = spec.Root.Path[:len(spec.Root.Path)-1]
- }
-
- if spec.Root.Readonly {
- return errors.New(`OCI spec is invalid - Root.Readonly must not be set on Windows`)
- }
-
- for _, layerPath := range layerFolders {
- _, filename := filepath.Split(layerPath)
- g, err := hcsshim.NameToGuid(filename)
- if err != nil {
- return err
- }
- configuration.Layers = append(configuration.Layers, hcsshim.Layer{
- ID: g.ToString(),
- Path: layerPath,
- })
- }
-
- // Add the mounts (volumes, bind mounts etc) to the structure
- var mds []hcsshim.MappedDir
- var mps []hcsshim.MappedPipe
- for _, mount := range spec.Mounts {
- const pipePrefix = `\\.\pipe\`
- if mount.Type != "" {
- return fmt.Errorf("OCI spec is invalid - Mount.Type '%s' must not be set", mount.Type)
- }
- if strings.HasPrefix(mount.Destination, pipePrefix) {
- mp := hcsshim.MappedPipe{
- HostPath: mount.Source,
- ContainerPipeName: mount.Destination[len(pipePrefix):],
- }
- mps = append(mps, mp)
- } else {
- md := hcsshim.MappedDir{
- HostPath: mount.Source,
- ContainerPath: mount.Destination,
- ReadOnly: false,
- }
- for _, o := range mount.Options {
- if strings.ToLower(o) == "ro" {
- md.ReadOnly = true
- }
- }
- mds = append(mds, md)
- }
- }
- configuration.MappedDirectories = mds
- if len(mps) > 0 && system.GetOSVersion().Build < 16210 { // replace with Win10 RS3 build number at RTM
- return errors.New("named pipe mounts are not supported on this version of Windows")
- }
- configuration.MappedPipes = mps
-
- hcsContainer, err := hcsshim.CreateContainer(containerID, configuration)
- if err != nil {
- return err
- }
-
- // Construct a container object for calling start on it.
- container := &container{
- containerCommon: containerCommon{
- process: process{
- processCommon: processCommon{
- containerID: containerID,
- client: clnt,
- friendlyName: InitFriendlyName,
- },
- },
- processes: make(map[string]*process),
- },
- isWindows: true,
- ociSpec: spec,
- hcsContainer: hcsContainer,
- }
-
- container.options = options
- for _, option := range options {
- if err := option.Apply(container); err != nil {
- logrus.Errorf("libcontainerd: %v", err)
- }
- }
-
- // Call start, and if it fails, delete the container from our
- // internal structure, start will keep HCS in sync by deleting the
- // container there.
- logrus.Debugf("libcontainerd: createWindows() id=%s, Calling start()", containerID)
- if err := container.start(attachStdio); err != nil {
- clnt.deleteContainer(containerID)
- return err
- }
-
- logrus.Debugf("libcontainerd: createWindows() id=%s completed successfully", containerID)
- return nil
-
-}
-
-func (clnt *client) createLinux(containerID string, checkpoint string, checkpointDir string, spec specs.Spec, attachStdio StdioCallback, options ...CreateOption) error {
- logrus.Debugf("libcontainerd: createLinux(): containerId %s ", containerID)
-
- var lcowOpt *LCOWOption
- for _, option := range options {
- if lcow, ok := option.(*LCOWOption); ok {
- lcowOpt = lcow
- }
- }
- if lcowOpt == nil || lcowOpt.Config == nil {
- return fmt.Errorf("lcow option must be supplied to the runtime")
- }
-
- configuration := &hcsshim.ContainerConfig{
- HvPartition: true,
- Name: containerID,
- SystemType: "container",
- ContainerType: "linux",
- Owner: defaultOwner,
- TerminateOnLastHandleClosed: true,
- }
-
- if lcowOpt.Config.ActualMode == opengcs.ModeActualVhdx {
- configuration.HvRuntime = &hcsshim.HvRuntime{
- ImagePath: lcowOpt.Config.Vhdx,
- BootSource: "Vhd",
- WritableBootSource: false,
- }
- } else {
- configuration.HvRuntime = &hcsshim.HvRuntime{
- ImagePath: lcowOpt.Config.KirdPath,
- LinuxKernelFile: lcowOpt.Config.KernelFile,
- LinuxInitrdFile: lcowOpt.Config.InitrdFile,
- LinuxBootParameters: lcowOpt.Config.BootParameters,
- }
- }
-
- if spec.Windows == nil {
- return fmt.Errorf("spec.Windows must not be nil for LCOW containers")
- }
-
- // We must have least one layer in the spec
- if spec.Windows.LayerFolders == nil || len(spec.Windows.LayerFolders) == 0 {
- return fmt.Errorf("OCI spec is invalid - at least one LayerFolders must be supplied to the runtime")
- }
-
- // Strip off the top-most layer as that's passed in separately to HCS
- configuration.LayerFolderPath = spec.Windows.LayerFolders[len(spec.Windows.LayerFolders)-1]
- layerFolders := spec.Windows.LayerFolders[:len(spec.Windows.LayerFolders)-1]
-
- for _, layerPath := range layerFolders {
- _, filename := filepath.Split(layerPath)
- g, err := hcsshim.NameToGuid(filename)
- if err != nil {
- return err
- }
- configuration.Layers = append(configuration.Layers, hcsshim.Layer{
- ID: g.ToString(),
- Path: filepath.Join(layerPath, "layer.vhd"),
- })
- }
-
- if spec.Windows.Network != nil {
- configuration.EndpointList = spec.Windows.Network.EndpointList
- configuration.AllowUnqualifiedDNSQuery = spec.Windows.Network.AllowUnqualifiedDNSQuery
- if spec.Windows.Network.DNSSearchList != nil {
- configuration.DNSSearchList = strings.Join(spec.Windows.Network.DNSSearchList, ",")
- }
- configuration.NetworkSharedContainerName = spec.Windows.Network.NetworkSharedContainerName
- }
-
- // Add the mounts (volumes, bind mounts etc) to the structure. We have to do
- // some translation for both the mapped directories passed into HCS and in
- // the spec.
- //
- // For HCS, we only pass in the mounts from the spec which are type "bind".
- // Further, the "ContainerPath" field (which is a little mis-leadingly
- // named when it applies to the utility VM rather than the container in the
- // utility VM) is moved to under /tmp/gcs/<ID>/binds, where this is passed
- // by the caller through a 'uvmpath' option.
- //
- // We do similar translation for the mounts in the spec by stripping out
- // the uvmpath option, and translating the Source path to the location in the
- // utility VM calculated above.
- //
- // From inside the utility VM, you would see a 9p mount such as in the following
- // where a host folder has been mapped to /target. The line with /tmp/gcs/<ID>/binds
- // specifically:
- //
- // / # mount
- // rootfs on / type rootfs (rw,size=463736k,nr_inodes=115934)
- // proc on /proc type proc (rw,relatime)
- // sysfs on /sys type sysfs (rw,relatime)
- // udev on /dev type devtmpfs (rw,relatime,size=498100k,nr_inodes=124525,mode=755)
- // tmpfs on /run type tmpfs (rw,relatime)
- // cgroup on /sys/fs/cgroup type cgroup (rw,relatime,cpuset,cpu,cpuacct,blkio,memory,devices,freezer,net_cls,perf_event,net_prio,hugetlb,pids,rdma)
- // mqueue on /dev/mqueue type mqueue (rw,relatime)
- // devpts on /dev/pts type devpts (rw,relatime,mode=600,ptmxmode=000)
- // /binds/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/target on /binds/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/target type 9p (rw,sync,dirsync,relatime,trans=fd,rfdno=6,wfdno=6)
- // /dev/pmem0 on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/layer0 type ext4 (ro,relatime,block_validity,delalloc,norecovery,barrier,dax,user_xattr,acl)
- // /dev/sda on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch type ext4 (rw,relatime,block_validity,delalloc,barrier,user_xattr,acl)
- // overlay on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/rootfs type overlay (rw,relatime,lowerdir=/tmp/base/:/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/layer0,upperdir=/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch/upper,workdir=/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch/work)
- //
- // /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc # ls -l
- // total 16
- // drwx------ 3 0 0 60 Sep 7 18:54 binds
- // -rw-r--r-- 1 0 0 3345 Sep 7 18:54 config.json
- // drwxr-xr-x 10 0 0 4096 Sep 6 17:26 layer0
- // drwxr-xr-x 1 0 0 4096 Sep 7 18:54 rootfs
- // drwxr-xr-x 5 0 0 4096 Sep 7 18:54 scratch
- //
- // /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc # ls -l binds
- // total 0
- // drwxrwxrwt 2 0 0 4096 Sep 7 16:51 target
-
- mds := []hcsshim.MappedDir{}
- specMounts := []specs.Mount{}
- for _, mount := range spec.Mounts {
- specMount := mount
- if mount.Type == "bind" {
- // Strip out the uvmpath from the options
- updatedOptions := []string{}
- uvmPath := ""
- readonly := false
- for _, opt := range mount.Options {
- dropOption := false
- elements := strings.SplitN(opt, "=", 2)
- switch elements[0] {
- case "uvmpath":
- uvmPath = elements[1]
- dropOption = true
- case "rw":
- case "ro":
- readonly = true
- case "rbind":
- default:
- return fmt.Errorf("unsupported option %q", opt)
- }
- if !dropOption {
- updatedOptions = append(updatedOptions, opt)
- }
- }
- mount.Options = updatedOptions
- if uvmPath == "" {
- return fmt.Errorf("no uvmpath for bind mount %+v", mount)
- }
- md := hcsshim.MappedDir{
- HostPath: mount.Source,
- ContainerPath: path.Join(uvmPath, mount.Destination),
- CreateInUtilityVM: true,
- ReadOnly: readonly,
- }
- mds = append(mds, md)
- specMount.Source = path.Join(uvmPath, mount.Destination)
- }
- specMounts = append(specMounts, specMount)
- }
- configuration.MappedDirectories = mds
-
- hcsContainer, err := hcsshim.CreateContainer(containerID, configuration)
- if err != nil {
- return err
- }
-
- spec.Mounts = specMounts
-
- // Construct a container object for calling start on it.
- container := &container{
- containerCommon: containerCommon{
- process: process{
- processCommon: processCommon{
- containerID: containerID,
- client: clnt,
- friendlyName: InitFriendlyName,
- },
- },
- processes: make(map[string]*process),
- },
- ociSpec: spec,
- hcsContainer: hcsContainer,
- }
-
- container.options = options
- for _, option := range options {
- if err := option.Apply(container); err != nil {
- logrus.Errorf("libcontainerd: createLinux() %v", err)
- }
- }
-
- // Call start, and if it fails, delete the container from our
- // internal structure, start will keep HCS in sync by deleting the
- // container there.
- logrus.Debugf("libcontainerd: createLinux() id=%s, Calling start()", containerID)
- if err := container.start(attachStdio); err != nil {
- clnt.deleteContainer(containerID)
- return err
- }
-
- logrus.Debugf("libcontainerd: createLinux() id=%s completed successfully", containerID)
- return nil
-}
-
-// AddProcess is the handler for adding a process to an already running
-// container. It's called through docker exec. It returns the system pid of the
-// exec'd process.
-func (clnt *client) AddProcess(ctx context.Context, containerID, processFriendlyName string, procToAdd Process, attachStdio StdioCallback) (int, error) {
- clnt.lock(containerID)
- defer clnt.unlock(containerID)
- container, err := clnt.getContainer(containerID)
- if err != nil {
- return -1, err
- }
-
- defer container.debugGCS()
-
- // Note we always tell HCS to
- // create stdout as it's required regardless of '-i' or '-t' options, so that
- // docker can always grab the output through logs. We also tell HCS to always
- // create stdin, even if it's not used - it will be closed shortly. Stderr
- // is only created if it we're not -t.
- createProcessParms := hcsshim.ProcessConfig{
- CreateStdInPipe: true,
- CreateStdOutPipe: true,
- CreateStdErrPipe: !procToAdd.Terminal,
- }
- if procToAdd.Terminal {
- createProcessParms.EmulateConsole = true
- if procToAdd.ConsoleSize != nil {
- createProcessParms.ConsoleSize[0] = uint(procToAdd.ConsoleSize.Height)
- createProcessParms.ConsoleSize[1] = uint(procToAdd.ConsoleSize.Width)
- }
- }
-
- // Take working directory from the process to add if it is defined,
- // otherwise take from the first process.
- if procToAdd.Cwd != "" {
- createProcessParms.WorkingDirectory = procToAdd.Cwd
- } else {
- createProcessParms.WorkingDirectory = container.ociSpec.Process.Cwd
- }
-
- // Configure the environment for the process
- createProcessParms.Environment = setupEnvironmentVariables(procToAdd.Env)
- if container.isWindows {
- createProcessParms.CommandLine = strings.Join(procToAdd.Args, " ")
- } else {
- createProcessParms.CommandArgs = procToAdd.Args
- }
- createProcessParms.User = procToAdd.User.Username
-
- logrus.Debugf("libcontainerd: commandLine: %s", createProcessParms.CommandLine)
-
- // Start the command running in the container.
- var stdout, stderr io.ReadCloser
- var stdin io.WriteCloser
- newProcess, err := container.hcsContainer.CreateProcess(&createProcessParms)
- if err != nil {
- logrus.Errorf("libcontainerd: AddProcess(%s) CreateProcess() failed %s", containerID, err)
- return -1, err
- }
-
- pid := newProcess.Pid()
-
- stdin, stdout, stderr, err = newProcess.Stdio()
- if err != nil {
- logrus.Errorf("libcontainerd: %s getting std pipes failed %s", containerID, err)
- return -1, err
- }
-
- iopipe := &IOPipe{Terminal: procToAdd.Terminal}
- iopipe.Stdin = createStdInCloser(stdin, newProcess)
-
- // Convert io.ReadClosers to io.Readers
- if stdout != nil {
- iopipe.Stdout = ioutil.NopCloser(&autoClosingReader{ReadCloser: stdout})
- }
- if stderr != nil {
- iopipe.Stderr = ioutil.NopCloser(&autoClosingReader{ReadCloser: stderr})
- }
-
- proc := &process{
- processCommon: processCommon{
- containerID: containerID,
- friendlyName: processFriendlyName,
- client: clnt,
- systemPid: uint32(pid),
- },
- hcsProcess: newProcess,
- }
-
- // Add the process to the container's list of processes
- container.processes[processFriendlyName] = proc
-
- // Tell the engine to attach streams back to the client
- if err := attachStdio(*iopipe); err != nil {
- return -1, err
- }
-
- // Spin up a go routine waiting for exit to handle cleanup
- go container.waitExit(proc, false)
-
- return pid, nil
-}
-
-// Signal handles `docker stop` on Windows. While Linux has support for
-// the full range of signals, signals aren't really implemented on Windows.
-// We fake supporting regular stop and -9 to force kill.
-func (clnt *client) Signal(containerID string, sig int) error {
- var (
- cont *container
- err error
- )
-
- // Get the container as we need it to get the container handle.
- clnt.lock(containerID)
- defer clnt.unlock(containerID)
- if cont, err = clnt.getContainer(containerID); err != nil {
- return err
- }
-
- cont.manualStopRequested = true
-
- logrus.Debugf("libcontainerd: Signal() containerID=%s sig=%d pid=%d", containerID, sig, cont.systemPid)
-
- if syscall.Signal(sig) == syscall.SIGKILL {
- // Terminate the compute system
- if err := cont.hcsContainer.Terminate(); err != nil {
- if !hcsshim.IsPending(err) {
- logrus.Errorf("libcontainerd: failed to terminate %s - %q", containerID, err)
- }
- }
- } else {
- // Shut down the container
- if err := cont.hcsContainer.Shutdown(); err != nil {
- if !hcsshim.IsPending(err) && !hcsshim.IsAlreadyStopped(err) {
- // ignore errors
- logrus.Warnf("libcontainerd: failed to shutdown container %s: %q", containerID, err)
- }
- }
- }
-
- return nil
-}
-
-// While Linux has support for the full range of signals, signals aren't really implemented on Windows.
-// We try to terminate the specified process whatever signal is requested.
-func (clnt *client) SignalProcess(containerID string, processFriendlyName string, sig int) error {
- clnt.lock(containerID)
- defer clnt.unlock(containerID)
- cont, err := clnt.getContainer(containerID)
- if err != nil {
- return err
- }
-
- for _, p := range cont.processes {
- if p.friendlyName == processFriendlyName {
- return p.hcsProcess.Kill()
- }
- }
-
- return fmt.Errorf("SignalProcess could not find process %s in %s", processFriendlyName, containerID)
-}
-
-// Resize handles a CLI event to resize an interactive docker run or docker exec
-// window.
-func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error {
- // Get the libcontainerd container object
- clnt.lock(containerID)
- defer clnt.unlock(containerID)
- cont, err := clnt.getContainer(containerID)
- if err != nil {
- return err
- }
-
- h, w := uint16(height), uint16(width)
-
- if processFriendlyName == InitFriendlyName {
- logrus.Debugln("libcontainerd: resizing systemPID in", containerID, cont.process.systemPid)
- return cont.process.hcsProcess.ResizeConsole(w, h)
- }
-
- for _, p := range cont.processes {
- if p.friendlyName == processFriendlyName {
- logrus.Debugln("libcontainerd: resizing exec'd process", containerID, p.systemPid)
- return p.hcsProcess.ResizeConsole(w, h)
- }
- }
-
- return fmt.Errorf("Resize could not find containerID %s to resize", containerID)
-
-}
-
-// Pause handles pause requests for containers
-func (clnt *client) Pause(containerID string) error {
- unlockContainer := true
- // Get the libcontainerd container object
- clnt.lock(containerID)
- defer func() {
- if unlockContainer {
- clnt.unlock(containerID)
- }
- }()
- container, err := clnt.getContainer(containerID)
- if err != nil {
- return err
- }
-
- if container.ociSpec.Windows.HyperV == nil {
- return errors.New("cannot pause Windows Server Containers")
- }
-
- err = container.hcsContainer.Pause()
- if err != nil {
- return err
- }
-
- // Unlock container before calling back into the daemon
- unlockContainer = false
- clnt.unlock(containerID)
-
- return clnt.backend.StateChanged(containerID, StateInfo{
- CommonStateInfo: CommonStateInfo{
- State: StatePause,
- }})
-}
-
-// Resume handles resume requests for containers
-func (clnt *client) Resume(containerID string) error {
- unlockContainer := true
- // Get the libcontainerd container object
- clnt.lock(containerID)
- defer func() {
- if unlockContainer {
- clnt.unlock(containerID)
- }
- }()
- container, err := clnt.getContainer(containerID)
- if err != nil {
- return err
- }
-
- // This should never happen, since Windows Server Containers cannot be paused
-
- if container.ociSpec.Windows.HyperV == nil {
- return errors.New("cannot resume Windows Server Containers")
- }
-
- err = container.hcsContainer.Resume()
- if err != nil {
- return err
- }
-
- // Unlock container before calling back into the daemon
- unlockContainer = false
- clnt.unlock(containerID)
-
- return clnt.backend.StateChanged(containerID, StateInfo{
- CommonStateInfo: CommonStateInfo{
- State: StateResume,
- }})
-}
-
-// Stats handles stats requests for containers
-func (clnt *client) Stats(containerID string) (*Stats, error) {
- // Get the libcontainerd container object
- clnt.lock(containerID)
- defer clnt.unlock(containerID)
- container, err := clnt.getContainer(containerID)
- if err != nil {
- return nil, err
- }
- s, err := container.hcsContainer.Statistics()
- if err != nil {
- return nil, err
- }
- st := Stats(s)
- return &st, nil
-}
-
-// Restore is the handler for restoring a container
-func (clnt *client) Restore(containerID string, _ StdioCallback, unusedOnWindows ...CreateOption) error {
- logrus.Debugf("libcontainerd: Restore(%s)", containerID)
-
- // TODO Windows: On RS1, a re-attach isn't possible.
- // However, there is a scenario in which there is an issue.
- // Consider a background container. The daemon dies unexpectedly.
- // HCS will still have the compute service alive and running.
- // For consistence, we call in to shoot it regardless if HCS knows about it
- // We explicitly just log a warning if the terminate fails.
- // Then we tell the backend the container exited.
- if hc, err := hcsshim.OpenContainer(containerID); err == nil {
- const terminateTimeout = time.Minute * 2
- err := hc.Terminate()
-
- if hcsshim.IsPending(err) {
- err = hc.WaitTimeout(terminateTimeout)
- } else if hcsshim.IsAlreadyStopped(err) {
- err = nil
- }
-
- if err != nil {
- logrus.Warnf("libcontainerd: failed to terminate %s on restore - %q", containerID, err)
- return err
- }
- }
- return clnt.backend.StateChanged(containerID, StateInfo{
- CommonStateInfo: CommonStateInfo{
- State: StateExit,
- ExitCode: 1 << 31,
- }})
-}
-
-// GetPidsForContainer returns a list of process IDs running in a container.
-// Not used on Windows.
-func (clnt *client) GetPidsForContainer(containerID string) ([]int, error) {
- return nil, errors.New("not implemented on Windows")
-}
-
-// Summary returns a summary of the processes running in a container.
-// This is present in Windows to support docker top. In linux, the
-// engine shells out to ps to get process information. On Windows, as
-// the containers could be Hyper-V containers, they would not be
-// visible on the container host. However, libcontainerd does have
-// that information.
-func (clnt *client) Summary(containerID string) ([]Summary, error) {
-
- // Get the libcontainerd container object
- clnt.lock(containerID)
- defer clnt.unlock(containerID)
- container, err := clnt.getContainer(containerID)
- if err != nil {
- return nil, err
- }
- p, err := container.hcsContainer.ProcessList()
- if err != nil {
- return nil, err
- }
- pl := make([]Summary, len(p))
- for i := range p {
- pl[i] = Summary(p[i])
- }
- return pl, nil
-}
-
-// UpdateResources updates resources for a running container.
-func (clnt *client) UpdateResources(containerID string, resources Resources) error {
- // Updating resource isn't supported on Windows
- // but we should return nil for enabling updating container
- return nil
-}
-
-func (clnt *client) CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error {
- return errors.New("Windows: Containers do not support checkpoints")
-}
-
-func (clnt *client) DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error {
- return errors.New("Windows: Containers do not support checkpoints")
-}
-
-func (clnt *client) ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error) {
- return nil, errors.New("Windows: Containers do not support checkpoints")
-}
-
-func (clnt *client) GetServerVersion(ctx context.Context) (*ServerVersion, error) {
- return &ServerVersion{}, nil
-}
diff --git a/libcontainerd/container.go b/libcontainerd/container.go
deleted file mode 100644
index b40321389a..0000000000
--- a/libcontainerd/container.go
+++ /dev/null
@@ -1,13 +0,0 @@
-package libcontainerd
-
-const (
- // InitFriendlyName is the name given in the lookup map of processes
- // for the first process started in a container.
- InitFriendlyName = "init"
- configFilename = "config.json"
-)
-
-type containerCommon struct {
- process
- processes map[string]*process
-}
diff --git a/libcontainerd/container_unix.go b/libcontainerd/container_unix.go
deleted file mode 100644
index 9a7dbf01cd..0000000000
--- a/libcontainerd/container_unix.go
+++ /dev/null
@@ -1,246 +0,0 @@
-// +build linux solaris
-
-package libcontainerd
-
-import (
- "encoding/json"
- "io"
- "io/ioutil"
- "os"
- "path/filepath"
- "sync"
- "time"
-
- containerd "github.com/containerd/containerd/api/grpc/types"
- "github.com/docker/docker/pkg/ioutils"
- specs "github.com/opencontainers/runtime-spec/specs-go"
- "github.com/sirupsen/logrus"
- "github.com/tonistiigi/fifo"
- "golang.org/x/net/context"
- "golang.org/x/sys/unix"
-)
-
-type container struct {
- containerCommon
-
- // Platform specific fields are below here.
- pauseMonitor
- oom bool
- runtime string
- runtimeArgs []string
-}
-
-type runtime struct {
- path string
- args []string
-}
-
-// WithRuntime sets the runtime to be used for the created container
-func WithRuntime(path string, args []string) CreateOption {
- return runtime{path, args}
-}
-
-func (rt runtime) Apply(p interface{}) error {
- if pr, ok := p.(*container); ok {
- pr.runtime = rt.path
- pr.runtimeArgs = rt.args
- }
- return nil
-}
-
-func (ctr *container) clean() error {
- if os.Getenv("LIBCONTAINERD_NOCLEAN") == "1" {
- return nil
- }
- if _, err := os.Lstat(ctr.dir); err != nil {
- if os.IsNotExist(err) {
- return nil
- }
- return err
- }
-
- if err := os.RemoveAll(ctr.dir); err != nil {
- return err
- }
- return nil
-}
-
-// cleanProcess removes the fifos used by an additional process.
-// Caller needs to lock container ID before calling this method.
-func (ctr *container) cleanProcess(id string) {
- if p, ok := ctr.processes[id]; ok {
- for _, i := range []int{unix.Stdin, unix.Stdout, unix.Stderr} {
- if err := os.Remove(p.fifo(i)); err != nil && !os.IsNotExist(err) {
- logrus.Warnf("libcontainerd: failed to remove %v for process %v: %v", p.fifo(i), id, err)
- }
- }
- }
- delete(ctr.processes, id)
-}
-
-func (ctr *container) spec() (*specs.Spec, error) {
- var spec specs.Spec
- dt, err := ioutil.ReadFile(filepath.Join(ctr.dir, configFilename))
- if err != nil {
- return nil, err
- }
- if err := json.Unmarshal(dt, &spec); err != nil {
- return nil, err
- }
- return &spec, nil
-}
-
-func (ctr *container) start(spec *specs.Spec, checkpoint, checkpointDir string, attachStdio StdioCallback) (err error) {
- ctx, cancel := context.WithCancel(context.Background())
- defer cancel()
- ready := make(chan struct{})
-
- fifoCtx, cancel := context.WithCancel(context.Background())
- defer func() {
- if err != nil {
- cancel()
- }
- }()
-
- iopipe, err := ctr.openFifos(fifoCtx, spec.Process.Terminal)
- if err != nil {
- return err
- }
-
- var stdinOnce sync.Once
-
- // we need to delay stdin closure after container start or else "stdin close"
- // event will be rejected by containerd.
- // stdin closure happens in attachStdio
- stdin := iopipe.Stdin
- iopipe.Stdin = ioutils.NewWriteCloserWrapper(stdin, func() error {
- var err error
- stdinOnce.Do(func() { // on error from attach we don't know if stdin was already closed
- err = stdin.Close()
- go func() {
- select {
- case <-ready:
- case <-ctx.Done():
- }
- select {
- case <-ready:
- if err := ctr.sendCloseStdin(); err != nil {
- logrus.Warnf("failed to close stdin: %+v", err)
- }
- default:
- }
- }()
- })
- return err
- })
-
- r := &containerd.CreateContainerRequest{
- Id: ctr.containerID,
- BundlePath: ctr.dir,
- Stdin: ctr.fifo(unix.Stdin),
- Stdout: ctr.fifo(unix.Stdout),
- Stderr: ctr.fifo(unix.Stderr),
- Checkpoint: checkpoint,
- CheckpointDir: checkpointDir,
- // check to see if we are running in ramdisk to disable pivot root
- NoPivotRoot: os.Getenv("DOCKER_RAMDISK") != "",
- Runtime: ctr.runtime,
- RuntimeArgs: ctr.runtimeArgs,
- }
- ctr.client.appendContainer(ctr)
-
- if err := attachStdio(*iopipe); err != nil {
- ctr.closeFifos(iopipe)
- return err
- }
-
- resp, err := ctr.client.remote.apiClient.CreateContainer(context.Background(), r)
- if err != nil {
- ctr.closeFifos(iopipe)
- return err
- }
- ctr.systemPid = systemPid(resp.Container)
- close(ready)
-
- return ctr.client.backend.StateChanged(ctr.containerID, StateInfo{
- CommonStateInfo: CommonStateInfo{
- State: StateStart,
- Pid: ctr.systemPid,
- }})
-
-}
-
-func (ctr *container) newProcess(friendlyName string) *process {
- return &process{
- dir: ctr.dir,
- processCommon: processCommon{
- containerID: ctr.containerID,
- friendlyName: friendlyName,
- client: ctr.client,
- },
- }
-}
-
-func (ctr *container) handleEvent(e *containerd.Event) error {
- ctr.client.lock(ctr.containerID)
- defer ctr.client.unlock(ctr.containerID)
- switch e.Type {
- case StateExit, StatePause, StateResume, StateOOM:
- st := StateInfo{
- CommonStateInfo: CommonStateInfo{
- State: e.Type,
- ExitCode: e.Status,
- },
- OOMKilled: e.Type == StateExit && ctr.oom,
- }
- if e.Type == StateOOM {
- ctr.oom = true
- }
- if e.Type == StateExit && e.Pid != InitFriendlyName {
- st.ProcessID = e.Pid
- st.State = StateExitProcess
- }
-
- // Remove process from list if we have exited
- switch st.State {
- case StateExit:
- ctr.clean()
- ctr.client.deleteContainer(e.Id)
- case StateExitProcess:
- ctr.cleanProcess(st.ProcessID)
- }
- ctr.client.q.append(e.Id, func() {
- if err := ctr.client.backend.StateChanged(e.Id, st); err != nil {
- logrus.Errorf("libcontainerd: backend.StateChanged(): %v", err)
- }
- if e.Type == StatePause || e.Type == StateResume {
- ctr.pauseMonitor.handle(e.Type)
- }
- if e.Type == StateExit {
- if en := ctr.client.getExitNotifier(e.Id); en != nil {
- en.close()
- }
- }
- })
-
- default:
- logrus.Debugf("libcontainerd: event unhandled: %+v", e)
- }
- return nil
-}
-
-// discardFifos attempts to fully read the container fifos to unblock processes
-// that may be blocked on the writer side.
-func (ctr *container) discardFifos() {
- ctx, _ := context.WithTimeout(context.Background(), 3*time.Second)
- for _, i := range []int{unix.Stdout, unix.Stderr} {
- f, err := fifo.OpenFifo(ctx, ctr.fifo(i), unix.O_RDONLY|unix.O_NONBLOCK, 0)
- if err != nil {
- logrus.Warnf("error opening fifo %v for discarding: %+v", f, err)
- continue
- }
- go func() {
- io.Copy(ioutil.Discard, f)
- }()
- }
-}
diff --git a/libcontainerd/container_windows.go b/libcontainerd/container_windows.go
deleted file mode 100644
index 73fc6bd41b..0000000000
--- a/libcontainerd/container_windows.go
+++ /dev/null
@@ -1,338 +0,0 @@
-package libcontainerd
-
-import (
- "encoding/json"
- "fmt"
- "io"
- "io/ioutil"
- "strings"
- "time"
-
- "github.com/Microsoft/hcsshim"
- "github.com/opencontainers/runtime-spec/specs-go"
- "github.com/sirupsen/logrus"
- "golang.org/x/sys/windows"
-)
-
-type container struct {
- containerCommon
-
- // Platform specific fields are below here. There are none presently on Windows.
- options []CreateOption
-
- // The ociSpec is required, as client.Create() needs a spec,
- // but can be called from the RestartManager context which does not
- // otherwise have access to the Spec
- ociSpec specs.Spec
-
- isWindows bool
- manualStopRequested bool
- hcsContainer hcsshim.Container
-}
-
-func (ctr *container) newProcess(friendlyName string) *process {
- return &process{
- processCommon: processCommon{
- containerID: ctr.containerID,
- friendlyName: friendlyName,
- client: ctr.client,
- },
- }
-}
-
-// start starts a created container.
-// Caller needs to lock container ID before calling this method.
-func (ctr *container) start(attachStdio StdioCallback) error {
- var err error
-
- // Start the container. If this is a servicing container, this call will block
- // until the container is done with the servicing execution.
- logrus.Debugln("libcontainerd: starting container ", ctr.containerID)
- if err = ctr.hcsContainer.Start(); err != nil {
- logrus.Errorf("libcontainerd: failed to start container: %s", err)
- ctr.debugGCS() // Before terminating!
- if err := ctr.terminate(); err != nil {
- logrus.Errorf("libcontainerd: failed to cleanup after a failed Start. %s", err)
- } else {
- logrus.Debugln("libcontainerd: cleaned up after failed Start by calling Terminate")
- }
- return err
- }
-
- defer ctr.debugGCS()
-
- // Note we always tell HCS to
- // create stdout as it's required regardless of '-i' or '-t' options, so that
- // docker can always grab the output through logs. We also tell HCS to always
- // create stdin, even if it's not used - it will be closed shortly. Stderr
- // is only created if it we're not -t.
- var (
- emulateConsole bool
- createStdErrPipe bool
- )
- if ctr.ociSpec.Process != nil {
- emulateConsole = ctr.ociSpec.Process.Terminal
- createStdErrPipe = !ctr.ociSpec.Process.Terminal && !ctr.ociSpec.Windows.Servicing
- }
-
- createProcessParms := &hcsshim.ProcessConfig{
- EmulateConsole: emulateConsole,
- WorkingDirectory: ctr.ociSpec.Process.Cwd,
- CreateStdInPipe: !ctr.ociSpec.Windows.Servicing,
- CreateStdOutPipe: !ctr.ociSpec.Windows.Servicing,
- CreateStdErrPipe: createStdErrPipe,
- }
-
- if ctr.ociSpec.Process != nil && ctr.ociSpec.Process.ConsoleSize != nil {
- createProcessParms.ConsoleSize[0] = uint(ctr.ociSpec.Process.ConsoleSize.Height)
- createProcessParms.ConsoleSize[1] = uint(ctr.ociSpec.Process.ConsoleSize.Width)
- }
-
- // Configure the environment for the process
- createProcessParms.Environment = setupEnvironmentVariables(ctr.ociSpec.Process.Env)
- if ctr.isWindows {
- createProcessParms.CommandLine = strings.Join(ctr.ociSpec.Process.Args, " ")
- } else {
- createProcessParms.CommandArgs = ctr.ociSpec.Process.Args
- }
- createProcessParms.User = ctr.ociSpec.Process.User.Username
-
- // LCOW requires the raw OCI spec passed through HCS and onwards to GCS for the utility VM.
- if !ctr.isWindows {
- ociBuf, err := json.Marshal(ctr.ociSpec)
- if err != nil {
- return err
- }
- ociRaw := json.RawMessage(ociBuf)
- createProcessParms.OCISpecification = &ociRaw
- }
-
- // Start the command running in the container.
- newProcess, err := ctr.hcsContainer.CreateProcess(createProcessParms)
- if err != nil {
- logrus.Errorf("libcontainerd: CreateProcess() failed %s", err)
- if err := ctr.terminate(); err != nil {
- logrus.Errorf("libcontainerd: failed to cleanup after a failed CreateProcess. %s", err)
- } else {
- logrus.Debugln("libcontainerd: cleaned up after failed CreateProcess by calling Terminate")
- }
- return err
- }
-
- pid := newProcess.Pid()
-
- // Save the hcs Process and PID
- ctr.process.friendlyName = InitFriendlyName
- ctr.process.hcsProcess = newProcess
-
- // If this is a servicing container, wait on the process synchronously here and
- // if it succeeds, wait for it cleanly shutdown and merge into the parent container.
- if ctr.ociSpec.Windows.Servicing {
- exitCode := ctr.waitProcessExitCode(&ctr.process)
-
- if exitCode != 0 {
- if err := ctr.terminate(); err != nil {
- logrus.Warnf("libcontainerd: terminating servicing container %s failed: %s", ctr.containerID, err)
- }
- return fmt.Errorf("libcontainerd: servicing container %s returned non-zero exit code %d", ctr.containerID, exitCode)
- }
-
- return ctr.hcsContainer.WaitTimeout(time.Minute * 5)
- }
-
- var stdout, stderr io.ReadCloser
- var stdin io.WriteCloser
- stdin, stdout, stderr, err = newProcess.Stdio()
- if err != nil {
- logrus.Errorf("libcontainerd: failed to get stdio pipes: %s", err)
- if err := ctr.terminate(); err != nil {
- logrus.Errorf("libcontainerd: failed to cleanup after a failed Stdio. %s", err)
- }
- return err
- }
-
- iopipe := &IOPipe{Terminal: ctr.ociSpec.Process.Terminal}
-
- iopipe.Stdin = createStdInCloser(stdin, newProcess)
-
- // Convert io.ReadClosers to io.Readers
- if stdout != nil {
- iopipe.Stdout = ioutil.NopCloser(&autoClosingReader{ReadCloser: stdout})
- }
- if stderr != nil {
- iopipe.Stderr = ioutil.NopCloser(&autoClosingReader{ReadCloser: stderr})
- }
-
- // Save the PID
- logrus.Debugf("libcontainerd: process started - PID %d", pid)
- ctr.systemPid = uint32(pid)
-
- // Spin up a go routine waiting for exit to handle cleanup
- go ctr.waitExit(&ctr.process, true)
-
- ctr.client.appendContainer(ctr)
-
- if err := attachStdio(*iopipe); err != nil {
- // OK to return the error here, as waitExit will handle tear-down in HCS
- return err
- }
-
- // Tell the docker engine that the container has started.
- si := StateInfo{
- CommonStateInfo: CommonStateInfo{
- State: StateStart,
- Pid: ctr.systemPid, // Not sure this is needed? Double-check monitor.go in daemon BUGBUG @jhowardmsft
- }}
- logrus.Debugf("libcontainerd: start() completed OK, %+v", si)
- return ctr.client.backend.StateChanged(ctr.containerID, si)
-
-}
-
-// waitProcessExitCode will wait for the given process to exit and return its error code.
-func (ctr *container) waitProcessExitCode(process *process) int {
- // Block indefinitely for the process to exit.
- err := process.hcsProcess.Wait()
- if err != nil {
- if herr, ok := err.(*hcsshim.ProcessError); ok && herr.Err != windows.ERROR_BROKEN_PIPE {
- logrus.Warnf("libcontainerd: Wait() failed (container may have been killed): %s", err)
- }
- // Fall through here, do not return. This ensures we attempt to continue the
- // shutdown in HCS and tell the docker engine that the process/container
- // has exited to avoid a container being dropped on the floor.
- }
-
- exitCode, err := process.hcsProcess.ExitCode()
- if err != nil {
- if herr, ok := err.(*hcsshim.ProcessError); ok && herr.Err != windows.ERROR_BROKEN_PIPE {
- logrus.Warnf("libcontainerd: unable to get exit code from container %s", ctr.containerID)
- }
- // Since we got an error retrieving the exit code, make sure that the code we return
- // doesn't incorrectly indicate success.
- exitCode = -1
-
- // Fall through here, do not return. This ensures we attempt to continue the
- // shutdown in HCS and tell the docker engine that the process/container
- // has exited to avoid a container being dropped on the floor.
- }
-
- return exitCode
-}
-
-// waitExit runs as a goroutine waiting for the process to exit. It's
-// equivalent to (in the linux containerd world) where events come in for
-// state change notifications from containerd.
-func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) error {
- logrus.Debugln("libcontainerd: waitExit() on pid", process.systemPid)
-
- exitCode := ctr.waitProcessExitCode(process)
- // Lock the container while removing the process/container from the list
- ctr.client.lock(ctr.containerID)
-
- if !isFirstProcessToStart {
- ctr.cleanProcess(process.friendlyName)
- } else {
- ctr.client.deleteContainer(ctr.containerID)
- }
-
- // Unlock here so other threads are unblocked
- ctr.client.unlock(ctr.containerID)
-
- // Assume the container has exited
- si := StateInfo{
- CommonStateInfo: CommonStateInfo{
- State: StateExit,
- ExitCode: uint32(exitCode),
- Pid: process.systemPid,
- ProcessID: process.friendlyName,
- },
- UpdatePending: false,
- }
-
- // But it could have been an exec'd process which exited
- if !isFirstProcessToStart {
- si.State = StateExitProcess
- } else {
- // Pending updates is only applicable for WCOW
- if ctr.isWindows {
- updatePending, err := ctr.hcsContainer.HasPendingUpdates()
- if err != nil {
- logrus.Warnf("libcontainerd: HasPendingUpdates() failed (container may have been killed): %s", err)
- } else {
- si.UpdatePending = updatePending
- }
- }
-
- logrus.Debugf("libcontainerd: shutting down container %s", ctr.containerID)
- if err := ctr.shutdown(); err != nil {
- logrus.Debugf("libcontainerd: failed to shutdown container %s", ctr.containerID)
- } else {
- logrus.Debugf("libcontainerd: completed shutting down container %s", ctr.containerID)
- }
- if err := ctr.hcsContainer.Close(); err != nil {
- logrus.Error(err)
- }
- }
-
- if err := process.hcsProcess.Close(); err != nil {
- logrus.Errorf("libcontainerd: hcsProcess.Close(): %v", err)
- }
-
- // Call into the backend to notify it of the state change.
- logrus.Debugf("libcontainerd: waitExit() calling backend.StateChanged %+v", si)
- if err := ctr.client.backend.StateChanged(ctr.containerID, si); err != nil {
- logrus.Error(err)
- }
-
- logrus.Debugf("libcontainerd: waitExit() completed OK, %+v", si)
-
- return nil
-}
-
-// cleanProcess removes process from the map.
-// Caller needs to lock container ID before calling this method.
-func (ctr *container) cleanProcess(id string) {
- delete(ctr.processes, id)
-}
-
-// shutdown shuts down the container in HCS
-// Caller needs to lock container ID before calling this method.
-func (ctr *container) shutdown() error {
- const shutdownTimeout = time.Minute * 5
- err := ctr.hcsContainer.Shutdown()
- if hcsshim.IsPending(err) {
- // Explicit timeout to avoid a (remote) possibility that shutdown hangs indefinitely.
- err = ctr.hcsContainer.WaitTimeout(shutdownTimeout)
- } else if hcsshim.IsAlreadyStopped(err) {
- err = nil
- }
-
- if err != nil {
- logrus.Debugf("libcontainerd: error shutting down container %s %v calling terminate", ctr.containerID, err)
- if err := ctr.terminate(); err != nil {
- return err
- }
- return err
- }
-
- return nil
-}
-
-// terminate terminates the container in HCS
-// Caller needs to lock container ID before calling this method.
-func (ctr *container) terminate() error {
- const terminateTimeout = time.Minute * 5
- err := ctr.hcsContainer.Terminate()
-
- if hcsshim.IsPending(err) {
- err = ctr.hcsContainer.WaitTimeout(terminateTimeout)
- } else if hcsshim.IsAlreadyStopped(err) {
- err = nil
- }
-
- if err != nil {
- logrus.Debugf("libcontainerd: error terminating container %s %v", ctr.containerID, err)
- return err
- }
-
- return nil
-}
diff --git a/libcontainerd/errors.go b/libcontainerd/errors.go
new file mode 100644
index 0000000000..db59ea878c
--- /dev/null
+++ b/libcontainerd/errors.go
@@ -0,0 +1,46 @@
+package libcontainerd
+
+import "errors"
+
+type liberr struct {
+ err error
+}
+
+func (e liberr) Error() string {
+ return e.err.Error()
+}
+
+func (e liberr) Cause() error {
+ return e.err
+}
+
+type notFoundErr struct {
+ liberr
+}
+
+func (notFoundErr) NotFound() {}
+
+func newNotFoundError(err string) error { return notFoundErr{liberr{errors.New(err)}} }
+func wrapNotFoundError(err error) error { return notFoundErr{liberr{err}} }
+
+type invalidParamErr struct {
+ liberr
+}
+
+func (invalidParamErr) InvalidParameter() {}
+
+func newInvalidParameterError(err string) error { return invalidParamErr{liberr{errors.New(err)}} }
+
+type conflictErr struct {
+ liberr
+}
+
+func (conflictErr) ConflictErr() {}
+
+func newConflictError(err string) error { return conflictErr{liberr{errors.New(err)}} }
+
+type sysErr struct {
+ liberr
+}
+
+func wrapSystemError(err error) error { return sysErr{liberr{err}} }
diff --git a/libcontainerd/io.go b/libcontainerd/io.go
new file mode 100644
index 0000000000..2c4af58ce9
--- /dev/null
+++ b/libcontainerd/io.go
@@ -0,0 +1,36 @@
+package libcontainerd
+
+import "github.com/containerd/containerd"
+
+// Config returns the containerd.IOConfig of this pipe set
+func (p *IOPipe) Config() containerd.IOConfig {
+ return p.config
+}
+
+// Cancel aborts ongoing operations if they have not completed yet
+func (p *IOPipe) Cancel() {
+ p.cancel()
+}
+
+// Wait waits for io operations to finish
+func (p *IOPipe) Wait() {
+}
+
+// Close closes the underlying pipes
+func (p *IOPipe) Close() error {
+ p.cancel()
+
+ if p.Stdin != nil {
+ p.Stdin.Close()
+ }
+
+ if p.Stdout != nil {
+ p.Stdout.Close()
+ }
+
+ if p.Stderr != nil {
+ p.Stderr.Close()
+ }
+
+ return nil
+}
diff --git a/libcontainerd/io_unix.go b/libcontainerd/io_unix.go
new file mode 100644
index 0000000000..0c08b20136
--- /dev/null
+++ b/libcontainerd/io_unix.go
@@ -0,0 +1,60 @@
+// +build !windows
+
+package libcontainerd
+
+import (
+ "context"
+ "io"
+ "syscall"
+
+ "github.com/containerd/containerd"
+ "github.com/containerd/fifo"
+ "github.com/pkg/errors"
+)
+
+func newIOPipe(fifos *containerd.FIFOSet) (*IOPipe, error) {
+ var (
+ err error
+ ctx, cancel = context.WithCancel(context.Background())
+ f io.ReadWriteCloser
+ iop = &IOPipe{
+ Terminal: fifos.Terminal,
+ cancel: cancel,
+ config: containerd.IOConfig{
+ Terminal: fifos.Terminal,
+ Stdin: fifos.In,
+ Stdout: fifos.Out,
+ Stderr: fifos.Err,
+ },
+ }
+ )
+ defer func() {
+ if err != nil {
+ cancel()
+ iop.Close()
+ }
+ }()
+
+ if fifos.In != "" {
+ if f, err = fifo.OpenFifo(ctx, fifos.In, syscall.O_WRONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
+ return nil, errors.WithStack(err)
+ }
+ iop.Stdin = f
+ }
+
+ if fifos.Out != "" {
+ if f, err = fifo.OpenFifo(ctx, fifos.Out, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
+ return nil, errors.WithStack(err)
+ }
+ iop.Stdout = f
+ }
+
+ if fifos.Err != "" {
+ if f, err = fifo.OpenFifo(ctx, fifos.Err, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
+ return nil, errors.WithStack(err)
+ }
+ iop.Stderr = f
+ }
+
+ return iop, nil
+}
diff --git a/libcontainerd/io_windows.go b/libcontainerd/io_windows.go
new file mode 100644
index 0000000000..312bdbd8cf
--- /dev/null
+++ b/libcontainerd/io_windows.go
@@ -0,0 +1,138 @@
+package libcontainerd
+
+import (
+ "context"
+ "io"
+ "net"
+ "sync"
+
+ winio "github.com/Microsoft/go-winio"
+ "github.com/containerd/containerd"
+ "github.com/pkg/errors"
+)
+
+type winpipe struct {
+ sync.Mutex
+
+ ctx context.Context
+ listener net.Listener
+ readyCh chan struct{}
+ readyErr error
+
+ client net.Conn
+}
+
+func newWinpipe(ctx context.Context, pipe string) (*winpipe, error) {
+ l, err := winio.ListenPipe(pipe, nil)
+ if err != nil {
+ return nil, errors.Wrapf(err, "%q pipe creation failed", pipe)
+ }
+ wp := &winpipe{
+ ctx: ctx,
+ listener: l,
+ readyCh: make(chan struct{}),
+ }
+ go func() {
+ go func() {
+ defer close(wp.readyCh)
+ defer wp.listener.Close()
+ c, err := wp.listener.Accept()
+ if err != nil {
+ wp.Lock()
+ if wp.readyErr == nil {
+ wp.readyErr = err
+ }
+ wp.Unlock()
+ return
+ }
+ wp.client = c
+ }()
+
+ select {
+ case <-wp.readyCh:
+ case <-ctx.Done():
+ wp.Lock()
+ if wp.readyErr == nil {
+ wp.listener.Close()
+ wp.readyErr = ctx.Err()
+ }
+ wp.Unlock()
+ }
+ }()
+
+ return wp, nil
+}
+
+func (wp *winpipe) Read(b []byte) (int, error) {
+ select {
+ case <-wp.ctx.Done():
+ return 0, wp.ctx.Err()
+ case <-wp.readyCh:
+ return wp.client.Read(b)
+ }
+}
+
+func (wp *winpipe) Write(b []byte) (int, error) {
+ select {
+ case <-wp.ctx.Done():
+ return 0, wp.ctx.Err()
+ case <-wp.readyCh:
+ return wp.client.Write(b)
+ }
+}
+
+func (wp *winpipe) Close() error {
+ select {
+ case <-wp.readyCh:
+ return wp.client.Close()
+ default:
+ return nil
+ }
+}
+
+func newIOPipe(fifos *containerd.FIFOSet) (*IOPipe, error) {
+ var (
+ err error
+ ctx, cancel = context.WithCancel(context.Background())
+ p io.ReadWriteCloser
+ iop = &IOPipe{
+ Terminal: fifos.Terminal,
+ cancel: cancel,
+ config: containerd.IOConfig{
+ Terminal: fifos.Terminal,
+ Stdin: fifos.In,
+ Stdout: fifos.Out,
+ Stderr: fifos.Err,
+ },
+ }
+ )
+ defer func() {
+ if err != nil {
+ cancel()
+ iop.Close()
+ }
+ }()
+
+ if fifos.In != "" {
+ if p, err = newWinpipe(ctx, fifos.In); err != nil {
+ return nil, err
+ }
+ iop.Stdin = p
+ }
+
+ if fifos.Out != "" {
+ if p, err = newWinpipe(ctx, fifos.Out); err != nil {
+ return nil, err
+ }
+ iop.Stdout = p
+ }
+
+ if fifos.Err != "" {
+ if p, err = newWinpipe(ctx, fifos.Err); err != nil {
+ return nil, err
+ }
+ iop.Stderr = p
+ }
+
+ return iop, nil
+}
diff --git a/libcontainerd/oom_linux.go b/libcontainerd/oom_linux.go
deleted file mode 100644
index 70f0daca5f..0000000000
--- a/libcontainerd/oom_linux.go
+++ /dev/null
@@ -1,31 +0,0 @@
-package libcontainerd
-
-import (
- "fmt"
- "os"
- "strconv"
-
- "github.com/opencontainers/runc/libcontainer/system"
- "github.com/sirupsen/logrus"
-)
-
-func setOOMScore(pid, score int) error {
- oomScoreAdjPath := fmt.Sprintf("/proc/%d/oom_score_adj", pid)
- f, err := os.OpenFile(oomScoreAdjPath, os.O_WRONLY, 0)
- if err != nil {
- return err
- }
- stringScore := strconv.Itoa(score)
- _, err = f.WriteString(stringScore)
- f.Close()
- if os.IsPermission(err) {
- // Setting oom_score_adj does not work in an
- // unprivileged container. Ignore the error, but log
- // it if we appear not to be in that situation.
- if !system.RunningInUserNS() {
- logrus.Debugf("Permission denied writing %q to %s", stringScore, oomScoreAdjPath)
- }
- return nil
- }
- return err
-}
diff --git a/libcontainerd/oom_solaris.go b/libcontainerd/oom_solaris.go
deleted file mode 100644
index 2ebe5e87cf..0000000000
--- a/libcontainerd/oom_solaris.go
+++ /dev/null
@@ -1,5 +0,0 @@
-package libcontainerd
-
-func setOOMScore(pid, score int) error {
- return nil
-}
diff --git a/libcontainerd/pausemonitor_unix.go b/libcontainerd/pausemonitor_unix.go
deleted file mode 100644
index 4f3766d95c..0000000000
--- a/libcontainerd/pausemonitor_unix.go
+++ /dev/null
@@ -1,42 +0,0 @@
-// +build !windows
-
-package libcontainerd
-
-import (
- "sync"
-)
-
-// pauseMonitor is helper to get notifications from pause state changes.
-type pauseMonitor struct {
- sync.Mutex
- waiters map[string][]chan struct{}
-}
-
-func (m *pauseMonitor) handle(t string) {
- m.Lock()
- defer m.Unlock()
- if m.waiters == nil {
- return
- }
- q, ok := m.waiters[t]
- if !ok {
- return
- }
- if len(q) > 0 {
- close(q[0])
- m.waiters[t] = q[1:]
- }
-}
-
-func (m *pauseMonitor) append(t string, waiter chan struct{}) {
- m.Lock()
- defer m.Unlock()
- if m.waiters == nil {
- m.waiters = make(map[string][]chan struct{})
- }
- _, ok := m.waiters[t]
- if !ok {
- m.waiters[t] = make([]chan struct{}, 0)
- }
- m.waiters[t] = append(m.waiters[t], waiter)
-}
diff --git a/libcontainerd/process.go b/libcontainerd/process.go
deleted file mode 100644
index 57562c8789..0000000000
--- a/libcontainerd/process.go
+++ /dev/null
@@ -1,18 +0,0 @@
-package libcontainerd
-
-// processCommon are the platform common fields as part of the process structure
-// which keeps the state for the main container process, as well as any exec
-// processes.
-type processCommon struct {
- client *client
-
- // containerID is the Container ID
- containerID string
-
- // friendlyName is an identifier for the process (or `InitFriendlyName`
- // for the first process)
- friendlyName string
-
- // systemPid is the PID of the main container process
- systemPid uint32
-}
diff --git a/libcontainerd/process_unix.go b/libcontainerd/process_unix.go
deleted file mode 100644
index 3b54e325b5..0000000000
--- a/libcontainerd/process_unix.go
+++ /dev/null
@@ -1,107 +0,0 @@
-// +build linux solaris
-
-package libcontainerd
-
-import (
- "io"
- "io/ioutil"
- "os"
- "path/filepath"
- goruntime "runtime"
- "strings"
-
- containerd "github.com/containerd/containerd/api/grpc/types"
- "github.com/tonistiigi/fifo"
- "golang.org/x/net/context"
- "golang.org/x/sys/unix"
-)
-
-var fdNames = map[int]string{
- unix.Stdin: "stdin",
- unix.Stdout: "stdout",
- unix.Stderr: "stderr",
-}
-
-// process keeps the state for both main container process and exec process.
-type process struct {
- processCommon
-
- // Platform specific fields are below here.
- dir string
-}
-
-func (p *process) openFifos(ctx context.Context, terminal bool) (pipe *IOPipe, err error) {
- if err := os.MkdirAll(p.dir, 0700); err != nil {
- return nil, err
- }
-
- io := &IOPipe{}
-
- io.Stdin, err = fifo.OpenFifo(ctx, p.fifo(unix.Stdin), unix.O_WRONLY|unix.O_CREAT|unix.O_NONBLOCK, 0700)
- if err != nil {
- return nil, err
- }
-
- defer func() {
- if err != nil {
- io.Stdin.Close()
- }
- }()
-
- io.Stdout, err = fifo.OpenFifo(ctx, p.fifo(unix.Stdout), unix.O_RDONLY|unix.O_CREAT|unix.O_NONBLOCK, 0700)
- if err != nil {
- return nil, err
- }
-
- defer func() {
- if err != nil {
- io.Stdout.Close()
- }
- }()
-
- if goruntime.GOOS == "solaris" || !terminal {
- // For Solaris terminal handling is done exclusively by the runtime therefore we make no distinction
- // in the processing for terminal and !terminal cases.
- io.Stderr, err = fifo.OpenFifo(ctx, p.fifo(unix.Stderr), unix.O_RDONLY|unix.O_CREAT|unix.O_NONBLOCK, 0700)
- if err != nil {
- return nil, err
- }
- defer func() {
- if err != nil {
- io.Stderr.Close()
- }
- }()
- } else {
- io.Stderr = ioutil.NopCloser(emptyReader{})
- }
-
- return io, nil
-}
-
-func (p *process) sendCloseStdin() error {
- _, err := p.client.remote.apiClient.UpdateProcess(context.Background(), &containerd.UpdateProcessRequest{
- Id: p.containerID,
- Pid: p.friendlyName,
- CloseStdin: true,
- })
- if err != nil && (strings.Contains(err.Error(), "container not found") || strings.Contains(err.Error(), "process not found")) {
- return nil
- }
- return err
-}
-
-func (p *process) closeFifos(io *IOPipe) {
- io.Stdin.Close()
- io.Stdout.Close()
- io.Stderr.Close()
-}
-
-type emptyReader struct{}
-
-func (r emptyReader) Read(b []byte) (int, error) {
- return 0, io.EOF
-}
-
-func (p *process) fifo(index int) string {
- return filepath.Join(p.dir, p.friendlyName+"-"+fdNames[index])
-}
diff --git a/libcontainerd/process_windows.go b/libcontainerd/process_windows.go
index 854c4dd1f0..76b92a6ceb 100644
--- a/libcontainerd/process_windows.go
+++ b/libcontainerd/process_windows.go
@@ -8,14 +8,6 @@ import (
"github.com/docker/docker/pkg/ioutils"
)
-// process keeps the state for both main container process and exec process.
-type process struct {
- processCommon
-
- // Platform specific fields are below here.
- hcsProcess hcsshim.Process
-}
-
type autoClosingReader struct {
io.ReadCloser
sync.Once
@@ -23,7 +15,7 @@ type autoClosingReader struct {
func (r *autoClosingReader) Read(b []byte) (n int, err error) {
n, err = r.ReadCloser.Read(b)
- if err == io.EOF {
+ if err != nil {
r.Once.Do(func() { r.ReadCloser.Close() })
}
return
@@ -46,3 +38,7 @@ func createStdInCloser(pipe io.WriteCloser, process hcsshim.Process) io.WriteClo
return nil
})
}
+
+func (p *process) Cleanup() error {
+ return nil
+}
diff --git a/libcontainerd/queue_unix.go b/libcontainerd/queue.go
index 66765f75ec..38d74a0a46 100644
--- a/libcontainerd/queue_unix.go
+++ b/libcontainerd/queue.go
@@ -1,5 +1,3 @@
-// +build linux solaris
-
package libcontainerd
import "sync"
diff --git a/libcontainerd/queue_unix_test.go b/libcontainerd/queue_test.go
index bb49a5d4c2..902f48aef2 100644
--- a/libcontainerd/queue_unix_test.go
+++ b/libcontainerd/queue_test.go
@@ -1,5 +1,3 @@
-// +build linux solaris
-
package libcontainerd
import (
diff --git a/libcontainerd/remote.go b/libcontainerd/remote.go
deleted file mode 100644
index 9031e3ae7d..0000000000
--- a/libcontainerd/remote.go
+++ /dev/null
@@ -1,20 +0,0 @@
-package libcontainerd
-
-// Remote on Linux defines the accesspoint to the containerd grpc API.
-// Remote on Windows is largely an unimplemented interface as there is
-// no remote containerd.
-type Remote interface {
- // Client returns a new Client instance connected with given Backend.
- Client(Backend) (Client, error)
- // Cleanup stops containerd if it was started by libcontainerd.
- // Note this is not used on Windows as there is no remote containerd.
- Cleanup()
- // UpdateOptions allows various remote options to be updated at runtime.
- UpdateOptions(...RemoteOption) error
-}
-
-// RemoteOption allows to configure parameters of remotes.
-// This is unused on Windows.
-type RemoteOption interface {
- Apply(Remote) error
-}
diff --git a/libcontainerd/remote_daemon.go b/libcontainerd/remote_daemon.go
new file mode 100644
index 0000000000..e6fd05f08a
--- /dev/null
+++ b/libcontainerd/remote_daemon.go
@@ -0,0 +1,317 @@
+// +build !windows
+
+package libcontainerd
+
+import (
+ "context"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strconv"
+ "strings"
+ "sync"
+ "syscall"
+ "time"
+
+ "github.com/BurntSushi/toml"
+ "github.com/containerd/containerd"
+ "github.com/containerd/containerd/server"
+ "github.com/docker/docker/pkg/system"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+)
+
+const (
+ maxConnectionRetryCount = 3
+ healthCheckTimeout = 3 * time.Second
+ shutdownTimeout = 15 * time.Second
+ configFile = "containerd.toml"
+ binaryName = "docker-containerd"
+ pidFile = "docker-containerd.pid"
+)
+
+type pluginConfigs struct {
+ Plugins map[string]interface{} `toml:"plugins"`
+}
+
+type remote struct {
+ sync.RWMutex
+ server.Config
+
+ daemonPid int
+ logger *logrus.Entry
+
+ daemonWaitCh chan struct{}
+ clients []*client
+ shutdownContext context.Context
+ shutdownCancel context.CancelFunc
+ shutdown bool
+
+ // Options
+ startDaemon bool
+ rootDir string
+ stateDir string
+ snapshotter string
+ pluginConfs pluginConfigs
+}
+
+// New creates a fresh instance of libcontainerd remote.
+func New(rootDir, stateDir string, options ...RemoteOption) (rem Remote, err error) {
+ defer func() {
+ if err != nil {
+ err = errors.Wrap(err, "Failed to connect to containerd")
+ }
+ }()
+
+ r := &remote{
+ rootDir: rootDir,
+ stateDir: stateDir,
+ Config: server.Config{
+ Root: filepath.Join(rootDir, "daemon"),
+ State: filepath.Join(stateDir, "daemon"),
+ },
+ pluginConfs: pluginConfigs{make(map[string]interface{})},
+ daemonPid: -1,
+ logger: logrus.WithField("module", "libcontainerd"),
+ }
+ r.shutdownContext, r.shutdownCancel = context.WithCancel(context.Background())
+
+ rem = r
+ for _, option := range options {
+ if err = option.Apply(r); err != nil {
+ return
+ }
+ }
+ r.setDefaults()
+
+ if err = system.MkdirAll(stateDir, 0700, ""); err != nil {
+ return
+ }
+
+ if r.startDaemon {
+ os.Remove(r.GRPC.Address)
+ if err = r.startContainerd(); err != nil {
+ return
+ }
+ defer func() {
+ if err != nil {
+ r.Cleanup()
+ }
+ }()
+ }
+
+ // This connection is just used to monitor the connection
+ client, err := containerd.New(r.GRPC.Address)
+ if err != nil {
+ return
+ }
+ if _, err := client.Version(context.Background()); err != nil {
+ system.KillProcess(r.daemonPid)
+ return nil, errors.Wrapf(err, "unable to get containerd version")
+ }
+
+ go r.monitorConnection(client)
+
+ return r, nil
+}
+
+func (r *remote) NewClient(ns string, b Backend) (Client, error) {
+ c := &client{
+ stateDir: r.stateDir,
+ logger: r.logger.WithField("namespace", ns),
+ namespace: ns,
+ backend: b,
+ containers: make(map[string]*container),
+ }
+
+ rclient, err := containerd.New(r.GRPC.Address, containerd.WithDefaultNamespace(ns))
+ if err != nil {
+ return nil, err
+ }
+ c.remote = rclient
+
+ go c.processEventStream(r.shutdownContext)
+
+ r.Lock()
+ r.clients = append(r.clients, c)
+ r.Unlock()
+ return c, nil
+}
+
+func (r *remote) Cleanup() {
+ if r.daemonPid != -1 {
+ r.shutdownCancel()
+ r.stopDaemon()
+ }
+
+ // cleanup some files
+ os.Remove(filepath.Join(r.stateDir, pidFile))
+
+ r.platformCleanup()
+}
+
+func (r *remote) getContainerdPid() (int, error) {
+ pidFile := filepath.Join(r.stateDir, pidFile)
+ f, err := os.OpenFile(pidFile, os.O_RDWR, 0600)
+ if err != nil {
+ if os.IsNotExist(err) {
+ return -1, nil
+ }
+ return -1, err
+ }
+ defer f.Close()
+
+ b := make([]byte, 8)
+ n, err := f.Read(b)
+ if err != nil && err != io.EOF {
+ return -1, err
+ }
+
+ if n > 0 {
+ pid, err := strconv.ParseUint(string(b[:n]), 10, 64)
+ if err != nil {
+ return -1, err
+ }
+ if system.IsProcessAlive(int(pid)) {
+ return int(pid), nil
+ }
+ }
+
+ return -1, nil
+}
+
+func (r *remote) getContainerdConfig() (string, error) {
+ path := filepath.Join(r.stateDir, configFile)
+ f, err := os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600)
+ if err != nil {
+ return "", errors.Wrapf(err, "failed to open containerd config file at %s", path)
+ }
+ defer f.Close()
+
+ enc := toml.NewEncoder(f)
+ if err = enc.Encode(r.Config); err != nil {
+ return "", errors.Wrapf(err, "failed to encode general config")
+ }
+ if err = enc.Encode(r.pluginConfs); err != nil {
+ return "", errors.Wrapf(err, "failed to encode plugin configs")
+ }
+
+ return path, nil
+}
+
+func (r *remote) startContainerd() error {
+ pid, err := r.getContainerdPid()
+ if err != nil {
+ return err
+ }
+
+ if pid != -1 {
+ r.daemonPid = pid
+ logrus.WithField("pid", pid).
+ Infof("libcontainerd: %s is still running", binaryName)
+ return nil
+ }
+
+ configFile, err := r.getContainerdConfig()
+ if err != nil {
+ return err
+ }
+
+ args := []string{"--config", configFile}
+ cmd := exec.Command(binaryName, args...)
+ // redirect containerd logs to docker logs
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ cmd.SysProcAttr = containerdSysProcAttr()
+ // clear the NOTIFY_SOCKET from the env when starting containerd
+ cmd.Env = nil
+ for _, e := range os.Environ() {
+ if !strings.HasPrefix(e, "NOTIFY_SOCKET") {
+ cmd.Env = append(cmd.Env, e)
+ }
+ }
+ if err := cmd.Start(); err != nil {
+ return err
+ }
+
+ r.daemonWaitCh = make(chan struct{})
+ go func() {
+ // Reap our child when needed
+ if err := cmd.Wait(); err != nil {
+ r.logger.WithError(err).Errorf("containerd did not exit successfully")
+ }
+ close(r.daemonWaitCh)
+ }()
+
+ r.daemonPid = cmd.Process.Pid
+
+ err = ioutil.WriteFile(filepath.Join(r.stateDir, pidFile), []byte(fmt.Sprintf("%d", r.daemonPid)), 0660)
+ if err != nil {
+ system.KillProcess(r.daemonPid)
+ return errors.Wrap(err, "libcontainerd: failed to save daemon pid to disk")
+ }
+
+ logrus.WithField("pid", r.daemonPid).
+ Infof("libcontainerd: started new %s process", binaryName)
+
+ return nil
+}
+
+func (r *remote) monitorConnection(client *containerd.Client) {
+ var transientFailureCount = 0
+
+ ticker := time.NewTicker(500 * time.Millisecond)
+ defer ticker.Stop()
+
+ for {
+ <-ticker.C
+ ctx, cancel := context.WithTimeout(r.shutdownContext, healthCheckTimeout)
+ _, err := client.IsServing(ctx)
+ cancel()
+ if err == nil {
+ transientFailureCount = 0
+ continue
+ }
+
+ select {
+ case <-r.shutdownContext.Done():
+ r.logger.Info("stopping healtcheck following graceful shutdown")
+ client.Close()
+ return
+ default:
+ }
+
+ r.logger.WithError(err).WithField("binary", binaryName).Debug("daemon is not responding")
+
+ if r.daemonPid != -1 {
+ transientFailureCount++
+ if transientFailureCount >= maxConnectionRetryCount || !system.IsProcessAlive(r.daemonPid) {
+ transientFailureCount = 0
+ if system.IsProcessAlive(r.daemonPid) {
+ r.logger.WithField("pid", r.daemonPid).Info("killing and restarting containerd")
+ // Try to get a stack trace
+ syscall.Kill(r.daemonPid, syscall.SIGUSR1)
+ <-time.After(100 * time.Millisecond)
+ system.KillProcess(r.daemonPid)
+ }
+ <-r.daemonWaitCh
+ var err error
+ client.Close()
+ os.Remove(r.GRPC.Address)
+ if err = r.startContainerd(); err != nil {
+ r.logger.WithError(err).Error("failed restarting containerd")
+ } else {
+ newClient, err := containerd.New(r.GRPC.Address)
+ if err != nil {
+ r.logger.WithError(err).Error("failed connect to containerd")
+ } else {
+ client = newClient
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/libcontainerd/remote_daemon_options.go b/libcontainerd/remote_daemon_options.go
new file mode 100644
index 0000000000..b167f64c8b
--- /dev/null
+++ b/libcontainerd/remote_daemon_options.go
@@ -0,0 +1,141 @@
+// +build !windows
+
+package libcontainerd
+
+import "fmt"
+
+// WithRemoteAddr sets the external containerd socket to connect to.
+func WithRemoteAddr(addr string) RemoteOption {
+ return rpcAddr(addr)
+}
+
+type rpcAddr string
+
+func (a rpcAddr) Apply(r Remote) error {
+ if remote, ok := r.(*remote); ok {
+ remote.GRPC.Address = string(a)
+ return nil
+ }
+ return fmt.Errorf("WithRemoteAddr option not supported for this remote")
+}
+
+// WithRemoteAddrUser sets the uid and gid to create the RPC address with
+func WithRemoteAddrUser(uid, gid int) RemoteOption {
+ return rpcUser{uid, gid}
+}
+
+type rpcUser struct {
+ uid int
+ gid int
+}
+
+func (u rpcUser) Apply(r Remote) error {
+ if remote, ok := r.(*remote); ok {
+ remote.GRPC.Uid = u.uid
+ remote.GRPC.Gid = u.gid
+ return nil
+ }
+ return fmt.Errorf("WithRemoteAddr option not supported for this remote")
+}
+
+// WithStartDaemon defines if libcontainerd should also run containerd daemon.
+func WithStartDaemon(start bool) RemoteOption {
+ return startDaemon(start)
+}
+
+type startDaemon bool
+
+func (s startDaemon) Apply(r Remote) error {
+ if remote, ok := r.(*remote); ok {
+ remote.startDaemon = bool(s)
+ return nil
+ }
+ return fmt.Errorf("WithStartDaemon option not supported for this remote")
+}
+
+// WithLogLevel defines which log level to starts containerd with.
+// This only makes sense if WithStartDaemon() was set to true.
+func WithLogLevel(lvl string) RemoteOption {
+ return logLevel(lvl)
+}
+
+type logLevel string
+
+func (l logLevel) Apply(r Remote) error {
+ if remote, ok := r.(*remote); ok {
+ remote.Debug.Level = string(l)
+ return nil
+ }
+ return fmt.Errorf("WithDebugLog option not supported for this remote")
+}
+
+// WithDebugAddress defines at which location the debug GRPC connection
+// should be made
+func WithDebugAddress(addr string) RemoteOption {
+ return debugAddress(addr)
+}
+
+type debugAddress string
+
+func (d debugAddress) Apply(r Remote) error {
+ if remote, ok := r.(*remote); ok {
+ remote.Debug.Address = string(d)
+ return nil
+ }
+ return fmt.Errorf("WithDebugAddress option not supported for this remote")
+}
+
+// WithMetricsAddress defines at which location the debug GRPC connection
+// should be made
+func WithMetricsAddress(addr string) RemoteOption {
+ return metricsAddress(addr)
+}
+
+type metricsAddress string
+
+func (m metricsAddress) Apply(r Remote) error {
+ if remote, ok := r.(*remote); ok {
+ remote.Metrics.Address = string(m)
+ return nil
+ }
+ return fmt.Errorf("WithMetricsAddress option not supported for this remote")
+}
+
+// WithSnapshotter defines snapshotter driver should be used
+func WithSnapshotter(name string) RemoteOption {
+ return snapshotter(name)
+}
+
+type snapshotter string
+
+func (s snapshotter) Apply(r Remote) error {
+ if remote, ok := r.(*remote); ok {
+ remote.snapshotter = string(s)
+ return nil
+ }
+ return fmt.Errorf("WithSnapshotter option not supported for this remote")
+}
+
+// WithPlugin allow configuring a containerd plugin
+// configuration values passed needs to be quoted if quotes are needed in
+// the toml format.
+func WithPlugin(name string, conf interface{}) RemoteOption {
+ return pluginConf{
+ name: name,
+ conf: conf,
+ }
+}
+
+type pluginConf struct {
+ // Name is the name of the plugin
+ name string
+ conf interface{}
+}
+
+func (p pluginConf) Apply(r Remote) error {
+ if remote, ok := r.(*remote); ok {
+ remote.pluginConfs.Plugins[p.name] = p.conf
+ return nil
+ }
+ return fmt.Errorf("WithPlugin option not supported for this remote")
+}
diff --git a/libcontainerd/remote_daemon_options_unix.go b/libcontainerd/remote_daemon_options_unix.go
new file mode 100644
index 0000000000..e97789c4e5
--- /dev/null
+++ b/libcontainerd/remote_daemon_options_unix.go
@@ -0,0 +1,36 @@
+// +build linux solaris
+
+package libcontainerd
+
+import "fmt"
+
+// WithOOMScore defines the oom_score_adj to set for the containerd process.
+func WithOOMScore(score int) RemoteOption {
+ return oomScore(score)
+}
+
+type oomScore int
+
+func (o oomScore) Apply(r Remote) error {
+ if remote, ok := r.(*remote); ok {
+ remote.OOMScore = int(o)
+ return nil
+ }
+ return fmt.Errorf("WithOOMScore option not supported for this remote")
+}
+
+// WithSubreaper sets whether containerd should register itself as a
+// subreaper
+func WithSubreaper(reap bool) RemoteOption {
+ return subreaper(reap)
+}
+
+type subreaper bool
+
+func (s subreaper) Apply(r Remote) error {
+ if remote, ok := r.(*remote); ok {
+ remote.Subreaper = bool(s)
+ return nil
+ }
+ return fmt.Errorf("WithSubreaper option not supported for this remote")
+}
diff --git a/libcontainerd/remote_daemon_process.go b/libcontainerd/remote_daemon_process.go
new file mode 100644
index 0000000000..a00406e150
--- /dev/null
+++ b/libcontainerd/remote_daemon_process.go
@@ -0,0 +1,56 @@
+// +build !windows
+
+package libcontainerd
+
+import "github.com/pkg/errors"
+
+// process represents the state for the main container process or an exec.
+type process struct {
+ // id is the logical name of the process
+ id string
+
+ // cid is the container id to which this process belongs
+ cid string
+
+ // pid is the identifier of the process
+ pid uint32
+
+ // io holds the io reader/writer associated with the process
+ io *IOPipe
+
+ // root is the state directory for the process
+ root string
+}
+
+func (p *process) ID() string {
+ return p.id
+}
+
+func (p *process) Pid() uint32 {
+ return p.pid
+}
+
+func (p *process) SetPid(pid uint32) error {
+ if p.pid != 0 {
+ return errors.Errorf("pid is already set to %d", pid)
+ }
+
+ p.pid = pid
+ return nil
+}
+
+func (p *process) IOPipe() *IOPipe {
+ return p.io
+}
+
+func (p *process) CloseIO() {
+ if p.io.Stdin != nil {
+ p.io.Stdin.Close()
+ }
+ if p.io.Stdout != nil {
+ p.io.Stdout.Close()
+ }
+ if p.io.Stderr != nil {
+ p.io.Stderr.Close()
+ }
+}
diff --git a/libcontainerd/remote_daemon_process_unix.go b/libcontainerd/remote_daemon_process_unix.go
new file mode 100644
index 0000000000..38533df35f
--- /dev/null
+++ b/libcontainerd/remote_daemon_process_unix.go
@@ -0,0 +1,61 @@
+// +build linux solaris
+
+package libcontainerd
+
+import (
+ "os"
+ "path/filepath"
+
+ "github.com/pkg/errors"
+ "golang.org/x/sys/unix"
+)
+
+var fdNames = map[int]string{
+ unix.Stdin: "stdin",
+ unix.Stdout: "stdout",
+ unix.Stderr: "stderr",
+}
+
+func (p *process) pipeName(index int) string {
+ return filepath.Join(p.root, p.id+"-"+fdNames[index])
+}
+
+func (p *process) IOPaths() (string, string, string) {
+ var (
+ stdin = p.pipeName(unix.Stdin)
+ stdout = p.pipeName(unix.Stdout)
+ stderr = p.pipeName(unix.Stderr)
+ )
+ // TODO: debug why we're having zombies when I don't unset those
+ if p.io.Stdin == nil {
+ stdin = ""
+ }
+ if p.io.Stderr == nil {
+ stderr = ""
+ }
+ return stdin, stdout, stderr
+}
+
+func (p *process) Cleanup() error {
+ var retErr error
+
+ // Ensure everything was closed
+ p.CloseIO()
+
+ for _, i := range [3]string{
+ p.pipeName(unix.Stdin),
+ p.pipeName(unix.Stdout),
+ p.pipeName(unix.Stderr),
+ } {
+ err := os.Remove(i)
+ if err != nil {
+ if retErr == nil {
+ retErr = errors.Wrapf(err, "failed to remove %s", i)
+ } else {
+ retErr = errors.Wrapf(retErr, "failed to remove %s", i)
+ }
+ }
+ }
+
+ return retErr
+}
diff --git a/libcontainerd/remote_daemon_unix.go b/libcontainerd/remote_daemon_unix.go
new file mode 100644
index 0000000000..e0c56e83aa
--- /dev/null
+++ b/libcontainerd/remote_daemon_unix.go
@@ -0,0 +1,56 @@
+// +build linux solaris
+
+package libcontainerd
+
+import (
+ "os"
+ "path/filepath"
+ "syscall"
+ "time"
+
+ "github.com/docker/docker/pkg/system"
+)
+
+const (
+ sockFile = "docker-containerd.sock"
+ debugSockFile = "docker-containerd-debug.sock"
+)
+
+func (r *remote) setDefaults() {
+ if r.GRPC.Address == "" {
+ r.GRPC.Address = filepath.Join(r.stateDir, sockFile)
+ }
+ if r.Debug.Address == "" {
+ r.Debug.Address = filepath.Join(r.stateDir, debugSockFile)
+ }
+ if r.Debug.Level == "" {
+ r.Debug.Level = "info"
+ }
+ if r.OOMScore == 0 {
+ r.OOMScore = -999
+ }
+ if r.snapshotter == "" {
+ r.snapshotter = "overlay"
+ }
+}
+
+func (r *remote) stopDaemon() {
+ // Ask the daemon to quit
+ syscall.Kill(r.daemonPid, syscall.SIGTERM)
+ // Wait up to 15secs for it to stop
+ for i := time.Duration(0); i < shutdownTimeout; i += time.Second {
+ if !system.IsProcessAlive(r.daemonPid) {
+ break
+ }
+ time.Sleep(time.Second)
+ }
+
+ if system.IsProcessAlive(r.daemonPid) {
+ r.logger.WithField("pid", r.daemonPid).Warn("daemon didn't stop within 15 secs, killing it")
+ syscall.Kill(r.daemonPid, syscall.SIGKILL)
+ }
+}
+
+func (r *remote) platformCleanup() {
+ os.Remove(filepath.Join(r.stateDir, sockFile))
+}
diff --git a/libcontainerd/remote_daemon_windows.go b/libcontainerd/remote_daemon_windows.go
new file mode 100644
index 0000000000..44b5fc0837
--- /dev/null
+++ b/libcontainerd/remote_daemon_windows.go
@@ -0,0 +1,50 @@
+// +build remote_daemon
+
+package libcontainerd
+
+import (
+ "os"
+)
+
+const (
+ grpcPipeName = `\\.\pipe\docker-containerd-containerd`
+ debugPipeName = `\\.\pipe\docker-containerd-debug`
+)
+
+func (r *remote) setDefaults() {
+ if r.GRPC.Address == "" {
+ r.GRPC.Address = grpcPipeName
+ }
+ if r.Debug.Address == "" {
+ r.Debug.Address = debugPipeName
+ }
+ if r.Debug.Level == "" {
+ r.Debug.Level = "info"
+ }
+ if r.snapshotter == "" {
+ r.snapshotter = "naive" // TODO(mlaventure): switch to "windows" once implemented
+ }
+}
+
+func (r *remote) stopDaemon() {
+ p, err := os.FindProcess(r.daemonPid)
+ if err != nil {
+ r.logger.WithField("pid", r.daemonPid).Warn("could not find daemon process")
+ return
+ }
+
+ if err = p.Kill(); err != nil {
+ r.logger.WithError(err).WithField("pid", r.daemonPid).Warn("could not kill daemon process")
+ return
+ }
+
+ _, err = p.Wait()
+ if err != nil {
+ r.logger.WithError(err).WithField("pid", r.daemonPid).Warn("wait for daemon process")
+ return
+ }
+}
+
+func (r *remote) platformCleanup() {
+ // Nothing to do
+}
diff --git a/libcontainerd/remote_local.go b/libcontainerd/remote_local.go
new file mode 100644
index 0000000000..ad3be03abe
--- /dev/null
+++ b/libcontainerd/remote_local.go
@@ -0,0 +1,59 @@
+// +build windows
+
+package libcontainerd
+
+import (
+ "sync"
+
+ "github.com/sirupsen/logrus"
+)
+
+type remote struct {
+ sync.RWMutex
+
+ logger *logrus.Entry
+ clients []*client
+
+ // Options
+ rootDir string
+ stateDir string
+}
+
+// New creates a fresh instance of libcontainerd remote.
+func New(rootDir, stateDir string, options ...RemoteOption) (Remote, error) {
+ return &remote{
+ logger: logrus.WithField("module", "libcontainerd"),
+ rootDir: rootDir,
+ stateDir: stateDir,
+ }, nil
+}
+
+type client struct {
+ sync.Mutex
+
+ rootDir string
+ stateDir string
+ backend Backend
+ logger *logrus.Entry
+ eventQ queue
+ containers map[string]*container
+}
+
+func (r *remote) NewClient(ns string, b Backend) (Client, error) {
+ c := &client{
+ rootDir: r.rootDir,
+ stateDir: r.stateDir,
+ backend: b,
+ logger: r.logger.WithField("namespace", ns),
+ containers: make(map[string]*container),
+ }
+ r.Lock()
+ r.clients = append(r.clients, c)
+ r.Unlock()
+
+ return c, nil
+}
+
+func (r *remote) Cleanup() {
+ // Nothing to do
+}
diff --git a/libcontainerd/remote_unix.go b/libcontainerd/remote_unix.go
deleted file mode 100644
index 7bab53e796..0000000000
--- a/libcontainerd/remote_unix.go
+++ /dev/null
@@ -1,565 +0,0 @@
-// +build linux solaris
-
-package libcontainerd
-
-import (
- "fmt"
- "io"
- "io/ioutil"
- "log"
- "net"
- "os"
- "os/exec"
- "path/filepath"
- goruntime "runtime"
- "strconv"
- "strings"
- "sync"
- "time"
-
- containerd "github.com/containerd/containerd/api/grpc/types"
- "github.com/docker/docker/pkg/locker"
- "github.com/docker/docker/pkg/system"
- "github.com/golang/protobuf/ptypes"
- "github.com/golang/protobuf/ptypes/timestamp"
- "github.com/sirupsen/logrus"
- "golang.org/x/net/context"
- "golang.org/x/sys/unix"
- "google.golang.org/grpc"
- "google.golang.org/grpc/grpclog"
- "google.golang.org/grpc/health/grpc_health_v1"
- "google.golang.org/grpc/transport"
-)
-
-const (
- maxConnectionRetryCount = 3
- containerdHealthCheckTimeout = 3 * time.Second
- containerdShutdownTimeout = 15 * time.Second
- containerdBinary = "docker-containerd"
- containerdPidFilename = "docker-containerd.pid"
- containerdSockFilename = "docker-containerd.sock"
- containerdStateDir = "containerd"
- eventTimestampFilename = "event.ts"
-)
-
-type remote struct {
- sync.RWMutex
- apiClient containerd.APIClient
- daemonPid int
- stateDir string
- rpcAddr string
- startDaemon bool
- closedManually bool
- debugLog bool
- rpcConn *grpc.ClientConn
- clients []*client
- eventTsPath string
- runtime string
- runtimeArgs []string
- daemonWaitCh chan struct{}
- liveRestore bool
- oomScore int
- restoreFromTimestamp *timestamp.Timestamp
-}
-
-// New creates a fresh instance of libcontainerd remote.
-func New(stateDir string, options ...RemoteOption) (_ Remote, err error) {
- defer func() {
- if err != nil {
- err = fmt.Errorf("Failed to connect to containerd. Please make sure containerd is installed in your PATH or you have specified the correct address. Got error: %v", err)
- }
- }()
- r := &remote{
- stateDir: stateDir,
- daemonPid: -1,
- eventTsPath: filepath.Join(stateDir, eventTimestampFilename),
- }
- for _, option := range options {
- if err := option.Apply(r); err != nil {
- return nil, err
- }
- }
-
- if err := system.MkdirAll(stateDir, 0700, ""); err != nil {
- return nil, err
- }
-
- if r.rpcAddr == "" {
- r.rpcAddr = filepath.Join(stateDir, containerdSockFilename)
- }
-
- if r.startDaemon {
- if err := r.runContainerdDaemon(); err != nil {
- return nil, err
- }
- }
-
- // don't output the grpc reconnect logging
- grpclog.SetLogger(log.New(ioutil.Discard, "", log.LstdFlags))
- dialOpts := []grpc.DialOption{
- grpc.WithInsecure(),
- grpc.WithBackoffMaxDelay(2 * time.Second),
- grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) {
- return net.DialTimeout("unix", addr, timeout)
- }),
- }
- conn, err := grpc.Dial(r.rpcAddr, dialOpts...)
- if err != nil {
- return nil, fmt.Errorf("error connecting to containerd: %v", err)
- }
-
- r.rpcConn = conn
- r.apiClient = containerd.NewAPIClient(conn)
-
- // Get the timestamp to restore from
- t := r.getLastEventTimestamp()
- tsp, err := ptypes.TimestampProto(t)
- if err != nil {
- logrus.Errorf("libcontainerd: failed to convert timestamp: %q", err)
- }
- r.restoreFromTimestamp = tsp
-
- go r.handleConnectionChange()
-
- if err := r.startEventsMonitor(); err != nil {
- return nil, err
- }
-
- return r, nil
-}
-
-func (r *remote) UpdateOptions(options ...RemoteOption) error {
- for _, option := range options {
- if err := option.Apply(r); err != nil {
- return err
- }
- }
- return nil
-}
-
-func (r *remote) handleConnectionChange() {
- var transientFailureCount = 0
-
- ticker := time.NewTicker(500 * time.Millisecond)
- defer ticker.Stop()
- healthClient := grpc_health_v1.NewHealthClient(r.rpcConn)
-
- for {
- <-ticker.C
- ctx, cancel := context.WithTimeout(context.Background(), containerdHealthCheckTimeout)
- _, err := healthClient.Check(ctx, &grpc_health_v1.HealthCheckRequest{})
- cancel()
- if err == nil {
- continue
- }
-
- logrus.Debugf("libcontainerd: containerd health check returned error: %v", err)
-
- if r.daemonPid != -1 {
- if r.closedManually {
- // Well, we asked for it to stop, just return
- return
- }
- // all other errors are transient
- // Reset state to be notified of next failure
- transientFailureCount++
- if transientFailureCount >= maxConnectionRetryCount {
- transientFailureCount = 0
- if system.IsProcessAlive(r.daemonPid) {
- system.KillProcess(r.daemonPid)
- }
- <-r.daemonWaitCh
- if err := r.runContainerdDaemon(); err != nil { //FIXME: Handle error
- logrus.Errorf("libcontainerd: error restarting containerd: %v", err)
- }
- continue
- }
- }
- }
-}
-
-func (r *remote) Cleanup() {
- if r.daemonPid == -1 {
- return
- }
- r.closedManually = true
- r.rpcConn.Close()
- // Ask the daemon to quit
- unix.Kill(r.daemonPid, unix.SIGTERM)
-
- // Wait up to 15secs for it to stop
- for i := time.Duration(0); i < containerdShutdownTimeout; i += time.Second {
- if !system.IsProcessAlive(r.daemonPid) {
- break
- }
- time.Sleep(time.Second)
- }
-
- if system.IsProcessAlive(r.daemonPid) {
- logrus.Warnf("libcontainerd: containerd (%d) didn't stop within 15 secs, killing it\n", r.daemonPid)
- unix.Kill(r.daemonPid, unix.SIGKILL)
- }
-
- // cleanup some files
- os.Remove(filepath.Join(r.stateDir, containerdPidFilename))
- os.Remove(filepath.Join(r.stateDir, containerdSockFilename))
-}
-
-func (r *remote) Client(b Backend) (Client, error) {
- c := &client{
- clientCommon: clientCommon{
- backend: b,
- containers: make(map[string]*container),
- locker: locker.New(),
- },
- remote: r,
- exitNotifiers: make(map[string]*exitNotifier),
- liveRestore: r.liveRestore,
- }
-
- r.Lock()
- r.clients = append(r.clients, c)
- r.Unlock()
- return c, nil
-}
-
-func (r *remote) updateEventTimestamp(t time.Time) {
- f, err := os.OpenFile(r.eventTsPath, unix.O_CREAT|unix.O_WRONLY|unix.O_TRUNC, 0600)
- if err != nil {
- logrus.Warnf("libcontainerd: failed to open event timestamp file: %v", err)
- return
- }
- defer f.Close()
-
- b, err := t.MarshalText()
- if err != nil {
- logrus.Warnf("libcontainerd: failed to encode timestamp: %v", err)
- return
- }
-
- n, err := f.Write(b)
- if err != nil || n != len(b) {
- logrus.Warnf("libcontainerd: failed to update event timestamp file: %v", err)
- f.Truncate(0)
- return
- }
-}
-
-func (r *remote) getLastEventTimestamp() time.Time {
- t := time.Now()
-
- fi, err := os.Stat(r.eventTsPath)
- if os.IsNotExist(err) || fi.Size() == 0 {
- return t
- }
-
- f, err := os.Open(r.eventTsPath)
- if err != nil {
- logrus.Warnf("libcontainerd: Unable to access last event ts: %v", err)
- return t
- }
- defer f.Close()
-
- b := make([]byte, fi.Size())
- n, err := f.Read(b)
- if err != nil || n != len(b) {
- logrus.Warnf("libcontainerd: Unable to read last event ts: %v", err)
- return t
- }
-
- t.UnmarshalText(b)
-
- return t
-}
-
-func (r *remote) startEventsMonitor() error {
- // First, get past events
- t := r.getLastEventTimestamp()
- tsp, err := ptypes.TimestampProto(t)
- if err != nil {
- logrus.Errorf("libcontainerd: failed to convert timestamp: %q", err)
- }
- er := &containerd.EventsRequest{
- Timestamp: tsp,
- }
-
- var events containerd.API_EventsClient
- for {
- events, err = r.apiClient.Events(context.Background(), er, grpc.FailFast(false))
- if err == nil {
- break
- }
- logrus.Warnf("libcontainerd: failed to get events from containerd: %q", err)
-
- if r.closedManually {
- // ignore error if grpc remote connection is closed manually
- return nil
- }
-
- <-time.After(100 * time.Millisecond)
- }
-
- go r.handleEventStream(events)
- return nil
-}
-
-func (r *remote) handleEventStream(events containerd.API_EventsClient) {
- for {
- e, err := events.Recv()
- if err != nil {
- if grpc.ErrorDesc(err) == transport.ErrConnClosing.Desc &&
- r.closedManually {
- // ignore error if grpc remote connection is closed manually
- return
- }
- logrus.Errorf("libcontainerd: failed to receive event from containerd: %v", err)
- go r.startEventsMonitor()
- return
- }
-
- logrus.Debugf("libcontainerd: received containerd event: %#v", e)
-
- var container *container
- var c *client
- r.RLock()
- for _, c = range r.clients {
- container, err = c.getContainer(e.Id)
- if err == nil {
- break
- }
- }
- r.RUnlock()
- if container == nil {
- logrus.Warnf("libcontainerd: unknown container %s", e.Id)
- continue
- }
-
- if err := container.handleEvent(e); err != nil {
- logrus.Errorf("libcontainerd: error processing state change for %s: %v", e.Id, err)
- }
-
- tsp, err := ptypes.Timestamp(e.Timestamp)
- if err != nil {
- logrus.Errorf("libcontainerd: failed to convert event timestamp: %q", err)
- continue
- }
-
- r.updateEventTimestamp(tsp)
- }
-}
-
-func (r *remote) runContainerdDaemon() error {
- pidFilename := filepath.Join(r.stateDir, containerdPidFilename)
- f, err := os.OpenFile(pidFilename, os.O_RDWR|os.O_CREATE, 0600)
- if err != nil {
- return err
- }
- defer f.Close()
-
- // File exist, check if the daemon is alive
- b := make([]byte, 8)
- n, err := f.Read(b)
- if err != nil && err != io.EOF {
- return err
- }
-
- if n > 0 {
- pid, err := strconv.ParseUint(string(b[:n]), 10, 64)
- if err != nil {
- return err
- }
- if system.IsProcessAlive(int(pid)) {
- logrus.Infof("libcontainerd: previous instance of containerd still alive (%d)", pid)
- r.daemonPid = int(pid)
- return nil
- }
- }
-
- // rewind the file
- _, err = f.Seek(0, os.SEEK_SET)
- if err != nil {
- return err
- }
-
- // Truncate it
- err = f.Truncate(0)
- if err != nil {
- return err
- }
-
- // Start a new instance
- args := []string{
- "-l", fmt.Sprintf("unix://%s", r.rpcAddr),
- "--metrics-interval=0",
- "--start-timeout", "2m",
- "--state-dir", filepath.Join(r.stateDir, containerdStateDir),
- }
- if goruntime.GOOS == "solaris" {
- args = append(args, "--shim", "containerd-shim", "--runtime", "runc")
- } else {
- args = append(args, "--shim", "docker-containerd-shim")
- if r.runtime != "" {
- args = append(args, "--runtime")
- args = append(args, r.runtime)
- }
- }
- if r.debugLog {
- args = append(args, "--debug")
- }
- if len(r.runtimeArgs) > 0 {
- for _, v := range r.runtimeArgs {
- args = append(args, "--runtime-args")
- args = append(args, v)
- }
- logrus.Debugf("libcontainerd: runContainerdDaemon: runtimeArgs: %s", args)
- }
-
- cmd := exec.Command(containerdBinary, args...)
- // redirect containerd logs to docker logs
- cmd.Stdout = os.Stdout
- cmd.Stderr = os.Stderr
- cmd.SysProcAttr = setSysProcAttr(true)
- cmd.Env = nil
- // clear the NOTIFY_SOCKET from the env when starting containerd
- for _, e := range os.Environ() {
- if !strings.HasPrefix(e, "NOTIFY_SOCKET") {
- cmd.Env = append(cmd.Env, e)
- }
- }
- if err := cmd.Start(); err != nil {
- return err
- }
-
- // unless strictly necessary, do not add anything in between here
- // as the reaper goroutine below needs to kick in as soon as possible
- // and any "return" from code paths added here will defeat the reaper
- // process.
-
- r.daemonWaitCh = make(chan struct{})
- go func() {
- cmd.Wait()
- close(r.daemonWaitCh)
- }() // Reap our child when needed
-
- logrus.Infof("libcontainerd: new containerd process, pid: %d", cmd.Process.Pid)
- if err := setOOMScore(cmd.Process.Pid, r.oomScore); err != nil {
- system.KillProcess(cmd.Process.Pid)
- return err
- }
- if _, err := f.WriteString(fmt.Sprintf("%d", cmd.Process.Pid)); err != nil {
- system.KillProcess(cmd.Process.Pid)
- return err
- }
-
- r.daemonPid = cmd.Process.Pid
- return nil
-}
-
-// WithRemoteAddr sets the external containerd socket to connect to.
-func WithRemoteAddr(addr string) RemoteOption {
- return rpcAddr(addr)
-}
-
-type rpcAddr string
-
-func (a rpcAddr) Apply(r Remote) error {
- if remote, ok := r.(*remote); ok {
- remote.rpcAddr = string(a)
- return nil
- }
- return fmt.Errorf("WithRemoteAddr option not supported for this remote")
-}
-
-// WithRuntimePath sets the path of the runtime to be used as the
-// default by containerd
-func WithRuntimePath(rt string) RemoteOption {
- return runtimePath(rt)
-}
-
-type runtimePath string
-
-func (rt runtimePath) Apply(r Remote) error {
- if remote, ok := r.(*remote); ok {
- remote.runtime = string(rt)
- return nil
- }
- return fmt.Errorf("WithRuntime option not supported for this remote")
-}
-
-// WithRuntimeArgs sets the list of runtime args passed to containerd
-func WithRuntimeArgs(args []string) RemoteOption {
- return runtimeArgs(args)
-}
-
-type runtimeArgs []string
-
-func (rt runtimeArgs) Apply(r Remote) error {
- if remote, ok := r.(*remote); ok {
- remote.runtimeArgs = rt
- return nil
- }
- return fmt.Errorf("WithRuntimeArgs option not supported for this remote")
-}
-
-// WithStartDaemon defines if libcontainerd should also run containerd daemon.
-func WithStartDaemon(start bool) RemoteOption {
- return startDaemon(start)
-}
-
-type startDaemon bool
-
-func (s startDaemon) Apply(r Remote) error {
- if remote, ok := r.(*remote); ok {
- remote.startDaemon = bool(s)
- return nil
- }
- return fmt.Errorf("WithStartDaemon option not supported for this remote")
-}
-
-// WithDebugLog defines if containerd debug logs will be enabled for daemon.
-func WithDebugLog(debug bool) RemoteOption {
- return debugLog(debug)
-}
-
-type debugLog bool
-
-func (d debugLog) Apply(r Remote) error {
- if remote, ok := r.(*remote); ok {
- remote.debugLog = bool(d)
- return nil
- }
- return fmt.Errorf("WithDebugLog option not supported for this remote")
-}
-
-// WithLiveRestore defines if containers are stopped on shutdown or restored.
-func WithLiveRestore(v bool) RemoteOption {
- return liveRestore(v)
-}
-
-type liveRestore bool
-
-func (l liveRestore) Apply(r Remote) error {
- if remote, ok := r.(*remote); ok {
- remote.liveRestore = bool(l)
- for _, c := range remote.clients {
- c.liveRestore = bool(l)
- }
- return nil
- }
- return fmt.Errorf("WithLiveRestore option not supported for this remote")
-}
-
-// WithOOMScore defines the oom_score_adj to set for the containerd process.
-func WithOOMScore(score int) RemoteOption {
- return oomScore(score)
-}
-
-type oomScore int
-
-func (o oomScore) Apply(r Remote) error {
- if remote, ok := r.(*remote); ok {
- remote.oomScore = int(o)
- return nil
- }
- return fmt.Errorf("WithOOMScore option not supported for this remote")
-}
diff --git a/libcontainerd/remote_windows.go b/libcontainerd/remote_windows.go
deleted file mode 100644
index 74c10447bb..0000000000
--- a/libcontainerd/remote_windows.go
+++ /dev/null
@@ -1,36 +0,0 @@
-package libcontainerd
-
-import "github.com/docker/docker/pkg/locker"
-
-type remote struct {
-}
-
-func (r *remote) Client(b Backend) (Client, error) {
- c := &client{
- clientCommon: clientCommon{
- backend: b,
- containers: make(map[string]*container),
- locker: locker.New(),
- },
- }
- return c, nil
-}
-
-// Cleanup is a no-op on Windows. It is here to implement the interface.
-func (r *remote) Cleanup() {
-}
-
-func (r *remote) UpdateOptions(opts ...RemoteOption) error {
- return nil
-}
-
-// New creates a fresh instance of libcontainerd remote. On Windows,
-// this is not used as there is no remote containerd process.
-func New(_ string, _ ...RemoteOption) (Remote, error) {
- return &remote{}, nil
-}
-
-// WithLiveRestore is a noop on windows.
-func WithLiveRestore(v bool) RemoteOption {
- return nil
-}
diff --git a/libcontainerd/types.go b/libcontainerd/types.go
index c7ade6b188..9e05c16bf8 100644
--- a/libcontainerd/types.go
+++ b/libcontainerd/types.go
@@ -1,64 +1,110 @@
package libcontainerd
import (
+ "context"
"io"
+ "time"
- containerd "github.com/containerd/containerd/api/grpc/types"
+ "github.com/containerd/containerd"
"github.com/opencontainers/runtime-spec/specs-go"
- "golang.org/x/net/context"
)
-// State constants used in state change reporting.
+// EventType represents a possible event from libcontainerd
+type EventType string
+
+// Event constants used when reporting events
+const (
+ EventUnknown EventType = "unknown"
+ EventExit EventType = "exit"
+ EventOOM EventType = "oom"
+ EventCreate EventType = "create"
+ EventStart EventType = "start"
+ EventExecAdded EventType = "exec-added"
+ EventExecStarted EventType = "exec-started"
+ EventPaused EventType = "paused"
+ EventResumed EventType = "resumed"
+)
+
+// Status represents the current status of a container
+type Status string
+
+// Possible container statuses
const (
- StateStart = "start-container"
- StatePause = "pause"
- StateResume = "resume"
- StateExit = "exit"
- StateRestore = "restore"
- StateExitProcess = "exit-process"
- StateOOM = "oom" // fake state
+ // Running indicates the process is currently executing
+ StatusRunning Status = "running"
+ // Created indicates the process has been created within containerd but the
+ // user's defined process has not started
+ StatusCreated Status = "created"
+ // Stopped indicates that the process has ran and exited
+ StatusStopped Status = "stopped"
+ // Paused indicates that the process is currently paused
+ StatusPaused Status = "paused"
+ // Pausing indicates that the process is currently switching from a
+ // running state into a paused state
+ StatusPausing Status = "pausing"
+ // Unknown indicates that we could not determine the status from the runtime
+ StatusUnknown Status = "unknown"
)
-// CommonStateInfo contains the state info common to all platforms.
-type CommonStateInfo struct { // FIXME: event?
- State string
- Pid uint32
- ExitCode uint32
- ProcessID string
+// Remote on Linux defines the accesspoint to the containerd grpc API.
+// Remote on Windows is largely an unimplemented interface as there is
+// no remote containerd.
+type Remote interface {
+ // Client returns a new Client instance connected with given Backend.
+ NewClient(namespace string, backend Backend) (Client, error)
+ // Cleanup stops containerd if it was started by libcontainerd.
+ // Note this is not used on Windows as there is no remote containerd.
+ Cleanup()
+}
+
+// RemoteOption allows to configure parameters of remotes.
+// This is unused on Windows.
+type RemoteOption interface {
+ Apply(Remote) error
+}
+
+// EventInfo contains the event info
+type EventInfo struct {
+ ContainerID string
+ ProcessID string
+ Pid uint32
+ ExitCode uint32
+ ExitedAt time.Time
+ OOMKilled bool
+ // Windows Only field
+ UpdatePending bool
}
// Backend defines callbacks that the client of the library needs to implement.
type Backend interface {
- StateChanged(containerID string, state StateInfo) error
+ ProcessEvent(containerID string, event EventType, ei EventInfo) error
}
// Client provides access to containerd features.
type Client interface {
- GetServerVersion(ctx context.Context) (*ServerVersion, error)
- Create(containerID string, checkpoint string, checkpointDir string, spec specs.Spec, attachStdio StdioCallback, options ...CreateOption) error
- Signal(containerID string, sig int) error
- SignalProcess(containerID string, processFriendlyName string, sig int) error
- AddProcess(ctx context.Context, containerID, processFriendlyName string, process Process, attachStdio StdioCallback) (int, error)
- Resize(containerID, processFriendlyName string, width, height int) error
- Pause(containerID string) error
- Resume(containerID string) error
- Restore(containerID string, attachStdio StdioCallback, options ...CreateOption) error
- Stats(containerID string) (*Stats, error)
- GetPidsForContainer(containerID string) ([]int, error)
- Summary(containerID string) ([]Summary, error)
- UpdateResources(containerID string, resources Resources) error
- CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error
- DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error
- ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error)
-}
+ Restore(ctx context.Context, containerID string, attachStdio StdioCallback) (alive bool, pid int, err error)
+
+ Create(ctx context.Context, containerID string, spec *specs.Spec, runtimeOptions interface{}) error
+ Start(ctx context.Context, containerID, checkpointDir string, withStdin bool, attachStdio StdioCallback) (pid int, err error)
+ SignalProcess(ctx context.Context, containerID, processID string, signal int) error
+ Exec(ctx context.Context, containerID, processID string, spec *specs.Process, withStdin bool, attachStdio StdioCallback) (int, error)
+ ResizeTerminal(ctx context.Context, containerID, processID string, width, height int) error
+ CloseStdin(ctx context.Context, containerID, processID string) error
+ Pause(ctx context.Context, containerID string) error
+ Resume(ctx context.Context, containerID string) error
+ Stats(ctx context.Context, containerID string) (*Stats, error)
+ ListPids(ctx context.Context, containerID string) ([]uint32, error)
+ Summary(ctx context.Context, containerID string) ([]Summary, error)
+ DeleteTask(ctx context.Context, containerID string) (uint32, time.Time, error)
+ Delete(ctx context.Context, containerID string) error
+ Status(ctx context.Context, containerID string) (Status, error)
-// CreateOption allows to configure parameters of container creation.
-type CreateOption interface {
- Apply(interface{}) error
+ UpdateResources(ctx context.Context, containerID string, resources *Resources) error
+ CreateCheckpoint(ctx context.Context, containerID, checkpointDir string, exit bool) error
}
// StdioCallback is called to connect a container or process stdio.
-type StdioCallback func(IOPipe) error
+type StdioCallback func(*IOPipe) (containerd.IO, error)
// IOPipe contains the stdio streams.
type IOPipe struct {
@@ -66,10 +112,12 @@ type IOPipe struct {
Stdout io.ReadCloser
Stderr io.ReadCloser
Terminal bool // Whether stderr is connected on Windows
+
+ cancel context.CancelFunc
+ config containerd.IOConfig
}
// ServerVersion contains version information as retrieved from the
// server
type ServerVersion struct {
- containerd.GetServerVersionResponse
}
diff --git a/libcontainerd/types_linux.go b/libcontainerd/types_linux.go
index f21a85eec9..b63efcb15c 100644
--- a/libcontainerd/types_linux.go
+++ b/libcontainerd/types_linux.go
@@ -1,49 +1,30 @@
package libcontainerd
import (
- containerd "github.com/containerd/containerd/api/grpc/types"
- "github.com/opencontainers/runtime-spec/specs-go"
-)
+ "time"
-// Process contains information to start a specific application inside the container.
-type Process struct {
- // Terminal creates an interactive terminal for the container.
- Terminal bool `json:"terminal"`
- // User specifies user information for the process.
- User *specs.User `json:"user"`
- // Args specifies the binary and arguments for the application to execute.
- Args []string `json:"args"`
- // Env populates the process environment for the process.
- Env []string `json:"env,omitempty"`
- // Cwd is the current working directory for the process and must be
- // relative to the container's root.
- Cwd *string `json:"cwd"`
- // Capabilities are linux capabilities that are kept for the container.
- Capabilities []string `json:"capabilities,omitempty"`
- // Rlimits specifies rlimit options to apply to the process.
- Rlimits []specs.POSIXRlimit `json:"rlimits,omitempty"`
- // ApparmorProfile specifies the apparmor profile for the container.
- ApparmorProfile *string `json:"apparmorProfile,omitempty"`
- // SelinuxLabel specifies the selinux context that the container process is run as.
- SelinuxLabel *string `json:"selinuxLabel,omitempty"`
-}
+ "github.com/containerd/cgroups"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+)
-// StateInfo contains description about the new state container has entered.
-type StateInfo struct {
- CommonStateInfo
+// Summary is not used on linux
+type Summary struct{}
- // Platform specific StateInfo
- OOMKilled bool
+// Stats holds metrics properties as returned by containerd
+type Stats struct {
+ Read time.Time
+ Metrics *cgroups.Metrics
}
-// Stats contains a stats properties from containerd.
-type Stats containerd.StatsResponse
-
-// Summary contains a container summary from containerd
-type Summary struct{}
+func interfaceToStats(read time.Time, v interface{}) *Stats {
+ return &Stats{
+ Metrics: v.(*cgroups.Metrics),
+ Read: read,
+ }
+}
-// Resources defines updatable container resource values.
-type Resources containerd.UpdateResource
+// Resources defines updatable container resource values. TODO: it must match containerd upcoming API
+type Resources specs.LinuxResources
// Checkpoints contains the details of a checkpoint
-type Checkpoints containerd.ListCheckpointResponse
+type Checkpoints struct{}
diff --git a/libcontainerd/types_solaris.go b/libcontainerd/types_solaris.go
deleted file mode 100644
index 2ab18eb0da..0000000000
--- a/libcontainerd/types_solaris.go
+++ /dev/null
@@ -1,43 +0,0 @@
-package libcontainerd
-
-import (
- containerd "github.com/containerd/containerd/api/grpc/types"
- "github.com/opencontainers/runtime-spec/specs-go"
-)
-
-// Process contains information to start a specific application inside the container.
-type Process struct {
- // Terminal creates an interactive terminal for the container.
- Terminal bool `json:"terminal"`
- // User specifies user information for the process.
- User *specs.User `json:"user"`
- // Args specifies the binary and arguments for the application to execute.
- Args []string `json:"args"`
- // Env populates the process environment for the process.
- Env []string `json:"env,omitempty"`
- // Cwd is the current working directory for the process and must be
- // relative to the container's root.
- Cwd *string `json:"cwd"`
- // Capabilities are linux capabilities that are kept for the container.
- Capabilities []string `json:"capabilities,omitempty"`
-}
-
-// Stats contains a stats properties from containerd.
-type Stats struct{}
-
-// Summary contains a container summary from containerd
-type Summary struct{}
-
-// StateInfo contains description about the new state container has entered.
-type StateInfo struct {
- CommonStateInfo
-
- // Platform specific StateInfo
- OOMKilled bool
-}
-
-// Resources defines updatable container resource values.
-type Resources struct{}
-
-// Checkpoints contains the details of a checkpoint
-type Checkpoints containerd.ListCheckpointResponse
diff --git a/libcontainerd/types_windows.go b/libcontainerd/types_windows.go
index f271ecd479..aab8079f3b 100644
--- a/libcontainerd/types_windows.go
+++ b/libcontainerd/types_windows.go
@@ -1,27 +1,27 @@
package libcontainerd
import (
+ "time"
+
"github.com/Microsoft/hcsshim"
opengcs "github.com/Microsoft/opengcs/client"
- "github.com/opencontainers/runtime-spec/specs-go"
)
-// Process contains information to start a specific application inside the container.
-type Process specs.Process
-
// Summary contains a ProcessList item from HCS to support `top`
type Summary hcsshim.ProcessListItem
-// StateInfo contains description about the new state container has entered.
-type StateInfo struct {
- CommonStateInfo
-
- // Platform specific StateInfo
- UpdatePending bool // Indicates that there are some update operations pending that should be completed by a servicing container.
+// Stats contains statistics from HCS
+type Stats struct {
+ Read time.Time
+ HCSStats *hcsshim.Statistics
}
-// Stats contains statistics from HCS
-type Stats hcsshim.Statistics
+func interfaceToStats(read time.Time, v interface{}) *Stats {
+ return &Stats{
+ HCSStats: v.(*hcsshim.Statistics),
+ Read: read,
+ }
+}
// Resources defines updatable container resource values.
type Resources struct{}
diff --git a/libcontainerd/utils_linux.go b/libcontainerd/utils_linux.go
index 5372b886cf..0f0adf322d 100644
--- a/libcontainerd/utils_linux.go
+++ b/libcontainerd/utils_linux.go
@@ -1,63 +1,12 @@
package libcontainerd
-import (
- "syscall"
+import "syscall"
- containerd "github.com/containerd/containerd/api/grpc/types"
- "github.com/opencontainers/runtime-spec/specs-go"
- "golang.org/x/sys/unix"
-)
-
-func getRootIDs(s specs.Spec) (int, int, error) {
- var hasUserns bool
- for _, ns := range s.Linux.Namespaces {
- if ns.Type == specs.UserNamespace {
- hasUserns = true
- break
- }
- }
- if !hasUserns {
- return 0, 0, nil
- }
- uid := hostIDFromMap(0, s.Linux.UIDMappings)
- gid := hostIDFromMap(0, s.Linux.GIDMappings)
- return uid, gid, nil
-}
-
-func hostIDFromMap(id uint32, mp []specs.LinuxIDMapping) int {
- for _, m := range mp {
- if id >= m.ContainerID && id <= m.ContainerID+m.Size-1 {
- return int(m.HostID + id - m.ContainerID)
- }
- }
- return 0
-}
-
-func systemPid(ctr *containerd.Container) uint32 {
- var pid uint32
- for _, p := range ctr.Processes {
- if p.Pid == InitFriendlyName {
- pid = p.SystemPid
- }
- }
- return pid
-}
-
-func convertRlimits(sr []specs.POSIXRlimit) (cr []*containerd.Rlimit) {
- for _, r := range sr {
- cr = append(cr, &containerd.Rlimit{
- Type: r.Type,
- Hard: r.Hard,
- Soft: r.Soft,
- })
- }
- return
-}
-
-// setPDeathSig sets the parent death signal to SIGKILL
-func setSysProcAttr(sid bool) *syscall.SysProcAttr {
+// containerdSysProcAttr returns the SysProcAttr to use when exec'ing
+// containerd
+func containerdSysProcAttr() *syscall.SysProcAttr {
return &syscall.SysProcAttr{
- Setsid: sid,
- Pdeathsig: unix.SIGKILL,
+ Setsid: true,
+ Pdeathsig: syscall.SIGKILL,
}
}
diff --git a/libcontainerd/utils_solaris.go b/libcontainerd/utils_solaris.go
deleted file mode 100644
index 10ae599808..0000000000
--- a/libcontainerd/utils_solaris.go
+++ /dev/null
@@ -1,27 +0,0 @@
-package libcontainerd
-
-import (
- "syscall"
-
- containerd "github.com/containerd/containerd/api/grpc/types"
- "github.com/opencontainers/runtime-spec/specs-go"
-)
-
-func getRootIDs(s specs.Spec) (int, int, error) {
- return 0, 0, nil
-}
-
-func systemPid(ctr *containerd.Container) uint32 {
- var pid uint32
- for _, p := range ctr.Processes {
- if p.Pid == InitFriendlyName {
- pid = p.SystemPid
- }
- }
- return pid
-}
-
-// setPDeathSig sets the parent death signal to SIGKILL
-func setSysProcAttr(sid bool) *syscall.SysProcAttr {
- return nil
-}
diff --git a/libcontainerd/utils_windows.go b/libcontainerd/utils_windows.go
index bca9fa2086..1347ff2b95 100644
--- a/libcontainerd/utils_windows.go
+++ b/libcontainerd/utils_windows.go
@@ -3,6 +3,8 @@ package libcontainerd
import (
"strings"
+ "syscall"
+
opengcs "github.com/Microsoft/opengcs/client"
)
@@ -36,3 +38,9 @@ func (c *container) debugGCS() {
}
cfg.DebugGCS()
}
+
+// containerdSysProcAttr returns the SysProcAttr to use when exec'ing
+// containerd
+func containerdSysProcAttr() *syscall.SysProcAttr {
+ return nil
+}
diff --git a/oci/defaults.go b/oci/defaults.go
index d706fafcc0..bf74ef4eeb 100644
--- a/oci/defaults.go
+++ b/oci/defaults.go
@@ -69,8 +69,14 @@ func DefaultSolarisSpec() specs.Spec {
func DefaultLinuxSpec() specs.Spec {
s := specs.Spec{
Version: specs.Version,
- Process: &specs.Process{},
- Root: &specs.Root{},
+ Process: &specs.Process{
+ Capabilities: &specs.LinuxCapabilities{
+ Bounding: defaultCapabilities(),
+ Permitted: defaultCapabilities(),
+ Inheritable: defaultCapabilities(),
+ Effective: defaultCapabilities(),
+ },
+ },
}
s.Mounts = []specs.Mount{
{
@@ -116,14 +122,6 @@ func DefaultLinuxSpec() specs.Spec {
Options: []string{"nosuid", "noexec", "nodev", "mode=1777"},
},
}
- s.Process = &specs.Process{
- Capabilities: &specs.LinuxCapabilities{
- Bounding: defaultCapabilities(),
- Permitted: defaultCapabilities(),
- Inheritable: defaultCapabilities(),
- Effective: defaultCapabilities(),
- },
- }
s.Linux = &specs.Linux{
MaskedPaths: []string{
diff --git a/pkg/authorization/plugin.go b/pkg/authorization/plugin.go
index 939f926744..2797a72d58 100644
--- a/pkg/authorization/plugin.go
+++ b/pkg/authorization/plugin.go
@@ -48,9 +48,10 @@ func GetPluginGetter() plugingetter.PluginGetter {
// authorizationPlugin is an internal adapter to docker plugin system
type authorizationPlugin struct {
- plugin *plugins.Client
- name string
- once sync.Once
+ initErr error
+ plugin *plugins.Client
+ name string
+ once sync.Once
}
func newAuthorizationPlugin(name string) Plugin {
@@ -95,7 +96,6 @@ func (a *authorizationPlugin) AuthZResponse(authReq *Request) (*Response, error)
// initPlugin initializes the authorization plugin if needed
func (a *authorizationPlugin) initPlugin() error {
// Lazy loading of plugins
- var err error
a.once.Do(func() {
if a.plugin == nil {
var plugin plugingetter.CompatPlugin
@@ -108,11 +108,11 @@ func (a *authorizationPlugin) initPlugin() error {
plugin, e = plugins.Get(a.name, AuthZApiImplements)
}
if e != nil {
- err = e
+ a.initErr = e
return
}
a.plugin = plugin.Client()
}
})
- return err
+ return a.initErr
}
diff --git a/pkg/mount/mount.go b/pkg/mount/mount.go
index c9fdfd6942..eced0219fd 100644
--- a/pkg/mount/mount.go
+++ b/pkg/mount/mount.go
@@ -3,6 +3,8 @@ package mount
import (
"sort"
"strings"
+
+ "github.com/sirupsen/logrus"
)
// GetMounts retrieves a list of mounts for the current running process.
@@ -74,12 +76,18 @@ func RecursiveUnmount(target string) error {
if !strings.HasPrefix(m.Mountpoint, target) {
continue
}
- if err := Unmount(m.Mountpoint); err != nil && i == len(mounts)-1 {
+ logrus.Debugf("Trying to unmount %s", m.Mountpoint)
+ err = Unmount(m.Mountpoint)
+ if err != nil && i == len(mounts)-1 {
if mounted, err := Mounted(m.Mountpoint); err != nil || mounted {
return err
}
// Ignore errors for submounts and continue trying to unmount others
// The final unmount should fail if there ane any submounts remaining
+ } else if err != nil {
+ logrus.Errorf("Failed to unmount %s: %v", m.Mountpoint, err)
+ } else if err == nil {
+ logrus.Debugf("Unmounted %s", m.Mountpoint)
}
}
return nil
diff --git a/pkg/system/process_windows.go b/pkg/system/process_windows.go
new file mode 100644
index 0000000000..5973c46de9
--- /dev/null
+++ b/pkg/system/process_windows.go
@@ -0,0 +1,18 @@
+package system
+
+import "os"
+
+// IsProcessAlive returns true if process with a given pid is running.
+func IsProcessAlive(pid int) bool {
+ _, err := os.FindProcess(pid)
+
+ return err == nil
+}
+
+// KillProcess force-stops a process.
+func KillProcess(pid int) {
+ p, err := os.FindProcess(pid)
+ if err == nil {
+ p.Kill()
+ }
+}
diff --git a/pkg/system/rm.go b/pkg/system/rm.go
index 101b569a56..c453adcdb9 100644
--- a/pkg/system/rm.go
+++ b/pkg/system/rm.go
@@ -26,7 +26,7 @@ func EnsureRemoveAll(dir string) error {
// track retries
exitOnErr := make(map[string]int)
- maxRetry := 5
+ maxRetry := 50
// Attempt to unmount anything beneath this dir first
mount.RecursiveUnmount(dir)
diff --git a/plugin/executor/containerd/containerd.go b/plugin/executor/containerd/containerd.go
index 74cf530cf1..d93b8b75ec 100644
--- a/plugin/executor/containerd/containerd.go
+++ b/plugin/executor/containerd/containerd.go
@@ -1,22 +1,35 @@
package containerd
import (
+ "context"
"io"
+ "path/filepath"
+ "sync"
+ "github.com/containerd/containerd"
+ "github.com/containerd/containerd/linux/runcopts"
+ "github.com/docker/docker/api/errdefs"
"github.com/docker/docker/libcontainerd"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
)
+// PluginNamespace is the name used for the plugins namespace
+var PluginNamespace = "moby-plugins"
+
// ExitHandler represents an object that is called when the exit event is received from containerd
type ExitHandler interface {
HandleExitEvent(id string) error
}
// New creates a new containerd plugin executor
-func New(remote libcontainerd.Remote, exitHandler ExitHandler) (*Executor, error) {
- e := &Executor{exitHandler: exitHandler}
- client, err := remote.Client(e)
+func New(rootDir string, remote libcontainerd.Remote, exitHandler ExitHandler) (*Executor, error) {
+ e := &Executor{
+ rootDir: rootDir,
+ exitHandler: exitHandler,
+ }
+ client, err := remote.NewClient(PluginNamespace, e)
if err != nil {
return nil, errors.Wrap(err, "error creating containerd exec client")
}
@@ -26,52 +39,108 @@ func New(remote libcontainerd.Remote, exitHandler ExitHandler) (*Executor, error
// Executor is the containerd client implementation of a plugin executor
type Executor struct {
+ rootDir string
client libcontainerd.Client
exitHandler ExitHandler
}
// Create creates a new container
func (e *Executor) Create(id string, spec specs.Spec, stdout, stderr io.WriteCloser) error {
- return e.client.Create(id, "", "", spec, attachStreamsFunc(stdout, stderr))
+ opts := runcopts.RuncOptions{
+ RuntimeRoot: filepath.Join(e.rootDir, "runtime-root"),
+ }
+ ctx := context.Background()
+ err := e.client.Create(ctx, id, &spec, &opts)
+ if err != nil {
+ return err
+ }
+
+ _, err = e.client.Start(ctx, id, "", false, attachStreamsFunc(stdout, stderr))
+ return err
}
// Restore restores a container
func (e *Executor) Restore(id string, stdout, stderr io.WriteCloser) error {
- return e.client.Restore(id, attachStreamsFunc(stdout, stderr))
+ alive, _, err := e.client.Restore(context.Background(), id, attachStreamsFunc(stdout, stderr))
+ if err != nil && !errdefs.IsNotFound(err) {
+ return err
+ }
+ if !alive {
+ _, _, err = e.client.DeleteTask(context.Background(), id)
+ if err != nil && !errdefs.IsNotFound(err) {
+ logrus.WithError(err).Errorf("failed to delete container plugin %s task from containerd", id)
+ return err
+ }
+
+ err = e.client.Delete(context.Background(), id)
+ if err != nil && !errdefs.IsNotFound(err) {
+ logrus.WithError(err).Errorf("failed to delete container plugin %s from containerd", id)
+ return err
+ }
+ }
+ return nil
}
// IsRunning returns if the container with the given id is running
func (e *Executor) IsRunning(id string) (bool, error) {
- pids, err := e.client.GetPidsForContainer(id)
- return len(pids) > 0, err
+ status, err := e.client.Status(context.Background(), id)
+ return status == libcontainerd.StatusRunning, err
}
// Signal sends the specified signal to the container
func (e *Executor) Signal(id string, signal int) error {
- return e.client.Signal(id, signal)
+ return e.client.SignalProcess(context.Background(), id, libcontainerd.InitProcessName, signal)
}
-// StateChanged handles state changes from containerd
+// ProcessEvent handles events from containerd
// All events are ignored except the exit event, which is sent of to the stored handler
-func (e *Executor) StateChanged(id string, event libcontainerd.StateInfo) error {
- switch event.State {
- case libcontainerd.StateExit:
- return e.exitHandler.HandleExitEvent(id)
+func (e *Executor) ProcessEvent(id string, et libcontainerd.EventType, ei libcontainerd.EventInfo) error {
+ switch et {
+ case libcontainerd.EventExit:
+ // delete task and container
+ if _, _, err := e.client.DeleteTask(context.Background(), id); err != nil {
+ logrus.WithError(err).Errorf("failed to delete container plugin %s task from containerd", id)
+ }
+
+ if err := e.client.Delete(context.Background(), id); err != nil {
+ logrus.WithError(err).Errorf("failed to delete container plugin %s from containerd", id)
+ }
+ return e.exitHandler.HandleExitEvent(ei.ContainerID)
}
return nil
}
-func attachStreamsFunc(stdout, stderr io.WriteCloser) func(libcontainerd.IOPipe) error {
- return func(iop libcontainerd.IOPipe) error {
- iop.Stdin.Close()
+type cio struct {
+ containerd.IO
+
+ wg sync.WaitGroup
+}
+
+func (c *cio) Wait() {
+ c.wg.Wait()
+ c.IO.Wait()
+}
+
+func attachStreamsFunc(stdout, stderr io.WriteCloser) libcontainerd.StdioCallback {
+ return func(iop *libcontainerd.IOPipe) (containerd.IO, error) {
+ if iop.Stdin != nil {
+ iop.Stdin.Close()
+ // closing stdin shouldn't be needed here, it should never be open
+ panic("plugin stdin shouldn't have been created!")
+ }
+
+ cio := &cio{IO: iop}
+ cio.wg.Add(2)
go func() {
io.Copy(stdout, iop.Stdout)
stdout.Close()
+ cio.wg.Done()
}()
go func() {
io.Copy(stderr, iop.Stderr)
stderr.Close()
+ cio.wg.Done()
}()
- return nil
+ return cio, nil
}
}
diff --git a/plugin/manager_linux.go b/plugin/manager_linux.go
index beefc3dfba..eff21e1d05 100644
--- a/plugin/manager_linux.go
+++ b/plugin/manager_linux.go
@@ -23,7 +23,7 @@ import (
"golang.org/x/sys/unix"
)
-func (pm *Manager) enable(p *v2.Plugin, c *controller, force bool) error {
+func (pm *Manager) enable(p *v2.Plugin, c *controller, force bool) (err error) {
p.Rootfs = filepath.Join(pm.config.Root, p.PluginObj.ID, "rootfs")
if p.IsEnabled() && !force {
return errors.Wrap(enabledError(p.Name()), "plugin already enabled")
@@ -44,15 +44,15 @@ func (pm *Manager) enable(p *v2.Plugin, c *controller, force bool) error {
if p.PropagatedMount != "" {
propRoot = filepath.Join(filepath.Dir(p.Rootfs), "propagated-mount")
- if err := os.MkdirAll(propRoot, 0755); err != nil {
+ if err = os.MkdirAll(propRoot, 0755); err != nil {
logrus.Errorf("failed to create PropagatedMount directory at %s: %v", propRoot, err)
}
- if err := mount.MakeRShared(propRoot); err != nil {
+ if err = mount.MakeRShared(propRoot); err != nil {
return errors.Wrap(err, "error setting up propagated mount dir")
}
- if err := mount.Mount(propRoot, p.PropagatedMount, "none", "rbind"); err != nil {
+ if err = mount.Mount(propRoot, p.PropagatedMount, "none", "rbind"); err != nil {
return errors.Wrap(err, "error creating mount for propagated mount")
}
}
@@ -72,7 +72,6 @@ func (pm *Manager) enable(p *v2.Plugin, c *controller, force bool) error {
logrus.Warnf("Could not unmount %s: %v", propRoot, err)
}
}
- return errors.WithStack(err)
}
return pm.pluginPostStart(p, c)
@@ -159,6 +158,12 @@ func shutdownPlugin(p *v2.Plugin, c *controller, executor Executor) {
if err := executor.Signal(pluginID, int(unix.SIGKILL)); err != nil {
logrus.Errorf("Sending SIGKILL to plugin failed with error: %v", err)
}
+ select {
+ case <-c.exitChan:
+ logrus.Debug("SIGKILL plugin shutdown")
+ case <-time.After(time.Second * 10):
+ logrus.Debug("Force shutdown plugin FAILED")
+ }
}
}
}