summaryrefslogtreecommitdiff
path: root/libnetwork/cmd
diff options
context:
space:
mode:
authorFlavio Crisciani <flavio.crisciani@docker.com>2018-02-07 09:20:55 -0800
committerFlavio Crisciani <flavio.crisciani@docker.com>2018-02-07 09:20:55 -0800
commita16d469867ba740fca19cd7082a20384bd8a2339 (patch)
tree27d5442a9b165f77eee8033e2e0b03d08a4b3b53 /libnetwork/cmd
parent2459e6fbd384fad28c605bc10b6d49de9e0a5198 (diff)
downloaddocker-a16d469867ba740fca19cd7082a20384bd8a2339.tar.gz
Import the ssd tool in libnetwork
Signed-off-by: Flavio Crisciani <flavio.crisciani@docker.com>
Diffstat (limited to 'libnetwork/cmd')
-rwxr-xr-xlibnetwork/cmd/ssd/Dockerfile34
-rwxr-xr-xlibnetwork/cmd/ssd/README.md47
-rwxr-xr-xlibnetwork/cmd/ssd/ssd.py180
3 files changed, 261 insertions, 0 deletions
diff --git a/libnetwork/cmd/ssd/Dockerfile b/libnetwork/cmd/ssd/Dockerfile
new file mode 100755
index 0000000000..0542247920
--- /dev/null
+++ b/libnetwork/cmd/ssd/Dockerfile
@@ -0,0 +1,34 @@
+FROM alpine:3.7
+ENV PACKAGES="\
+ musl \
+ linux-headers \
+ build-base \
+ util-linux \
+ bash \
+ git \
+ ca-certificates \
+ python2 \
+ python2-dev \
+ py-setuptools \
+ iproute2 \
+ curl \
+ strace \
+ drill \
+ ipvsadm \
+ iperf \
+ ethtool \
+"
+
+RUN echo \
+ && apk add --no-cache $PACKAGES \
+ && if [[ ! -e /usr/bin/python ]]; then ln -sf /usr/bin/python2.7 /usr/bin/python; fi \
+ && if [[ ! -e /usr/bin/python-config ]]; then ln -sf /usr/bin/python2.7-config /usr/bin/python-config; fi \
+ && if [[ ! -e /usr/bin/easy_install ]]; then ln -sf /usr/bin/easy_install-2.7 /usr/bin/easy_install; fi \
+ && easy_install pip \
+ && pip install --upgrade pip \
+ && if [[ ! -e /usr/bin/pip ]]; then ln -sf /usr/bin/pip2.7 /usr/bin/pip; fi \
+ && echo
+
+ADD ssd.py /
+RUN pip install git+git://github.com/docker/docker-py.git
+ENTRYPOINT [ "python", "/ssd.py"]
diff --git a/libnetwork/cmd/ssd/README.md b/libnetwork/cmd/ssd/README.md
new file mode 100755
index 0000000000..a0a0048da5
--- /dev/null
+++ b/libnetwork/cmd/ssd/README.md
@@ -0,0 +1,47 @@
+# Docker Swarm Service Driller(ssd)
+
+ssd is a troubleshooting utility for Docker swarm networks.
+
+### control-plane and datapath consistency check on a node
+ssd checks for the consistency between docker network control-plane (from the docker daemon in-memory state) and kernel data path programming. Currently the tool checks only for the consistency of the Load balancer (implemented using IPVS).
+
+In a three node swarm cluser ssd status for a overlay network `ov2` which has three services running, each replicated to 3 instances.
+
+````bash
+vagrant@net-1:~/code/go/src/github.com/docker/docker-e2e/tests$ docker run -v /var/run/docker.sock:/var/run/docker.sock -v /var/run/docker/netns:/var/run/docker/netns --privileged --net=host sanimej/ssd ov2
+Verifying LB programming for containers on network ov2
+Verifying container /s2.3.ltrdwef0iqf90rqauw3ehcs56...
+service s2... OK
+service s3... OK
+service s1... OK
+Verifying container /s3.3.nyhwvdvnocb4wftyhb8dr4fj8...
+service s2... OK
+service s3... OK
+service s1... OK
+Verifying container /s1.3.wwx5tuxhnvoz5vrb8ohphby0r...
+service s2... OK
+service s3... OK
+service s1... OK
+Verifying LB programming for containers on network ingress
+Verifying container Ingress...
+service web... OK
+````
+
+ssd checks the required iptables programming to direct an incoming packet with the <host ip>:<published port> to the right <backend ip>:<target port>
+
+### control-plane consistency check across nodes in a cluster
+
+Docker networking uses a gossip protocol to synchronize networking state across nodes in a cluster. ssd's `gossip-consistency` command verifies if the state maintained by all the nodes are consistent.
+
+````bash
+In a three node cluster with services running on an overlay network ov2 ssd consistency-checker shows
+
+vagrant@net-1:~/code/go/src/github.com/docker/docker-e2e/tests$ docker run -v /var/run/docker.sock:/var/run/docker.sock -v /var/run/docker/netns:/var/run/docker/netns --privileged sanimej/ssd ov2 gossip-consistency
+Node id: sjfp0ca8f43rvnab6v7f21gq0 gossip hash c57d89094dbb574a37930393278dc282
+
+Node id: bg228r3q9095grj4wxkqs80oe gossip hash c57d89094dbb574a37930393278dc282
+
+Node id: 6jylcraipcv2pxdricqe77j5q gossip hash c57d89094dbb574a37930393278dc282
+````
+
+This is hash digest of the control-plane state for the network `ov2` from all the cluster nodes. If the values have a mismatch `docker network inspect --verbose` on the individual nodes can help in identifying what the specific difference is.
diff --git a/libnetwork/cmd/ssd/ssd.py b/libnetwork/cmd/ssd/ssd.py
new file mode 100755
index 0000000000..2cfc039bd3
--- /dev/null
+++ b/libnetwork/cmd/ssd/ssd.py
@@ -0,0 +1,180 @@
+#!/usr/bin/python
+
+import sys, signal, time
+import docker
+import re
+import subprocess
+import json
+import hashlib
+
+ipv4match = re.compile(
+ r'(25[0-5]|2[0-4][0-9]|[01]?[0-9]?[0-9]).' +
+ r'(25[0-5]|2[0-4][0-9]|[01]?[0-9]?[0-9]).' +
+ r'(25[0-5]|2[0-4][0-9]|[01]?[0-9]?[0-9]).' +
+ r'(25[0-5]|2[0-4][0-9]|[01]?[0-9]?[0-9])'
+)
+
+def check_iptables(name, plist):
+ replace = (':', ',')
+ ports = []
+ for port in plist:
+ for r in replace:
+ port = port.replace(r, ' ')
+
+ p = port.split()
+ ports.append((p[1], p[3]))
+
+ # get the ingress sandbox's docker_gwbridge network IP.
+ # published ports get DNAT'ed to this IP.
+ ip = subprocess.check_output(['/usr/bin/nsenter', '--net=/var/run/docker/netns/ingress_sbox', '/bin/bash', '-c', 'ifconfig eth1 | grep \"inet\\ addr\" | cut -d: -f2 | cut -d\" \" -f1'])
+ ip = ip.rstrip()
+
+ for p in ports:
+ rule = '/sbin/iptables -t nat -C DOCKER-INGRESS -p tcp --dport {0} -j DNAT --to {1}:{2}'.format(p[1], ip, p[1])
+ try:
+ subprocess.check_output(["/bin/bash", "-c", rule])
+ except subprocess.CalledProcessError as e:
+ print "Service {0}: host iptables DNAT rule for port {1} -> ingress sandbox {2}:{3} missing".format(name, p[1], ip, p[1])
+
+def get_namespaces(data, ingress=False):
+ if ingress is True:
+ return {"Ingress":"/var/run/docker/netns/ingress_sbox"}
+ else:
+ spaces =[]
+ for c in data["Containers"]:
+ sandboxes = {str(c) for c in data["Containers"]}
+
+ containers = {}
+ for s in sandboxes:
+ spaces.append(str(cli.inspect_container(s)["NetworkSettings"]["SandboxKey"]))
+ inspect = cli.inspect_container(s)
+ containers[str(inspect["Name"])] = str(inspect["NetworkSettings"]["SandboxKey"])
+ return containers
+
+
+def check_network(nw_name, ingress=False):
+
+ print "Verifying LB programming for containers on network %s" % nw_name
+
+ data = cli.inspect_network(nw_name, verbose=True)
+
+ services = data["Services"]
+ fwmarks = {str(service): str(svalue["LocalLBIndex"]) for service, svalue in services.items()}
+
+ stasks = {}
+ for service, svalue in services.items():
+ if service == "":
+ continue
+ tasks = []
+ for task in svalue["Tasks"]:
+ tasks.append(str(task["EndpointIP"]))
+ stasks[fwmarks[str(service)]] = tasks
+
+ # for services in ingress network verify the iptables rules
+ # that direct ingress (published port) to backend (target port)
+ if ingress is True:
+ check_iptables(service, svalue["Ports"])
+
+ containers = get_namespaces(data, ingress)
+ for container, namespace in containers.items():
+ print "Verifying container %s..." % container
+ ipvs = subprocess.check_output(['/usr/bin/nsenter', '--net=%s' % namespace, '/usr/sbin/ipvsadm', '-ln'])
+
+ mark = ""
+ realmark = {}
+ for line in ipvs.splitlines():
+ if "FWM" in line:
+ mark = re.findall("[0-9]+", line)[0]
+ realmark[str(mark)] = []
+ elif "->" in line:
+ if mark == "":
+ continue
+ ip = ipv4match.search(line)
+ if ip is not None:
+ realmark[mark].append(format(ip.group(0)))
+ else:
+ mark = ""
+ for key in realmark.keys():
+ if key not in stasks:
+ print "LB Index %s" % key, "present in IPVS but missing in docker daemon"
+ del realmark[key]
+
+ for key in stasks.keys():
+ if key not in realmark:
+ print "LB Index %s" % key, "present in docker daemon but missing in IPVS"
+ del stasks[key]
+
+ for key in realmark:
+ service = "--Invalid--"
+ for sname, idx in fwmarks.items():
+ if key == idx:
+ service = sname
+ if len(set(realmark[key])) != len(set(stasks[key])):
+ print "Incorrect LB Programming for service %s" % service
+ print "control-plane backend tasks:"
+ for task in stasks[key]:
+ print task
+ print "kernel IPVS backend tasks:"
+ for task in realmark[key]:
+ print task
+ else:
+ print "service %s... OK" % service
+
+if __name__ == '__main__':
+ if len(sys.argv) < 2:
+ print 'Usage: ssd.py network-name [gossip-consistency]'
+ sys.exit()
+
+ cli = docker.APIClient(base_url='unix://var/run/docker.sock', version='auto')
+ if len(sys.argv) == 3:
+ command = sys.argv[2]
+ else:
+ command = 'default'
+
+ if command == 'gossip-consistency':
+ cspec = docker.types.ContainerSpec(
+ image='sanimej/ssd',
+ args=[sys.argv[1], 'gossip-hash'],
+ mounts=[docker.types.Mount('/var/run/docker.sock', '/var/run/docker.sock', type='bind')]
+ )
+ mode = docker.types.ServiceMode(
+ mode='global'
+ )
+ task_template = docker.types.TaskTemplate(cspec)
+
+ cli.create_service(task_template, name='gossip-hash', mode=mode)
+ #TODO change to a deterministic way to check if the service is up.
+ time.sleep(5)
+ output = cli.service_logs('gossip-hash', stdout=True, stderr=True, details=True)
+ for line in output:
+ print("Node id: %s gossip hash %s" % (line[line.find("=")+1:line.find(",")], line[line.find(" ")+1:]))
+ if cli.remove_service('gossip-hash') is not True:
+ print("Deleting gossip-hash service failed")
+ elif command == 'gossip-hash':
+ data = cli.inspect_network(sys.argv[1], verbose=True)
+ services = data["Services"]
+ md5 = hashlib.md5()
+ entries = []
+ for service, value in services.items():
+ entries.append(service)
+ entries.append(value["VIP"])
+ for task in value["Tasks"]:
+ for key, val in task.items():
+ if isinstance(val, dict):
+ for k, v in val.items():
+ entries.append(v)
+ else:
+ entries.append(val)
+ entries.sort()
+ for e in entries:
+ md5.update(e)
+ print(md5.hexdigest())
+ sys.stdout.flush()
+ while True:
+ signal.pause()
+ elif command == 'default':
+ if sys.argv[1] == "ingress":
+ check_network("ingress", ingress=True)
+ else:
+ check_network(sys.argv[1])
+ check_network("ingress", ingress=True)