summaryrefslogtreecommitdiff
path: root/utilities
diff options
context:
space:
mode:
authorEelco Chaudron <echaudro@redhat.com>2021-12-22 10:18:23 +0100
committerIlya Maximets <i.maximets@ovn.org>2022-01-18 00:46:30 +0100
commit51ec98635e3f193fc8b0b4fd5636dd831e4d3073 (patch)
treefe69e438cef779841c8381b57d2b0b33239fc884 /utilities
parentff4c712d45d7b696039061b1821a39dcd72d88fb (diff)
downloadopenvswitch-51ec98635e3f193fc8b0b4fd5636dd831e4d3073.tar.gz
utilities: Add upcall USDT probe and associated script.
Added the dpif_recv:recv_upcall USDT probe, which is used by the included upcall_monitor.py script. This script receives all upcall packets sent by the kernel to ovs-vswitchd. By default, it will show all upcall events, which looks something like this: TIME CPU COMM PID DPIF_NAME TYPE PKT_LEN FLOW_KEY_LEN 5952147.003848809 2 handler4 1381158 system@ovs-system 0 98 132 5952147.003879643 2 handler4 1381158 system@ovs-system 0 70 160 5952147.003914924 2 handler4 1381158 system@ovs-system 0 98 152 It can also dump the packet and NetLink content, and if required, the packets can also be written to a pcap file. Signed-off-by: Eelco Chaudron <echaudro@redhat.com> Acked-by: Paolo Valerio <pvalerio@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Diffstat (limited to 'utilities')
-rw-r--r--utilities/automake.mk6
-rwxr-xr-xutilities/usdt-scripts/upcall_monitor.py533
2 files changed, 537 insertions, 2 deletions
diff --git a/utilities/automake.mk b/utilities/automake.mk
index 2be8423d6..9da729404 100644
--- a/utilities/automake.mk
+++ b/utilities/automake.mk
@@ -62,7 +62,8 @@ EXTRA_DIST += \
utilities/docker/create_ovs_db.sh \
utilities/docker/debian/Dockerfile \
utilities/docker/debian/build-kernel-modules.sh \
- utilities/usdt-scripts/bridge_loop.bt
+ utilities/usdt-scripts/bridge_loop.bt \
+ utilities/usdt-scripts/upcall_monitor.py
MAN_ROOTS += \
utilities/ovs-testcontroller.8.in \
utilities/ovs-dpctl.8.in \
@@ -130,6 +131,7 @@ FLAKE8_PYFILES += utilities/ovs-pcap.in \
utilities/gdb/ovs_gdb.py \
utilities/ovs-check-dead-ifs.in \
utilities/ovs-tcpdump.in \
- utilities/ovs-pipegen.py
+ utilities/ovs-pipegen.py \
+ utilities/usdt-scripts/upcall_monitor.py
include utilities/bugtool/automake.mk
diff --git a/utilities/usdt-scripts/upcall_monitor.py b/utilities/usdt-scripts/upcall_monitor.py
new file mode 100755
index 000000000..d856547f0
--- /dev/null
+++ b/utilities/usdt-scripts/upcall_monitor.py
@@ -0,0 +1,533 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2021 Red Hat, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Script information:
+# -------------------
+# upcall_monitor.py uses the dpif_recv:recv_upcall USDT to receive all upcall
+# packets sent by the kernel to ovs-vswitchd. By default, it will show all
+# upcall events, which looks something like this:
+#
+# TIME CPU COMM PID DPIF_NAME TYPE PKT_LEN...
+# 5952147.003848809 2 handler4 1381158 system@ovs-system 0 98 132
+# 5952147.003879643 2 handler4 1381158 system@ovs-system 0 70 160
+# 5952147.003914924 2 handler4 1381158 system@ovs-system 0 98 152
+#
+# In addition, the packet and flow key data can be dumped. This can be done
+# using the --packet-decode and --flow-key decode options (see below).
+#
+# Note that by default only 64 bytes of the packet and flow key are retrieved.
+# If you would like to capture all or more of the packet and/or flow key data,
+# the ----packet-size and --flow-key-size options can be used.
+#
+# If required, the received packets can also be stored in a pcap file using the
+# --pcap option.
+#
+# The following are the available options:
+#
+# usage: upcall_monitor.py [-h] [-D [DEBUG]] [-d {none,hex,decode}]
+# [-f [64-2048]] [-k {none,hex,nlraw}]
+# [-p VSWITCHD_PID] [-s [64-2048]] [-w PCAP_FILE]
+#
+# optional arguments:
+# -h, --help show this help message and exit
+# -D [DEBUG], --debug [DEBUG]
+# Enable eBPF debugging
+# -d {none,hex,decode}, --packet-decode {none,hex,decode}
+# Display packet content in selected mode,
+# default none
+# -f [64-2048], --flow-key-size [64-2048]
+# Set maximum flow key size to capture, default 64
+# -k {none,hex,nlraw}, --flow-key-decode {none,hex,nlraw}
+# Display flow-key content in selected mode, default
+# none
+# -p VSWITCHD_PID, --pid VSWITCHD_PID
+# ovs-vswitch's PID
+# -s [64-2048], --packet-size [64-2048]
+# Set maximum packet size to capture, default 64
+# -w PCAP_FILE, --pcap PCAP_FILE
+# Write upcall packets to specified pcap file.
+#
+# The following is an example of how to use the script on the running
+# ovs-vswitchd process with a packet and flow key dump enabled:
+#
+# $ ./upcall_monitor.py --packet-decode decode --flow-key-decode nlraw \
+# --packet-size 128 --flow-key-size 256
+# TIME CPU COMM PID DPIF_NAME ...
+# 5953013.333214231 2 handler4 1381158 system@ovs-system ...
+# Flow key size 132 bytes, size captured 132 bytes.
+# nla_len 8, nla_type OVS_KEY_ATTR_RECIRC_ID[20], data: 00 00 00 00
+# nla_len 8, nla_type OVS_KEY_ATTR_DP_HASH[19], data: 00 00 00 00
+# nla_len 8, nla_type OVS_KEY_ATTR_PRIORITY[2], data: 00 00 00 00
+# nla_len 8, nla_type OVS_KEY_ATTR_IN_PORT[3], data: 02 00 00 00
+# nla_len 8, nla_type OVS_KEY_ATTR_SKB_MARK[15], data: 00 00 00 00
+# nla_len 8, nla_type OVS_KEY_ATTR_CT_STATE[22], data: 00 00 00 00
+# nla_len 6, nla_type OVS_KEY_ATTR_CT_ZONE[23], data: 00 00
+# nla_len 8, nla_type OVS_KEY_ATTR_CT_MARK[24], data: 00 00 00 00
+# nla_len 20, nla_type OVS_KEY_ATTR_CT_LABELS[25], data: 00 00 00 00 ...
+# nla_len 16, nla_type OVS_KEY_ATTR_ETHERNET[4], data: 04 f4 bc 28 57 ...
+# nla_len 6, nla_type OVS_KEY_ATTR_ETHERTYPE[6], data: 08 00
+# nla_len 16, nla_type OVS_KEY_ATTR_IPV4[7], data: 01 01 01 64 01 01 ...
+# nla_len 6, nla_type OVS_KEY_ATTR_ICMP[11], data: 00 00
+# 1: Receive dp_port 2, packet size 98 bytes, size captured 98 bytes.
+# ###[ Ethernet ]###
+# dst = 3c:fd:fe:9e:7f:68
+# src = 04:f4:bc:28:57:01
+# type = IPv4
+# ###[ IP ]###
+# version = 4
+# ihl = 5
+# tos = 0x0
+# len = 84
+# id = 41404
+# flags = DF
+# frag = 0
+# ttl = 64
+# proto = icmp
+# chksum = 0x940c
+# src = 1.1.1.100
+# dst = 1.1.1.123
+# \options \
+# ###[ ICMP ]###
+# type = echo-reply
+# code = 0
+# chksum = 0x2f55
+# id = 0x90e6
+# seq = 0x1
+# ###[ Raw ]###
+# load = 'GBTa\x00\x00\x00\x00\xd8L\r\x00\x00\x00\x00\...
+#
+
+from bcc import BPF, USDT, USDTException
+from os.path import exists
+from scapy.all import hexdump, wrpcap
+from scapy.layers.l2 import Ether
+
+import argparse
+import psutil
+import re
+import struct
+import sys
+import time
+
+#
+# Actual eBPF source code
+#
+ebpf_source = """
+#include <linux/sched.h>
+
+#define MAX_PACKET <MAX_PACKET_VAL>
+#define MAX_KEY <MAX_KEY_VAL>
+
+struct event_t {
+ u32 cpu;
+ u32 pid;
+ u32 upcall_type;
+ u64 ts;
+ u32 pkt_size;
+ u64 key_size;
+ char comm[TASK_COMM_LEN];
+ char dpif_name[32];
+ unsigned char pkt[MAX_PACKET];
+ unsigned char key[MAX_KEY];
+};
+BPF_RINGBUF_OUTPUT(events, <BUFFER_PAGE_CNT>);
+BPF_TABLE("percpu_array", uint32_t, uint64_t, dropcnt, 1);
+
+int do_trace(struct pt_regs *ctx) {
+ uint64_t addr;
+ uint64_t size;
+
+ struct event_t *event = events.ringbuf_reserve(sizeof(struct event_t));
+ if (!event) {
+ uint32_t type = 0;
+ uint64_t *value = dropcnt.lookup(&type);
+ if (value)
+ __sync_fetch_and_add(value, 1);
+
+ return 1;
+ }
+
+ event->ts = bpf_ktime_get_ns();
+ event->cpu = bpf_get_smp_processor_id();
+ event->pid = bpf_get_current_pid_tgid();
+ bpf_get_current_comm(&event->comm, sizeof(event->comm));
+
+ bpf_usdt_readarg(1, ctx, &addr);
+ bpf_probe_read_str(&event->dpif_name, sizeof(event->dpif_name),
+ (void *)addr);
+
+ bpf_usdt_readarg(2, ctx, &event->upcall_type);
+ bpf_usdt_readarg(4, ctx, &event->pkt_size);
+ bpf_usdt_readarg(6, ctx, &event->key_size);
+
+ if (event->pkt_size > MAX_PACKET)
+ size = MAX_PACKET;
+ else
+ size = event->pkt_size;
+ bpf_usdt_readarg(3, ctx, &addr);
+ bpf_probe_read(&event->pkt, size, (void *)addr);
+
+ if (event->key_size > MAX_KEY)
+ size = MAX_KEY;
+ else
+ size = event->key_size;
+ bpf_usdt_readarg(5, ctx, &addr);
+ bpf_probe_read(&event->key, size, (void *)addr);
+
+ events.ringbuf_submit(event, 0);
+ return 0;
+};
+"""
+
+
+#
+# print_event()
+#
+def print_event(ctx, data, size):
+ event = b['events'].event(data)
+ print("{:<18.9f} {:<4} {:<16} {:<10} {:<32} {:<4} {:<10} {:<10}".
+ format(event.ts / 1000000000,
+ event.cpu,
+ event.comm.decode("utf-8"),
+ event.pid,
+ event.dpif_name.decode("utf-8"),
+ event.upcall_type,
+ event.pkt_size,
+ event.key_size))
+
+ #
+ # Dump flow key information
+ #
+ if event.key_size < options.flow_key_size:
+ key_len = event.key_size
+ else:
+ key_len = options.flow_key_size
+
+ if options.flow_key_decode != 'none':
+ print(" Flow key size {} bytes, size captured {} bytes.".
+ format(event.key_size, key_len))
+
+ if options.flow_key_decode == 'hex':
+ #
+ # Abuse scapy's hex dump to dump flow key
+ #
+ print(re.sub('^', ' ' * 4, hexdump(Ether(bytes(event.key)[:key_len]),
+ dump=True),
+ flags=re.MULTILINE))
+
+ if options.flow_key_decode == 'nlraw':
+ nla = decode_nlm(bytes(event.key)[:key_len])
+ else:
+ nla = decode_nlm(bytes(event.key)[:key_len], dump=False)
+
+ if "OVS_KEY_ATTR_IN_PORT" in nla:
+ port = struct.unpack("=I", nla["OVS_KEY_ATTR_IN_PORT"])[0]
+ else:
+ port = "Unknown"
+
+ #
+ # Decode packet only if there is data
+ #
+ if event.pkt_size <= 0:
+ return
+
+ pkt_id = get_pkt_id()
+
+ if event.pkt_size < options.packet_size:
+ pkt_len = event.pkt_size
+ pkt_data = bytes(event.pkt)[:event.pkt_size]
+ else:
+ pkt_len = options.packet_size
+ pkt_data = bytes(event.pkt)
+
+ if options.packet_decode != 'none' or options.pcap is not None:
+ print(" {}: Receive dp_port {}, packet size {} bytes, size "
+ "captured {} bytes.".format(pkt_id, port, event.pkt_size,
+ pkt_len))
+
+ if options.packet_decode == 'hex':
+ print(re.sub('^', ' ' * 4, hexdump(pkt_data, dump=True),
+ flags=re.MULTILINE))
+
+ packet = Ether(pkt_data)
+ packet.wirelen = event.pkt_size
+
+ if options.packet_decode == 'decode':
+ print(re.sub('^', ' ' * 4, packet.show(dump=True), flags=re.MULTILINE))
+
+ if options.pcap is not None:
+ wrpcap(options.pcap, packet, append=True, snaplen=options.packet_size)
+
+
+#
+# decode_nlm()
+#
+def decode_nlm(msg, indent=4, dump=True):
+ bytes_left = len(msg)
+ result = {}
+
+ while bytes_left:
+ if bytes_left < 4:
+ if dump:
+ print("{}WARN: decode truncated; can't read header".format(
+ ' ' * indent))
+ break
+
+ nla_len, nla_type = struct.unpack("=HH", msg[:4])
+
+ if nla_len < 4:
+ if dump:
+ print("{}WARN: decode truncated; nla_len < 4".format(
+ ' ' * indent))
+ break
+
+ nla_data = msg[4:nla_len]
+ trunc = ""
+
+ if nla_len > bytes_left:
+ trunc = "..."
+ nla_data = nla_data[:(bytes_left - 4)]
+ else:
+ result[get_ovs_key_attr_str(nla_type)] = nla_data
+
+ if dump:
+ print("{}nla_len {}, nla_type {}[{}], data: {}{}".format(
+ ' ' * indent, nla_len, get_ovs_key_attr_str(nla_type),
+ nla_type,
+ "".join("{:02x} ".format(b) for b in nla_data), trunc))
+
+ if trunc != "":
+ if dump:
+ print("{}WARN: decode truncated; nla_len > msg_len[{}] ".
+ format(' ' * indent, bytes_left))
+ break
+
+ # update next offset, but make sure it's aligned correctly
+ next_offset = (nla_len + 3) & ~(3)
+ msg = msg[next_offset:]
+ bytes_left -= next_offset
+
+ return result
+
+
+#
+# get_ovs_key_attr_str()
+#
+def get_ovs_key_attr_str(attr):
+ ovs_key_attr = ["OVS_KEY_ATTR_UNSPEC",
+ "OVS_KEY_ATTR_ENCAP",
+ "OVS_KEY_ATTR_PRIORITY",
+ "OVS_KEY_ATTR_IN_PORT",
+ "OVS_KEY_ATTR_ETHERNET",
+ "OVS_KEY_ATTR_VLAN",
+ "OVS_KEY_ATTR_ETHERTYPE",
+ "OVS_KEY_ATTR_IPV4",
+ "OVS_KEY_ATTR_IPV6",
+ "OVS_KEY_ATTR_TCP",
+ "OVS_KEY_ATTR_UDP",
+ "OVS_KEY_ATTR_ICMP",
+ "OVS_KEY_ATTR_ICMPV6",
+ "OVS_KEY_ATTR_ARP",
+ "OVS_KEY_ATTR_ND",
+ "OVS_KEY_ATTR_SKB_MARK",
+ "OVS_KEY_ATTR_TUNNEL",
+ "OVS_KEY_ATTR_SCTP",
+ "OVS_KEY_ATTR_TCP_FLAGS",
+ "OVS_KEY_ATTR_DP_HASH",
+ "OVS_KEY_ATTR_RECIRC_ID",
+ "OVS_KEY_ATTR_MPLS",
+ "OVS_KEY_ATTR_CT_STATE",
+ "OVS_KEY_ATTR_CT_ZONE",
+ "OVS_KEY_ATTR_CT_MARK",
+ "OVS_KEY_ATTR_CT_LABELS",
+ "OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4",
+ "OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6",
+ "OVS_KEY_ATTR_NSH"]
+
+ if attr < 0 or attr > len(ovs_key_attr):
+ return "<UNKNOWN>"
+
+ return ovs_key_attr[attr]
+
+
+#
+# get_pkt_id()
+#
+def get_pkt_id():
+ if not hasattr(get_pkt_id, "counter"):
+ get_pkt_id.counter = 0
+ get_pkt_id.counter += 1
+ return get_pkt_id.counter
+
+
+#
+# buffer_size_type()
+#
+def buffer_size_type(astr, min=64, max=2048):
+ value = int(astr)
+ if min <= value <= max:
+ return value
+ else:
+ raise argparse.ArgumentTypeError(
+ 'value not in range {}-{}'.format(min, max))
+
+
+#
+# next_power_of_two()
+#
+def next_power_of_two(val):
+ np = 1
+ while np < val:
+ np *= 2
+ return np
+
+
+#
+# main()
+#
+def main():
+ #
+ # Don't like these globals, but ctx passing does not seem to work with the
+ # existing open_ring_buffer() API :(
+ #
+ global b
+ global options
+
+ #
+ # Argument parsing
+ #
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--buffer-page-count",
+ help="Number of BPF ring buffer pages, default 1024",
+ type=int, default=1024, metavar="NUMBER")
+ parser.add_argument("-D", "--debug",
+ help="Enable eBPF debugging",
+ type=int, const=0x3f, default=0, nargs='?')
+ parser.add_argument('-d', '--packet-decode',
+ help='Display packet content in selected mode, '
+ 'default none',
+ choices=['none', 'hex', 'decode'], default='none')
+ parser.add_argument("-f", "--flow-key-size",
+ help="Set maximum flow key size to capture, "
+ "default 64", type=buffer_size_type, default=64,
+ metavar="[64-2048]")
+ parser.add_argument('-k', '--flow-key-decode',
+ help='Display flow-key content in selected mode, '
+ 'default none',
+ choices=['none', 'hex', 'nlraw'], default='none')
+ parser.add_argument("-p", "--pid", metavar="VSWITCHD_PID",
+ help="ovs-vswitch's PID",
+ type=int, default=None)
+ parser.add_argument("-s", "--packet-size",
+ help="Set maximum packet size to capture, "
+ "default 64", type=buffer_size_type, default=64,
+ metavar="[64-2048]")
+ parser.add_argument("-w", "--pcap", metavar="PCAP_FILE",
+ help="Write upcall packets to specified pcap file.",
+ type=str, default=None)
+
+ options = parser.parse_args()
+
+ #
+ # Find the PID of the ovs-vswitchd daemon if not specified.
+ #
+ if options.pid is None:
+ for proc in psutil.process_iter():
+ if 'ovs-vswitchd' in proc.name():
+ if options.pid is not None:
+ print("ERROR: Multiple ovs-vswitchd daemons running, "
+ "use the -p option!")
+ sys.exit(-1)
+
+ options.pid = proc.pid
+
+ #
+ # Error checking on input parameters
+ #
+ if options.pid is None:
+ print("ERROR: Failed to find ovs-vswitchd's PID!")
+ sys.exit(-1)
+
+ if options.pcap is not None:
+ if exists(options.pcap):
+ print("ERROR: Destination capture file \"{}\" already exists!".
+ format(options.pcap))
+ sys.exit(-1)
+
+ options.buffer_page_count = next_power_of_two(options.buffer_page_count)
+
+ #
+ # Attach the usdt probe
+ #
+ u = USDT(pid=int(options.pid))
+ try:
+ u.enable_probe(probe="recv_upcall", fn_name="do_trace")
+ except USDTException as e:
+ print("ERROR: {}"
+ "ovs-vswitchd!".format(
+ (re.sub('^', ' ' * 7, str(e), flags=re.MULTILINE)).strip().
+ replace("--with-dtrace or --enable-dtrace",
+ "--enable-usdt-probes")))
+ sys.exit(-1)
+
+ #
+ # Uncomment to see how arguments are decoded.
+ # print(u.get_text())
+ #
+
+ #
+ # Attach probe to running process
+ #
+ source = ebpf_source.replace("<MAX_PACKET_VAL>", str(options.packet_size))
+ source = source.replace("<MAX_KEY_VAL>", str(options.flow_key_size))
+ source = source.replace("<BUFFER_PAGE_CNT>",
+ str(options.buffer_page_count))
+
+ b = BPF(text=source, usdt_contexts=[u], debug=options.debug)
+
+ #
+ # Print header
+ #
+ print("{:<18} {:<4} {:<16} {:<10} {:<32} {:<4} {:<10} {:<10}".format(
+ "TIME", "CPU", "COMM", "PID", "DPIF_NAME", "TYPE", "PKT_LEN",
+ "FLOW_KEY_LEN"))
+
+ #
+ # Dump out all events
+ #
+ b['events'].open_ring_buffer(print_event)
+ while 1:
+ try:
+ b.ring_buffer_poll()
+ time.sleep(0.5)
+ except KeyboardInterrupt:
+ break
+
+ dropcnt = b.get_table("dropcnt")
+ for k in dropcnt.keys():
+ count = dropcnt.sum(k).value
+ if k.value == 0 and count > 0:
+ print("\nWARNING: Not all upcalls were captured, {} were dropped!"
+ "\n Increase the BPF ring buffer size with the "
+ "--buffer-page-count option.".format(count))
+
+
+#
+# Start main() as the default entry point...
+#
+if __name__ == '__main__':
+ main()