summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/trace/ftrace-design.txt233
-rw-r--r--Documentation/trace/ftrace.txt6
-rw-r--r--include/linux/kprobes.h4
-rw-r--r--include/trace/ftrace.h8
-rw-r--r--kernel/trace/Kconfig28
-rw-r--r--kernel/trace/ftrace.c4
-rw-r--r--kernel/trace/trace_clock.c24
-rw-r--r--kernel/trace/trace_entries.h27
-rw-r--r--kernel/trace/trace_events.c14
-rw-r--r--kernel/trace/trace_export.c71
10 files changed, 349 insertions, 70 deletions
diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt
new file mode 100644
index 000000000000..7003e10f10f5
--- /dev/null
+++ b/Documentation/trace/ftrace-design.txt
@@ -0,0 +1,233 @@
+ function tracer guts
+ ====================
+
+Introduction
+------------
+
+Here we will cover the architecture pieces that the common function tracing
+code relies on for proper functioning. Things are broken down into increasing
+complexity so that you can start simple and at least get basic functionality.
+
+Note that this focuses on architecture implementation details only. If you
+want more explanation of a feature in terms of common code, review the common
+ftrace.txt file.
+
+
+Prerequisites
+-------------
+
+Ftrace relies on these features being implemented:
+ STACKTRACE_SUPPORT - implement save_stack_trace()
+ TRACE_IRQFLAGS_SUPPORT - implement include/asm/irqflags.h
+
+
+HAVE_FUNCTION_TRACER
+--------------------
+
+You will need to implement the mcount and the ftrace_stub functions.
+
+The exact mcount symbol name will depend on your toolchain. Some call it
+"mcount", "_mcount", or even "__mcount". You can probably figure it out by
+running something like:
+ $ echo 'main(){}' | gcc -x c -S -o - - -pg | grep mcount
+ call mcount
+We'll make the assumption below that the symbol is "mcount" just to keep things
+nice and simple in the examples.
+
+Keep in mind that the ABI that is in effect inside of the mcount function is
+*highly* architecture/toolchain specific. We cannot help you in this regard,
+sorry. Dig up some old documentation and/or find someone more familiar than
+you to bang ideas off of. Typically, register usage (argument/scratch/etc...)
+is a major issue at this point, especially in relation to the location of the
+mcount call (before/after function prologue). You might also want to look at
+how glibc has implemented the mcount function for your architecture. It might
+be (semi-)relevant.
+
+The mcount function should check the function pointer ftrace_trace_function
+to see if it is set to ftrace_stub. If it is, there is nothing for you to do,
+so return immediately. If it isn't, then call that function in the same way
+the mcount function normally calls __mcount_internal -- the first argument is
+the "frompc" while the second argument is the "selfpc" (adjusted to remove the
+size of the mcount call that is embedded in the function).
+
+For example, if the function foo() calls bar(), when the bar() function calls
+mcount(), the arguments mcount() will pass to the tracer are:
+ "frompc" - the address bar() will use to return to foo()
+ "selfpc" - the address bar() (with _mcount() size adjustment)
+
+Also keep in mind that this mcount function will be called *a lot*, so
+optimizing for the default case of no tracer will help the smooth running of
+your system when tracing is disabled. So the start of the mcount function is
+typically the bare min with checking things before returning. That also means
+the code flow should usually kept linear (i.e. no branching in the nop case).
+This is of course an optimization and not a hard requirement.
+
+Here is some pseudo code that should help (these functions should actually be
+implemented in assembly):
+
+void ftrace_stub(void)
+{
+ return;
+}
+
+void mcount(void)
+{
+ /* save any bare state needed in order to do initial checking */
+
+ extern void (*ftrace_trace_function)(unsigned long, unsigned long);
+ if (ftrace_trace_function != ftrace_stub)
+ goto do_trace;
+
+ /* restore any bare state */
+
+ return;
+
+do_trace:
+
+ /* save all state needed by the ABI (see paragraph above) */
+
+ unsigned long frompc = ...;
+ unsigned long selfpc = <return address> - MCOUNT_INSN_SIZE;
+ ftrace_trace_function(frompc, selfpc);
+
+ /* restore all state needed by the ABI */
+}
+
+Don't forget to export mcount for modules !
+extern void mcount(void);
+EXPORT_SYMBOL(mcount);
+
+
+HAVE_FUNCTION_TRACE_MCOUNT_TEST
+-------------------------------
+
+This is an optional optimization for the normal case when tracing is turned off
+in the system. If you do not enable this Kconfig option, the common ftrace
+code will take care of doing the checking for you.
+
+To support this feature, you only need to check the function_trace_stop
+variable in the mcount function. If it is non-zero, there is no tracing to be
+done at all, so you can return.
+
+This additional pseudo code would simply be:
+void mcount(void)
+{
+ /* save any bare state needed in order to do initial checking */
+
++ if (function_trace_stop)
++ return;
+
+ extern void (*ftrace_trace_function)(unsigned long, unsigned long);
+ if (ftrace_trace_function != ftrace_stub)
+...
+
+
+HAVE_FUNCTION_GRAPH_TRACER
+--------------------------
+
+Deep breath ... time to do some real work. Here you will need to update the
+mcount function to check ftrace graph function pointers, as well as implement
+some functions to save (hijack) and restore the return address.
+
+The mcount function should check the function pointers ftrace_graph_return
+(compare to ftrace_stub) and ftrace_graph_entry (compare to
+ftrace_graph_entry_stub). If either of those are not set to the relevant stub
+function, call the arch-specific function ftrace_graph_caller which in turn
+calls the arch-specific function prepare_ftrace_return. Neither of these
+function names are strictly required, but you should use them anyways to stay
+consistent across the architecture ports -- easier to compare & contrast
+things.
+
+The arguments to prepare_ftrace_return are slightly different than what are
+passed to ftrace_trace_function. The second argument "selfpc" is the same,
+but the first argument should be a pointer to the "frompc". Typically this is
+located on the stack. This allows the function to hijack the return address
+temporarily to have it point to the arch-specific function return_to_handler.
+That function will simply call the common ftrace_return_to_handler function and
+that will return the original return address with which, you can return to the
+original call site.
+
+Here is the updated mcount pseudo code:
+void mcount(void)
+{
+...
+ if (ftrace_trace_function != ftrace_stub)
+ goto do_trace;
+
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++ extern void (*ftrace_graph_return)(...);
++ extern void (*ftrace_graph_entry)(...);
++ if (ftrace_graph_return != ftrace_stub ||
++ ftrace_graph_entry != ftrace_graph_entry_stub)
++ ftrace_graph_caller();
++#endif
+
+ /* restore any bare state */
+...
+
+Here is the pseudo code for the new ftrace_graph_caller assembly function:
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+void ftrace_graph_caller(void)
+{
+ /* save all state needed by the ABI */
+
+ unsigned long *frompc = &...;
+ unsigned long selfpc = <return address> - MCOUNT_INSN_SIZE;
+ prepare_ftrace_return(frompc, selfpc);
+
+ /* restore all state needed by the ABI */
+}
+#endif
+
+For information on how to implement prepare_ftrace_return(), simply look at
+the x86 version. The only architecture-specific piece in it is the setup of
+the fault recovery table (the asm(...) code). The rest should be the same
+across architectures.
+
+Here is the pseudo code for the new return_to_handler assembly function. Note
+that the ABI that applies here is different from what applies to the mcount
+code. Since you are returning from a function (after the epilogue), you might
+be able to skimp on things saved/restored (usually just registers used to pass
+return values).
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+void return_to_handler(void)
+{
+ /* save all state needed by the ABI (see paragraph above) */
+
+ void (*original_return_point)(void) = ftrace_return_to_handler();
+
+ /* restore all state needed by the ABI */
+
+ /* this is usually either a return or a jump */
+ original_return_point();
+}
+#endif
+
+
+HAVE_FTRACE_NMI_ENTER
+---------------------
+
+If you can't trace NMI functions, then skip this option.
+
+<details to be filled>
+
+
+HAVE_FTRACE_SYSCALLS
+---------------------
+
+<details to be filled>
+
+
+HAVE_FTRACE_MCOUNT_RECORD
+-------------------------
+
+See scripts/recordmcount.pl for more info.
+
+<details to be filled>
+
+
+HAVE_DYNAMIC_FTRACE
+---------------------
+
+<details to be filled>
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 355d0f1f8c50..1b6292bbdd6d 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -26,6 +26,12 @@ disabled, and more (ftrace allows for tracer plugins, which
means that the list of tracers can always grow).
+Implementation Details
+----------------------
+
+See ftrace-design.txt for details for arch porters and such.
+
+
The File System
---------------
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index bcd9c07848be..3a46b7b7abb2 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -48,13 +48,13 @@
#define KPROBE_HIT_SSDONE 0x00000008
/* Attach to insert probes on any functions which should be ignored*/
-#define __kprobes __attribute__((__section__(".kprobes.text"))) notrace
+#define __kprobes __attribute__((__section__(".kprobes.text")))
#else /* CONFIG_KPROBES */
typedef int kprobe_opcode_t;
struct arch_specific_insn {
int dummy;
};
-#define __kprobes notrace
+#define __kprobes
#endif /* CONFIG_KPROBES */
struct kprobe;
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 308bafd93325..72a3b437b829 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -239,9 +239,9 @@ ftrace_format_##call(struct ftrace_event_call *unused, \
#undef __print_flags
#define __print_flags(flag, delim, flag_array...) \
({ \
- static const struct trace_print_flags flags[] = \
+ static const struct trace_print_flags __flags[] = \
{ flag_array, { -1, NULL }}; \
- ftrace_print_flags_seq(p, delim, flag, flags); \
+ ftrace_print_flags_seq(p, delim, flag, __flags); \
})
#undef __print_symbolic
@@ -254,7 +254,7 @@ ftrace_format_##call(struct ftrace_event_call *unused, \
#undef TRACE_EVENT
#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
-enum print_line_t \
+static enum print_line_t \
ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
{ \
struct trace_seq *s = &iter->seq; \
@@ -317,7 +317,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
#undef TRACE_EVENT
#define TRACE_EVENT(call, proto, args, tstruct, func, print) \
-int \
+static int \
ftrace_define_fields_##call(struct ftrace_event_call *event_call) \
{ \
struct ftrace_raw_##call field; \
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 1ea0d1234f4a..e71634604400 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -11,12 +11,18 @@ config NOP_TRACER
config HAVE_FTRACE_NMI_ENTER
bool
+ help
+ See Documentation/trace/ftrace-implementation.txt
config HAVE_FUNCTION_TRACER
bool
+ help
+ See Documentation/trace/ftrace-implementation.txt
config HAVE_FUNCTION_GRAPH_TRACER
bool
+ help
+ See Documentation/trace/ftrace-implementation.txt
config HAVE_FUNCTION_GRAPH_FP_TEST
bool
@@ -28,21 +34,25 @@ config HAVE_FUNCTION_GRAPH_FP_TEST
config HAVE_FUNCTION_TRACE_MCOUNT_TEST
bool
help
- This gets selected when the arch tests the function_trace_stop
- variable at the mcount call site. Otherwise, this variable
- is tested by the called function.
+ See Documentation/trace/ftrace-implementation.txt
config HAVE_DYNAMIC_FTRACE
bool
+ help
+ See Documentation/trace/ftrace-implementation.txt
config HAVE_FTRACE_MCOUNT_RECORD
bool
+ help
+ See Documentation/trace/ftrace-implementation.txt
config HAVE_HW_BRANCH_TRACER
bool
config HAVE_SYSCALL_TRACEPOINTS
bool
+ help
+ See Documentation/trace/ftrace-implementation.txt
config TRACER_MAX_TRACE
bool
@@ -469,6 +479,18 @@ config FTRACE_STARTUP_TEST
functioning properly. It will do tests on all the configured
tracers of ftrace.
+config EVENT_TRACE_TEST_SYSCALLS
+ bool "Run selftest on syscall events"
+ depends on FTRACE_STARTUP_TEST
+ help
+ This option will also enable testing every syscall event.
+ It only enables the event and disables it and runs various loads
+ with the event enabled. This adds a bit more time for kernel boot
+ up since it runs this on every system call defined.
+
+ TBD - enable a way to actually call the syscalls as we test their
+ events
+
config MMIOTRACE
bool "Memory mapped IO tracing"
depends on HAVE_MMIOTRACE_SUPPORT && PCI
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 8b23d5670088..f7ab7fc162cc 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2062,9 +2062,9 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
int i, len = 0;
char *search;
- if (glob && (strcmp(glob, "*") || !strlen(glob)))
+ if (glob && (strcmp(glob, "*") == 0 || !strlen(glob)))
glob = NULL;
- else {
+ else if (glob) {
int not;
type = ftrace_setup_glob(glob, strlen(glob), &search, &not);
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index b588fd81f7f9..20c5f92e28a8 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -66,10 +66,14 @@ u64 notrace trace_clock(void)
* Used by plugins that need globally coherent timestamps.
*/
-static u64 prev_trace_clock_time;
-
-static raw_spinlock_t trace_clock_lock ____cacheline_aligned_in_smp =
- (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+/* keep prev_time and lock in the same cacheline. */
+static struct {
+ u64 prev_time;
+ raw_spinlock_t lock;
+} trace_clock_struct ____cacheline_aligned_in_smp =
+ {
+ .lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED,
+ };
u64 notrace trace_clock_global(void)
{
@@ -88,19 +92,19 @@ u64 notrace trace_clock_global(void)
if (unlikely(in_nmi()))
goto out;
- __raw_spin_lock(&trace_clock_lock);
+ __raw_spin_lock(&trace_clock_struct.lock);
/*
* TODO: if this happens often then maybe we should reset
- * my_scd->clock to prev_trace_clock_time+1, to make sure
+ * my_scd->clock to prev_time+1, to make sure
* we start ticking with the local clock from now on?
*/
- if ((s64)(now - prev_trace_clock_time) < 0)
- now = prev_trace_clock_time + 1;
+ if ((s64)(now - trace_clock_struct.prev_time) < 0)
+ now = trace_clock_struct.prev_time + 1;
- prev_trace_clock_time = now;
+ trace_clock_struct.prev_time = now;
- __raw_spin_unlock(&trace_clock_lock);
+ __raw_spin_unlock(&trace_clock_struct.lock);
out:
raw_local_irq_restore(flags);
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index c866d34e0144..a431748ddd6e 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -78,7 +78,7 @@ FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry,
__field_desc( int, graph_ent, depth )
),
- F_printk("--> %lx (%d)", __entry->graph_ent.func, __entry->depth)
+ F_printk("--> %lx (%d)", __entry->func, __entry->depth)
);
/* Function return entry */
@@ -97,8 +97,8 @@ FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry,
F_printk("<-- %lx (%d) (start: %llx end: %llx) over: %d",
__entry->func, __entry->depth,
- __entry->calltime, __entry->rettim,
- __entrty->depth)
+ __entry->calltime, __entry->rettime,
+ __entry->depth)
);
/*
@@ -116,15 +116,6 @@ FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry,
__field( unsigned char, next_state ) \
__field( unsigned int, next_cpu )
-#if 0
-FTRACE_ENTRY_STRUCT_ONLY(ctx_switch_entry,
-
- F_STRUCT(
- FTRACE_CTX_FIELDS
- )
-);
-#endif
-
FTRACE_ENTRY(context_switch, ctx_switch_entry,
TRACE_CTX,
@@ -133,7 +124,7 @@ FTRACE_ENTRY(context_switch, ctx_switch_entry,
FTRACE_CTX_FIELDS
),
- F_printk(b"%u:%u:%u ==> %u:%u:%u [%03u]",
+ F_printk("%u:%u:%u ==> %u:%u:%u [%03u]",
__entry->prev_pid, __entry->prev_prio, __entry->prev_state,
__entry->next_pid, __entry->next_prio, __entry->next_state,
__entry->next_cpu
@@ -257,8 +248,8 @@ FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw,
__field_desc( unsigned char, rw, width )
),
- F_printk("%lx %lx %lx %d %lx %lx",
- __entry->phs, __entry->value, __entry->pc,
+ F_printk("%lx %lx %lx %d %x %x",
+ (unsigned long)__entry->phys, __entry->value, __entry->pc,
__entry->map_id, __entry->opcode, __entry->width)
);
@@ -275,8 +266,8 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
__field_desc( unsigned char, map, opcode )
),
- F_printk("%lx %lx %lx %d %lx",
- __entry->phs, __entry->virt, __entry->len,
+ F_printk("%lx %lx %lx %d %x",
+ (unsigned long)__entry->phys, __entry->virt, __entry->len,
__entry->map_id, __entry->opcode)
);
@@ -370,7 +361,7 @@ FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry,
__field( int, node )
),
- F_printk("type:%u call_site:%lx ptr:%p req:%lu alloc:%lu"
+ F_printk("type:%u call_site:%lx ptr:%p req:%zi alloc:%zi"
" flags:%x node:%d",
__entry->type_id, __entry->call_site, __entry->ptr,
__entry->bytes_req, __entry->bytes_alloc,
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index adbed124c3e7..787f0fb0994e 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1154,7 +1154,7 @@ static int trace_module_notify(struct notifier_block *self,
}
#endif /* CONFIG_MODULES */
-struct notifier_block trace_module_nb = {
+static struct notifier_block trace_module_nb = {
.notifier_call = trace_module_notify,
.priority = 0,
};
@@ -1326,6 +1326,18 @@ static __init void event_trace_self_tests(void)
if (!call->regfunc)
continue;
+/*
+ * Testing syscall events here is pretty useless, but
+ * we still do it if configured. But this is time consuming.
+ * What we really need is a user thread to perform the
+ * syscalls as we test.
+ */
+#ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
+ if (call->system &&
+ strcmp(call->system, "syscalls") == 0)
+ continue;
+#endif
+
pr_info("Testing event %s: ", call->name);
/*
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 4cb29d84d73a..9753fcc61bc5 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -23,6 +23,47 @@
#define __field_struct(type, item)
#undef __field
+#define __field(type, item) type item;
+
+#undef __field_desc
+#define __field_desc(type, container, item) type item;
+
+#undef __array
+#define __array(type, item, size) type item[size];
+
+#undef __array_desc
+#define __array_desc(type, container, item, size) type item[size];
+
+#undef __dynamic_array
+#define __dynamic_array(type, item) type item[];
+
+#undef F_STRUCT
+#define F_STRUCT(args...) args
+
+#undef F_printk
+#define F_printk(fmt, args...) fmt, args
+
+#undef FTRACE_ENTRY
+#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
+struct ____ftrace_##name { \
+ tstruct \
+}; \
+static void __used ____ftrace_check_##name(void) \
+{ \
+ struct ____ftrace_##name *__entry = NULL; \
+ \
+ /* force cmpile-time check on F_printk() */ \
+ printk(print); \
+}
+
+#undef FTRACE_ENTRY_DUP
+#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print) \
+ FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print))
+
+#include "trace_entries.h"
+
+
+#undef __field
#define __field(type, item) \
ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
"offset:%zu;\tsize:%zu;\n", \
@@ -88,10 +129,6 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
return ret; \
}
-#undef FTRACE_ENTRY_DUP
-#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print) \
- FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print))
-
#include "trace_entries.h"
@@ -172,32 +209,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
#undef __dynamic_array
#define __dynamic_array(type, item)
-
-#undef TRACE_ZERO_CHAR
-#define TRACE_ZERO_CHAR(arg)
-
-#undef TRACE_FIELD
-#define TRACE_FIELD(type, item, assign)\
- entry->item = assign;
-
-#undef TRACE_FIELD
-#define TRACE_FIELD(type, item, assign)\
- entry->item = assign;
-
-#undef TRACE_FIELD_SIGN
-#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \
- TRACE_FIELD(type, item, assign)
-
-#undef TP_CMD
-#define TP_CMD(cmd...) cmd
-
-#undef TRACE_ENTRY
-#define TRACE_ENTRY entry
-
-#undef TRACE_FIELD_SPECIAL
-#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \
- cmd;
-
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \
static int ftrace_raw_init_event_##call(void); \