Fixed MTP to work with TWRP

This commit is contained in:
awab228 2018-06-19 23:16:04 +02:00
commit f6dfaef42e
50820 changed files with 20846062 additions and 0 deletions

608
kernel/trace/Kconfig Normal file
View file

@ -0,0 +1,608 @@
#
# Architectures that offer an FUNCTION_TRACER implementation should
# select HAVE_FUNCTION_TRACER:
#
config USER_STACKTRACE_SUPPORT
bool
config NOP_TRACER
bool
config HAVE_FTRACE_NMI_ENTER
bool
help
See Documentation/trace/ftrace-design.txt
config HAVE_FUNCTION_TRACER
bool
help
See Documentation/trace/ftrace-design.txt
config HAVE_FUNCTION_GRAPH_TRACER
bool
help
See Documentation/trace/ftrace-design.txt
config HAVE_FUNCTION_GRAPH_FP_TEST
bool
help
See Documentation/trace/ftrace-design.txt
config HAVE_DYNAMIC_FTRACE
bool
help
See Documentation/trace/ftrace-design.txt
config HAVE_DYNAMIC_FTRACE_WITH_REGS
bool
config HAVE_FTRACE_MCOUNT_RECORD
bool
help
See Documentation/trace/ftrace-design.txt
config HAVE_SYSCALL_TRACEPOINTS
bool
help
See Documentation/trace/ftrace-design.txt
config HAVE_FENTRY
bool
help
Arch supports the gcc options -pg with -mfentry
config HAVE_C_RECORDMCOUNT
bool
help
C version of recordmcount available?
config TRACER_MAX_TRACE
bool
config TRACE_CLOCK
bool
config RING_BUFFER
bool
select TRACE_CLOCK
select IRQ_WORK
config FTRACE_NMI_ENTER
bool
depends on HAVE_FTRACE_NMI_ENTER
default y
config EVENT_TRACING
select CONTEXT_SWITCH_TRACER
bool
config GPU_TRACEPOINTS
bool
config CONTEXT_SWITCH_TRACER
bool
config RING_BUFFER_ALLOW_SWAP
bool
help
Allow the use of ring_buffer_swap_cpu.
Adds a very slight overhead to tracing when enabled.
# All tracer options should select GENERIC_TRACER. For those options that are
# enabled by all tracers (context switch and event tracer) they select TRACING.
# This allows those options to appear when no other tracer is selected. But the
# options do not appear when something else selects it. We need the two options
# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
# hiding of the automatic options.
config TRACING
bool
select DEBUG_FS
select RING_BUFFER
select STACKTRACE if STACKTRACE_SUPPORT
select TRACEPOINTS
select NOP_TRACER
select BINARY_PRINTF
select EVENT_TRACING
select TRACE_CLOCK
config GENERIC_TRACER
bool
select TRACING
#
# Minimum requirements an architecture has to meet for us to
# be able to offer generic tracing facilities:
#
config TRACING_SUPPORT
bool
# PPC32 has no irqflags tracing support, but it can use most of the
# tracers anyway, they were tested to build and work. Note that new
# exceptions to this list aren't welcomed, better implement the
# irqflags tracing for your architecture.
depends on TRACE_IRQFLAGS_SUPPORT || PPC32
depends on STACKTRACE_SUPPORT
default y
if TRACING_SUPPORT
menuconfig FTRACE
bool "Tracers"
default y if DEBUG_KERNEL
help
Enable the kernel tracing infrastructure.
if FTRACE
config FUNCTION_TRACER
bool "Kernel Function Tracer"
depends on HAVE_FUNCTION_TRACER
select KALLSYMS
select GENERIC_TRACER
select CONTEXT_SWITCH_TRACER
help
Enable the kernel to trace every kernel function. This is done
by using a compiler feature to insert a small, 5-byte No-Operation
instruction at the beginning of every kernel function, which NOP
sequence is then dynamically patched into a tracer call when
tracing is enabled by the administrator. If it's runtime disabled
(the bootup default), then the overhead of the instructions is very
small and not measurable even in micro-benchmarks.
config FUNCTION_GRAPH_TRACER
bool "Kernel Function Graph Tracer"
depends on HAVE_FUNCTION_GRAPH_TRACER
depends on FUNCTION_TRACER
depends on !X86_32 || !CC_OPTIMIZE_FOR_SIZE
default y
help
Enable the kernel to trace a function at both its return
and its entry.
Its first purpose is to trace the duration of functions and
draw a call graph for each thread with some information like
the return value. This is done by setting the current return
address on the current task structure into a stack of calls.
config IRQSOFF_TRACER
bool "Interrupts-off Latency Tracer"
default n
depends on TRACE_IRQFLAGS_SUPPORT
depends on !ARCH_USES_GETTIMEOFFSET
select TRACE_IRQFLAGS
select GENERIC_TRACER
select TRACER_MAX_TRACE
select RING_BUFFER_ALLOW_SWAP
select TRACER_SNAPSHOT
select TRACER_SNAPSHOT_PER_CPU_SWAP
help
This option measures the time spent in irqs-off critical
sections, with microsecond accuracy.
The default measurement method is a maximum search, which is
disabled by default and can be runtime (re-)started
via:
echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
(Note that kernel size and overhead increase with this option
enabled. This option and the preempt-off timing option can be
used together or separately.)
config PREEMPT_TRACER
bool "Preemption-off Latency Tracer"
default n
depends on !ARCH_USES_GETTIMEOFFSET
depends on PREEMPT
select GENERIC_TRACER
select TRACER_MAX_TRACE
select RING_BUFFER_ALLOW_SWAP
select TRACER_SNAPSHOT
select TRACER_SNAPSHOT_PER_CPU_SWAP
help
This option measures the time spent in preemption-off critical
sections, with microsecond accuracy.
The default measurement method is a maximum search, which is
disabled by default and can be runtime (re-)started
via:
echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
(Note that kernel size and overhead increase with this option
enabled. This option and the irqs-off timing option can be
used together or separately.)
config SCHED_TRACER
bool "Scheduling Latency Tracer"
select GENERIC_TRACER
select CONTEXT_SWITCH_TRACER
select TRACER_MAX_TRACE
select TRACER_SNAPSHOT
help
This tracer tracks the latency of the highest priority task
to be scheduled in, starting from the point it has woken up.
config ENABLE_DEFAULT_TRACERS
bool "Trace process context switches and events"
depends on !GENERIC_TRACER
select TRACING
help
This tracer hooks to various trace points in the kernel,
allowing the user to pick and choose which trace point they
want to trace. It also includes the sched_switch tracer plugin.
config FTRACE_SYSCALLS
bool "Trace syscalls"
depends on HAVE_SYSCALL_TRACEPOINTS
select GENERIC_TRACER
select KALLSYMS
help
Basic tracer to catch the syscall entry and exit events.
config TRACER_SNAPSHOT
bool "Create a snapshot trace buffer"
select TRACER_MAX_TRACE
help
Allow tracing users to take snapshot of the current buffer using the
ftrace interface, e.g.:
echo 1 > /sys/kernel/debug/tracing/snapshot
cat snapshot
config TRACER_SNAPSHOT_PER_CPU_SWAP
bool "Allow snapshot to swap per CPU"
depends on TRACER_SNAPSHOT
select RING_BUFFER_ALLOW_SWAP
help
Allow doing a snapshot of a single CPU buffer instead of a
full swap (all buffers). If this is set, then the following is
allowed:
echo 1 > /sys/kernel/debug/tracing/per_cpu/cpu2/snapshot
After which, only the tracing buffer for CPU 2 was swapped with
the main tracing buffer, and the other CPU buffers remain the same.
When this is enabled, this adds a little more overhead to the
trace recording, as it needs to add some checks to synchronize
recording with swaps. But this does not affect the performance
of the overall system. This is enabled by default when the preempt
or irq latency tracers are enabled, as those need to swap as well
and already adds the overhead (plus a lot more).
config TRACE_BRANCH_PROFILING
bool
select GENERIC_TRACER
choice
prompt "Branch Profiling"
default BRANCH_PROFILE_NONE
help
The branch profiling is a software profiler. It will add hooks
into the C conditionals to test which path a branch takes.
The likely/unlikely profiler only looks at the conditions that
are annotated with a likely or unlikely macro.
The "all branch" profiler will profile every if-statement in the
kernel. This profiler will also enable the likely/unlikely
profiler.
Either of the above profilers adds a bit of overhead to the system.
If unsure, choose "No branch profiling".
config BRANCH_PROFILE_NONE
bool "No branch profiling"
help
No branch profiling. Branch profiling adds a bit of overhead.
Only enable it if you want to analyse the branching behavior.
Otherwise keep it disabled.
config PROFILE_ANNOTATED_BRANCHES
bool "Trace likely/unlikely profiler"
select TRACE_BRANCH_PROFILING
help
This tracer profiles all likely and unlikely macros
in the kernel. It will display the results in:
/sys/kernel/debug/tracing/trace_stat/branch_annotated
Note: this will add a significant overhead; only turn this
on if you need to profile the system's use of these macros.
config PROFILE_ALL_BRANCHES
bool "Profile all if conditionals"
select TRACE_BRANCH_PROFILING
help
This tracer profiles all branch conditions. Every if ()
taken in the kernel is recorded whether it hit or miss.
The results will be displayed in:
/sys/kernel/debug/tracing/trace_stat/branch_all
This option also enables the likely/unlikely profiler.
This configuration, when enabled, will impose a great overhead
on the system. This should only be enabled when the system
is to be analyzed in much detail.
endchoice
config TRACING_BRANCHES
bool
help
Selected by tracers that will trace the likely and unlikely
conditions. This prevents the tracers themselves from being
profiled. Profiling the tracing infrastructure can only happen
when the likelys and unlikelys are not being traced.
config BRANCH_TRACER
bool "Trace likely/unlikely instances"
depends on TRACE_BRANCH_PROFILING
select TRACING_BRANCHES
help
This traces the events of likely and unlikely condition
calls in the kernel. The difference between this and the
"Trace likely/unlikely profiler" is that this is not a
histogram of the callers, but actually places the calling
events into a running trace buffer to see when and where the
events happened, as well as their results.
Say N if unsure.
config STACK_TRACER
bool "Trace max stack"
depends on HAVE_FUNCTION_TRACER
select FUNCTION_TRACER
select STACKTRACE
select KALLSYMS
help
This special tracer records the maximum stack footprint of the
kernel and displays it in /sys/kernel/debug/tracing/stack_trace.
This tracer works by hooking into every function call that the
kernel executes, and keeping a maximum stack depth value and
stack-trace saved. If this is configured with DYNAMIC_FTRACE
then it will not have any overhead while the stack tracer
is disabled.
To enable the stack tracer on bootup, pass in 'stacktrace'
on the kernel command line.
The stack tracer can also be enabled or disabled via the
sysctl kernel.stack_tracer_enabled
Say N if unsure.
config BLK_DEV_IO_TRACE
bool "Support for tracing block IO actions"
depends on SYSFS
depends on BLOCK
select RELAY
select DEBUG_FS
select TRACEPOINTS
select GENERIC_TRACER
select STACKTRACE
help
Say Y here if you want to be able to trace the block layer actions
on a given queue. Tracing allows you to see any traffic happening
on a block device queue. For more information (and the userspace
support tools needed), fetch the blktrace tools from:
git://git.kernel.dk/blktrace.git
Tracing also is possible using the ftrace interface, e.g.:
echo 1 > /sys/block/sda/sda1/trace/enable
echo blk > /sys/kernel/debug/tracing/current_tracer
cat /sys/kernel/debug/tracing/trace_pipe
If unsure, say N.
config KPROBE_EVENT
depends on KPROBES
depends on HAVE_REGS_AND_STACK_ACCESS_API
bool "Enable kprobes-based dynamic events"
select TRACING
select PROBE_EVENTS
default y
help
This allows the user to add tracing events (similar to tracepoints)
on the fly via the ftrace interface. See
Documentation/trace/kprobetrace.txt for more details.
Those events can be inserted wherever kprobes can probe, and record
various register and memory values.
This option is also required by perf-probe subcommand of perf tools.
If you want to use perf tools, this option is strongly recommended.
config UPROBE_EVENT
bool "Enable uprobes-based dynamic events"
depends on ARCH_SUPPORTS_UPROBES
depends on MMU
depends on PERF_EVENTS
select UPROBES
select PROBE_EVENTS
select TRACING
default n
help
This allows the user to add tracing events on top of userspace
dynamic events (similar to tracepoints) on the fly via the trace
events interface. Those events can be inserted wherever uprobes
can probe, and record various registers.
This option is required if you plan to use perf-probe subcommand
of perf tools on user space applications.
config PROBE_EVENTS
def_bool n
config DYNAMIC_FTRACE
bool "enable/disable function tracing dynamically"
depends on FUNCTION_TRACER
depends on HAVE_DYNAMIC_FTRACE
default y
help
This option will modify all the calls to function tracing
dynamically (will patch them out of the binary image and
replace them with a No-Op instruction) on boot up. During
compile time, a table is made of all the locations that ftrace
can function trace, and this table is linked into the kernel
image. When this is enabled, functions can be individually
enabled, and the functions not enabled will not affect
performance of the system.
See the files in /sys/kernel/debug/tracing:
available_filter_functions
set_ftrace_filter
set_ftrace_notrace
This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but
otherwise has native performance as long as no tracing is active.
config DYNAMIC_FTRACE_WITH_REGS
def_bool y
depends on DYNAMIC_FTRACE
depends on HAVE_DYNAMIC_FTRACE_WITH_REGS
config FUNCTION_PROFILER
bool "Kernel function profiler"
depends on FUNCTION_TRACER
default n
help
This option enables the kernel function profiler. A file is created
in debugfs called function_profile_enabled which defaults to zero.
When a 1 is echoed into this file profiling begins, and when a
zero is entered, profiling stops. A "functions" file is created in
the trace_stats directory; this file shows the list of functions that
have been hit and their counters.
If in doubt, say N.
config FTRACE_MCOUNT_RECORD
def_bool y
depends on DYNAMIC_FTRACE
depends on HAVE_FTRACE_MCOUNT_RECORD
config FTRACE_SELFTEST
bool
config FTRACE_STARTUP_TEST
bool "Perform a startup test on ftrace"
depends on GENERIC_TRACER
select FTRACE_SELFTEST
help
This option performs a series of startup tests on ftrace. On bootup
a series of tests are made to verify that the tracer is
functioning properly. It will do tests on all the configured
tracers of ftrace.
config EVENT_TRACE_TEST_SYSCALLS
bool "Run selftest on syscall events"
depends on FTRACE_STARTUP_TEST
help
This option will also enable testing every syscall event.
It only enables the event and disables it and runs various loads
with the event enabled. This adds a bit more time for kernel boot
up since it runs this on every system call defined.
TBD - enable a way to actually call the syscalls as we test their
events
config MMIOTRACE
bool "Memory mapped IO tracing"
depends on HAVE_MMIOTRACE_SUPPORT && PCI
select GENERIC_TRACER
help
Mmiotrace traces Memory Mapped I/O access and is meant for
debugging and reverse engineering. It is called from the ioremap
implementation and works via page faults. Tracing is disabled by
default and can be enabled at run-time.
See Documentation/trace/mmiotrace.txt.
If you are not helping to develop drivers, say N.
config MMIOTRACE_TEST
tristate "Test module for mmiotrace"
depends on MMIOTRACE && m
help
This is a dumb module for testing mmiotrace. It is very dangerous
as it will write garbage to IO memory starting at a given address.
However, it should be safe to use on e.g. unused portion of VRAM.
Say N, unless you absolutely know what you are doing.
config TRACEPOINT_BENCHMARK
bool "Add tracepoint that benchmarks tracepoints"
help
This option creates the tracepoint "benchmark:benchmark_event".
When the tracepoint is enabled, it kicks off a kernel thread that
goes into an infinite loop (calling cond_sched() to let other tasks
run), and calls the tracepoint. Each iteration will record the time
it took to write to the tracepoint and the next iteration that
data will be passed to the tracepoint itself. That is, the tracepoint
will report the time it took to do the previous tracepoint.
The string written to the tracepoint is a static string of 128 bytes
to keep the time the same. The initial string is simply a write of
"START". The second string records the cold cache time of the first
write which is not added to the rest of the calculations.
As it is a tight loop, it benchmarks as hot cache. That's fine because
we care most about hot paths that are probably in cache already.
An example of the output:
START
first=3672 [COLD CACHED]
last=632 first=3672 max=632 min=632 avg=316 std=446 std^2=199712
last=278 first=3672 max=632 min=278 avg=303 std=316 std^2=100337
last=277 first=3672 max=632 min=277 avg=296 std=258 std^2=67064
last=273 first=3672 max=632 min=273 avg=292 std=224 std^2=50411
last=273 first=3672 max=632 min=273 avg=288 std=200 std^2=40389
last=281 first=3672 max=632 min=273 avg=287 std=183 std^2=33666
config RING_BUFFER_BENCHMARK
tristate "Ring buffer benchmark stress tester"
depends on RING_BUFFER
help
This option creates a test to stress the ring buffer and benchmark it.
It creates its own ring buffer such that it will not interfere with
any other users of the ring buffer (such as ftrace). It then creates
a producer and consumer that will run for 10 seconds and sleep for
10 seconds. Each interval it will print out the number of events
it recorded and give a rough estimate of how long each iteration took.
It does not disable interrupts or raise its priority, so it may be
affected by processes that are running.
If unsure, say N.
config RING_BUFFER_STARTUP_TEST
bool "Ring buffer startup self test"
depends on RING_BUFFER
help
Run a simple self test on the ring buffer on boot up. Late in the
kernel boot sequence, the test will start that kicks off
a thread per cpu. Each thread will write various size events
into the ring buffer. Another thread is created to send IPIs
to each of the threads, where the IPI handler will also write
to the ring buffer, to test/stress the nesting ability.
If any anomalies are discovered, a warning will be displayed
and all ring buffers will be disabled.
The test runs for 10 seconds. This will slow your boot time
by at least 10 more seconds.
At the end of the test, statics and more checks are done.
It will output the stats of each per cpu buffer. What
was written, the sizes, what was read, what was lost, and
other similar details.
If unsure, say N
endif # FTRACE
endif # TRACING_SUPPORT

70
kernel/trace/Makefile Normal file
View file

@ -0,0 +1,70 @@
# Do not instrument the tracer itself:
ifdef CONFIG_FUNCTION_TRACER
ORIG_CFLAGS := $(KBUILD_CFLAGS)
KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
ifdef CONFIG_FTRACE_SELFTEST
# selftest needs instrumentation
CFLAGS_trace_selftest_dynamic.o = -pg
obj-y += trace_selftest_dynamic.o
endif
endif
# If unlikely tracing is enabled, do not trace these files
ifdef CONFIG_TRACING_BRANCHES
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
endif
CFLAGS_trace_benchmark.o := -I$(src)
CFLAGS_trace_events_filter.o := -I$(src)
obj-$(CONFIG_TRACE_CLOCK) += trace_clock.o
obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
obj-$(CONFIG_RING_BUFFER_BENCHMARK) += ring_buffer_benchmark.o
obj-$(CONFIG_TRACING) += trace.o
obj-$(CONFIG_TRACING) += trace_output.o
obj-$(CONFIG_TRACING) += trace_seq.o
obj-$(CONFIG_TRACING) += trace_stat.o
obj-$(CONFIG_TRACING) += trace_printk.o
obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
obj-$(CONFIG_NOP_TRACER) += trace_nop.o
obj-$(CONFIG_STACK_TRACER) += trace_stack.o
obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
ifeq ($(CONFIG_BLOCK),y)
obj-$(CONFIG_EVENT_TRACING) += blktrace.o
endif
obj-$(CONFIG_EVENT_TRACING) += trace_events.o
obj-$(CONFIG_EVENT_TRACING) += trace_export.o
obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
ifeq ($(CONFIG_PERF_EVENTS),y)
obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
endif
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
obj-$(CONFIG_TRACEPOINTS) += power-traces.o
ifeq ($(CONFIG_PM_RUNTIME),y)
obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o
endif
ifeq ($(CONFIG_TRACING),y)
obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
endif
obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o
obj-$(CONFIG_GPU_TRACEPOINTS) += gpu-traces.o
obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
libftrace-y := ftrace.o

1838
kernel/trace/blktrace.c Normal file

File diff suppressed because it is too large Load diff

5679
kernel/trace/ftrace.c Normal file

File diff suppressed because it is too large Load diff

23
kernel/trace/gpu-traces.c Normal file
View file

@ -0,0 +1,23 @@
/*
* GPU tracepoints
*
* Copyright (C) 2013 Google, Inc.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include <linux/module.h>
#define CREATE_TRACE_POINTS
#include <trace/events/gpu.h>
EXPORT_TRACEPOINT_SYMBOL(gpu_sched_switch);
EXPORT_TRACEPOINT_SYMBOL(gpu_job_enqueue);

View file

@ -0,0 +1,17 @@
/*
* Power trace points
*
* Copyright (C) 2009 Arjan van de Ven <arjan@linux.intel.com>
*/
#include <linux/string.h>
#include <linux/types.h>
#include <linux/workqueue.h>
#include <linux/sched.h>
#include <linux/module.h>
#define CREATE_TRACE_POINTS
#include <trace/events/power.h>
EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle);

5031
kernel/trace/ring_buffer.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,485 @@
/*
* ring buffer tester and benchmark
*
* Copyright (C) 2009 Steven Rostedt <srostedt@redhat.com>
*/
#include <linux/ring_buffer.h>
#include <linux/completion.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/time.h>
#include <asm/local.h>
struct rb_page {
u64 ts;
local_t commit;
char data[4080];
};
/* run time and sleep time in seconds */
#define RUN_TIME 10
#define SLEEP_TIME 10
/* number of events for writer to wake up the reader */
static int wakeup_interval = 100;
static int reader_finish;
static struct completion read_start;
static struct completion read_done;
static struct ring_buffer *buffer;
static struct task_struct *producer;
static struct task_struct *consumer;
static unsigned long read;
static int disable_reader;
module_param(disable_reader, uint, 0644);
MODULE_PARM_DESC(disable_reader, "only run producer");
static int write_iteration = 50;
module_param(write_iteration, uint, 0644);
MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings");
static int producer_nice = MAX_NICE;
static int consumer_nice = MAX_NICE;
static int producer_fifo = -1;
static int consumer_fifo = -1;
module_param(producer_nice, uint, 0644);
MODULE_PARM_DESC(producer_nice, "nice prio for producer");
module_param(consumer_nice, uint, 0644);
MODULE_PARM_DESC(consumer_nice, "nice prio for consumer");
module_param(producer_fifo, uint, 0644);
MODULE_PARM_DESC(producer_fifo, "fifo prio for producer");
module_param(consumer_fifo, uint, 0644);
MODULE_PARM_DESC(consumer_fifo, "fifo prio for consumer");
static int read_events;
static int kill_test;
#define KILL_TEST() \
do { \
if (!kill_test) { \
kill_test = 1; \
WARN_ON(1); \
} \
} while (0)
enum event_status {
EVENT_FOUND,
EVENT_DROPPED,
};
static enum event_status read_event(int cpu)
{
struct ring_buffer_event *event;
int *entry;
u64 ts;
event = ring_buffer_consume(buffer, cpu, &ts, NULL);
if (!event)
return EVENT_DROPPED;
entry = ring_buffer_event_data(event);
if (*entry != cpu) {
KILL_TEST();
return EVENT_DROPPED;
}
read++;
return EVENT_FOUND;
}
static enum event_status read_page(int cpu)
{
struct ring_buffer_event *event;
struct rb_page *rpage;
unsigned long commit;
void *bpage;
int *entry;
int ret;
int inc;
int i;
bpage = ring_buffer_alloc_read_page(buffer, cpu);
if (!bpage)
return EVENT_DROPPED;
ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1);
if (ret >= 0) {
rpage = bpage;
/* The commit may have missed event flags set, clear them */
commit = local_read(&rpage->commit) & 0xfffff;
for (i = 0; i < commit && !kill_test; i += inc) {
if (i >= (PAGE_SIZE - offsetof(struct rb_page, data))) {
KILL_TEST();
break;
}
inc = -1;
event = (void *)&rpage->data[i];
switch (event->type_len) {
case RINGBUF_TYPE_PADDING:
/* failed writes may be discarded events */
if (!event->time_delta)
KILL_TEST();
inc = event->array[0] + 4;
break;
case RINGBUF_TYPE_TIME_EXTEND:
inc = 8;
break;
case 0:
entry = ring_buffer_event_data(event);
if (*entry != cpu) {
KILL_TEST();
break;
}
read++;
if (!event->array[0]) {
KILL_TEST();
break;
}
inc = event->array[0] + 4;
break;
default:
entry = ring_buffer_event_data(event);
if (*entry != cpu) {
KILL_TEST();
break;
}
read++;
inc = ((event->type_len + 1) * 4);
}
if (kill_test)
break;
if (inc <= 0) {
KILL_TEST();
break;
}
}
}
ring_buffer_free_read_page(buffer, bpage);
if (ret < 0)
return EVENT_DROPPED;
return EVENT_FOUND;
}
static void ring_buffer_consumer(void)
{
/* toggle between reading pages and events */
read_events ^= 1;
read = 0;
while (!reader_finish && !kill_test) {
int found;
do {
int cpu;
found = 0;
for_each_online_cpu(cpu) {
enum event_status stat;
if (read_events)
stat = read_event(cpu);
else
stat = read_page(cpu);
if (kill_test)
break;
if (stat == EVENT_FOUND)
found = 1;
}
} while (found && !kill_test);
set_current_state(TASK_INTERRUPTIBLE);
if (reader_finish)
break;
schedule();
}
reader_finish = 0;
complete(&read_done);
}
static void ring_buffer_producer(void)
{
struct timeval start_tv;
struct timeval end_tv;
unsigned long long time;
unsigned long long entries;
unsigned long long overruns;
unsigned long missed = 0;
unsigned long hit = 0;
unsigned long avg;
int cnt = 0;
/*
* Hammer the buffer for 10 secs (this may
* make the system stall)
*/
trace_printk("Starting ring buffer hammer\n");
do_gettimeofday(&start_tv);
do {
struct ring_buffer_event *event;
int *entry;
int i;
for (i = 0; i < write_iteration; i++) {
event = ring_buffer_lock_reserve(buffer, 10);
if (!event) {
missed++;
} else {
hit++;
entry = ring_buffer_event_data(event);
*entry = smp_processor_id();
ring_buffer_unlock_commit(buffer, event);
}
}
do_gettimeofday(&end_tv);
cnt++;
if (consumer && !(cnt % wakeup_interval))
wake_up_process(consumer);
#ifndef CONFIG_PREEMPT
/*
* If we are a non preempt kernel, the 10 second run will
* stop everything while it runs. Instead, we will call
* cond_resched and also add any time that was lost by a
* rescedule.
*
* Do a cond resched at the same frequency we would wake up
* the reader.
*/
if (cnt % wakeup_interval)
cond_resched();
#endif
} while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test);
trace_printk("End ring buffer hammer\n");
if (consumer) {
/* Init both completions here to avoid races */
init_completion(&read_start);
init_completion(&read_done);
/* the completions must be visible before the finish var */
smp_wmb();
reader_finish = 1;
/* finish var visible before waking up the consumer */
smp_wmb();
wake_up_process(consumer);
wait_for_completion(&read_done);
}
time = end_tv.tv_sec - start_tv.tv_sec;
time *= USEC_PER_SEC;
time += (long long)((long)end_tv.tv_usec - (long)start_tv.tv_usec);
entries = ring_buffer_entries(buffer);
overruns = ring_buffer_overruns(buffer);
if (kill_test)
trace_printk("ERROR!\n");
if (!disable_reader) {
if (consumer_fifo < 0)
trace_printk("Running Consumer at nice: %d\n",
consumer_nice);
else
trace_printk("Running Consumer at SCHED_FIFO %d\n",
consumer_fifo);
}
if (producer_fifo < 0)
trace_printk("Running Producer at nice: %d\n",
producer_nice);
else
trace_printk("Running Producer at SCHED_FIFO %d\n",
producer_fifo);
/* Let the user know that the test is running at low priority */
if (producer_fifo < 0 && consumer_fifo < 0 &&
producer_nice == MAX_NICE && consumer_nice == MAX_NICE)
trace_printk("WARNING!!! This test is running at lowest priority.\n");
trace_printk("Time: %lld (usecs)\n", time);
trace_printk("Overruns: %lld\n", overruns);
if (disable_reader)
trace_printk("Read: (reader disabled)\n");
else
trace_printk("Read: %ld (by %s)\n", read,
read_events ? "events" : "pages");
trace_printk("Entries: %lld\n", entries);
trace_printk("Total: %lld\n", entries + overruns + read);
trace_printk("Missed: %ld\n", missed);
trace_printk("Hit: %ld\n", hit);
/* Convert time from usecs to millisecs */
do_div(time, USEC_PER_MSEC);
if (time)
hit /= (long)time;
else
trace_printk("TIME IS ZERO??\n");
trace_printk("Entries per millisec: %ld\n", hit);
if (hit) {
/* Calculate the average time in nanosecs */
avg = NSEC_PER_MSEC / hit;
trace_printk("%ld ns per entry\n", avg);
}
if (missed) {
if (time)
missed /= (long)time;
trace_printk("Total iterations per millisec: %ld\n",
hit + missed);
/* it is possible that hit + missed will overflow and be zero */
if (!(hit + missed)) {
trace_printk("hit + missed overflowed and totalled zero!\n");
hit--; /* make it non zero */
}
/* Caculate the average time in nanosecs */
avg = NSEC_PER_MSEC / (hit + missed);
trace_printk("%ld ns per entry\n", avg);
}
}
static void wait_to_die(void)
{
set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop()) {
schedule();
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
}
static int ring_buffer_consumer_thread(void *arg)
{
while (!kthread_should_stop() && !kill_test) {
complete(&read_start);
ring_buffer_consumer();
set_current_state(TASK_INTERRUPTIBLE);
if (kthread_should_stop() || kill_test)
break;
schedule();
}
__set_current_state(TASK_RUNNING);
if (kill_test)
wait_to_die();
return 0;
}
static int ring_buffer_producer_thread(void *arg)
{
init_completion(&read_start);
while (!kthread_should_stop() && !kill_test) {
ring_buffer_reset(buffer);
if (consumer) {
smp_wmb();
wake_up_process(consumer);
wait_for_completion(&read_start);
}
ring_buffer_producer();
trace_printk("Sleeping for 10 secs\n");
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(HZ * SLEEP_TIME);
}
if (kill_test)
wait_to_die();
return 0;
}
static int __init ring_buffer_benchmark_init(void)
{
int ret;
/* make a one meg buffer in overwite mode */
buffer = ring_buffer_alloc(1000000, RB_FL_OVERWRITE);
if (!buffer)
return -ENOMEM;
if (!disable_reader) {
consumer = kthread_create(ring_buffer_consumer_thread,
NULL, "rb_consumer");
ret = PTR_ERR(consumer);
if (IS_ERR(consumer))
goto out_fail;
}
producer = kthread_run(ring_buffer_producer_thread,
NULL, "rb_producer");
ret = PTR_ERR(producer);
if (IS_ERR(producer))
goto out_kill;
/*
* Run them as low-prio background tasks by default:
*/
if (!disable_reader) {
if (consumer_fifo >= 0) {
struct sched_param param = {
.sched_priority = consumer_fifo
};
sched_setscheduler(consumer, SCHED_FIFO, &param);
} else
set_user_nice(consumer, consumer_nice);
}
if (producer_fifo >= 0) {
struct sched_param param = {
.sched_priority = consumer_fifo
};
sched_setscheduler(producer, SCHED_FIFO, &param);
} else
set_user_nice(producer, producer_nice);
return 0;
out_kill:
if (consumer)
kthread_stop(consumer);
out_fail:
ring_buffer_free(buffer);
return ret;
}
static void __exit ring_buffer_benchmark_exit(void)
{
kthread_stop(producer);
if (consumer)
kthread_stop(consumer);
ring_buffer_free(buffer);
}
module_init(ring_buffer_benchmark_init);
module_exit(ring_buffer_benchmark_exit);
MODULE_AUTHOR("Steven Rostedt");
MODULE_DESCRIPTION("ring_buffer_benchmark");
MODULE_LICENSE("GPL");

20
kernel/trace/rpm-traces.c Normal file
View file

@ -0,0 +1,20 @@
/*
* Power trace points
*
* Copyright (C) 2009 Ming Lei <ming.lei@canonical.com>
*/
#include <linux/string.h>
#include <linux/types.h>
#include <linux/workqueue.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/usb.h>
#define CREATE_TRACE_POINTS
#include <trace/events/rpm.h>
EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_return_int);
EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_idle);
EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_suspend);
EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_resume);

6998
kernel/trace/trace.c Normal file

File diff suppressed because it is too large Load diff

1315
kernel/trace/trace.h Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,198 @@
#include <linux/delay.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/trace_clock.h>
#define CREATE_TRACE_POINTS
#include "trace_benchmark.h"
static struct task_struct *bm_event_thread;
static char bm_str[BENCHMARK_EVENT_STRLEN] = "START";
static u64 bm_total;
static u64 bm_totalsq;
static u64 bm_last;
static u64 bm_max;
static u64 bm_min;
static u64 bm_first;
static u64 bm_cnt;
static u64 bm_stddev;
static unsigned int bm_avg;
static unsigned int bm_std;
/*
* This gets called in a loop recording the time it took to write
* the tracepoint. What it writes is the time statistics of the last
* tracepoint write. As there is nothing to write the first time
* it simply writes "START". As the first write is cold cache and
* the rest is hot, we save off that time in bm_first and it is
* reported as "first", which is shown in the second write to the
* tracepoint. The "first" field is writen within the statics from
* then on but never changes.
*/
static void trace_do_benchmark(void)
{
u64 start;
u64 stop;
u64 delta;
u64 stddev;
u64 seed;
u64 last_seed;
unsigned int avg;
unsigned int std = 0;
/* Only run if the tracepoint is actually active */
if (!trace_benchmark_event_enabled())
return;
local_irq_disable();
start = trace_clock_local();
trace_benchmark_event(bm_str);
stop = trace_clock_local();
local_irq_enable();
bm_cnt++;
delta = stop - start;
/*
* The first read is cold cached, keep it separate from the
* other calculations.
*/
if (bm_cnt == 1) {
bm_first = delta;
scnprintf(bm_str, BENCHMARK_EVENT_STRLEN,
"first=%llu [COLD CACHED]", bm_first);
return;
}
bm_last = delta;
if (delta > bm_max)
bm_max = delta;
if (!bm_min || delta < bm_min)
bm_min = delta;
/*
* When bm_cnt is greater than UINT_MAX, it breaks the statistics
* accounting. Freeze the statistics when that happens.
* We should have enough data for the avg and stddev anyway.
*/
if (bm_cnt > UINT_MAX) {
scnprintf(bm_str, BENCHMARK_EVENT_STRLEN,
"last=%llu first=%llu max=%llu min=%llu ** avg=%u std=%d std^2=%lld",
bm_last, bm_first, bm_max, bm_min, bm_avg, bm_std, bm_stddev);
return;
}
bm_total += delta;
bm_totalsq += delta * delta;
if (bm_cnt > 1) {
/*
* Apply Welford's method to calculate standard deviation:
* s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2)
*/
stddev = (u64)bm_cnt * bm_totalsq - bm_total * bm_total;
do_div(stddev, (u32)bm_cnt);
do_div(stddev, (u32)bm_cnt - 1);
} else
stddev = 0;
delta = bm_total;
do_div(delta, bm_cnt);
avg = delta;
if (stddev > 0) {
int i = 0;
/*
* stddev is the square of standard deviation but
* we want the actualy number. Use the average
* as our seed to find the std.
*
* The next try is:
* x = (x + N/x) / 2
*
* Where N is the squared number to find the square
* root of.
*/
seed = avg;
do {
last_seed = seed;
seed = stddev;
if (!last_seed)
break;
do_div(seed, last_seed);
seed += last_seed;
do_div(seed, 2);
} while (i++ < 10 && last_seed != seed);
std = seed;
}
scnprintf(bm_str, BENCHMARK_EVENT_STRLEN,
"last=%llu first=%llu max=%llu min=%llu avg=%u std=%d std^2=%lld",
bm_last, bm_first, bm_max, bm_min, avg, std, stddev);
bm_std = std;
bm_avg = avg;
bm_stddev = stddev;
}
static int benchmark_event_kthread(void *arg)
{
/* sleep a bit to make sure the tracepoint gets activated */
msleep(100);
while (!kthread_should_stop()) {
trace_do_benchmark();
/*
* We don't go to sleep, but let others
* run as well.
*/
cond_resched();
}
return 0;
}
/*
* When the benchmark tracepoint is enabled, it calls this
* function and the thread that calls the tracepoint is created.
*/
void trace_benchmark_reg(void)
{
bm_event_thread = kthread_run(benchmark_event_kthread,
NULL, "event_benchmark");
WARN_ON(!bm_event_thread);
}
/*
* When the benchmark tracepoint is disabled, it calls this
* function and the thread that calls the tracepoint is deleted
* and all the numbers are reset.
*/
void trace_benchmark_unreg(void)
{
if (!bm_event_thread)
return;
kthread_stop(bm_event_thread);
strcpy(bm_str, "START");
bm_total = 0;
bm_totalsq = 0;
bm_last = 0;
bm_max = 0;
bm_min = 0;
bm_cnt = 0;
/* These don't need to be reset but reset them anyway */
bm_first = 0;
bm_std = 0;
bm_avg = 0;
bm_stddev = 0;
}

View file

@ -0,0 +1,41 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM benchmark
#if !defined(_TRACE_BENCHMARK_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_BENCHMARK_H
#include <linux/tracepoint.h>
extern void trace_benchmark_reg(void);
extern void trace_benchmark_unreg(void);
#define BENCHMARK_EVENT_STRLEN 128
TRACE_EVENT_FN(benchmark_event,
TP_PROTO(const char *str),
TP_ARGS(str),
TP_STRUCT__entry(
__array( char, str, BENCHMARK_EVENT_STRLEN )
),
TP_fast_assign(
memcpy(__entry->str, str, BENCHMARK_EVENT_STRLEN);
),
TP_printk("%s", __entry->str),
trace_benchmark_reg, trace_benchmark_unreg
);
#endif /* _TRACE_BENCHMARK_H */
#undef TRACE_INCLUDE_FILE
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE trace_benchmark
/* This part must be outside protection */
#include <trace/define_trace.h>

413
kernel/trace/trace_branch.c Normal file
View file

@ -0,0 +1,413 @@
/*
* unlikely profiler
*
* Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
*/
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
#include <linux/spinlock.h>
#include <linux/irqflags.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/ftrace.h>
#include <linux/hash.h>
#include <linux/fs.h>
#include <asm/local.h>
#include "trace.h"
#include "trace_stat.h"
#include "trace_output.h"
#ifdef CONFIG_BRANCH_TRACER
static struct tracer branch_trace;
static int branch_tracing_enabled __read_mostly;
static DEFINE_MUTEX(branch_tracing_mutex);
static struct trace_array *branch_tracer;
static void
probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
{
struct ftrace_event_call *call = &event_branch;
struct trace_array *tr = branch_tracer;
struct trace_array_cpu *data;
struct ring_buffer_event *event;
struct trace_branch *entry;
struct ring_buffer *buffer;
unsigned long flags;
int cpu, pc;
const char *p;
/*
* I would love to save just the ftrace_likely_data pointer, but
* this code can also be used by modules. Ugly things can happen
* if the module is unloaded, and then we go and read the
* pointer. This is slower, but much safer.
*/
if (unlikely(!tr))
return;
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
if (atomic_inc_return(&data->disabled) != 1)
goto out;
pc = preempt_count();
buffer = tr->trace_buffer.buffer;
event = trace_buffer_lock_reserve(buffer, TRACE_BRANCH,
sizeof(*entry), flags, pc);
if (!event)
goto out;
entry = ring_buffer_event_data(event);
/* Strip off the path, only save the file */
p = f->file + strlen(f->file);
while (p >= f->file && *p != '/')
p--;
p++;
strncpy(entry->func, f->func, TRACE_FUNC_SIZE);
strncpy(entry->file, p, TRACE_FILE_SIZE);
entry->func[TRACE_FUNC_SIZE] = 0;
entry->file[TRACE_FILE_SIZE] = 0;
entry->line = f->line;
entry->correct = val == expect;
if (!call_filter_check_discard(call, entry, buffer, event))
__buffer_unlock_commit(buffer, event);
out:
atomic_dec(&data->disabled);
local_irq_restore(flags);
}
static inline
void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
{
if (!branch_tracing_enabled)
return;
probe_likely_condition(f, val, expect);
}
int enable_branch_tracing(struct trace_array *tr)
{
mutex_lock(&branch_tracing_mutex);
branch_tracer = tr;
/*
* Must be seen before enabling. The reader is a condition
* where we do not need a matching rmb()
*/
smp_wmb();
branch_tracing_enabled++;
mutex_unlock(&branch_tracing_mutex);
return 0;
}
void disable_branch_tracing(void)
{
mutex_lock(&branch_tracing_mutex);
if (!branch_tracing_enabled)
goto out_unlock;
branch_tracing_enabled--;
out_unlock:
mutex_unlock(&branch_tracing_mutex);
}
static void start_branch_trace(struct trace_array *tr)
{
enable_branch_tracing(tr);
}
static void stop_branch_trace(struct trace_array *tr)
{
disable_branch_tracing();
}
static int branch_trace_init(struct trace_array *tr)
{
start_branch_trace(tr);
return 0;
}
static void branch_trace_reset(struct trace_array *tr)
{
stop_branch_trace(tr);
}
static enum print_line_t trace_branch_print(struct trace_iterator *iter,
int flags, struct trace_event *event)
{
struct trace_branch *field;
trace_assign_type(field, iter->ent);
if (trace_seq_printf(&iter->seq, "[%s] %s:%s:%d\n",
field->correct ? " ok " : " MISS ",
field->func,
field->file,
field->line))
return TRACE_TYPE_PARTIAL_LINE;
return TRACE_TYPE_HANDLED;
}
static void branch_print_header(struct seq_file *s)
{
seq_puts(s, "# TASK-PID CPU# TIMESTAMP CORRECT"
" FUNC:FILE:LINE\n");
seq_puts(s, "# | | | | | "
" |\n");
}
static struct trace_event_functions trace_branch_funcs = {
.trace = trace_branch_print,
};
static struct trace_event trace_branch_event = {
.type = TRACE_BRANCH,
.funcs = &trace_branch_funcs,
};
static struct tracer branch_trace __read_mostly =
{
.name = "branch",
.init = branch_trace_init,
.reset = branch_trace_reset,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_branch,
#endif /* CONFIG_FTRACE_SELFTEST */
.print_header = branch_print_header,
};
__init static int init_branch_tracer(void)
{
int ret;
ret = register_ftrace_event(&trace_branch_event);
if (!ret) {
printk(KERN_WARNING "Warning: could not register "
"branch events\n");
return 1;
}
return register_tracer(&branch_trace);
}
core_initcall(init_branch_tracer);
#else
static inline
void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
{
}
#endif /* CONFIG_BRANCH_TRACER */
void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect)
{
/*
* I would love to have a trace point here instead, but the
* trace point code is so inundated with unlikely and likely
* conditions that the recursive nightmare that exists is too
* much to try to get working. At least for now.
*/
trace_likely_condition(f, val, expect);
/* FIXME: Make this atomic! */
if (val == expect)
f->correct++;
else
f->incorrect++;
}
EXPORT_SYMBOL(ftrace_likely_update);
extern unsigned long __start_annotated_branch_profile[];
extern unsigned long __stop_annotated_branch_profile[];
static int annotated_branch_stat_headers(struct seq_file *m)
{
seq_printf(m, " correct incorrect %% ");
seq_printf(m, " Function "
" File Line\n"
" ------- --------- - "
" -------- "
" ---- ----\n");
return 0;
}
static inline long get_incorrect_percent(struct ftrace_branch_data *p)
{
long percent;
if (p->correct) {
percent = p->incorrect * 100;
percent /= p->correct + p->incorrect;
} else
percent = p->incorrect ? 100 : -1;
return percent;
}
static int branch_stat_show(struct seq_file *m, void *v)
{
struct ftrace_branch_data *p = v;
const char *f;
long percent;
/* Only print the file, not the path */
f = p->file + strlen(p->file);
while (f >= p->file && *f != '/')
f--;
f++;
/*
* The miss is overlayed on correct, and hit on incorrect.
*/
percent = get_incorrect_percent(p);
seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect);
if (percent < 0)
seq_printf(m, " X ");
else
seq_printf(m, "%3ld ", percent);
seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line);
return 0;
}
static void *annotated_branch_stat_start(struct tracer_stat *trace)
{
return __start_annotated_branch_profile;
}
static void *
annotated_branch_stat_next(void *v, int idx)
{
struct ftrace_branch_data *p = v;
++p;
if ((void *)p >= (void *)__stop_annotated_branch_profile)
return NULL;
return p;
}
static int annotated_branch_stat_cmp(void *p1, void *p2)
{
struct ftrace_branch_data *a = p1;
struct ftrace_branch_data *b = p2;
long percent_a, percent_b;
percent_a = get_incorrect_percent(a);
percent_b = get_incorrect_percent(b);
if (percent_a < percent_b)
return -1;
if (percent_a > percent_b)
return 1;
if (a->incorrect < b->incorrect)
return -1;
if (a->incorrect > b->incorrect)
return 1;
/*
* Since the above shows worse (incorrect) cases
* first, we continue that by showing best (correct)
* cases last.
*/
if (a->correct > b->correct)
return -1;
if (a->correct < b->correct)
return 1;
return 0;
}
static struct tracer_stat annotated_branch_stats = {
.name = "branch_annotated",
.stat_start = annotated_branch_stat_start,
.stat_next = annotated_branch_stat_next,
.stat_cmp = annotated_branch_stat_cmp,
.stat_headers = annotated_branch_stat_headers,
.stat_show = branch_stat_show
};
__init static int init_annotated_branch_stats(void)
{
int ret;
ret = register_stat_tracer(&annotated_branch_stats);
if (!ret) {
printk(KERN_WARNING "Warning: could not register "
"annotated branches stats\n");
return 1;
}
return 0;
}
fs_initcall(init_annotated_branch_stats);
#ifdef CONFIG_PROFILE_ALL_BRANCHES
extern unsigned long __start_branch_profile[];
extern unsigned long __stop_branch_profile[];
static int all_branch_stat_headers(struct seq_file *m)
{
seq_printf(m, " miss hit %% ");
seq_printf(m, " Function "
" File Line\n"
" ------- --------- - "
" -------- "
" ---- ----\n");
return 0;
}
static void *all_branch_stat_start(struct tracer_stat *trace)
{
return __start_branch_profile;
}
static void *
all_branch_stat_next(void *v, int idx)
{
struct ftrace_branch_data *p = v;
++p;
if ((void *)p >= (void *)__stop_branch_profile)
return NULL;
return p;
}
static struct tracer_stat all_branch_stats = {
.name = "branch_all",
.stat_start = all_branch_stat_start,
.stat_next = all_branch_stat_next,
.stat_headers = all_branch_stat_headers,
.stat_show = branch_stat_show
};
__init static int all_annotated_branch_stats(void)
{
int ret;
ret = register_stat_tracer(&all_branch_stats);
if (!ret) {
printk(KERN_WARNING "Warning: could not register "
"all branches stats\n");
return 1;
}
return 0;
}
fs_initcall(all_annotated_branch_stats);
#endif /* CONFIG_PROFILE_ALL_BRANCHES */

137
kernel/trace/trace_clock.c Normal file
View file

@ -0,0 +1,137 @@
/*
* tracing clocks
*
* Copyright (C) 2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
*
* Implements 3 trace clock variants, with differing scalability/precision
* tradeoffs:
*
* - local: CPU-local trace clock
* - medium: scalable global clock with some jitter
* - global: globally monotonic, serialized clock
*
* Tracer plugins will chose a default from these clocks.
*/
#include <linux/spinlock.h>
#include <linux/irqflags.h>
#include <linux/hardirq.h>
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/ktime.h>
#include <linux/trace_clock.h>
/*
* trace_clock_local(): the simplest and least coherent tracing clock.
*
* Useful for tracing that does not cross to other CPUs nor
* does it go through idle events.
*/
u64 notrace trace_clock_local(void)
{
u64 clock;
/*
* sched_clock() is an architecture implemented, fast, scalable,
* lockless clock. It is not guaranteed to be coherent across
* CPUs, nor across CPU idle events.
*/
preempt_disable_notrace();
clock = sched_clock();
preempt_enable_notrace();
return clock;
}
EXPORT_SYMBOL_GPL(trace_clock_local);
/*
* trace_clock(): 'between' trace clock. Not completely serialized,
* but not completely incorrect when crossing CPUs either.
*
* This is based on cpu_clock(), which will allow at most ~1 jiffy of
* jitter between CPUs. So it's a pretty scalable clock, but there
* can be offsets in the trace data.
*/
u64 notrace trace_clock(void)
{
return local_clock();
}
/*
* trace_jiffy_clock(): Simply use jiffies as a clock counter.
* Note that this use of jiffies_64 is not completely safe on
* 32-bit systems. But the window is tiny, and the effect if
* we are affected is that we will have an obviously bogus
* timestamp on a trace event - i.e. not life threatening.
*/
u64 notrace trace_clock_jiffies(void)
{
return jiffies_64_to_clock_t(jiffies_64 - INITIAL_JIFFIES);
}
/*
* trace_clock_global(): special globally coherent trace clock
*
* It has higher overhead than the other trace clocks but is still
* an order of magnitude faster than GTOD derived hardware clocks.
*
* Used by plugins that need globally coherent timestamps.
*/
/* keep prev_time and lock in the same cacheline. */
static struct {
u64 prev_time;
arch_spinlock_t lock;
} trace_clock_struct ____cacheline_aligned_in_smp =
{
.lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED,
};
u64 notrace trace_clock_global(void)
{
unsigned long flags;
int this_cpu;
u64 now;
local_irq_save(flags);
this_cpu = raw_smp_processor_id();
now = sched_clock_cpu(this_cpu);
/*
* If in an NMI context then dont risk lockups and return the
* cpu_clock() time:
*/
if (unlikely(in_nmi()))
goto out;
arch_spin_lock(&trace_clock_struct.lock);
/*
* TODO: if this happens often then maybe we should reset
* my_scd->clock to prev_time+1, to make sure
* we start ticking with the local clock from now on?
*/
if ((s64)(now - trace_clock_struct.prev_time) < 0)
now = trace_clock_struct.prev_time + 1;
trace_clock_struct.prev_time = now;
arch_spin_unlock(&trace_clock_struct.lock);
out:
local_irq_restore(flags);
return now;
}
static atomic64_t trace_counter;
/*
* trace_clock_counter(): simply an atomic counter.
* Use the trace_counter "counter" for cases where you do not care
* about timings, but are interested in strict ordering.
*/
u64 notrace trace_clock_counter(void)
{
return atomic64_add_return(1, &trace_counter);
}

View file

@ -0,0 +1,324 @@
/*
* This file defines the trace event structures that go into the ring
* buffer directly. They are created via macros so that changes for them
* appear in the format file. Using macros will automate this process.
*
* The macro used to create a ftrace data structure is:
*
* FTRACE_ENTRY( name, struct_name, id, structure, print )
*
* @name: the name used the event name, as well as the name of
* the directory that holds the format file.
*
* @struct_name: the name of the structure that is created.
*
* @id: The event identifier that is used to detect what event
* this is from the ring buffer.
*
* @structure: the structure layout
*
* - __field( type, item )
* This is equivalent to declaring
* type item;
* in the structure.
* - __array( type, item, size )
* This is equivalent to declaring
* type item[size];
* in the structure.
*
* * for structures within structures, the format of the internal
* structure is laid out. This allows the internal structure
* to be deciphered for the format file. Although these macros
* may become out of sync with the internal structure, they
* will create a compile error if it happens. Since the
* internel structures are just tracing helpers, this is not
* an issue.
*
* When an internal structure is used, it should use:
*
* __field_struct( type, item )
*
* instead of __field. This will prevent it from being shown in
* the output file. The fields in the structure should use.
*
* __field_desc( type, container, item )
* __array_desc( type, container, item, len )
*
* type, item and len are the same as __field and __array, but
* container is added. This is the name of the item in
* __field_struct that this is describing.
*
*
* @print: the print format shown to users in the format file.
*/
/*
* Function trace entry - function address and parent function address:
*/
FTRACE_ENTRY_REG(function, ftrace_entry,
TRACE_FN,
F_STRUCT(
__field( unsigned long, ip )
__field( unsigned long, parent_ip )
),
F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip),
FILTER_TRACE_FN,
perf_ftrace_event_register
);
/* Function call entry */
FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry,
TRACE_GRAPH_ENT,
F_STRUCT(
__field_struct( struct ftrace_graph_ent, graph_ent )
__field_desc( unsigned long, graph_ent, func )
__field_desc( int, graph_ent, depth )
),
F_printk("--> %lx (%d)", __entry->func, __entry->depth),
FILTER_OTHER
);
/* Function return entry */
FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry,
TRACE_GRAPH_RET,
F_STRUCT(
__field_struct( struct ftrace_graph_ret, ret )
__field_desc( unsigned long, ret, func )
__field_desc( unsigned long long, ret, calltime)
__field_desc( unsigned long long, ret, rettime )
__field_desc( unsigned long, ret, overrun )
__field_desc( int, ret, depth )
),
F_printk("<-- %lx (%d) (start: %llx end: %llx) over: %d",
__entry->func, __entry->depth,
__entry->calltime, __entry->rettime,
__entry->depth),
FILTER_OTHER
);
/*
* Context switch trace entry - which task (and prio) we switched from/to:
*
* This is used for both wakeup and context switches. We only want
* to create one structure, but we need two outputs for it.
*/
#define FTRACE_CTX_FIELDS \
__field( unsigned int, prev_pid ) \
__field( unsigned int, next_pid ) \
__field( unsigned int, next_cpu ) \
__field( unsigned char, prev_prio ) \
__field( unsigned char, prev_state ) \
__field( unsigned char, next_prio ) \
__field( unsigned char, next_state )
FTRACE_ENTRY(context_switch, ctx_switch_entry,
TRACE_CTX,
F_STRUCT(
FTRACE_CTX_FIELDS
),
F_printk("%u:%u:%u ==> %u:%u:%u [%03u]",
__entry->prev_pid, __entry->prev_prio, __entry->prev_state,
__entry->next_pid, __entry->next_prio, __entry->next_state,
__entry->next_cpu),
FILTER_OTHER
);
/*
* FTRACE_ENTRY_DUP only creates the format file, it will not
* create another structure.
*/
FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
TRACE_WAKE,
F_STRUCT(
FTRACE_CTX_FIELDS
),
F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]",
__entry->prev_pid, __entry->prev_prio, __entry->prev_state,
__entry->next_pid, __entry->next_prio, __entry->next_state,
__entry->next_cpu),
FILTER_OTHER
);
/*
* Stack-trace entry:
*/
#define FTRACE_STACK_ENTRIES 8
#ifndef CONFIG_64BIT
# define IP_FMT "%08lx"
#else
# define IP_FMT "%016lx"
#endif
FTRACE_ENTRY(kernel_stack, stack_entry,
TRACE_STACK,
F_STRUCT(
__field( int, size )
__dynamic_array(unsigned long, caller )
),
F_printk("\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n"
"\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n"
"\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n",
__entry->caller[0], __entry->caller[1], __entry->caller[2],
__entry->caller[3], __entry->caller[4], __entry->caller[5],
__entry->caller[6], __entry->caller[7]),
FILTER_OTHER
);
FTRACE_ENTRY(user_stack, userstack_entry,
TRACE_USER_STACK,
F_STRUCT(
__field( unsigned int, tgid )
__array( unsigned long, caller, FTRACE_STACK_ENTRIES )
),
F_printk("\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n"
"\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n"
"\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n",
__entry->caller[0], __entry->caller[1], __entry->caller[2],
__entry->caller[3], __entry->caller[4], __entry->caller[5],
__entry->caller[6], __entry->caller[7]),
FILTER_OTHER
);
/*
* trace_printk entry:
*/
FTRACE_ENTRY(bprint, bprint_entry,
TRACE_BPRINT,
F_STRUCT(
__field( unsigned long, ip )
__field( const char *, fmt )
__dynamic_array( u32, buf )
),
F_printk("%pf: %s",
(void *)__entry->ip, __entry->fmt),
FILTER_OTHER
);
FTRACE_ENTRY(print, print_entry,
TRACE_PRINT,
F_STRUCT(
__field( unsigned long, ip )
__dynamic_array( char, buf )
),
F_printk("%pf: %s",
(void *)__entry->ip, __entry->buf),
FILTER_OTHER
);
FTRACE_ENTRY(bputs, bputs_entry,
TRACE_BPUTS,
F_STRUCT(
__field( unsigned long, ip )
__field( const char *, str )
),
F_printk("%pf: %s",
(void *)__entry->ip, __entry->str),
FILTER_OTHER
);
FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw,
TRACE_MMIO_RW,
F_STRUCT(
__field_struct( struct mmiotrace_rw, rw )
__field_desc( resource_size_t, rw, phys )
__field_desc( unsigned long, rw, value )
__field_desc( unsigned long, rw, pc )
__field_desc( int, rw, map_id )
__field_desc( unsigned char, rw, opcode )
__field_desc( unsigned char, rw, width )
),
F_printk("%lx %lx %lx %d %x %x",
(unsigned long)__entry->phys, __entry->value, __entry->pc,
__entry->map_id, __entry->opcode, __entry->width),
FILTER_OTHER
);
FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
TRACE_MMIO_MAP,
F_STRUCT(
__field_struct( struct mmiotrace_map, map )
__field_desc( resource_size_t, map, phys )
__field_desc( unsigned long, map, virt )
__field_desc( unsigned long, map, len )
__field_desc( int, map, map_id )
__field_desc( unsigned char, map, opcode )
),
F_printk("%lx %lx %lx %d %x",
(unsigned long)__entry->phys, __entry->virt, __entry->len,
__entry->map_id, __entry->opcode),
FILTER_OTHER
);
#define TRACE_FUNC_SIZE 30
#define TRACE_FILE_SIZE 20
FTRACE_ENTRY(branch, trace_branch,
TRACE_BRANCH,
F_STRUCT(
__field( unsigned int, line )
__array( char, func, TRACE_FUNC_SIZE+1 )
__array( char, file, TRACE_FILE_SIZE+1 )
__field( char, correct )
),
F_printk("%u:%s:%s (%u)",
__entry->line,
__entry->func, __entry->file, __entry->correct),
FILTER_OTHER
);

View file

@ -0,0 +1,382 @@
/*
* trace event based perf event profiling/tracing
*
* Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
* Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
*/
#include <linux/module.h>
#include <linux/kprobes.h>
#include "trace.h"
static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
/*
* Force it to be aligned to unsigned long to avoid misaligned accesses
* suprises
*/
typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
perf_trace_t;
/* Count the events in use (per event id, not per instance) */
static int total_ref_count;
static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
struct perf_event *p_event)
{
if (tp_event->perf_perm) {
int ret = tp_event->perf_perm(tp_event, p_event);
if (ret)
return ret;
}
/*
* We checked and allowed to create parent,
* allow children without checking.
*/
if (p_event->parent)
return 0;
/*
* It's ok to check current process (owner) permissions in here,
* because code below is called only via perf_event_open syscall.
*/
/* The ftrace function trace is allowed only for root. */
if (ftrace_event_is_function(tp_event)) {
if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
return -EPERM;
/*
* We don't allow user space callchains for function trace
* event, due to issues with page faults while tracing page
* fault handler and its overall trickiness nature.
*/
if (!p_event->attr.exclude_callchain_user)
return -EINVAL;
/*
* Same reason to disable user stack dump as for user space
* callchains above.
*/
if (p_event->attr.sample_type & PERF_SAMPLE_STACK_USER)
return -EINVAL;
}
/* No tracing, just counting, so no obvious leak */
if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
return 0;
/* Some events are ok to be traced by non-root users... */
if (p_event->attach_state == PERF_ATTACH_TASK) {
if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
return 0;
}
/*
* ...otherwise raw tracepoint data can be a severe data leak,
* only allow root to have these.
*/
if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
return -EPERM;
return 0;
}
static int perf_trace_event_reg(struct ftrace_event_call *tp_event,
struct perf_event *p_event)
{
struct hlist_head __percpu *list;
int ret = -ENOMEM;
int cpu;
p_event->tp_event = tp_event;
if (tp_event->perf_refcount++ > 0)
return 0;
list = alloc_percpu(struct hlist_head);
if (!list)
goto fail;
for_each_possible_cpu(cpu)
INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));
tp_event->perf_events = list;
if (!total_ref_count) {
char __percpu *buf;
int i;
for (i = 0; i < PERF_NR_CONTEXTS; i++) {
buf = (char __percpu *)alloc_percpu(perf_trace_t);
if (!buf)
goto fail;
perf_trace_buf[i] = buf;
}
}
ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER, NULL);
if (ret)
goto fail;
total_ref_count++;
return 0;
fail:
if (!total_ref_count) {
int i;
for (i = 0; i < PERF_NR_CONTEXTS; i++) {
free_percpu(perf_trace_buf[i]);
perf_trace_buf[i] = NULL;
}
}
if (!--tp_event->perf_refcount) {
free_percpu(tp_event->perf_events);
tp_event->perf_events = NULL;
}
return ret;
}
static void perf_trace_event_unreg(struct perf_event *p_event)
{
struct ftrace_event_call *tp_event = p_event->tp_event;
int i;
if (--tp_event->perf_refcount > 0)
goto out;
tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER, NULL);
/*
* Ensure our callback won't be called anymore. The buffers
* will be freed after that.
*/
tracepoint_synchronize_unregister();
free_percpu(tp_event->perf_events);
tp_event->perf_events = NULL;
if (!--total_ref_count) {
for (i = 0; i < PERF_NR_CONTEXTS; i++) {
free_percpu(perf_trace_buf[i]);
perf_trace_buf[i] = NULL;
}
}
out:
module_put(tp_event->mod);
}
static int perf_trace_event_open(struct perf_event *p_event)
{
struct ftrace_event_call *tp_event = p_event->tp_event;
return tp_event->class->reg(tp_event, TRACE_REG_PERF_OPEN, p_event);
}
static void perf_trace_event_close(struct perf_event *p_event)
{
struct ftrace_event_call *tp_event = p_event->tp_event;
tp_event->class->reg(tp_event, TRACE_REG_PERF_CLOSE, p_event);
}
static int perf_trace_event_init(struct ftrace_event_call *tp_event,
struct perf_event *p_event)
{
int ret;
ret = perf_trace_event_perm(tp_event, p_event);
if (ret)
return ret;
ret = perf_trace_event_reg(tp_event, p_event);
if (ret)
return ret;
ret = perf_trace_event_open(p_event);
if (ret) {
perf_trace_event_unreg(p_event);
return ret;
}
return 0;
}
int perf_trace_init(struct perf_event *p_event)
{
struct ftrace_event_call *tp_event;
u64 event_id = p_event->attr.config;
int ret = -EINVAL;
mutex_lock(&event_mutex);
list_for_each_entry(tp_event, &ftrace_events, list) {
if (tp_event->event.type == event_id &&
tp_event->class && tp_event->class->reg &&
try_module_get(tp_event->mod)) {
ret = perf_trace_event_init(tp_event, p_event);
if (ret)
module_put(tp_event->mod);
break;
}
}
mutex_unlock(&event_mutex);
return ret;
}
void perf_trace_destroy(struct perf_event *p_event)
{
mutex_lock(&event_mutex);
perf_trace_event_close(p_event);
perf_trace_event_unreg(p_event);
mutex_unlock(&event_mutex);
}
int perf_trace_add(struct perf_event *p_event, int flags)
{
struct ftrace_event_call *tp_event = p_event->tp_event;
struct hlist_head __percpu *pcpu_list;
struct hlist_head *list;
pcpu_list = tp_event->perf_events;
if (WARN_ON_ONCE(!pcpu_list))
return -EINVAL;
if (!(flags & PERF_EF_START))
p_event->hw.state = PERF_HES_STOPPED;
list = this_cpu_ptr(pcpu_list);
hlist_add_head_rcu(&p_event->hlist_entry, list);
return tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event);
}
void perf_trace_del(struct perf_event *p_event, int flags)
{
struct ftrace_event_call *tp_event = p_event->tp_event;
hlist_del_rcu(&p_event->hlist_entry);
tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event);
}
void *perf_trace_buf_prepare(int size, unsigned short type,
struct pt_regs *regs, int *rctxp)
{
struct trace_entry *entry;
unsigned long flags;
char *raw_data;
int pc;
BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
"perf buffer not large enough"))
return NULL;
pc = preempt_count();
*rctxp = perf_swevent_get_recursion_context();
if (*rctxp < 0)
return NULL;
raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]);
/* zero the dead bytes from align to not leak stack to user */
memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
entry = (struct trace_entry *)raw_data;
local_save_flags(flags);
tracing_generic_entry_update(entry, flags, pc);
entry->type = type;
return raw_data;
}
EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
NOKPROBE_SYMBOL(perf_trace_buf_prepare);
#ifdef CONFIG_FUNCTION_TRACER
static void
perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct pt_regs *pt_regs)
{
struct ftrace_entry *entry;
struct hlist_head *head;
struct pt_regs regs;
int rctx;
head = this_cpu_ptr(event_function.perf_events);
if (hlist_empty(head))
return;
#define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \
sizeof(u64)) - sizeof(u32))
BUILD_BUG_ON(ENTRY_SIZE > PERF_MAX_TRACE_SIZE);
perf_fetch_caller_regs(&regs);
entry = perf_trace_buf_prepare(ENTRY_SIZE, TRACE_FN, NULL, &rctx);
if (!entry)
return;
entry->ip = ip;
entry->parent_ip = parent_ip;
perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0,
1, &regs, head, NULL);
#undef ENTRY_SIZE
}
static int perf_ftrace_function_register(struct perf_event *event)
{
struct ftrace_ops *ops = &event->ftrace_ops;
ops->flags |= FTRACE_OPS_FL_CONTROL;
ops->func = perf_ftrace_function_call;
return register_ftrace_function(ops);
}
static int perf_ftrace_function_unregister(struct perf_event *event)
{
struct ftrace_ops *ops = &event->ftrace_ops;
int ret = unregister_ftrace_function(ops);
ftrace_free_filter(ops);
return ret;
}
static void perf_ftrace_function_enable(struct perf_event *event)
{
ftrace_function_local_enable(&event->ftrace_ops);
}
static void perf_ftrace_function_disable(struct perf_event *event)
{
ftrace_function_local_disable(&event->ftrace_ops);
}
int perf_ftrace_event_register(struct ftrace_event_call *call,
enum trace_reg type, void *data)
{
switch (type) {
case TRACE_REG_REGISTER:
case TRACE_REG_UNREGISTER:
break;
case TRACE_REG_PERF_REGISTER:
case TRACE_REG_PERF_UNREGISTER:
return 0;
case TRACE_REG_PERF_OPEN:
return perf_ftrace_function_register(data);
case TRACE_REG_PERF_CLOSE:
return perf_ftrace_function_unregister(data);
case TRACE_REG_PERF_ADD:
perf_ftrace_function_enable(data);
return 0;
case TRACE_REG_PERF_DEL:
perf_ftrace_function_disable(data);
return 0;
}
return -EINVAL;
}
#endif /* CONFIG_FUNCTION_TRACER */

2729
kernel/trace/trace_events.c Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,50 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM test
#if !defined(_TRACE_TEST_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_TEST_H
#include <linux/tracepoint.h>
TRACE_EVENT(ftrace_test_filter,
TP_PROTO(int a, int b, int c, int d, int e, int f, int g, int h),
TP_ARGS(a, b, c, d, e, f, g, h),
TP_STRUCT__entry(
__field(int, a)
__field(int, b)
__field(int, c)
__field(int, d)
__field(int, e)
__field(int, f)
__field(int, g)
__field(int, h)
),
TP_fast_assign(
__entry->a = a;
__entry->b = b;
__entry->c = c;
__entry->d = d;
__entry->e = e;
__entry->f = f;
__entry->g = g;
__entry->h = h;
),
TP_printk("a %d, b %d, c %d, d %d, e %d, f %d, g %d, h %d",
__entry->a, __entry->b, __entry->c, __entry->d,
__entry->e, __entry->f, __entry->g, __entry->h)
);
#endif /* _TRACE_TEST_H || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE trace_events_filter_test
/* This part must be outside protection */
#include <trace/define_trace.h>

File diff suppressed because it is too large Load diff

197
kernel/trace/trace_export.c Normal file
View file

@ -0,0 +1,197 @@
/*
* trace_export.c - export basic ftrace utilities to user space
*
* Copyright (C) 2009 Steven Rostedt <srostedt@redhat.com>
*/
#include <linux/stringify.h>
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
#include "trace_output.h"
#undef TRACE_SYSTEM
#define TRACE_SYSTEM ftrace
/*
* The FTRACE_ENTRY_REG macro allows ftrace entry to define register
* function and thus become accesible via perf.
*/
#undef FTRACE_ENTRY_REG
#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \
filter, regfn) \
FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
filter)
/* not needed for this file */
#undef __field_struct
#define __field_struct(type, item)
#undef __field
#define __field(type, item) type item;
#undef __field_desc
#define __field_desc(type, container, item) type item;
#undef __array
#define __array(type, item, size) type item[size];
#undef __array_desc
#define __array_desc(type, container, item, size) type item[size];
#undef __dynamic_array
#define __dynamic_array(type, item) type item[];
#undef F_STRUCT
#define F_STRUCT(args...) args
#undef F_printk
#define F_printk(fmt, args...) fmt, args
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \
struct ____ftrace_##name { \
tstruct \
}; \
static void __always_unused ____ftrace_check_##name(void) \
{ \
struct ____ftrace_##name *__entry = NULL; \
\
/* force compile-time check on F_printk() */ \
printk(print); \
}
#undef FTRACE_ENTRY_DUP
#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print, filter) \
FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
filter)
#include "trace_entries.h"
#undef __field
#define __field(type, item) \
ret = trace_define_field(event_call, #type, #item, \
offsetof(typeof(field), item), \
sizeof(field.item), \
is_signed_type(type), filter_type); \
if (ret) \
return ret;
#undef __field_desc
#define __field_desc(type, container, item) \
ret = trace_define_field(event_call, #type, #item, \
offsetof(typeof(field), \
container.item), \
sizeof(field.container.item), \
is_signed_type(type), filter_type); \
if (ret) \
return ret;
#undef __array
#define __array(type, item, len) \
do { \
char *type_str = #type"["__stringify(len)"]"; \
BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
ret = trace_define_field(event_call, type_str, #item, \
offsetof(typeof(field), item), \
sizeof(field.item), \
is_signed_type(type), filter_type); \
if (ret) \
return ret; \
} while (0);
#undef __array_desc
#define __array_desc(type, container, item, len) \
BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
ret = trace_define_field(event_call, #type "[" #len "]", #item, \
offsetof(typeof(field), \
container.item), \
sizeof(field.container.item), \
is_signed_type(type), filter_type); \
if (ret) \
return ret;
#undef __dynamic_array
#define __dynamic_array(type, item) \
ret = trace_define_field(event_call, #type, #item, \
offsetof(typeof(field), item), \
0, is_signed_type(type), filter_type);\
if (ret) \
return ret;
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \
static int __init \
ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
{ \
struct struct_name field; \
int ret; \
int filter_type = filter; \
\
tstruct; \
\
return ret; \
}
#include "trace_entries.h"
#undef __entry
#define __entry REC
#undef __field
#define __field(type, item)
#undef __field_desc
#define __field_desc(type, container, item)
#undef __array
#define __array(type, item, len)
#undef __array_desc
#define __array_desc(type, container, item, len)
#undef __dynamic_array
#define __dynamic_array(type, item)
#undef F_printk
#define F_printk(fmt, args...) __stringify(fmt) ", " __stringify(args)
#undef FTRACE_ENTRY_REG
#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\
regfn) \
\
struct ftrace_event_class __refdata event_class_ftrace_##call = { \
.system = __stringify(TRACE_SYSTEM), \
.define_fields = ftrace_define_fields_##call, \
.fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
.reg = regfn, \
}; \
\
struct ftrace_event_call __used event_##call = { \
.class = &event_class_ftrace_##call, \
{ \
.name = #call, \
}, \
.event.type = etype, \
.print_fmt = print, \
.flags = TRACE_EVENT_FL_IGNORE_ENABLE | TRACE_EVENT_FL_USE_CALL_FILTER, \
}; \
struct ftrace_event_call __used \
__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call;
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print, filter) \
FTRACE_ENTRY_REG(call, struct_name, etype, \
PARAMS(tstruct), PARAMS(print), filter, NULL)
int ftrace_event_is_function(struct ftrace_event_call *call)
{
return call == &event_function;
}
#include "trace_entries.h"

View file

@ -0,0 +1,614 @@
/*
* ring buffer based function tracer
*
* Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
* Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
*
* Based on code from the latency_tracer, that is:
*
* Copyright (C) 2004-2006 Ingo Molnar
* Copyright (C) 2004 Nadia Yvette Chambers
*/
#include <linux/ring_buffer.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include "trace.h"
static void tracing_start_function_trace(struct trace_array *tr);
static void tracing_stop_function_trace(struct trace_array *tr);
static void
function_trace_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *pt_regs);
static void
function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *pt_regs);
static struct tracer_flags func_flags;
/* Our option */
enum {
TRACE_FUNC_OPT_STACK = 0x1,
};
static int allocate_ftrace_ops(struct trace_array *tr)
{
struct ftrace_ops *ops;
ops = kzalloc(sizeof(*ops), GFP_KERNEL);
if (!ops)
return -ENOMEM;
/* Currently only the non stack verision is supported */
ops->func = function_trace_call;
ops->flags = FTRACE_OPS_FL_RECURSION_SAFE;
tr->ops = ops;
ops->private = tr;
return 0;
}
int ftrace_create_function_files(struct trace_array *tr,
struct dentry *parent)
{
int ret;
/*
* The top level array uses the "global_ops", and the files are
* created on boot up.
*/
if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
return 0;
ret = allocate_ftrace_ops(tr);
if (ret)
return ret;
ftrace_create_filter_files(tr->ops, parent);
return 0;
}
void ftrace_destroy_function_files(struct trace_array *tr)
{
ftrace_destroy_filter_files(tr->ops);
kfree(tr->ops);
tr->ops = NULL;
}
static int function_trace_init(struct trace_array *tr)
{
ftrace_func_t func;
/*
* Instance trace_arrays get their ops allocated
* at instance creation. Unless it failed
* the allocation.
*/
if (!tr->ops)
return -ENOMEM;
/* Currently only the global instance can do stack tracing */
if (tr->flags & TRACE_ARRAY_FL_GLOBAL &&
func_flags.val & TRACE_FUNC_OPT_STACK)
func = function_stack_trace_call;
else
func = function_trace_call;
ftrace_init_array_ops(tr, func);
tr->trace_buffer.cpu = get_cpu();
put_cpu();
tracing_start_cmdline_record();
tracing_start_function_trace(tr);
return 0;
}
static void function_trace_reset(struct trace_array *tr)
{
tracing_stop_function_trace(tr);
tracing_stop_cmdline_record();
ftrace_reset_array_ops(tr);
}
static void function_trace_start(struct trace_array *tr)
{
tracing_reset_online_cpus(&tr->trace_buffer);
}
static void
function_trace_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *pt_regs)
{
struct trace_array *tr = op->private;
struct trace_array_cpu *data;
unsigned long flags;
int bit;
int cpu;
int pc;
if (unlikely(!tr->function_enabled))
return;
pc = preempt_count();
preempt_disable_notrace();
bit = trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX);
if (bit < 0)
goto out;
cpu = smp_processor_id();
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
if (!atomic_read(&data->disabled)) {
local_save_flags(flags);
trace_function(tr, ip, parent_ip, flags, pc);
}
trace_clear_recursion(bit);
out:
preempt_enable_notrace();
}
static void
function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *pt_regs)
{
struct trace_array *tr = op->private;
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
int cpu;
int pc;
if (unlikely(!tr->function_enabled))
return;
/*
* Need to use raw, since this must be called before the
* recursive protection is performed.
*/
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1)) {
pc = preempt_count();
trace_function(tr, ip, parent_ip, flags, pc);
/*
* skip over 5 funcs:
* __ftrace_trace_stack,
* __trace_stack,
* function_stack_trace_call
* ftrace_list_func
* ftrace_call
*/
__trace_stack(tr, flags, 5, pc);
}
atomic_dec(&data->disabled);
local_irq_restore(flags);
}
static struct tracer_opt func_opts[] = {
#ifdef CONFIG_STACKTRACE
{ TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) },
#endif
{ } /* Always set a last empty entry */
};
static struct tracer_flags func_flags = {
.val = 0, /* By default: all flags disabled */
.opts = func_opts
};
static void tracing_start_function_trace(struct trace_array *tr)
{
tr->function_enabled = 0;
register_ftrace_function(tr->ops);
tr->function_enabled = 1;
}
static void tracing_stop_function_trace(struct trace_array *tr)
{
tr->function_enabled = 0;
unregister_ftrace_function(tr->ops);
}
static int
func_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
{
switch (bit) {
case TRACE_FUNC_OPT_STACK:
/* do nothing if already set */
if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK))
break;
unregister_ftrace_function(tr->ops);
if (set) {
tr->ops->func = function_stack_trace_call;
register_ftrace_function(tr->ops);
} else {
tr->ops->func = function_trace_call;
register_ftrace_function(tr->ops);
}
break;
default:
return -EINVAL;
}
return 0;
}
static struct tracer function_trace __tracer_data =
{
.name = "function",
.init = function_trace_init,
.reset = function_trace_reset,
.start = function_trace_start,
.flags = &func_flags,
.set_flag = func_set_flag,
.allow_instances = true,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_function,
#endif
};
#ifdef CONFIG_DYNAMIC_FTRACE
static int update_count(void **data)
{
unsigned long *count = (long *)data;
if (!*count)
return 0;
if (*count != -1)
(*count)--;
return 1;
}
static void
ftrace_traceon_count(unsigned long ip, unsigned long parent_ip, void **data)
{
if (tracing_is_on())
return;
if (update_count(data))
tracing_on();
}
static void
ftrace_traceoff_count(unsigned long ip, unsigned long parent_ip, void **data)
{
if (!tracing_is_on())
return;
if (update_count(data))
tracing_off();
}
static void
ftrace_traceon(unsigned long ip, unsigned long parent_ip, void **data)
{
if (tracing_is_on())
return;
tracing_on();
}
static void
ftrace_traceoff(unsigned long ip, unsigned long parent_ip, void **data)
{
if (!tracing_is_on())
return;
tracing_off();
}
/*
* Skip 4:
* ftrace_stacktrace()
* function_trace_probe_call()
* ftrace_ops_list_func()
* ftrace_call()
*/
#define STACK_SKIP 4
static void
ftrace_stacktrace(unsigned long ip, unsigned long parent_ip, void **data)
{
trace_dump_stack(STACK_SKIP);
}
static void
ftrace_stacktrace_count(unsigned long ip, unsigned long parent_ip, void **data)
{
if (!tracing_is_on())
return;
if (update_count(data))
trace_dump_stack(STACK_SKIP);
}
static void
ftrace_dump_probe(unsigned long ip, unsigned long parent_ip, void **data)
{
if (update_count(data))
ftrace_dump(DUMP_ALL);
}
/* Only dump the current CPU buffer. */
static void
ftrace_cpudump_probe(unsigned long ip, unsigned long parent_ip, void **data)
{
if (update_count(data))
ftrace_dump(DUMP_ORIG);
}
static int
ftrace_probe_print(const char *name, struct seq_file *m,
unsigned long ip, void *data)
{
long count = (long)data;
seq_printf(m, "%ps:%s", (void *)ip, name);
if (count == -1)
seq_printf(m, ":unlimited\n");
else
seq_printf(m, ":count=%ld\n", count);
return 0;
}
static int
ftrace_traceon_print(struct seq_file *m, unsigned long ip,
struct ftrace_probe_ops *ops, void *data)
{
return ftrace_probe_print("traceon", m, ip, data);
}
static int
ftrace_traceoff_print(struct seq_file *m, unsigned long ip,
struct ftrace_probe_ops *ops, void *data)
{
return ftrace_probe_print("traceoff", m, ip, data);
}
static int
ftrace_stacktrace_print(struct seq_file *m, unsigned long ip,
struct ftrace_probe_ops *ops, void *data)
{
return ftrace_probe_print("stacktrace", m, ip, data);
}
static int
ftrace_dump_print(struct seq_file *m, unsigned long ip,
struct ftrace_probe_ops *ops, void *data)
{
return ftrace_probe_print("dump", m, ip, data);
}
static int
ftrace_cpudump_print(struct seq_file *m, unsigned long ip,
struct ftrace_probe_ops *ops, void *data)
{
return ftrace_probe_print("cpudump", m, ip, data);
}
static struct ftrace_probe_ops traceon_count_probe_ops = {
.func = ftrace_traceon_count,
.print = ftrace_traceon_print,
};
static struct ftrace_probe_ops traceoff_count_probe_ops = {
.func = ftrace_traceoff_count,
.print = ftrace_traceoff_print,
};
static struct ftrace_probe_ops stacktrace_count_probe_ops = {
.func = ftrace_stacktrace_count,
.print = ftrace_stacktrace_print,
};
static struct ftrace_probe_ops dump_probe_ops = {
.func = ftrace_dump_probe,
.print = ftrace_dump_print,
};
static struct ftrace_probe_ops cpudump_probe_ops = {
.func = ftrace_cpudump_probe,
.print = ftrace_cpudump_print,
};
static struct ftrace_probe_ops traceon_probe_ops = {
.func = ftrace_traceon,
.print = ftrace_traceon_print,
};
static struct ftrace_probe_ops traceoff_probe_ops = {
.func = ftrace_traceoff,
.print = ftrace_traceoff_print,
};
static struct ftrace_probe_ops stacktrace_probe_ops = {
.func = ftrace_stacktrace,
.print = ftrace_stacktrace_print,
};
static int
ftrace_trace_probe_callback(struct ftrace_probe_ops *ops,
struct ftrace_hash *hash, char *glob,
char *cmd, char *param, int enable)
{
void *count = (void *)-1;
char *number;
int ret;
/* hash funcs only work with set_ftrace_filter */
if (!enable)
return -EINVAL;
if (glob[0] == '!') {
unregister_ftrace_function_probe_func(glob+1, ops);
return 0;
}
if (!param)
goto out_reg;
number = strsep(&param, ":");
if (!strlen(number))
goto out_reg;
/*
* We use the callback data field (which is a pointer)
* as our counter.
*/
ret = kstrtoul(number, 0, (unsigned long *)&count);
if (ret)
return ret;
out_reg:
ret = register_ftrace_function_probe(glob, ops, count);
return ret < 0 ? ret : 0;
}
static int
ftrace_trace_onoff_callback(struct ftrace_hash *hash,
char *glob, char *cmd, char *param, int enable)
{
struct ftrace_probe_ops *ops;
/* we register both traceon and traceoff to this callback */
if (strcmp(cmd, "traceon") == 0)
ops = param ? &traceon_count_probe_ops : &traceon_probe_ops;
else
ops = param ? &traceoff_count_probe_ops : &traceoff_probe_ops;
return ftrace_trace_probe_callback(ops, hash, glob, cmd,
param, enable);
}
static int
ftrace_stacktrace_callback(struct ftrace_hash *hash,
char *glob, char *cmd, char *param, int enable)
{
struct ftrace_probe_ops *ops;
ops = param ? &stacktrace_count_probe_ops : &stacktrace_probe_ops;
return ftrace_trace_probe_callback(ops, hash, glob, cmd,
param, enable);
}
static int
ftrace_dump_callback(struct ftrace_hash *hash,
char *glob, char *cmd, char *param, int enable)
{
struct ftrace_probe_ops *ops;
ops = &dump_probe_ops;
/* Only dump once. */
return ftrace_trace_probe_callback(ops, hash, glob, cmd,
"1", enable);
}
static int
ftrace_cpudump_callback(struct ftrace_hash *hash,
char *glob, char *cmd, char *param, int enable)
{
struct ftrace_probe_ops *ops;
ops = &cpudump_probe_ops;
/* Only dump once. */
return ftrace_trace_probe_callback(ops, hash, glob, cmd,
"1", enable);
}
static struct ftrace_func_command ftrace_traceon_cmd = {
.name = "traceon",
.func = ftrace_trace_onoff_callback,
};
static struct ftrace_func_command ftrace_traceoff_cmd = {
.name = "traceoff",
.func = ftrace_trace_onoff_callback,
};
static struct ftrace_func_command ftrace_stacktrace_cmd = {
.name = "stacktrace",
.func = ftrace_stacktrace_callback,
};
static struct ftrace_func_command ftrace_dump_cmd = {
.name = "dump",
.func = ftrace_dump_callback,
};
static struct ftrace_func_command ftrace_cpudump_cmd = {
.name = "cpudump",
.func = ftrace_cpudump_callback,
};
static int __init init_func_cmd_traceon(void)
{
int ret;
ret = register_ftrace_command(&ftrace_traceoff_cmd);
if (ret)
return ret;
ret = register_ftrace_command(&ftrace_traceon_cmd);
if (ret)
goto out_free_traceoff;
ret = register_ftrace_command(&ftrace_stacktrace_cmd);
if (ret)
goto out_free_traceon;
ret = register_ftrace_command(&ftrace_dump_cmd);
if (ret)
goto out_free_stacktrace;
ret = register_ftrace_command(&ftrace_cpudump_cmd);
if (ret)
goto out_free_dump;
return 0;
out_free_dump:
unregister_ftrace_command(&ftrace_dump_cmd);
out_free_stacktrace:
unregister_ftrace_command(&ftrace_stacktrace_cmd);
out_free_traceon:
unregister_ftrace_command(&ftrace_traceon_cmd);
out_free_traceoff:
unregister_ftrace_command(&ftrace_traceoff_cmd);
return ret;
}
#else
static inline int init_func_cmd_traceon(void)
{
return 0;
}
#endif /* CONFIG_DYNAMIC_FTRACE */
static __init int init_function_trace(void)
{
init_func_cmd_traceon();
return register_tracer(&function_trace);
}
core_initcall(init_function_trace);

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,766 @@
/*
* trace irqs off critical timings
*
* Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
* Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
*
* From code in the latency_tracer, that is:
*
* Copyright (C) 2004-2006 Ingo Molnar
* Copyright (C) 2004 Nadia Yvette Chambers
*/
#include <linux/kallsyms.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/ftrace.h>
#include <linux/fs.h>
#include "trace.h"
static struct trace_array *irqsoff_trace __read_mostly;
static int tracer_enabled __read_mostly;
static DEFINE_PER_CPU(int, tracing_cpu);
static DEFINE_RAW_SPINLOCK(max_trace_lock);
enum {
TRACER_IRQS_OFF = (1 << 1),
TRACER_PREEMPT_OFF = (1 << 2),
};
static int trace_type __read_mostly;
static int save_flags;
static bool function_enabled;
static void stop_irqsoff_tracer(struct trace_array *tr, int graph);
static int start_irqsoff_tracer(struct trace_array *tr, int graph);
#ifdef CONFIG_PREEMPT_TRACER
static inline int
preempt_trace(void)
{
return ((trace_type & TRACER_PREEMPT_OFF) && preempt_count());
}
#else
# define preempt_trace() (0)
#endif
#ifdef CONFIG_IRQSOFF_TRACER
static inline int
irq_trace(void)
{
return ((trace_type & TRACER_IRQS_OFF) &&
irqs_disabled());
}
#else
# define irq_trace() (0)
#endif
#define TRACE_DISPLAY_GRAPH 1
static struct tracer_opt trace_opts[] = {
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/* display latency trace as call graph */
{ TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) },
#endif
{ } /* Empty entry */
};
static struct tracer_flags tracer_flags = {
.val = 0,
.opts = trace_opts,
};
#define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH)
/*
* Sequence count - we record it when starting a measurement and
* skip the latency if the sequence has changed - some other section
* did a maximum and could disturb our measurement with serial console
* printouts, etc. Truly coinciding maximum latencies should be rare
* and what happens together happens separately as well, so this doesn't
* decrease the validity of the maximum found:
*/
static __cacheline_aligned_in_smp unsigned long max_sequence;
#ifdef CONFIG_FUNCTION_TRACER
/*
* Prologue for the preempt and irqs off function tracers.
*
* Returns 1 if it is OK to continue, and data->disabled is
* incremented.
* 0 if the trace is to be ignored, and data->disabled
* is kept the same.
*
* Note, this function is also used outside this ifdef but
* inside the #ifdef of the function graph tracer below.
* This is OK, since the function graph tracer is
* dependent on the function tracer.
*/
static int func_prolog_dec(struct trace_array *tr,
struct trace_array_cpu **data,
unsigned long *flags)
{
long disabled;
int cpu;
/*
* Does not matter if we preempt. We test the flags
* afterward, to see if irqs are disabled or not.
* If we preempt and get a false positive, the flags
* test will fail.
*/
cpu = raw_smp_processor_id();
if (likely(!per_cpu(tracing_cpu, cpu)))
return 0;
local_save_flags(*flags);
/* slight chance to get a false positive on tracing_cpu */
if (!irqs_disabled_flags(*flags))
return 0;
*data = per_cpu_ptr(tr->trace_buffer.data, cpu);
disabled = atomic_inc_return(&(*data)->disabled);
if (likely(disabled == 1))
return 1;
atomic_dec(&(*data)->disabled);
return 0;
}
/*
* irqsoff uses its own tracer function to keep the overhead down:
*/
static void
irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *pt_regs)
{
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
unsigned long flags;
if (!func_prolog_dec(tr, &data, &flags))
return;
trace_function(tr, ip, parent_ip, flags, preempt_count());
atomic_dec(&data->disabled);
}
#endif /* CONFIG_FUNCTION_TRACER */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static int
irqsoff_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
{
int cpu;
if (!(bit & TRACE_DISPLAY_GRAPH))
return -EINVAL;
if (!(is_graph() ^ set))
return 0;
stop_irqsoff_tracer(irqsoff_trace, !set);
for_each_possible_cpu(cpu)
per_cpu(tracing_cpu, cpu) = 0;
tr->max_latency = 0;
tracing_reset_online_cpus(&irqsoff_trace->trace_buffer);
return start_irqsoff_tracer(irqsoff_trace, set);
}
static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
{
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
unsigned long flags;
int ret;
int pc;
if (!func_prolog_dec(tr, &data, &flags))
return 0;
pc = preempt_count();
ret = __trace_graph_entry(tr, trace, flags, pc);
atomic_dec(&data->disabled);
return ret;
}
static void irqsoff_graph_return(struct ftrace_graph_ret *trace)
{
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
unsigned long flags;
int pc;
if (!func_prolog_dec(tr, &data, &flags))
return;
pc = preempt_count();
__trace_graph_return(tr, trace, flags, pc);
atomic_dec(&data->disabled);
}
static void irqsoff_trace_open(struct trace_iterator *iter)
{
if (is_graph())
graph_trace_open(iter);
}
static void irqsoff_trace_close(struct trace_iterator *iter)
{
if (iter->private)
graph_trace_close(iter);
}
#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \
TRACE_GRAPH_PRINT_PROC | \
TRACE_GRAPH_PRINT_ABS_TIME | \
TRACE_GRAPH_PRINT_DURATION)
static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
{
/*
* In graph mode call the graph tracer output function,
* otherwise go with the TRACE_FN event handler
*/
if (is_graph())
return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);
return TRACE_TYPE_UNHANDLED;
}
static void irqsoff_print_header(struct seq_file *s)
{
if (is_graph())
print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
else
trace_default_header(s);
}
static void
__trace_function(struct trace_array *tr,
unsigned long ip, unsigned long parent_ip,
unsigned long flags, int pc)
{
if (is_graph())
trace_graph_function(tr, ip, parent_ip, flags, pc);
else
trace_function(tr, ip, parent_ip, flags, pc);
}
#else
#define __trace_function trace_function
static int
irqsoff_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
{
return -EINVAL;
}
static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
{
return -1;
}
static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
{
return TRACE_TYPE_UNHANDLED;
}
static void irqsoff_graph_return(struct ftrace_graph_ret *trace) { }
static void irqsoff_trace_open(struct trace_iterator *iter) { }
static void irqsoff_trace_close(struct trace_iterator *iter) { }
#ifdef CONFIG_FUNCTION_TRACER
static void irqsoff_print_header(struct seq_file *s)
{
trace_default_header(s);
}
#else
static void irqsoff_print_header(struct seq_file *s)
{
trace_latency_header(s);
}
#endif /* CONFIG_FUNCTION_TRACER */
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
/*
* Should this new latency be reported/recorded?
*/
static int report_latency(struct trace_array *tr, cycle_t delta)
{
if (tracing_thresh) {
if (delta < tracing_thresh)
return 0;
} else {
if (delta <= tr->max_latency)
return 0;
}
return 1;
}
static void
check_critical_timing(struct trace_array *tr,
struct trace_array_cpu *data,
unsigned long parent_ip,
int cpu)
{
cycle_t T0, T1, delta;
unsigned long flags;
int pc;
T0 = data->preempt_timestamp;
T1 = ftrace_now(cpu);
delta = T1-T0;
local_save_flags(flags);
pc = preempt_count();
if (!report_latency(tr, delta))
goto out;
raw_spin_lock_irqsave(&max_trace_lock, flags);
/* check if we are still the max latency */
if (!report_latency(tr, delta))
goto out_unlock;
__trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
/* Skip 5 functions to get to the irq/preempt enable function */
__trace_stack(tr, flags, 5, pc);
if (data->critical_sequence != max_sequence)
goto out_unlock;
data->critical_end = parent_ip;
if (likely(!is_tracing_stopped())) {
tr->max_latency = delta;
update_max_tr_single(tr, current, cpu);
}
max_sequence++;
out_unlock:
raw_spin_unlock_irqrestore(&max_trace_lock, flags);
out:
data->critical_sequence = max_sequence;
data->preempt_timestamp = ftrace_now(cpu);
__trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
}
static inline void
start_critical_timing(unsigned long ip, unsigned long parent_ip)
{
int cpu;
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
unsigned long flags;
if (!tracer_enabled || !tracing_is_enabled())
return;
cpu = raw_smp_processor_id();
if (per_cpu(tracing_cpu, cpu))
return;
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
if (unlikely(!data) || atomic_read(&data->disabled))
return;
atomic_inc(&data->disabled);
data->critical_sequence = max_sequence;
data->preempt_timestamp = ftrace_now(cpu);
data->critical_start = parent_ip ? : ip;
local_save_flags(flags);
__trace_function(tr, ip, parent_ip, flags, preempt_count());
per_cpu(tracing_cpu, cpu) = 1;
atomic_dec(&data->disabled);
}
static inline void
stop_critical_timing(unsigned long ip, unsigned long parent_ip)
{
int cpu;
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
unsigned long flags;
cpu = raw_smp_processor_id();
/* Always clear the tracing cpu on stopping the trace */
if (unlikely(per_cpu(tracing_cpu, cpu)))
per_cpu(tracing_cpu, cpu) = 0;
else
return;
if (!tracer_enabled || !tracing_is_enabled())
return;
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
if (unlikely(!data) ||
!data->critical_start || atomic_read(&data->disabled))
return;
atomic_inc(&data->disabled);
local_save_flags(flags);
__trace_function(tr, ip, parent_ip, flags, preempt_count());
check_critical_timing(tr, data, parent_ip ? : ip, cpu);
data->critical_start = 0;
atomic_dec(&data->disabled);
}
/* start and stop critical timings used to for stoppage (in idle) */
void start_critical_timings(void)
{
if (preempt_trace() || irq_trace())
start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
EXPORT_SYMBOL_GPL(start_critical_timings);
void stop_critical_timings(void)
{
if (preempt_trace() || irq_trace())
stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
EXPORT_SYMBOL_GPL(stop_critical_timings);
#ifdef CONFIG_IRQSOFF_TRACER
#ifdef CONFIG_PROVE_LOCKING
void time_hardirqs_on(unsigned long a0, unsigned long a1)
{
if (!preempt_trace() && irq_trace())
stop_critical_timing(a0, a1);
}
void time_hardirqs_off(unsigned long a0, unsigned long a1)
{
if (!preempt_trace() && irq_trace())
start_critical_timing(a0, a1);
}
#else /* !CONFIG_PROVE_LOCKING */
/*
* Stubs:
*/
void trace_softirqs_on(unsigned long ip)
{
}
void trace_softirqs_off(unsigned long ip)
{
}
inline void print_irqtrace_events(struct task_struct *curr)
{
}
/*
* We are only interested in hardirq on/off events:
*/
void trace_hardirqs_on(void)
{
if (!preempt_trace() && irq_trace())
stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
EXPORT_SYMBOL(trace_hardirqs_on);
void trace_hardirqs_off(void)
{
if (!preempt_trace() && irq_trace())
start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
}
EXPORT_SYMBOL(trace_hardirqs_off);
__visible void trace_hardirqs_on_caller(unsigned long caller_addr)
{
if (!preempt_trace() && irq_trace())
stop_critical_timing(CALLER_ADDR0, caller_addr);
}
EXPORT_SYMBOL(trace_hardirqs_on_caller);
__visible void trace_hardirqs_off_caller(unsigned long caller_addr)
{
if (!preempt_trace() && irq_trace())
start_critical_timing(CALLER_ADDR0, caller_addr);
}
EXPORT_SYMBOL(trace_hardirqs_off_caller);
#endif /* CONFIG_PROVE_LOCKING */
#endif /* CONFIG_IRQSOFF_TRACER */
#ifdef CONFIG_PREEMPT_TRACER
void trace_preempt_on(unsigned long a0, unsigned long a1)
{
if (preempt_trace() && !irq_trace())
stop_critical_timing(a0, a1);
}
void trace_preempt_off(unsigned long a0, unsigned long a1)
{
if (preempt_trace() && !irq_trace())
start_critical_timing(a0, a1);
}
#endif /* CONFIG_PREEMPT_TRACER */
static int register_irqsoff_function(struct trace_array *tr, int graph, int set)
{
int ret;
/* 'set' is set if TRACE_ITER_FUNCTION is about to be set */
if (function_enabled || (!set && !(trace_flags & TRACE_ITER_FUNCTION)))
return 0;
if (graph)
ret = register_ftrace_graph(&irqsoff_graph_return,
&irqsoff_graph_entry);
else
ret = register_ftrace_function(tr->ops);
if (!ret)
function_enabled = true;
return ret;
}
static void unregister_irqsoff_function(struct trace_array *tr, int graph)
{
if (!function_enabled)
return;
if (graph)
unregister_ftrace_graph();
else
unregister_ftrace_function(tr->ops);
function_enabled = false;
}
static void irqsoff_function_set(struct trace_array *tr, int set)
{
if (set)
register_irqsoff_function(tr, is_graph(), 1);
else
unregister_irqsoff_function(tr, is_graph());
}
static int irqsoff_flag_changed(struct trace_array *tr, u32 mask, int set)
{
struct tracer *tracer = tr->current_trace;
if (mask & TRACE_ITER_FUNCTION)
irqsoff_function_set(tr, set);
return trace_keep_overwrite(tracer, mask, set);
}
static int start_irqsoff_tracer(struct trace_array *tr, int graph)
{
int ret;
ret = register_irqsoff_function(tr, graph, 0);
if (!ret && tracing_is_enabled())
tracer_enabled = 1;
else
tracer_enabled = 0;
return ret;
}
static void stop_irqsoff_tracer(struct trace_array *tr, int graph)
{
tracer_enabled = 0;
unregister_irqsoff_function(tr, graph);
}
static bool irqsoff_busy;
static int __irqsoff_tracer_init(struct trace_array *tr)
{
if (irqsoff_busy)
return -EBUSY;
save_flags = trace_flags;
/* non overwrite screws up the latency tracers */
set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1);
set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, 1);
tr->max_latency = 0;
irqsoff_trace = tr;
/* make sure that the tracer is visible */
smp_wmb();
tracing_reset_online_cpus(&tr->trace_buffer);
ftrace_init_array_ops(tr, irqsoff_tracer_call);
/* Only toplevel instance supports graph tracing */
if (start_irqsoff_tracer(tr, (tr->flags & TRACE_ARRAY_FL_GLOBAL &&
is_graph())))
printk(KERN_ERR "failed to start irqsoff tracer\n");
irqsoff_busy = true;
return 0;
}
static void irqsoff_tracer_reset(struct trace_array *tr)
{
int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT;
int overwrite_flag = save_flags & TRACE_ITER_OVERWRITE;
stop_irqsoff_tracer(tr, is_graph());
set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag);
set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag);
ftrace_reset_array_ops(tr);
irqsoff_busy = false;
}
static void irqsoff_tracer_start(struct trace_array *tr)
{
tracer_enabled = 1;
}
static void irqsoff_tracer_stop(struct trace_array *tr)
{
tracer_enabled = 0;
}
#ifdef CONFIG_IRQSOFF_TRACER
static int irqsoff_tracer_init(struct trace_array *tr)
{
trace_type = TRACER_IRQS_OFF;
return __irqsoff_tracer_init(tr);
}
static struct tracer irqsoff_tracer __read_mostly =
{
.name = "irqsoff",
.init = irqsoff_tracer_init,
.reset = irqsoff_tracer_reset,
.start = irqsoff_tracer_start,
.stop = irqsoff_tracer_stop,
.print_max = true,
.print_header = irqsoff_print_header,
.print_line = irqsoff_print_line,
.flags = &tracer_flags,
.set_flag = irqsoff_set_flag,
.flag_changed = irqsoff_flag_changed,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_irqsoff,
#endif
.open = irqsoff_trace_open,
.close = irqsoff_trace_close,
.allow_instances = true,
.use_max_tr = true,
};
# define register_irqsoff(trace) register_tracer(&trace)
#else
# define register_irqsoff(trace) do { } while (0)
#endif
#ifdef CONFIG_PREEMPT_TRACER
static int preemptoff_tracer_init(struct trace_array *tr)
{
trace_type = TRACER_PREEMPT_OFF;
return __irqsoff_tracer_init(tr);
}
static struct tracer preemptoff_tracer __read_mostly =
{
.name = "preemptoff",
.init = preemptoff_tracer_init,
.reset = irqsoff_tracer_reset,
.start = irqsoff_tracer_start,
.stop = irqsoff_tracer_stop,
.print_max = true,
.print_header = irqsoff_print_header,
.print_line = irqsoff_print_line,
.flags = &tracer_flags,
.set_flag = irqsoff_set_flag,
.flag_changed = irqsoff_flag_changed,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_preemptoff,
#endif
.open = irqsoff_trace_open,
.close = irqsoff_trace_close,
.allow_instances = true,
.use_max_tr = true,
};
# define register_preemptoff(trace) register_tracer(&trace)
#else
# define register_preemptoff(trace) do { } while (0)
#endif
#if defined(CONFIG_IRQSOFF_TRACER) && \
defined(CONFIG_PREEMPT_TRACER)
static int preemptirqsoff_tracer_init(struct trace_array *tr)
{
trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
return __irqsoff_tracer_init(tr);
}
static struct tracer preemptirqsoff_tracer __read_mostly =
{
.name = "preemptirqsoff",
.init = preemptirqsoff_tracer_init,
.reset = irqsoff_tracer_reset,
.start = irqsoff_tracer_start,
.stop = irqsoff_tracer_stop,
.print_max = true,
.print_header = irqsoff_print_header,
.print_line = irqsoff_print_line,
.flags = &tracer_flags,
.set_flag = irqsoff_set_flag,
.flag_changed = irqsoff_flag_changed,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_preemptirqsoff,
#endif
.open = irqsoff_trace_open,
.close = irqsoff_trace_close,
.allow_instances = true,
.use_max_tr = true,
};
# define register_preemptirqsoff(trace) register_tracer(&trace)
#else
# define register_preemptirqsoff(trace) do { } while (0)
#endif
__init static int init_irqsoff_tracer(void)
{
register_irqsoff(irqsoff_tracer);
register_preemptoff(preemptoff_tracer);
register_preemptirqsoff(preemptirqsoff_tracer);
return 0;
}
core_initcall(init_irqsoff_tracer);

135
kernel/trace/trace_kdb.c Normal file
View file

@ -0,0 +1,135 @@
/*
* kdb helper for dumping the ftrace buffer
*
* Copyright (C) 2010 Jason Wessel <jason.wessel@windriver.com>
*
* ftrace_dump_buf based on ftrace_dump:
* Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
* Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
*
*/
#include <linux/init.h>
#include <linux/kgdb.h>
#include <linux/kdb.h>
#include <linux/ftrace.h>
#include "trace.h"
#include "trace_output.h"
static void ftrace_dump_buf(int skip_lines, long cpu_file)
{
/* use static because iter can be a bit big for the stack */
static struct trace_iterator iter;
unsigned int old_userobj;
int cnt = 0, cpu;
trace_init_global_iter(&iter);
for_each_tracing_cpu(cpu) {
atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
}
old_userobj = trace_flags;
/* don't look at user memory in panic mode */
trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
kdb_printf("Dumping ftrace buffer:\n");
/* reset all but tr, trace, and overruns */
memset(&iter.seq, 0,
sizeof(struct trace_iterator) -
offsetof(struct trace_iterator, seq));
iter.iter_flags |= TRACE_FILE_LAT_FMT;
iter.pos = -1;
if (cpu_file == RING_BUFFER_ALL_CPUS) {
for_each_tracing_cpu(cpu) {
iter.buffer_iter[cpu] =
ring_buffer_read_prepare(iter.trace_buffer->buffer, cpu);
ring_buffer_read_start(iter.buffer_iter[cpu]);
tracing_iter_reset(&iter, cpu);
}
} else {
iter.cpu_file = cpu_file;
iter.buffer_iter[cpu_file] =
ring_buffer_read_prepare(iter.trace_buffer->buffer, cpu_file);
ring_buffer_read_start(iter.buffer_iter[cpu_file]);
tracing_iter_reset(&iter, cpu_file);
}
if (!trace_empty(&iter))
trace_find_next_entry_inc(&iter);
while (!trace_empty(&iter)) {
if (!cnt)
kdb_printf("---------------------------------\n");
cnt++;
if (trace_find_next_entry_inc(&iter) != NULL && !skip_lines)
print_trace_line(&iter);
if (!skip_lines)
trace_printk_seq(&iter.seq);
else
skip_lines--;
if (KDB_FLAG(CMD_INTERRUPT))
goto out;
}
if (!cnt)
kdb_printf(" (ftrace buffer empty)\n");
else
kdb_printf("---------------------------------\n");
out:
trace_flags = old_userobj;
for_each_tracing_cpu(cpu) {
atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
}
for_each_tracing_cpu(cpu)
if (iter.buffer_iter[cpu])
ring_buffer_read_finish(iter.buffer_iter[cpu]);
}
/*
* kdb_ftdump - Dump the ftrace log buffer
*/
static int kdb_ftdump(int argc, const char **argv)
{
int skip_lines = 0;
long cpu_file;
char *cp;
if (argc > 2)
return KDB_ARGCOUNT;
if (argc) {
skip_lines = simple_strtol(argv[1], &cp, 0);
if (*cp)
skip_lines = 0;
}
if (argc == 2) {
cpu_file = simple_strtol(argv[2], &cp, 0);
if (*cp || cpu_file >= NR_CPUS || cpu_file < 0 ||
!cpu_online(cpu_file))
return KDB_BADINT;
} else {
cpu_file = RING_BUFFER_ALL_CPUS;
}
kdb_trap_printk++;
ftrace_dump_buf(skip_lines, cpu_file);
kdb_trap_printk--;
return 0;
}
static __init int kdb_ftrace_register(void)
{
kdb_register_repeat("ftdump", kdb_ftdump, "[skip_#lines] [cpu]",
"Dump ftrace log", 0, KDB_REPEAT_NONE);
return 0;
}
late_initcall(kdb_ftrace_register);

1496
kernel/trace/trace_kprobe.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,374 @@
/*
* Memory mapped I/O tracing
*
* Copyright (C) 2008 Pekka Paalanen <pq@iki.fi>
*/
#define DEBUG 1
#include <linux/kernel.h>
#include <linux/mmiotrace.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/atomic.h>
#include "trace.h"
#include "trace_output.h"
struct header_iter {
struct pci_dev *dev;
};
static struct trace_array *mmio_trace_array;
static bool overrun_detected;
static unsigned long prev_overruns;
static atomic_t dropped_count;
static void mmio_reset_data(struct trace_array *tr)
{
overrun_detected = false;
prev_overruns = 0;
tracing_reset_online_cpus(&tr->trace_buffer);
}
static int mmio_trace_init(struct trace_array *tr)
{
pr_debug("in %s\n", __func__);
mmio_trace_array = tr;
mmio_reset_data(tr);
enable_mmiotrace();
return 0;
}
static void mmio_trace_reset(struct trace_array *tr)
{
pr_debug("in %s\n", __func__);
disable_mmiotrace();
mmio_reset_data(tr);
mmio_trace_array = NULL;
}
static void mmio_trace_start(struct trace_array *tr)
{
pr_debug("in %s\n", __func__);
mmio_reset_data(tr);
}
static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev)
{
int ret = 0;
int i;
resource_size_t start, end;
const struct pci_driver *drv = pci_dev_driver(dev);
/* XXX: incomplete checks for trace_seq_printf() return value */
ret += trace_seq_printf(s, "PCIDEV %02x%02x %04x%04x %x",
dev->bus->number, dev->devfn,
dev->vendor, dev->device, dev->irq);
/*
* XXX: is pci_resource_to_user() appropriate, since we are
* supposed to interpret the __ioremap() phys_addr argument based on
* these printed values?
*/
for (i = 0; i < 7; i++) {
pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
ret += trace_seq_printf(s, " %llx",
(unsigned long long)(start |
(dev->resource[i].flags & PCI_REGION_FLAG_MASK)));
}
for (i = 0; i < 7; i++) {
pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
ret += trace_seq_printf(s, " %llx",
dev->resource[i].start < dev->resource[i].end ?
(unsigned long long)(end - start) + 1 : 0);
}
if (drv)
ret += trace_seq_printf(s, " %s\n", drv->name);
else
ret += trace_seq_puts(s, " \n");
return ret;
}
static void destroy_header_iter(struct header_iter *hiter)
{
if (!hiter)
return;
pci_dev_put(hiter->dev);
kfree(hiter);
}
static void mmio_pipe_open(struct trace_iterator *iter)
{
struct header_iter *hiter;
struct trace_seq *s = &iter->seq;
trace_seq_puts(s, "VERSION 20070824\n");
hiter = kzalloc(sizeof(*hiter), GFP_KERNEL);
if (!hiter)
return;
hiter->dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, NULL);
iter->private = hiter;
}
/* XXX: This is not called when the pipe is closed! */
static void mmio_close(struct trace_iterator *iter)
{
struct header_iter *hiter = iter->private;
destroy_header_iter(hiter);
iter->private = NULL;
}
static unsigned long count_overruns(struct trace_iterator *iter)
{
unsigned long cnt = atomic_xchg(&dropped_count, 0);
unsigned long over = ring_buffer_overruns(iter->trace_buffer->buffer);
if (over > prev_overruns)
cnt += over - prev_overruns;
prev_overruns = over;
return cnt;
}
static ssize_t mmio_read(struct trace_iterator *iter, struct file *filp,
char __user *ubuf, size_t cnt, loff_t *ppos)
{
ssize_t ret;
struct header_iter *hiter = iter->private;
struct trace_seq *s = &iter->seq;
unsigned long n;
n = count_overruns(iter);
if (n) {
/* XXX: This is later than where events were lost. */
trace_seq_printf(s, "MARK 0.000000 Lost %lu events.\n", n);
if (!overrun_detected)
pr_warning("mmiotrace has lost events.\n");
overrun_detected = true;
goto print_out;
}
if (!hiter)
return 0;
mmio_print_pcidev(s, hiter->dev);
hiter->dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, hiter->dev);
if (!hiter->dev) {
destroy_header_iter(hiter);
iter->private = NULL;
}
print_out:
ret = trace_seq_to_user(s, ubuf, cnt);
return (ret == -EBUSY) ? 0 : ret;
}
static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
{
struct trace_entry *entry = iter->ent;
struct trace_mmiotrace_rw *field;
struct mmiotrace_rw *rw;
struct trace_seq *s = &iter->seq;
unsigned long long t = ns2usecs(iter->ts);
unsigned long usec_rem = do_div(t, USEC_PER_SEC);
unsigned secs = (unsigned long)t;
int ret = 1;
trace_assign_type(field, entry);
rw = &field->rw;
switch (rw->opcode) {
case MMIO_READ:
ret = trace_seq_printf(s,
"R %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
rw->width, secs, usec_rem, rw->map_id,
(unsigned long long)rw->phys,
rw->value, rw->pc, 0);
break;
case MMIO_WRITE:
ret = trace_seq_printf(s,
"W %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
rw->width, secs, usec_rem, rw->map_id,
(unsigned long long)rw->phys,
rw->value, rw->pc, 0);
break;
case MMIO_UNKNOWN_OP:
ret = trace_seq_printf(s,
"UNKNOWN %u.%06lu %d 0x%llx %02lx,%02lx,"
"%02lx 0x%lx %d\n",
secs, usec_rem, rw->map_id,
(unsigned long long)rw->phys,
(rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff,
(rw->value >> 0) & 0xff, rw->pc, 0);
break;
default:
ret = trace_seq_puts(s, "rw what?\n");
break;
}
if (ret)
return TRACE_TYPE_HANDLED;
return TRACE_TYPE_PARTIAL_LINE;
}
static enum print_line_t mmio_print_map(struct trace_iterator *iter)
{
struct trace_entry *entry = iter->ent;
struct trace_mmiotrace_map *field;
struct mmiotrace_map *m;
struct trace_seq *s = &iter->seq;
unsigned long long t = ns2usecs(iter->ts);
unsigned long usec_rem = do_div(t, USEC_PER_SEC);
unsigned secs = (unsigned long)t;
int ret;
trace_assign_type(field, entry);
m = &field->map;
switch (m->opcode) {
case MMIO_PROBE:
ret = trace_seq_printf(s,
"MAP %u.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
secs, usec_rem, m->map_id,
(unsigned long long)m->phys, m->virt, m->len,
0UL, 0);
break;
case MMIO_UNPROBE:
ret = trace_seq_printf(s,
"UNMAP %u.%06lu %d 0x%lx %d\n",
secs, usec_rem, m->map_id, 0UL, 0);
break;
default:
ret = trace_seq_puts(s, "map what?\n");
break;
}
if (ret)
return TRACE_TYPE_HANDLED;
return TRACE_TYPE_PARTIAL_LINE;
}
static enum print_line_t mmio_print_mark(struct trace_iterator *iter)
{
struct trace_entry *entry = iter->ent;
struct print_entry *print = (struct print_entry *)entry;
const char *msg = print->buf;
struct trace_seq *s = &iter->seq;
unsigned long long t = ns2usecs(iter->ts);
unsigned long usec_rem = do_div(t, USEC_PER_SEC);
unsigned secs = (unsigned long)t;
int ret;
/* The trailing newline must be in the message. */
ret = trace_seq_printf(s, "MARK %u.%06lu %s", secs, usec_rem, msg);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
return TRACE_TYPE_HANDLED;
}
static enum print_line_t mmio_print_line(struct trace_iterator *iter)
{
switch (iter->ent->type) {
case TRACE_MMIO_RW:
return mmio_print_rw(iter);
case TRACE_MMIO_MAP:
return mmio_print_map(iter);
case TRACE_PRINT:
return mmio_print_mark(iter);
default:
return TRACE_TYPE_HANDLED; /* ignore unknown entries */
}
}
static struct tracer mmio_tracer __read_mostly =
{
.name = "mmiotrace",
.init = mmio_trace_init,
.reset = mmio_trace_reset,
.start = mmio_trace_start,
.pipe_open = mmio_pipe_open,
.close = mmio_close,
.read = mmio_read,
.print_line = mmio_print_line,
};
__init static int init_mmio_trace(void)
{
return register_tracer(&mmio_tracer);
}
device_initcall(init_mmio_trace);
static void __trace_mmiotrace_rw(struct trace_array *tr,
struct trace_array_cpu *data,
struct mmiotrace_rw *rw)
{
struct ftrace_event_call *call = &event_mmiotrace_rw;
struct ring_buffer *buffer = tr->trace_buffer.buffer;
struct ring_buffer_event *event;
struct trace_mmiotrace_rw *entry;
int pc = preempt_count();
event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_RW,
sizeof(*entry), 0, pc);
if (!event) {
atomic_inc(&dropped_count);
return;
}
entry = ring_buffer_event_data(event);
entry->rw = *rw;
if (!call_filter_check_discard(call, entry, buffer, event))
trace_buffer_unlock_commit(buffer, event, 0, pc);
}
void mmio_trace_rw(struct mmiotrace_rw *rw)
{
struct trace_array *tr = mmio_trace_array;
struct trace_array_cpu *data = per_cpu_ptr(tr->trace_buffer.data, smp_processor_id());
__trace_mmiotrace_rw(tr, data, rw);
}
static void __trace_mmiotrace_map(struct trace_array *tr,
struct trace_array_cpu *data,
struct mmiotrace_map *map)
{
struct ftrace_event_call *call = &event_mmiotrace_map;
struct ring_buffer *buffer = tr->trace_buffer.buffer;
struct ring_buffer_event *event;
struct trace_mmiotrace_map *entry;
int pc = preempt_count();
event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_MAP,
sizeof(*entry), 0, pc);
if (!event) {
atomic_inc(&dropped_count);
return;
}
entry = ring_buffer_event_data(event);
entry->map = *map;
if (!call_filter_check_discard(call, entry, buffer, event))
trace_buffer_unlock_commit(buffer, event, 0, pc);
}
void mmio_trace_mapping(struct mmiotrace_map *map)
{
struct trace_array *tr = mmio_trace_array;
struct trace_array_cpu *data;
preempt_disable();
data = per_cpu_ptr(tr->trace_buffer.data, smp_processor_id());
__trace_mmiotrace_map(tr, data, map);
preempt_enable();
}
int mmio_trace_printk(const char *fmt, va_list args)
{
return trace_vprintk(0, fmt, args);
}

101
kernel/trace/trace_nop.c Normal file
View file

@ -0,0 +1,101 @@
/*
* nop tracer
*
* Copyright (C) 2008 Steven Noonan <steven@uplinklabs.net>
*
*/
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/debugfs.h>
#include <linux/ftrace.h>
#include "trace.h"
/* Our two options */
enum {
TRACE_NOP_OPT_ACCEPT = 0x1,
TRACE_NOP_OPT_REFUSE = 0x2
};
/* Options for the tracer (see trace_options file) */
static struct tracer_opt nop_opts[] = {
/* Option that will be accepted by set_flag callback */
{ TRACER_OPT(test_nop_accept, TRACE_NOP_OPT_ACCEPT) },
/* Option that will be refused by set_flag callback */
{ TRACER_OPT(test_nop_refuse, TRACE_NOP_OPT_REFUSE) },
{ } /* Always set a last empty entry */
};
static struct tracer_flags nop_flags = {
/* You can check your flags value here when you want. */
.val = 0, /* By default: all flags disabled */
.opts = nop_opts
};
static struct trace_array *ctx_trace;
static void start_nop_trace(struct trace_array *tr)
{
/* Nothing to do! */
}
static void stop_nop_trace(struct trace_array *tr)
{
/* Nothing to do! */
}
static int nop_trace_init(struct trace_array *tr)
{
ctx_trace = tr;
start_nop_trace(tr);
return 0;
}
static void nop_trace_reset(struct trace_array *tr)
{
stop_nop_trace(tr);
}
/* It only serves as a signal handler and a callback to
* accept or refuse tthe setting of a flag.
* If you don't implement it, then the flag setting will be
* automatically accepted.
*/
static int nop_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
{
/*
* Note that you don't need to update nop_flags.val yourself.
* The tracing Api will do it automatically if you return 0
*/
if (bit == TRACE_NOP_OPT_ACCEPT) {
printk(KERN_DEBUG "nop_test_accept flag set to %d: we accept."
" Now cat trace_options to see the result\n",
set);
return 0;
}
if (bit == TRACE_NOP_OPT_REFUSE) {
printk(KERN_DEBUG "nop_test_refuse flag set to %d: we refuse."
"Now cat trace_options to see the result\n",
set);
return -EINVAL;
}
return 0;
}
struct tracer nop_trace __read_mostly =
{
.name = "nop",
.init = nop_trace_init,
.reset = nop_trace_reset,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_nop,
#endif
.flags = &nop_flags,
.set_flag = nop_set_flag,
.allow_instances = true,
};

1453
kernel/trace/trace_output.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,51 @@
#ifndef __TRACE_EVENTS_H
#define __TRACE_EVENTS_H
#include <linux/trace_seq.h>
#include "trace.h"
extern enum print_line_t
trace_print_bputs_msg_only(struct trace_iterator *iter);
extern enum print_line_t
trace_print_bprintk_msg_only(struct trace_iterator *iter);
extern enum print_line_t
trace_print_printk_msg_only(struct trace_iterator *iter);
extern int
seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
unsigned long sym_flags);
extern int seq_print_userip_objs(const struct userstack_entry *entry,
struct trace_seq *s, unsigned long sym_flags);
extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
unsigned long ip, unsigned long sym_flags);
extern int trace_print_context(struct trace_iterator *iter);
extern int trace_print_lat_context(struct trace_iterator *iter);
extern void trace_event_read_lock(void);
extern void trace_event_read_unlock(void);
extern struct trace_event *ftrace_find_event(int type);
extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
int flags, struct trace_event *event);
extern int
trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry);
/* used by module unregistering */
extern int __unregister_ftrace_event(struct trace_event *event);
extern struct rw_semaphore trace_event_sem;
#define SEQ_PUT_FIELD_RET(s, x) \
do { \
if (!trace_seq_putmem(s, &(x), sizeof(x))) \
return TRACE_TYPE_PARTIAL_LINE; \
} while (0)
#define SEQ_PUT_HEX_FIELD_RET(s, x) \
do { \
if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
return TRACE_TYPE_PARTIAL_LINE; \
} while (0)
#endif

368
kernel/trace/trace_printk.c Normal file
View file

@ -0,0 +1,368 @@
/*
* trace binary printk
*
* Copyright (C) 2008 Lai Jiangshan <laijs@cn.fujitsu.com>
*
*/
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/kernel.h>
#include <linux/ftrace.h>
#include <linux/string.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/ctype.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include "trace.h"
#ifdef CONFIG_MODULES
/*
* modules trace_printk()'s formats are autosaved in struct trace_bprintk_fmt
* which are queued on trace_bprintk_fmt_list.
*/
static LIST_HEAD(trace_bprintk_fmt_list);
/* serialize accesses to trace_bprintk_fmt_list */
static DEFINE_MUTEX(btrace_mutex);
struct trace_bprintk_fmt {
struct list_head list;
const char *fmt;
};
static inline struct trace_bprintk_fmt *lookup_format(const char *fmt)
{
struct trace_bprintk_fmt *pos;
list_for_each_entry(pos, &trace_bprintk_fmt_list, list) {
if (!strcmp(pos->fmt, fmt))
return pos;
}
return NULL;
}
static
void hold_module_trace_bprintk_format(const char **start, const char **end)
{
const char **iter;
char *fmt;
/* allocate the trace_printk per cpu buffers */
if (start != end)
trace_printk_init_buffers();
mutex_lock(&btrace_mutex);
for (iter = start; iter < end; iter++) {
struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter);
if (tb_fmt) {
*iter = tb_fmt->fmt;
continue;
}
fmt = NULL;
tb_fmt = kmalloc(sizeof(*tb_fmt), GFP_KERNEL);
if (tb_fmt) {
fmt = kmalloc(strlen(*iter) + 1, GFP_KERNEL);
if (fmt) {
list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list);
strcpy(fmt, *iter);
tb_fmt->fmt = fmt;
} else
kfree(tb_fmt);
}
*iter = fmt;
}
mutex_unlock(&btrace_mutex);
}
static int module_trace_bprintk_format_notify(struct notifier_block *self,
unsigned long val, void *data)
{
struct module *mod = data;
if (mod->num_trace_bprintk_fmt) {
const char **start = mod->trace_bprintk_fmt_start;
const char **end = start + mod->num_trace_bprintk_fmt;
if (val == MODULE_STATE_COMING)
hold_module_trace_bprintk_format(start, end);
}
return 0;
}
/*
* The debugfs/tracing/printk_formats file maps the addresses with
* the ASCII formats that are used in the bprintk events in the
* buffer. For userspace tools to be able to decode the events from
* the buffer, they need to be able to map the address with the format.
*
* The addresses of the bprintk formats are in their own section
* __trace_printk_fmt. But for modules we copy them into a link list.
* The code to print the formats and their addresses passes around the
* address of the fmt string. If the fmt address passed into the seq
* functions is within the kernel core __trace_printk_fmt section, then
* it simply uses the next pointer in the list.
*
* When the fmt pointer is outside the kernel core __trace_printk_fmt
* section, then we need to read the link list pointers. The trick is
* we pass the address of the string to the seq function just like
* we do for the kernel core formats. To get back the structure that
* holds the format, we simply use containerof() and then go to the
* next format in the list.
*/
static const char **
find_next_mod_format(int start_index, void *v, const char **fmt, loff_t *pos)
{
struct trace_bprintk_fmt *mod_fmt;
if (list_empty(&trace_bprintk_fmt_list))
return NULL;
/*
* v will point to the address of the fmt record from t_next
* v will be NULL from t_start.
* If this is the first pointer or called from start
* then we need to walk the list.
*/
if (!v || start_index == *pos) {
struct trace_bprintk_fmt *p;
/* search the module list */
list_for_each_entry(p, &trace_bprintk_fmt_list, list) {
if (start_index == *pos)
return &p->fmt;
start_index++;
}
/* pos > index */
return NULL;
}
/*
* v points to the address of the fmt field in the mod list
* structure that holds the module print format.
*/
mod_fmt = container_of(v, typeof(*mod_fmt), fmt);
if (mod_fmt->list.next == &trace_bprintk_fmt_list)
return NULL;
mod_fmt = container_of(mod_fmt->list.next, typeof(*mod_fmt), list);
return &mod_fmt->fmt;
}
static void format_mod_start(void)
{
mutex_lock(&btrace_mutex);
}
static void format_mod_stop(void)
{
mutex_unlock(&btrace_mutex);
}
#else /* !CONFIG_MODULES */
__init static int
module_trace_bprintk_format_notify(struct notifier_block *self,
unsigned long val, void *data)
{
return 0;
}
static inline const char **
find_next_mod_format(int start_index, void *v, const char **fmt, loff_t *pos)
{
return NULL;
}
static inline void format_mod_start(void) { }
static inline void format_mod_stop(void) { }
#endif /* CONFIG_MODULES */
__initdata_or_module static
struct notifier_block module_trace_bprintk_format_nb = {
.notifier_call = module_trace_bprintk_format_notify,
};
int __trace_bprintk(unsigned long ip, const char *fmt, ...)
{
int ret;
va_list ap;
if (unlikely(!fmt))
return 0;
if (!(trace_flags & TRACE_ITER_PRINTK))
return 0;
va_start(ap, fmt);
ret = trace_vbprintk(ip, fmt, ap);
va_end(ap);
return ret;
}
EXPORT_SYMBOL_GPL(__trace_bprintk);
int __ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap)
{
if (unlikely(!fmt))
return 0;
if (!(trace_flags & TRACE_ITER_PRINTK))
return 0;
return trace_vbprintk(ip, fmt, ap);
}
EXPORT_SYMBOL_GPL(__ftrace_vbprintk);
int __trace_printk(unsigned long ip, const char *fmt, ...)
{
int ret;
va_list ap;
if (!(trace_flags & TRACE_ITER_PRINTK))
return 0;
va_start(ap, fmt);
ret = trace_vprintk(ip, fmt, ap);
va_end(ap);
return ret;
}
EXPORT_SYMBOL_GPL(__trace_printk);
int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
{
if (!(trace_flags & TRACE_ITER_PRINTK))
return 0;
return trace_vprintk(ip, fmt, ap);
}
EXPORT_SYMBOL_GPL(__ftrace_vprintk);
static const char **find_next(void *v, loff_t *pos)
{
const char **fmt = v;
int start_index;
int last_index;
start_index = __stop___trace_bprintk_fmt - __start___trace_bprintk_fmt;
if (*pos < start_index)
return __start___trace_bprintk_fmt + *pos;
/*
* The __tracepoint_str section is treated the same as the
* __trace_printk_fmt section. The difference is that the
* __trace_printk_fmt section should only be used by trace_printk()
* in a debugging environment, as if anything exists in that section
* the trace_prink() helper buffers are allocated, which would just
* waste space in a production environment.
*
* The __tracepoint_str sections on the other hand are used by
* tracepoints which need to map pointers to their strings to
* the ASCII text for userspace.
*/
last_index = start_index;
start_index = __stop___tracepoint_str - __start___tracepoint_str;
if (*pos < last_index + start_index)
return __start___tracepoint_str + (*pos - last_index);
return find_next_mod_format(start_index, v, fmt, pos);
}
static void *
t_start(struct seq_file *m, loff_t *pos)
{
format_mod_start();
return find_next(NULL, pos);
}
static void *t_next(struct seq_file *m, void * v, loff_t *pos)
{
(*pos)++;
return find_next(v, pos);
}
static int t_show(struct seq_file *m, void *v)
{
const char **fmt = v;
const char *str = *fmt;
int i;
seq_printf(m, "0x%lx : \"", *(unsigned long *)fmt);
/*
* Tabs and new lines need to be converted.
*/
for (i = 0; str[i]; i++) {
switch (str[i]) {
case '\n':
seq_puts(m, "\\n");
break;
case '\t':
seq_puts(m, "\\t");
break;
case '\\':
seq_puts(m, "\\");
break;
case '"':
seq_puts(m, "\\\"");
break;
default:
seq_putc(m, str[i]);
}
}
seq_puts(m, "\"\n");
return 0;
}
static void t_stop(struct seq_file *m, void *p)
{
format_mod_stop();
}
static const struct seq_operations show_format_seq_ops = {
.start = t_start,
.next = t_next,
.show = t_show,
.stop = t_stop,
};
static int
ftrace_formats_open(struct inode *inode, struct file *file)
{
return seq_open(file, &show_format_seq_ops);
}
static const struct file_operations ftrace_formats_fops = {
.open = ftrace_formats_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
static __init int init_trace_printk_function_export(void)
{
struct dentry *d_tracer;
d_tracer = tracing_init_dentry();
if (!d_tracer)
return 0;
trace_create_file("printk_formats", 0444, d_tracer,
NULL, &ftrace_formats_fops);
return 0;
}
fs_initcall(init_trace_printk_function_export);
static __init int init_trace_printk(void)
{
return register_module_notifier(&module_trace_bprintk_format_nb);
}
early_initcall(init_trace_printk);

726
kernel/trace/trace_probe.c Normal file
View file

@ -0,0 +1,726 @@
/*
* Common code for probe-based Dynamic events.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* This code was copied from kernel/trace/trace_kprobe.c written by
* Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
*
* Updates to make this generic:
* Copyright (C) IBM Corporation, 2010-2011
* Author: Srikar Dronamraju
*/
#include "trace_probe.h"
const char *reserved_field_names[] = {
"common_type",
"common_flags",
"common_preempt_count",
"common_pid",
"common_tgid",
FIELD_STRING_IP,
FIELD_STRING_RETIP,
FIELD_STRING_FUNC,
};
/* Printing in basic type function template */
#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt) \
int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name, \
void *data, void *ent) \
{ \
return trace_seq_printf(s, " %s=" fmt, name, *(type *)data); \
} \
const char PRINT_TYPE_FMT_NAME(type)[] = fmt; \
NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(type));
DEFINE_BASIC_PRINT_TYPE_FUNC(u8 , "0x%x")
DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "0x%x")
DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "0x%x")
DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "0x%Lx")
DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d")
DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d")
DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%d")
DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%Ld")
/* Print type function for string type */
int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, const char *name,
void *data, void *ent)
{
int len = *(u32 *)data >> 16;
if (!len)
return trace_seq_printf(s, " %s=(fault)", name);
else
return trace_seq_printf(s, " %s=\"%s\"", name,
(const char *)get_loc_data(data, ent));
}
NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(string));
const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
#define CHECK_FETCH_FUNCS(method, fn) \
(((FETCH_FUNC_NAME(method, u8) == fn) || \
(FETCH_FUNC_NAME(method, u16) == fn) || \
(FETCH_FUNC_NAME(method, u32) == fn) || \
(FETCH_FUNC_NAME(method, u64) == fn) || \
(FETCH_FUNC_NAME(method, string) == fn) || \
(FETCH_FUNC_NAME(method, string_size) == fn)) \
&& (fn != NULL))
/* Data fetch function templates */
#define DEFINE_FETCH_reg(type) \
void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, void *offset, void *dest) \
{ \
*(type *)dest = (type)regs_get_register(regs, \
(unsigned int)((unsigned long)offset)); \
} \
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(reg, type));
DEFINE_BASIC_FETCH_FUNCS(reg)
/* No string on the register */
#define fetch_reg_string NULL
#define fetch_reg_string_size NULL
#define DEFINE_FETCH_retval(type) \
void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs, \
void *dummy, void *dest) \
{ \
*(type *)dest = (type)regs_return_value(regs); \
} \
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(retval, type));
DEFINE_BASIC_FETCH_FUNCS(retval)
/* No string on the retval */
#define fetch_retval_string NULL
#define fetch_retval_string_size NULL
/* Dereference memory access function */
struct deref_fetch_param {
struct fetch_param orig;
long offset;
fetch_func_t fetch;
fetch_func_t fetch_size;
};
#define DEFINE_FETCH_deref(type) \
void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs, \
void *data, void *dest) \
{ \
struct deref_fetch_param *dprm = data; \
unsigned long addr; \
call_fetch(&dprm->orig, regs, &addr); \
if (addr) { \
addr += dprm->offset; \
dprm->fetch(regs, (void *)addr, dest); \
} else \
*(type *)dest = 0; \
} \
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, type));
DEFINE_BASIC_FETCH_FUNCS(deref)
DEFINE_FETCH_deref(string)
void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs,
void *data, void *dest)
{
struct deref_fetch_param *dprm = data;
unsigned long addr;
call_fetch(&dprm->orig, regs, &addr);
if (addr && dprm->fetch_size) {
addr += dprm->offset;
dprm->fetch_size(regs, (void *)addr, dest);
} else
*(string_size *)dest = 0;
}
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, string_size));
static void update_deref_fetch_param(struct deref_fetch_param *data)
{
if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
update_deref_fetch_param(data->orig.data);
else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
update_symbol_cache(data->orig.data);
}
NOKPROBE_SYMBOL(update_deref_fetch_param);
static void free_deref_fetch_param(struct deref_fetch_param *data)
{
if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
free_deref_fetch_param(data->orig.data);
else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
free_symbol_cache(data->orig.data);
kfree(data);
}
NOKPROBE_SYMBOL(free_deref_fetch_param);
/* Bitfield fetch function */
struct bitfield_fetch_param {
struct fetch_param orig;
unsigned char hi_shift;
unsigned char low_shift;
};
#define DEFINE_FETCH_bitfield(type) \
void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs, \
void *data, void *dest) \
{ \
struct bitfield_fetch_param *bprm = data; \
type buf = 0; \
call_fetch(&bprm->orig, regs, &buf); \
if (buf) { \
buf <<= bprm->hi_shift; \
buf >>= bprm->low_shift; \
} \
*(type *)dest = buf; \
} \
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(bitfield, type));
DEFINE_BASIC_FETCH_FUNCS(bitfield)
#define fetch_bitfield_string NULL
#define fetch_bitfield_string_size NULL
static void
update_bitfield_fetch_param(struct bitfield_fetch_param *data)
{
/*
* Don't check the bitfield itself, because this must be the
* last fetch function.
*/
if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
update_deref_fetch_param(data->orig.data);
else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
update_symbol_cache(data->orig.data);
}
static void
free_bitfield_fetch_param(struct bitfield_fetch_param *data)
{
/*
* Don't check the bitfield itself, because this must be the
* last fetch function.
*/
if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
free_deref_fetch_param(data->orig.data);
else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
free_symbol_cache(data->orig.data);
kfree(data);
}
static const struct fetch_type *find_fetch_type(const char *type,
const struct fetch_type *ftbl)
{
int i;
if (!type)
type = DEFAULT_FETCH_TYPE_STR;
/* Special case: bitfield */
if (*type == 'b') {
unsigned long bs;
type = strchr(type, '/');
if (!type)
goto fail;
type++;
if (kstrtoul(type, 0, &bs))
goto fail;
switch (bs) {
case 8:
return find_fetch_type("u8", ftbl);
case 16:
return find_fetch_type("u16", ftbl);
case 32:
return find_fetch_type("u32", ftbl);
case 64:
return find_fetch_type("u64", ftbl);
default:
goto fail;
}
}
for (i = 0; ftbl[i].name; i++) {
if (strcmp(type, ftbl[i].name) == 0)
return &ftbl[i];
}
fail:
return NULL;
}
/* Special function : only accept unsigned long */
static void fetch_kernel_stack_address(struct pt_regs *regs, void *dummy, void *dest)
{
*(unsigned long *)dest = kernel_stack_pointer(regs);
}
NOKPROBE_SYMBOL(fetch_kernel_stack_address);
static void fetch_user_stack_address(struct pt_regs *regs, void *dummy, void *dest)
{
*(unsigned long *)dest = user_stack_pointer(regs);
}
NOKPROBE_SYMBOL(fetch_user_stack_address);
static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
fetch_func_t orig_fn,
const struct fetch_type *ftbl)
{
int i;
if (type != &ftbl[FETCH_TYPE_STRING])
return NULL; /* Only string type needs size function */
for (i = 0; i < FETCH_MTD_END; i++)
if (type->fetch[i] == orig_fn)
return ftbl[FETCH_TYPE_STRSIZE].fetch[i];
WARN_ON(1); /* This should not happen */
return NULL;
}
/* Split symbol and offset. */
int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset)
{
char *tmp;
int ret;
if (!offset)
return -EINVAL;
tmp = strchr(symbol, '+');
if (tmp) {
/* skip sign because kstrtoul doesn't accept '+' */
ret = kstrtoul(tmp + 1, 0, offset);
if (ret)
return ret;
*tmp = '\0';
} else
*offset = 0;
return 0;
}
#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
static int parse_probe_vars(char *arg, const struct fetch_type *t,
struct fetch_param *f, bool is_return,
bool is_kprobe)
{
int ret = 0;
unsigned long param;
if (strcmp(arg, "retval") == 0) {
if (is_return)
f->fn = t->fetch[FETCH_MTD_retval];
else
ret = -EINVAL;
} else if (strncmp(arg, "stack", 5) == 0) {
if (arg[5] == '\0') {
if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR))
return -EINVAL;
if (is_kprobe)
f->fn = fetch_kernel_stack_address;
else
f->fn = fetch_user_stack_address;
} else if (isdigit(arg[5])) {
ret = kstrtoul(arg + 5, 10, &param);
if (ret || (is_kprobe && param > PARAM_MAX_STACK))
ret = -EINVAL;
else {
f->fn = t->fetch[FETCH_MTD_stack];
f->data = (void *)param;
}
} else
ret = -EINVAL;
} else
ret = -EINVAL;
return ret;
}
/* Recursive argument parser */
static int parse_probe_arg(char *arg, const struct fetch_type *t,
struct fetch_param *f, bool is_return, bool is_kprobe)
{
const struct fetch_type *ftbl;
unsigned long param;
long offset;
char *tmp;
int ret = 0;
ftbl = is_kprobe ? kprobes_fetch_type_table : uprobes_fetch_type_table;
BUG_ON(ftbl == NULL);
switch (arg[0]) {
case '$':
ret = parse_probe_vars(arg + 1, t, f, is_return, is_kprobe);
break;
case '%': /* named register */
ret = regs_query_register_offset(arg + 1);
if (ret >= 0) {
f->fn = t->fetch[FETCH_MTD_reg];
f->data = (void *)(unsigned long)ret;
ret = 0;
}
break;
case '@': /* memory, file-offset or symbol */
if (isdigit(arg[1])) {
ret = kstrtoul(arg + 1, 0, &param);
if (ret)
break;
f->fn = t->fetch[FETCH_MTD_memory];
f->data = (void *)param;
} else if (arg[1] == '+') {
/* kprobes don't support file offsets */
if (is_kprobe)
return -EINVAL;
ret = kstrtol(arg + 2, 0, &offset);
if (ret)
break;
f->fn = t->fetch[FETCH_MTD_file_offset];
f->data = (void *)offset;
} else {
/* uprobes don't support symbols */
if (!is_kprobe)
return -EINVAL;
ret = traceprobe_split_symbol_offset(arg + 1, &offset);
if (ret)
break;
f->data = alloc_symbol_cache(arg + 1, offset);
if (f->data)
f->fn = t->fetch[FETCH_MTD_symbol];
}
break;
case '+': /* deref memory */
arg++; /* Skip '+', because kstrtol() rejects it. */
case '-':
tmp = strchr(arg, '(');
if (!tmp)
break;
*tmp = '\0';
ret = kstrtol(arg, 0, &offset);
if (ret)
break;
arg = tmp + 1;
tmp = strrchr(arg, ')');
if (tmp) {
struct deref_fetch_param *dprm;
const struct fetch_type *t2;
t2 = find_fetch_type(NULL, ftbl);
*tmp = '\0';
dprm = kzalloc(sizeof(struct deref_fetch_param), GFP_KERNEL);
if (!dprm)
return -ENOMEM;
dprm->offset = offset;
dprm->fetch = t->fetch[FETCH_MTD_memory];
dprm->fetch_size = get_fetch_size_function(t,
dprm->fetch, ftbl);
ret = parse_probe_arg(arg, t2, &dprm->orig, is_return,
is_kprobe);
if (ret)
kfree(dprm);
else {
f->fn = t->fetch[FETCH_MTD_deref];
f->data = (void *)dprm;
}
}
break;
}
if (!ret && !f->fn) { /* Parsed, but do not find fetch method */
pr_info("%s type has no corresponding fetch method.\n", t->name);
ret = -EINVAL;
}
return ret;
}
#define BYTES_TO_BITS(nb) ((BITS_PER_LONG * (nb)) / sizeof(long))
/* Bitfield type needs to be parsed into a fetch function */
static int __parse_bitfield_probe_arg(const char *bf,
const struct fetch_type *t,
struct fetch_param *f)
{
struct bitfield_fetch_param *bprm;
unsigned long bw, bo;
char *tail;
if (*bf != 'b')
return 0;
bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
if (!bprm)
return -ENOMEM;
bprm->orig = *f;
f->fn = t->fetch[FETCH_MTD_bitfield];
f->data = (void *)bprm;
bw = simple_strtoul(bf + 1, &tail, 0); /* Use simple one */
if (bw == 0 || *tail != '@')
return -EINVAL;
bf = tail + 1;
bo = simple_strtoul(bf, &tail, 0);
if (tail == bf || *tail != '/')
return -EINVAL;
bprm->hi_shift = BYTES_TO_BITS(t->size) - (bw + bo);
bprm->low_shift = bprm->hi_shift + bo;
return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0;
}
/* String length checking wrapper */
int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
struct probe_arg *parg, bool is_return, bool is_kprobe)
{
const struct fetch_type *ftbl;
const char *t;
int ret;
ftbl = is_kprobe ? kprobes_fetch_type_table : uprobes_fetch_type_table;
BUG_ON(ftbl == NULL);
if (strlen(arg) > MAX_ARGSTR_LEN) {
pr_info("Argument is too long.: %s\n", arg);
return -ENOSPC;
}
parg->comm = kstrdup(arg, GFP_KERNEL);
if (!parg->comm) {
pr_info("Failed to allocate memory for command '%s'.\n", arg);
return -ENOMEM;
}
t = strchr(parg->comm, ':');
if (t) {
arg[t - parg->comm] = '\0';
t++;
}
parg->type = find_fetch_type(t, ftbl);
if (!parg->type) {
pr_info("Unsupported type: %s\n", t);
return -EINVAL;
}
parg->offset = *size;
*size += parg->type->size;
ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return, is_kprobe);
if (ret >= 0 && t != NULL)
ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch);
if (ret >= 0) {
parg->fetch_size.fn = get_fetch_size_function(parg->type,
parg->fetch.fn,
ftbl);
parg->fetch_size.data = parg->fetch.data;
}
return ret;
}
/* Return 1 if name is reserved or already used by another argument */
int traceprobe_conflict_field_name(const char *name,
struct probe_arg *args, int narg)
{
int i;
for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
if (strcmp(reserved_field_names[i], name) == 0)
return 1;
for (i = 0; i < narg; i++)
if (strcmp(args[i].name, name) == 0)
return 1;
return 0;
}
void traceprobe_update_arg(struct probe_arg *arg)
{
if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
update_bitfield_fetch_param(arg->fetch.data);
else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
update_deref_fetch_param(arg->fetch.data);
else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
update_symbol_cache(arg->fetch.data);
}
void traceprobe_free_probe_arg(struct probe_arg *arg)
{
if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
free_bitfield_fetch_param(arg->fetch.data);
else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
free_deref_fetch_param(arg->fetch.data);
else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
free_symbol_cache(arg->fetch.data);
kfree(arg->name);
kfree(arg->comm);
}
int traceprobe_command(const char *buf, int (*createfn)(int, char **))
{
char **argv;
int argc, ret;
argc = 0;
ret = 0;
argv = argv_split(GFP_KERNEL, buf, &argc);
if (!argv)
return -ENOMEM;
if (argc)
ret = createfn(argc, argv);
argv_free(argv);
return ret;
}
#define WRITE_BUFSIZE 4096
ssize_t traceprobe_probes_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos,
int (*createfn)(int, char **))
{
char *kbuf, *tmp;
int ret = 0;
size_t done = 0;
size_t size;
kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
if (!kbuf)
return -ENOMEM;
while (done < count) {
size = count - done;
if (size >= WRITE_BUFSIZE)
size = WRITE_BUFSIZE - 1;
if (copy_from_user(kbuf, buffer + done, size)) {
ret = -EFAULT;
goto out;
}
kbuf[size] = '\0';
tmp = strchr(kbuf, '\n');
if (tmp) {
*tmp = '\0';
size = tmp - kbuf + 1;
} else if (done + size < count) {
pr_warning("Line length is too long: "
"Should be less than %d.", WRITE_BUFSIZE);
ret = -EINVAL;
goto out;
}
done += size;
/* Remove comments */
tmp = strchr(kbuf, '#');
if (tmp)
*tmp = '\0';
ret = traceprobe_command(kbuf, createfn);
if (ret)
goto out;
}
ret = done;
out:
kfree(kbuf);
return ret;
}
static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
bool is_return)
{
int i;
int pos = 0;
const char *fmt, *arg;
if (!is_return) {
fmt = "(%lx)";
arg = "REC->" FIELD_STRING_IP;
} else {
fmt = "(%lx <- %lx)";
arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
}
/* When len=0, we just calculate the needed length */
#define LEN_OR_ZERO (len ? len - pos : 0)
pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
for (i = 0; i < tp->nr_args; i++) {
pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
tp->args[i].name, tp->args[i].type->fmt);
}
pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
for (i = 0; i < tp->nr_args; i++) {
if (strcmp(tp->args[i].type->name, "string") == 0)
pos += snprintf(buf + pos, LEN_OR_ZERO,
", __get_str(%s)",
tp->args[i].name);
else
pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
tp->args[i].name);
}
#undef LEN_OR_ZERO
/* return the length of print_fmt */
return pos;
}
int set_print_fmt(struct trace_probe *tp, bool is_return)
{
int len;
char *print_fmt;
/* First: called with 0 length to calculate the needed length */
len = __set_print_fmt(tp, NULL, 0, is_return);
print_fmt = kmalloc(len + 1, GFP_KERNEL);
if (!print_fmt)
return -ENOMEM;
/* Second: actually write the @print_fmt */
__set_print_fmt(tp, print_fmt, len + 1, is_return);
tp->call.print_fmt = print_fmt;
return 0;
}

400
kernel/trace/trace_probe.h Normal file
View file

@ -0,0 +1,400 @@
/*
* Common header file for probe-based Dynamic events.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* This code was copied from kernel/trace/trace_kprobe.h written by
* Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
*
* Updates to make this generic:
* Copyright (C) IBM Corporation, 2010-2011
* Author: Srikar Dronamraju
*/
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/smp.h>
#include <linux/debugfs.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/ptrace.h>
#include <linux/perf_event.h>
#include <linux/kprobes.h>
#include <linux/stringify.h>
#include <linux/limits.h>
#include <linux/uaccess.h>
#include <asm/bitsperlong.h>
#include "trace.h"
#include "trace_output.h"
#define MAX_TRACE_ARGS 128
#define MAX_ARGSTR_LEN 63
#define MAX_EVENT_NAME_LEN 64
#define MAX_STRING_SIZE PATH_MAX
/* Reserved field names */
#define FIELD_STRING_IP "__probe_ip"
#define FIELD_STRING_RETIP "__probe_ret_ip"
#define FIELD_STRING_FUNC "__probe_func"
#undef DEFINE_FIELD
#define DEFINE_FIELD(type, item, name, is_signed) \
do { \
ret = trace_define_field(event_call, #type, name, \
offsetof(typeof(field), item), \
sizeof(field.item), is_signed, \
FILTER_OTHER); \
if (ret) \
return ret; \
} while (0)
/* Flags for trace_probe */
#define TP_FLAG_TRACE 1
#define TP_FLAG_PROFILE 2
#define TP_FLAG_REGISTERED 4
/* data_rloc: data relative location, compatible with u32 */
#define make_data_rloc(len, roffs) \
(((u32)(len) << 16) | ((u32)(roffs) & 0xffff))
#define get_rloc_len(dl) ((u32)(dl) >> 16)
#define get_rloc_offs(dl) ((u32)(dl) & 0xffff)
/*
* Convert data_rloc to data_loc:
* data_rloc stores the offset from data_rloc itself, but data_loc
* stores the offset from event entry.
*/
#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs))
static nokprobe_inline void *get_rloc_data(u32 *dl)
{
return (u8 *)dl + get_rloc_offs(*dl);
}
/* For data_loc conversion */
static nokprobe_inline void *get_loc_data(u32 *dl, void *ent)
{
return (u8 *)ent + get_rloc_offs(*dl);
}
/* Data fetch function type */
typedef void (*fetch_func_t)(struct pt_regs *, void *, void *);
/* Printing function type */
typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *, void *);
/* Fetch types */
enum {
FETCH_MTD_reg = 0,
FETCH_MTD_stack,
FETCH_MTD_retval,
FETCH_MTD_memory,
FETCH_MTD_symbol,
FETCH_MTD_deref,
FETCH_MTD_bitfield,
FETCH_MTD_file_offset,
FETCH_MTD_END,
};
/* Fetch type information table */
struct fetch_type {
const char *name; /* Name of type */
size_t size; /* Byte size of type */
int is_signed; /* Signed flag */
print_type_func_t print; /* Print functions */
const char *fmt; /* Fromat string */
const char *fmttype; /* Name in format file */
/* Fetch functions */
fetch_func_t fetch[FETCH_MTD_END];
};
struct fetch_param {
fetch_func_t fn;
void *data;
};
/* For defining macros, define string/string_size types */
typedef u32 string;
typedef u32 string_size;
#define PRINT_TYPE_FUNC_NAME(type) print_type_##type
#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type
/* Printing in basic type function template */
#define DECLARE_BASIC_PRINT_TYPE_FUNC(type) \
int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name, \
void *data, void *ent); \
extern const char PRINT_TYPE_FMT_NAME(type)[]
DECLARE_BASIC_PRINT_TYPE_FUNC(u8);
DECLARE_BASIC_PRINT_TYPE_FUNC(u16);
DECLARE_BASIC_PRINT_TYPE_FUNC(u32);
DECLARE_BASIC_PRINT_TYPE_FUNC(u64);
DECLARE_BASIC_PRINT_TYPE_FUNC(s8);
DECLARE_BASIC_PRINT_TYPE_FUNC(s16);
DECLARE_BASIC_PRINT_TYPE_FUNC(s32);
DECLARE_BASIC_PRINT_TYPE_FUNC(s64);
DECLARE_BASIC_PRINT_TYPE_FUNC(string);
#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type
/* Declare macro for basic types */
#define DECLARE_FETCH_FUNC(method, type) \
extern void FETCH_FUNC_NAME(method, type)(struct pt_regs *regs, \
void *data, void *dest)
#define DECLARE_BASIC_FETCH_FUNCS(method) \
DECLARE_FETCH_FUNC(method, u8); \
DECLARE_FETCH_FUNC(method, u16); \
DECLARE_FETCH_FUNC(method, u32); \
DECLARE_FETCH_FUNC(method, u64)
DECLARE_BASIC_FETCH_FUNCS(reg);
#define fetch_reg_string NULL
#define fetch_reg_string_size NULL
DECLARE_BASIC_FETCH_FUNCS(retval);
#define fetch_retval_string NULL
#define fetch_retval_string_size NULL
DECLARE_BASIC_FETCH_FUNCS(symbol);
DECLARE_FETCH_FUNC(symbol, string);
DECLARE_FETCH_FUNC(symbol, string_size);
DECLARE_BASIC_FETCH_FUNCS(deref);
DECLARE_FETCH_FUNC(deref, string);
DECLARE_FETCH_FUNC(deref, string_size);
DECLARE_BASIC_FETCH_FUNCS(bitfield);
#define fetch_bitfield_string NULL
#define fetch_bitfield_string_size NULL
/*
* Define macro for basic types - we don't need to define s* types, because
* we have to care only about bitwidth at recording time.
*/
#define DEFINE_BASIC_FETCH_FUNCS(method) \
DEFINE_FETCH_##method(u8) \
DEFINE_FETCH_##method(u16) \
DEFINE_FETCH_##method(u32) \
DEFINE_FETCH_##method(u64)
/* Default (unsigned long) fetch type */
#define __DEFAULT_FETCH_TYPE(t) u##t
#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
#define ASSIGN_FETCH_FUNC(method, type) \
[FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type)
#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \
{.name = _name, \
.size = _size, \
.is_signed = sign, \
.print = PRINT_TYPE_FUNC_NAME(ptype), \
.fmt = PRINT_TYPE_FMT_NAME(ptype), \
.fmttype = _fmttype, \
.fetch = { \
ASSIGN_FETCH_FUNC(reg, ftype), \
ASSIGN_FETCH_FUNC(stack, ftype), \
ASSIGN_FETCH_FUNC(retval, ftype), \
ASSIGN_FETCH_FUNC(memory, ftype), \
ASSIGN_FETCH_FUNC(symbol, ftype), \
ASSIGN_FETCH_FUNC(deref, ftype), \
ASSIGN_FETCH_FUNC(bitfield, ftype), \
ASSIGN_FETCH_FUNC(file_offset, ftype), \
} \
}
#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
__ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
#define ASSIGN_FETCH_TYPE_END {}
#define FETCH_TYPE_STRING 0
#define FETCH_TYPE_STRSIZE 1
/*
* Fetch type information table.
* It's declared as a weak symbol due to conditional compilation.
*/
extern __weak const struct fetch_type kprobes_fetch_type_table[];
extern __weak const struct fetch_type uprobes_fetch_type_table[];
#ifdef CONFIG_KPROBE_EVENT
struct symbol_cache;
unsigned long update_symbol_cache(struct symbol_cache *sc);
void free_symbol_cache(struct symbol_cache *sc);
struct symbol_cache *alloc_symbol_cache(const char *sym, long offset);
#else
/* uprobes do not support symbol fetch methods */
#define fetch_symbol_u8 NULL
#define fetch_symbol_u16 NULL
#define fetch_symbol_u32 NULL
#define fetch_symbol_u64 NULL
#define fetch_symbol_string NULL
#define fetch_symbol_string_size NULL
struct symbol_cache {
};
static inline unsigned long __used update_symbol_cache(struct symbol_cache *sc)
{
return 0;
}
static inline void __used free_symbol_cache(struct symbol_cache *sc)
{
}
static inline struct symbol_cache * __used
alloc_symbol_cache(const char *sym, long offset)
{
return NULL;
}
#endif /* CONFIG_KPROBE_EVENT */
struct probe_arg {
struct fetch_param fetch;
struct fetch_param fetch_size;
unsigned int offset; /* Offset from argument entry */
const char *name; /* Name of this argument */
const char *comm; /* Command of this argument */
const struct fetch_type *type; /* Type of this argument */
};
struct trace_probe {
unsigned int flags; /* For TP_FLAG_* */
struct ftrace_event_class class;
struct ftrace_event_call call;
struct list_head files;
ssize_t size; /* trace entry size */
unsigned int nr_args;
struct probe_arg args[];
};
struct event_file_link {
struct ftrace_event_file *file;
struct list_head list;
};
static inline bool trace_probe_is_enabled(struct trace_probe *tp)
{
return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE));
}
static inline bool trace_probe_is_registered(struct trace_probe *tp)
{
return !!(tp->flags & TP_FLAG_REGISTERED);
}
static nokprobe_inline void call_fetch(struct fetch_param *fprm,
struct pt_regs *regs, void *dest)
{
return fprm->fn(regs, fprm->data, dest);
}
/* Check the name is good for event/group/fields */
static inline int is_good_name(const char *name)
{
if (!isalpha(*name) && *name != '_')
return 0;
while (*++name != '\0') {
if (!isalpha(*name) && !isdigit(*name) && *name != '_')
return 0;
}
return 1;
}
static inline struct event_file_link *
find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file)
{
struct event_file_link *link;
list_for_each_entry(link, &tp->files, list)
if (link->file == file)
return link;
return NULL;
}
extern int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
struct probe_arg *parg, bool is_return, bool is_kprobe);
extern int traceprobe_conflict_field_name(const char *name,
struct probe_arg *args, int narg);
extern void traceprobe_update_arg(struct probe_arg *arg);
extern void traceprobe_free_probe_arg(struct probe_arg *arg);
extern int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset);
extern ssize_t traceprobe_probes_write(struct file *file,
const char __user *buffer, size_t count, loff_t *ppos,
int (*createfn)(int, char**));
extern int traceprobe_command(const char *buf, int (*createfn)(int, char**));
/* Sum up total data length for dynamic arraies (strings) */
static nokprobe_inline int
__get_data_size(struct trace_probe *tp, struct pt_regs *regs)
{
int i, ret = 0;
u32 len;
for (i = 0; i < tp->nr_args; i++)
if (unlikely(tp->args[i].fetch_size.fn)) {
call_fetch(&tp->args[i].fetch_size, regs, &len);
ret += len;
}
return ret;
}
/* Store the value of each argument */
static nokprobe_inline void
store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs,
u8 *data, int maxlen)
{
int i;
u32 end = tp->size;
u32 *dl; /* Data (relative) location */
for (i = 0; i < tp->nr_args; i++) {
if (unlikely(tp->args[i].fetch_size.fn)) {
/*
* First, we set the relative location and
* maximum data length to *dl
*/
dl = (u32 *)(data + tp->args[i].offset);
*dl = make_data_rloc(maxlen, end - tp->args[i].offset);
/* Then try to fetch string or dynamic array data */
call_fetch(&tp->args[i].fetch, regs, dl);
/* Reduce maximum length */
end += get_rloc_len(*dl);
maxlen -= get_rloc_len(*dl);
/* Trick here, convert data_rloc to data_loc */
*dl = convert_rloc_to_loc(*dl,
ent_size + tp->args[i].offset);
} else
/* Just fetching data normally */
call_fetch(&tp->args[i].fetch, regs,
data + tp->args[i].offset);
}
}
extern int set_print_fmt(struct trace_probe *tp, bool is_return);

View file

@ -0,0 +1,247 @@
/*
* trace context switch
*
* Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
*
*/
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/debugfs.h>
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <trace/events/sched.h>
#include "trace.h"
static struct trace_array *ctx_trace;
static int __read_mostly tracer_enabled;
static int sched_ref;
static DEFINE_MUTEX(sched_register_mutex);
static int sched_stopped;
void
tracing_sched_switch_trace(struct trace_array *tr,
struct task_struct *prev,
struct task_struct *next,
unsigned long flags, int pc)
{
struct ftrace_event_call *call = &event_context_switch;
struct ring_buffer *buffer = tr->trace_buffer.buffer;
struct ring_buffer_event *event;
struct ctx_switch_entry *entry;
event = trace_buffer_lock_reserve(buffer, TRACE_CTX,
sizeof(*entry), flags, pc);
if (!event)
return;
entry = ring_buffer_event_data(event);
entry->prev_pid = prev->pid;
entry->prev_prio = prev->prio;
entry->prev_state = prev->state;
entry->next_pid = next->pid;
entry->next_prio = next->prio;
entry->next_state = next->state;
entry->next_cpu = task_cpu(next);
if (!call_filter_check_discard(call, entry, buffer, event))
trace_buffer_unlock_commit(buffer, event, flags, pc);
}
static void
probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *next)
{
struct trace_array_cpu *data;
unsigned long flags;
int cpu;
int pc;
if (unlikely(!sched_ref))
return;
tracing_record_cmdline(prev);
tracing_record_cmdline(next);
if (!tracer_enabled || sched_stopped)
return;
pc = preempt_count();
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = per_cpu_ptr(ctx_trace->trace_buffer.data, cpu);
if (likely(!atomic_read(&data->disabled)))
tracing_sched_switch_trace(ctx_trace, prev, next, flags, pc);
local_irq_restore(flags);
}
void
tracing_sched_wakeup_trace(struct trace_array *tr,
struct task_struct *wakee,
struct task_struct *curr,
unsigned long flags, int pc)
{
struct ftrace_event_call *call = &event_wakeup;
struct ring_buffer_event *event;
struct ctx_switch_entry *entry;
struct ring_buffer *buffer = tr->trace_buffer.buffer;
event = trace_buffer_lock_reserve(buffer, TRACE_WAKE,
sizeof(*entry), flags, pc);
if (!event)
return;
entry = ring_buffer_event_data(event);
entry->prev_pid = curr->pid;
entry->prev_prio = curr->prio;
entry->prev_state = curr->state;
entry->next_pid = wakee->pid;
entry->next_prio = wakee->prio;
entry->next_state = wakee->state;
entry->next_cpu = task_cpu(wakee);
if (!call_filter_check_discard(call, entry, buffer, event))
trace_buffer_unlock_commit(buffer, event, flags, pc);
}
static void
probe_sched_wakeup(void *ignore, struct task_struct *wakee, int success)
{
struct trace_array_cpu *data;
unsigned long flags;
int cpu, pc;
if (unlikely(!sched_ref))
return;
tracing_record_cmdline(current);
if (!tracer_enabled || sched_stopped)
return;
pc = preempt_count();
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = per_cpu_ptr(ctx_trace->trace_buffer.data, cpu);
if (likely(!atomic_read(&data->disabled)))
tracing_sched_wakeup_trace(ctx_trace, wakee, current,
flags, pc);
local_irq_restore(flags);
}
static int tracing_sched_register(void)
{
int ret;
ret = register_trace_sched_wakeup(probe_sched_wakeup, NULL);
if (ret) {
pr_info("wakeup trace: Couldn't activate tracepoint"
" probe to kernel_sched_wakeup\n");
return ret;
}
ret = register_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
if (ret) {
pr_info("wakeup trace: Couldn't activate tracepoint"
" probe to kernel_sched_wakeup_new\n");
goto fail_deprobe;
}
ret = register_trace_sched_switch(probe_sched_switch, NULL);
if (ret) {
pr_info("sched trace: Couldn't activate tracepoint"
" probe to kernel_sched_switch\n");
goto fail_deprobe_wake_new;
}
return ret;
fail_deprobe_wake_new:
unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
fail_deprobe:
unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
return ret;
}
static void tracing_sched_unregister(void)
{
unregister_trace_sched_switch(probe_sched_switch, NULL);
unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
}
static void tracing_start_sched_switch(void)
{
mutex_lock(&sched_register_mutex);
if (!(sched_ref++))
tracing_sched_register();
mutex_unlock(&sched_register_mutex);
}
static void tracing_stop_sched_switch(void)
{
mutex_lock(&sched_register_mutex);
if (!(--sched_ref))
tracing_sched_unregister();
mutex_unlock(&sched_register_mutex);
}
void tracing_start_cmdline_record(void)
{
tracing_start_sched_switch();
}
void tracing_stop_cmdline_record(void)
{
tracing_stop_sched_switch();
}
/**
* tracing_start_sched_switch_record - start tracing context switches
*
* Turns on context switch tracing for a tracer.
*/
void tracing_start_sched_switch_record(void)
{
if (unlikely(!ctx_trace)) {
WARN_ON(1);
return;
}
tracing_start_sched_switch();
mutex_lock(&sched_register_mutex);
tracer_enabled++;
mutex_unlock(&sched_register_mutex);
}
/**
* tracing_stop_sched_switch_record - start tracing context switches
*
* Turns off context switch tracing for a tracer.
*/
void tracing_stop_sched_switch_record(void)
{
mutex_lock(&sched_register_mutex);
tracer_enabled--;
WARN_ON(tracer_enabled < 0);
mutex_unlock(&sched_register_mutex);
tracing_stop_sched_switch();
}
/**
* tracing_sched_switch_assign_trace - assign a trace array for ctx switch
* @tr: trace array pointer to assign
*
* Some tracers might want to record the context switches in their
* trace. This function lets those tracers assign the trace array
* to use.
*/
void tracing_sched_switch_assign_trace(struct trace_array *tr)
{
ctx_trace = tr;
}

View file

@ -0,0 +1,762 @@
/*
* trace task wakeup timings
*
* Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
* Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
*
* Based on code from the latency_tracer, that is:
*
* Copyright (C) 2004-2006 Ingo Molnar
* Copyright (C) 2004 Nadia Yvette Chambers
*/
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/debugfs.h>
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/sched/rt.h>
#include <linux/sched/deadline.h>
#include <trace/events/sched.h>
#include "trace.h"
static struct trace_array *wakeup_trace;
static int __read_mostly tracer_enabled;
static struct task_struct *wakeup_task;
static int wakeup_cpu;
static int wakeup_current_cpu;
static unsigned wakeup_prio = -1;
static int wakeup_rt;
static int wakeup_dl;
static int tracing_dl = 0;
static arch_spinlock_t wakeup_lock =
(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
static void wakeup_reset(struct trace_array *tr);
static void __wakeup_reset(struct trace_array *tr);
static int wakeup_graph_entry(struct ftrace_graph_ent *trace);
static void wakeup_graph_return(struct ftrace_graph_ret *trace);
static int save_flags;
static bool function_enabled;
#define TRACE_DISPLAY_GRAPH 1
static struct tracer_opt trace_opts[] = {
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/* display latency trace as call graph */
{ TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) },
#endif
{ } /* Empty entry */
};
static struct tracer_flags tracer_flags = {
.val = 0,
.opts = trace_opts,
};
#define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH)
#ifdef CONFIG_FUNCTION_TRACER
/*
* Prologue for the wakeup function tracers.
*
* Returns 1 if it is OK to continue, and preemption
* is disabled and data->disabled is incremented.
* 0 if the trace is to be ignored, and preemption
* is not disabled and data->disabled is
* kept the same.
*
* Note, this function is also used outside this ifdef but
* inside the #ifdef of the function graph tracer below.
* This is OK, since the function graph tracer is
* dependent on the function tracer.
*/
static int
func_prolog_preempt_disable(struct trace_array *tr,
struct trace_array_cpu **data,
int *pc)
{
long disabled;
int cpu;
if (likely(!wakeup_task))
return 0;
*pc = preempt_count();
preempt_disable_notrace();
cpu = raw_smp_processor_id();
if (cpu != wakeup_current_cpu)
goto out_enable;
*data = per_cpu_ptr(tr->trace_buffer.data, cpu);
disabled = atomic_inc_return(&(*data)->disabled);
if (unlikely(disabled != 1))
goto out;
return 1;
out:
atomic_dec(&(*data)->disabled);
out_enable:
preempt_enable_notrace();
return 0;
}
/*
* wakeup uses its own tracer function to keep the overhead down:
*/
static void
wakeup_tracer_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *pt_regs)
{
struct trace_array *tr = wakeup_trace;
struct trace_array_cpu *data;
unsigned long flags;
int pc;
if (!func_prolog_preempt_disable(tr, &data, &pc))
return;
local_irq_save(flags);
trace_function(tr, ip, parent_ip, flags, pc);
local_irq_restore(flags);
atomic_dec(&data->disabled);
preempt_enable_notrace();
}
#endif /* CONFIG_FUNCTION_TRACER */
static int register_wakeup_function(struct trace_array *tr, int graph, int set)
{
int ret;
/* 'set' is set if TRACE_ITER_FUNCTION is about to be set */
if (function_enabled || (!set && !(trace_flags & TRACE_ITER_FUNCTION)))
return 0;
if (graph)
ret = register_ftrace_graph(&wakeup_graph_return,
&wakeup_graph_entry);
else
ret = register_ftrace_function(tr->ops);
if (!ret)
function_enabled = true;
return ret;
}
static void unregister_wakeup_function(struct trace_array *tr, int graph)
{
if (!function_enabled)
return;
if (graph)
unregister_ftrace_graph();
else
unregister_ftrace_function(tr->ops);
function_enabled = false;
}
static void wakeup_function_set(struct trace_array *tr, int set)
{
if (set)
register_wakeup_function(tr, is_graph(), 1);
else
unregister_wakeup_function(tr, is_graph());
}
static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set)
{
struct tracer *tracer = tr->current_trace;
if (mask & TRACE_ITER_FUNCTION)
wakeup_function_set(tr, set);
return trace_keep_overwrite(tracer, mask, set);
}
static int start_func_tracer(struct trace_array *tr, int graph)
{
int ret;
ret = register_wakeup_function(tr, graph, 0);
if (!ret && tracing_is_enabled())
tracer_enabled = 1;
else
tracer_enabled = 0;
return ret;
}
static void stop_func_tracer(struct trace_array *tr, int graph)
{
tracer_enabled = 0;
unregister_wakeup_function(tr, graph);
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static int
wakeup_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
{
if (!(bit & TRACE_DISPLAY_GRAPH))
return -EINVAL;
if (!(is_graph() ^ set))
return 0;
stop_func_tracer(tr, !set);
wakeup_reset(wakeup_trace);
tr->max_latency = 0;
return start_func_tracer(tr, set);
}
static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
{
struct trace_array *tr = wakeup_trace;
struct trace_array_cpu *data;
unsigned long flags;
int pc, ret = 0;
if (!func_prolog_preempt_disable(tr, &data, &pc))
return 0;
local_save_flags(flags);
ret = __trace_graph_entry(tr, trace, flags, pc);
atomic_dec(&data->disabled);
preempt_enable_notrace();
return ret;
}
static void wakeup_graph_return(struct ftrace_graph_ret *trace)
{
struct trace_array *tr = wakeup_trace;
struct trace_array_cpu *data;
unsigned long flags;
int pc;
if (!func_prolog_preempt_disable(tr, &data, &pc))
return;
local_save_flags(flags);
__trace_graph_return(tr, trace, flags, pc);
atomic_dec(&data->disabled);
preempt_enable_notrace();
return;
}
static void wakeup_trace_open(struct trace_iterator *iter)
{
if (is_graph())
graph_trace_open(iter);
}
static void wakeup_trace_close(struct trace_iterator *iter)
{
if (iter->private)
graph_trace_close(iter);
}
#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC | \
TRACE_GRAPH_PRINT_ABS_TIME | \
TRACE_GRAPH_PRINT_DURATION)
static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
{
/*
* In graph mode call the graph tracer output function,
* otherwise go with the TRACE_FN event handler
*/
if (is_graph())
return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);
return TRACE_TYPE_UNHANDLED;
}
static void wakeup_print_header(struct seq_file *s)
{
if (is_graph())
print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
else
trace_default_header(s);
}
static void
__trace_function(struct trace_array *tr,
unsigned long ip, unsigned long parent_ip,
unsigned long flags, int pc)
{
if (is_graph())
trace_graph_function(tr, ip, parent_ip, flags, pc);
else
trace_function(tr, ip, parent_ip, flags, pc);
}
#else
#define __trace_function trace_function
static int
wakeup_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
{
return -EINVAL;
}
static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
{
return -1;
}
static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
{
return TRACE_TYPE_UNHANDLED;
}
static void wakeup_graph_return(struct ftrace_graph_ret *trace) { }
static void wakeup_trace_open(struct trace_iterator *iter) { }
static void wakeup_trace_close(struct trace_iterator *iter) { }
#ifdef CONFIG_FUNCTION_TRACER
static void wakeup_print_header(struct seq_file *s)
{
trace_default_header(s);
}
#else
static void wakeup_print_header(struct seq_file *s)
{
trace_latency_header(s);
}
#endif /* CONFIG_FUNCTION_TRACER */
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
/*
* Should this new latency be reported/recorded?
*/
static int report_latency(struct trace_array *tr, cycle_t delta)
{
if (tracing_thresh) {
if (delta < tracing_thresh)
return 0;
} else {
if (delta <= tr->max_latency)
return 0;
}
return 1;
}
static void
probe_wakeup_migrate_task(void *ignore, struct task_struct *task, int cpu)
{
if (task != wakeup_task)
return;
wakeup_current_cpu = cpu;
}
static void notrace
probe_wakeup_sched_switch(void *ignore,
struct task_struct *prev, struct task_struct *next)
{
struct trace_array_cpu *data;
cycle_t T0, T1, delta;
unsigned long flags;
long disabled;
int cpu;
int pc;
tracing_record_cmdline(prev);
if (unlikely(!tracer_enabled))
return;
/*
* When we start a new trace, we set wakeup_task to NULL
* and then set tracer_enabled = 1. We want to make sure
* that another CPU does not see the tracer_enabled = 1
* and the wakeup_task with an older task, that might
* actually be the same as next.
*/
smp_rmb();
if (next != wakeup_task)
return;
pc = preempt_count();
/* disable local data, not wakeup_cpu data */
cpu = raw_smp_processor_id();
disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled);
if (likely(disabled != 1))
goto out;
local_irq_save(flags);
arch_spin_lock(&wakeup_lock);
/* We could race with grabbing wakeup_lock */
if (unlikely(!tracer_enabled || next != wakeup_task))
goto out_unlock;
/* The task we are waiting for is waking up */
data = per_cpu_ptr(wakeup_trace->trace_buffer.data, wakeup_cpu);
__trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
T0 = data->preempt_timestamp;
T1 = ftrace_now(cpu);
delta = T1-T0;
if (!report_latency(wakeup_trace, delta))
goto out_unlock;
if (likely(!is_tracing_stopped())) {
wakeup_trace->max_latency = delta;
update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
}
out_unlock:
__wakeup_reset(wakeup_trace);
arch_spin_unlock(&wakeup_lock);
local_irq_restore(flags);
out:
atomic_dec(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled);
}
static void __wakeup_reset(struct trace_array *tr)
{
wakeup_cpu = -1;
wakeup_prio = -1;
tracing_dl = 0;
if (wakeup_task)
put_task_struct(wakeup_task);
wakeup_task = NULL;
}
static void wakeup_reset(struct trace_array *tr)
{
unsigned long flags;
tracing_reset_online_cpus(&tr->trace_buffer);
local_irq_save(flags);
arch_spin_lock(&wakeup_lock);
__wakeup_reset(tr);
arch_spin_unlock(&wakeup_lock);
local_irq_restore(flags);
}
static void
probe_wakeup(void *ignore, struct task_struct *p, int success)
{
struct trace_array_cpu *data;
int cpu = smp_processor_id();
unsigned long flags;
long disabled;
int pc;
if (likely(!tracer_enabled))
return;
tracing_record_cmdline(p);
tracing_record_cmdline(current);
/*
* Semantic is like this:
* - wakeup tracer handles all tasks in the system, independently
* from their scheduling class;
* - wakeup_rt tracer handles tasks belonging to sched_dl and
* sched_rt class;
* - wakeup_dl handles tasks belonging to sched_dl class only.
*/
if (tracing_dl || (wakeup_dl && !dl_task(p)) ||
(wakeup_rt && !dl_task(p) && !rt_task(p)) ||
(!dl_task(p) && (p->prio >= wakeup_prio || p->prio >= current->prio)))
return;
pc = preempt_count();
disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled);
if (unlikely(disabled != 1))
goto out;
/* interrupts should be off from try_to_wake_up */
arch_spin_lock(&wakeup_lock);
/* check for races. */
if (!tracer_enabled || tracing_dl ||
(!dl_task(p) && p->prio >= wakeup_prio))
goto out_locked;
/* reset the trace */
__wakeup_reset(wakeup_trace);
wakeup_cpu = task_cpu(p);
wakeup_current_cpu = wakeup_cpu;
wakeup_prio = p->prio;
/*
* Once you start tracing a -deadline task, don't bother tracing
* another task until the first one wakes up.
*/
if (dl_task(p))
tracing_dl = 1;
else
tracing_dl = 0;
wakeup_task = p;
get_task_struct(wakeup_task);
local_save_flags(flags);
data = per_cpu_ptr(wakeup_trace->trace_buffer.data, wakeup_cpu);
data->preempt_timestamp = ftrace_now(cpu);
tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
/*
* We must be careful in using CALLER_ADDR2. But since wake_up
* is not called by an assembly function (where as schedule is)
* it should be safe to use it here.
*/
__trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
out_locked:
arch_spin_unlock(&wakeup_lock);
out:
atomic_dec(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled);
}
static void start_wakeup_tracer(struct trace_array *tr)
{
int ret;
ret = register_trace_sched_wakeup(probe_wakeup, NULL);
if (ret) {
pr_info("wakeup trace: Couldn't activate tracepoint"
" probe to kernel_sched_wakeup\n");
return;
}
ret = register_trace_sched_wakeup_new(probe_wakeup, NULL);
if (ret) {
pr_info("wakeup trace: Couldn't activate tracepoint"
" probe to kernel_sched_wakeup_new\n");
goto fail_deprobe;
}
ret = register_trace_sched_switch(probe_wakeup_sched_switch, NULL);
if (ret) {
pr_info("sched trace: Couldn't activate tracepoint"
" probe to kernel_sched_switch\n");
goto fail_deprobe_wake_new;
}
ret = register_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
if (ret) {
pr_info("wakeup trace: Couldn't activate tracepoint"
" probe to kernel_sched_migrate_task\n");
return;
}
wakeup_reset(tr);
/*
* Don't let the tracer_enabled = 1 show up before
* the wakeup_task is reset. This may be overkill since
* wakeup_reset does a spin_unlock after setting the
* wakeup_task to NULL, but I want to be safe.
* This is a slow path anyway.
*/
smp_wmb();
if (start_func_tracer(tr, is_graph()))
printk(KERN_ERR "failed to start wakeup tracer\n");
return;
fail_deprobe_wake_new:
unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
fail_deprobe:
unregister_trace_sched_wakeup(probe_wakeup, NULL);
}
static void stop_wakeup_tracer(struct trace_array *tr)
{
tracer_enabled = 0;
stop_func_tracer(tr, is_graph());
unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);
unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
unregister_trace_sched_wakeup(probe_wakeup, NULL);
unregister_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
}
static bool wakeup_busy;
static int __wakeup_tracer_init(struct trace_array *tr)
{
save_flags = trace_flags;
/* non overwrite screws up the latency tracers */
set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1);
set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, 1);
tr->max_latency = 0;
wakeup_trace = tr;
ftrace_init_array_ops(tr, wakeup_tracer_call);
start_wakeup_tracer(tr);
wakeup_busy = true;
return 0;
}
static int wakeup_tracer_init(struct trace_array *tr)
{
if (wakeup_busy)
return -EBUSY;
wakeup_dl = 0;
wakeup_rt = 0;
return __wakeup_tracer_init(tr);
}
static int wakeup_rt_tracer_init(struct trace_array *tr)
{
if (wakeup_busy)
return -EBUSY;
wakeup_dl = 0;
wakeup_rt = 1;
return __wakeup_tracer_init(tr);
}
static int wakeup_dl_tracer_init(struct trace_array *tr)
{
if (wakeup_busy)
return -EBUSY;
wakeup_dl = 1;
wakeup_rt = 0;
return __wakeup_tracer_init(tr);
}
static void wakeup_tracer_reset(struct trace_array *tr)
{
int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT;
int overwrite_flag = save_flags & TRACE_ITER_OVERWRITE;
stop_wakeup_tracer(tr);
/* make sure we put back any tasks we are tracing */
wakeup_reset(tr);
set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag);
set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag);
ftrace_reset_array_ops(tr);
wakeup_busy = false;
}
static void wakeup_tracer_start(struct trace_array *tr)
{
wakeup_reset(tr);
tracer_enabled = 1;
}
static void wakeup_tracer_stop(struct trace_array *tr)
{
tracer_enabled = 0;
}
static struct tracer wakeup_tracer __read_mostly =
{
.name = "wakeup",
.init = wakeup_tracer_init,
.reset = wakeup_tracer_reset,
.start = wakeup_tracer_start,
.stop = wakeup_tracer_stop,
.print_max = true,
.print_header = wakeup_print_header,
.print_line = wakeup_print_line,
.flags = &tracer_flags,
.set_flag = wakeup_set_flag,
.flag_changed = wakeup_flag_changed,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_wakeup,
#endif
.open = wakeup_trace_open,
.close = wakeup_trace_close,
.allow_instances = true,
.use_max_tr = true,
};
static struct tracer wakeup_rt_tracer __read_mostly =
{
.name = "wakeup_rt",
.init = wakeup_rt_tracer_init,
.reset = wakeup_tracer_reset,
.start = wakeup_tracer_start,
.stop = wakeup_tracer_stop,
.print_max = true,
.print_header = wakeup_print_header,
.print_line = wakeup_print_line,
.flags = &tracer_flags,
.set_flag = wakeup_set_flag,
.flag_changed = wakeup_flag_changed,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_wakeup,
#endif
.open = wakeup_trace_open,
.close = wakeup_trace_close,
.allow_instances = true,
.use_max_tr = true,
};
static struct tracer wakeup_dl_tracer __read_mostly =
{
.name = "wakeup_dl",
.init = wakeup_dl_tracer_init,
.reset = wakeup_tracer_reset,
.start = wakeup_tracer_start,
.stop = wakeup_tracer_stop,
.print_max = true,
.print_header = wakeup_print_header,
.print_line = wakeup_print_line,
.flags = &tracer_flags,
.set_flag = wakeup_set_flag,
.flag_changed = wakeup_flag_changed,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_wakeup,
#endif
.open = wakeup_trace_open,
.close = wakeup_trace_close,
.use_max_tr = true,
};
__init static int init_wakeup_tracer(void)
{
int ret;
ret = register_tracer(&wakeup_tracer);
if (ret)
return ret;
ret = register_tracer(&wakeup_rt_tracer);
if (ret)
return ret;
ret = register_tracer(&wakeup_dl_tracer);
if (ret)
return ret;
return 0;
}
core_initcall(init_wakeup_tracer);

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,13 @@
#include "trace.h"
int DYN_FTRACE_TEST_NAME(void)
{
/* used to call mcount */
return 0;
}
int DYN_FTRACE_TEST_NAME2(void)
{
/* used to call mcount */
return 0;
}

428
kernel/trace/trace_seq.c Normal file
View file

@ -0,0 +1,428 @@
/*
* trace_seq.c
*
* Copyright (C) 2008-2014 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
*
* The trace_seq is a handy tool that allows you to pass a descriptor around
* to a buffer that other functions can write to. It is similar to the
* seq_file functionality but has some differences.
*
* To use it, the trace_seq must be initialized with trace_seq_init().
* This will set up the counters within the descriptor. You can call
* trace_seq_init() more than once to reset the trace_seq to start
* from scratch.
*
* The buffer size is currently PAGE_SIZE, although it may become dynamic
* in the future.
*
* A write to the buffer will either succed or fail. That is, unlike
* sprintf() there will not be a partial write (well it may write into
* the buffer but it wont update the pointers). This allows users to
* try to write something into the trace_seq buffer and if it fails
* they can flush it and try again.
*
*/
#include <linux/uaccess.h>
#include <linux/seq_file.h>
#include <linux/trace_seq.h>
/* How much buffer is left on the trace_seq? */
#define TRACE_SEQ_BUF_LEFT(s) ((PAGE_SIZE - 1) - (s)->len)
/* How much buffer is written? */
#define TRACE_SEQ_BUF_USED(s) min((s)->len, (unsigned int)(PAGE_SIZE - 1))
/**
* trace_print_seq - move the contents of trace_seq into a seq_file
* @m: the seq_file descriptor that is the destination
* @s: the trace_seq descriptor that is the source.
*
* Returns 0 on success and non zero on error. If it succeeds to
* write to the seq_file it will reset the trace_seq, otherwise
* it does not modify the trace_seq to let the caller try again.
*/
int trace_print_seq(struct seq_file *m, struct trace_seq *s)
{
unsigned int len = TRACE_SEQ_BUF_USED(s);
int ret;
ret = seq_write(m, s->buffer, len);
/*
* Only reset this buffer if we successfully wrote to the
* seq_file buffer. This lets the caller try again or
* do something else with the contents.
*/
if (!ret)
trace_seq_init(s);
return ret;
}
/**
* trace_seq_printf - sequence printing of trace information
* @s: trace sequence descriptor
* @fmt: printf format string
*
* The tracer may use either sequence operations or its own
* copy to user routines. To simplify formating of a trace
* trace_seq_printf() is used to store strings into a special
* buffer (@s). Then the output may be either used by
* the sequencer or pulled into another buffer.
*
* Returns 1 if we successfully written all the contents to
* the buffer.
* Returns 0 if we the length to write is bigger than the
* reserved buffer space. In this case, nothing gets written.
*/
int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
{
unsigned int len = TRACE_SEQ_BUF_LEFT(s);
va_list ap;
int ret;
if (s->full || !len)
return 0;
va_start(ap, fmt);
ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
va_end(ap);
/* If we can't write it all, don't bother writing anything */
if (ret >= len) {
s->full = 1;
return 0;
}
s->len += ret;
return 1;
}
EXPORT_SYMBOL_GPL(trace_seq_printf);
/**
* trace_seq_bitmask - write a bitmask array in its ASCII representation
* @s: trace sequence descriptor
* @maskp: points to an array of unsigned longs that represent a bitmask
* @nmaskbits: The number of bits that are valid in @maskp
*
* Writes a ASCII representation of a bitmask string into @s.
*
* Returns 1 if we successfully written all the contents to
* the buffer.
* Returns 0 if we the length to write is bigger than the
* reserved buffer space. In this case, nothing gets written.
*/
int trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
int nmaskbits)
{
unsigned int len = TRACE_SEQ_BUF_LEFT(s);
int ret;
if (s->full || !len)
return 0;
ret = bitmap_scnprintf(s->buffer, len, maskp, nmaskbits);
s->len += ret;
return 1;
}
EXPORT_SYMBOL_GPL(trace_seq_bitmask);
/**
* trace_seq_vprintf - sequence printing of trace information
* @s: trace sequence descriptor
* @fmt: printf format string
*
* The tracer may use either sequence operations or its own
* copy to user routines. To simplify formating of a trace
* trace_seq_printf is used to store strings into a special
* buffer (@s). Then the output may be either used by
* the sequencer or pulled into another buffer.
*
* Returns how much it wrote to the buffer.
*/
int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
{
unsigned int len = TRACE_SEQ_BUF_LEFT(s);
int ret;
if (s->full || !len)
return 0;
ret = vsnprintf(s->buffer + s->len, len, fmt, args);
/* If we can't write it all, don't bother writing anything */
if (ret >= len) {
s->full = 1;
return 0;
}
s->len += ret;
return len;
}
EXPORT_SYMBOL_GPL(trace_seq_vprintf);
/**
* trace_seq_bprintf - Write the printf string from binary arguments
* @s: trace sequence descriptor
* @fmt: The format string for the @binary arguments
* @binary: The binary arguments for @fmt.
*
* When recording in a fast path, a printf may be recorded with just
* saving the format and the arguments as they were passed to the
* function, instead of wasting cycles converting the arguments into
* ASCII characters. Instead, the arguments are saved in a 32 bit
* word array that is defined by the format string constraints.
*
* This function will take the format and the binary array and finish
* the conversion into the ASCII string within the buffer.
*
* Returns how much it wrote to the buffer.
*/
int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
{
unsigned int len = TRACE_SEQ_BUF_LEFT(s);
int ret;
if (s->full || !len)
return 0;
ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
/* If we can't write it all, don't bother writing anything */
if (ret >= len) {
s->full = 1;
return 0;
}
s->len += ret;
return len;
}
EXPORT_SYMBOL_GPL(trace_seq_bprintf);
/**
* trace_seq_puts - trace sequence printing of simple string
* @s: trace sequence descriptor
* @str: simple string to record
*
* The tracer may use either the sequence operations or its own
* copy to user routines. This function records a simple string
* into a special buffer (@s) for later retrieval by a sequencer
* or other mechanism.
*
* Returns how much it wrote to the buffer.
*/
int trace_seq_puts(struct trace_seq *s, const char *str)
{
unsigned int len = strlen(str);
if (s->full)
return 0;
if (len > TRACE_SEQ_BUF_LEFT(s)) {
s->full = 1;
return 0;
}
memcpy(s->buffer + s->len, str, len);
s->len += len;
return len;
}
EXPORT_SYMBOL_GPL(trace_seq_puts);
/**
* trace_seq_putc - trace sequence printing of simple character
* @s: trace sequence descriptor
* @c: simple character to record
*
* The tracer may use either the sequence operations or its own
* copy to user routines. This function records a simple charater
* into a special buffer (@s) for later retrieval by a sequencer
* or other mechanism.
*
* Returns how much it wrote to the buffer.
*/
int trace_seq_putc(struct trace_seq *s, unsigned char c)
{
if (s->full)
return 0;
if (TRACE_SEQ_BUF_LEFT(s) < 1) {
s->full = 1;
return 0;
}
s->buffer[s->len++] = c;
return 1;
}
EXPORT_SYMBOL_GPL(trace_seq_putc);
/**
* trace_seq_putmem - write raw data into the trace_seq buffer
* @s: trace sequence descriptor
* @mem: The raw memory to copy into the buffer
* @len: The length of the raw memory to copy (in bytes)
*
* There may be cases where raw memory needs to be written into the
* buffer and a strcpy() would not work. Using this function allows
* for such cases.
*
* Returns how much it wrote to the buffer.
*/
int trace_seq_putmem(struct trace_seq *s, const void *mem, unsigned int len)
{
if (s->full)
return 0;
if (len > TRACE_SEQ_BUF_LEFT(s)) {
s->full = 1;
return 0;
}
memcpy(s->buffer + s->len, mem, len);
s->len += len;
return len;
}
EXPORT_SYMBOL_GPL(trace_seq_putmem);
#define MAX_MEMHEX_BYTES 8U
#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
/**
* trace_seq_putmem_hex - write raw memory into the buffer in ASCII hex
* @s: trace sequence descriptor
* @mem: The raw memory to write its hex ASCII representation of
* @len: The length of the raw memory to copy (in bytes)
*
* This is similar to trace_seq_putmem() except instead of just copying the
* raw memory into the buffer it writes its ASCII representation of it
* in hex characters.
*
* Returns how much it wrote to the buffer.
*/
int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
unsigned int len)
{
unsigned char hex[HEX_CHARS];
const unsigned char *data = mem;
unsigned int start_len;
int i, j;
int cnt = 0;
if (s->full)
return 0;
while (len) {
start_len = min(len, HEX_CHARS - 1);
#ifdef __BIG_ENDIAN
for (i = 0, j = 0; i < start_len; i++) {
#else
for (i = start_len-1, j = 0; i >= 0; i--) {
#endif
hex[j++] = hex_asc_hi(data[i]);
hex[j++] = hex_asc_lo(data[i]);
}
if (WARN_ON_ONCE(j == 0 || j/2 > len))
break;
/* j increments twice per loop */
len -= j / 2;
hex[j++] = ' ';
cnt += trace_seq_putmem(s, hex, j);
}
return cnt;
}
EXPORT_SYMBOL_GPL(trace_seq_putmem_hex);
/**
* trace_seq_path - copy a path into the sequence buffer
* @s: trace sequence descriptor
* @path: path to write into the sequence buffer.
*
* Write a path name into the sequence buffer.
*
* Returns 1 if we successfully written all the contents to
* the buffer.
* Returns 0 if we the length to write is bigger than the
* reserved buffer space. In this case, nothing gets written.
*/
int trace_seq_path(struct trace_seq *s, const struct path *path)
{
unsigned char *p;
if (s->full)
return 0;
if (TRACE_SEQ_BUF_LEFT(s) < 1) {
s->full = 1;
return 0;
}
p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
if (!IS_ERR(p)) {
p = mangle_path(s->buffer + s->len, p, "\n");
if (p) {
s->len = p - s->buffer;
return 1;
}
} else {
s->buffer[s->len++] = '?';
return 1;
}
s->full = 1;
return 0;
}
EXPORT_SYMBOL_GPL(trace_seq_path);
/**
* trace_seq_to_user - copy the squence buffer to user space
* @s: trace sequence descriptor
* @ubuf: The userspace memory location to copy to
* @cnt: The amount to copy
*
* Copies the sequence buffer into the userspace memory pointed to
* by @ubuf. It starts from the last read position (@s->readpos)
* and writes up to @cnt characters or till it reaches the end of
* the content in the buffer (@s->len), which ever comes first.
*
* On success, it returns a positive number of the number of bytes
* it copied.
*
* On failure it returns -EBUSY if all of the content in the
* sequence has been already read, which includes nothing in the
* sequenc (@s->len == @s->readpos).
*
* Returns -EFAULT if the copy to userspace fails.
*/
int trace_seq_to_user(struct trace_seq *s, char __user *ubuf, int cnt)
{
int len;
int ret;
if (!cnt)
return 0;
if (s->len <= s->readpos)
return -EBUSY;
len = s->len - s->readpos;
if (cnt > len)
cnt = len;
ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
if (ret == cnt)
return -EFAULT;
cnt -= ret;
s->readpos += cnt;
return cnt;
}
EXPORT_SYMBOL_GPL(trace_seq_to_user);

486
kernel/trace/trace_stack.c Normal file
View file

@ -0,0 +1,486 @@
/*
* Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
*
*/
#include <linux/stacktrace.h>
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
#include <linux/spinlock.h>
#include <linux/uaccess.h>
#include <linux/debugfs.h>
#include <linux/ftrace.h>
#include <linux/module.h>
#include <linux/sysctl.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <asm/setup.h>
#include "trace.h"
#define STACK_TRACE_ENTRIES 500
#ifdef CC_USING_FENTRY
# define fentry 1
#else
# define fentry 0
#endif
static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
{ [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
/*
* Reserve one entry for the passed in ip. This will allow
* us to remove most or all of the stack size overhead
* added by the stack tracer itself.
*/
static struct stack_trace max_stack_trace = {
.max_entries = STACK_TRACE_ENTRIES - 1,
.entries = &stack_dump_trace[1],
};
static unsigned long max_stack_size;
static arch_spinlock_t max_stack_lock =
(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
static DEFINE_PER_CPU(int, trace_active);
static DEFINE_MUTEX(stack_sysctl_mutex);
int stack_tracer_enabled;
static int last_stack_tracer_enabled;
static inline void print_max_stack(void)
{
long i;
int size;
pr_emerg(" Depth Size Location (%d entries)\n"
" ----- ---- --------\n",
max_stack_trace.nr_entries - 1);
for (i = 0; i < max_stack_trace.nr_entries; i++) {
if (stack_dump_trace[i] == ULONG_MAX)
break;
if (i+1 == max_stack_trace.nr_entries ||
stack_dump_trace[i+1] == ULONG_MAX)
size = stack_dump_index[i];
else
size = stack_dump_index[i] - stack_dump_index[i+1];
pr_emerg("%3ld) %8d %5d %pS\n", i, stack_dump_index[i],
size, (void *)stack_dump_trace[i]);
}
}
static inline void
check_stack(unsigned long ip, unsigned long *stack)
{
unsigned long this_size, flags; unsigned long *p, *top, *start;
static int tracer_frame;
int frame_size = ACCESS_ONCE(tracer_frame);
int i;
this_size = ((unsigned long)stack) & (THREAD_SIZE-1);
this_size = THREAD_SIZE - this_size;
/* Remove the frame of the tracer */
this_size -= frame_size;
if (this_size <= max_stack_size)
return;
/* we do not handle interrupt stacks yet */
if (!object_is_on_stack(stack))
return;
local_irq_save(flags);
arch_spin_lock(&max_stack_lock);
/* In case another CPU set the tracer_frame on us */
if (unlikely(!frame_size))
this_size -= tracer_frame;
/* a race could have already updated it */
if (this_size <= max_stack_size)
goto out;
max_stack_size = this_size;
max_stack_trace.nr_entries = 0;
if (using_ftrace_ops_list_func())
max_stack_trace.skip = 4;
else
max_stack_trace.skip = 3;
save_stack_trace(&max_stack_trace);
/*
* Add the passed in ip from the function tracer.
* Searching for this on the stack will skip over
* most of the overhead from the stack tracer itself.
*/
stack_dump_trace[0] = ip;
max_stack_trace.nr_entries++;
/*
* Now find where in the stack these are.
*/
i = 0;
start = stack;
top = (unsigned long *)
(((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
/*
* Loop through all the entries. One of the entries may
* for some reason be missed on the stack, so we may
* have to account for them. If they are all there, this
* loop will only happen once. This code only takes place
* on a new max, so it is far from a fast path.
*/
while (i < max_stack_trace.nr_entries) {
int found = 0;
stack_dump_index[i] = this_size;
p = start;
for (; p < top && i < max_stack_trace.nr_entries; p++) {
if (*p == stack_dump_trace[i]) {
this_size = stack_dump_index[i++] =
(top - p) * sizeof(unsigned long);
found = 1;
/* Start the search from here */
start = p + 1;
/*
* We do not want to show the overhead
* of the stack tracer stack in the
* max stack. If we haven't figured
* out what that is, then figure it out
* now.
*/
if (unlikely(!tracer_frame) && i == 1) {
tracer_frame = (p - stack) *
sizeof(unsigned long);
max_stack_size -= tracer_frame;
}
}
}
if (!found)
i++;
}
if (task_stack_end_corrupted(current)) {
print_max_stack();
BUG();
}
out:
arch_spin_unlock(&max_stack_lock);
local_irq_restore(flags);
}
static void
stack_trace_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *pt_regs)
{
unsigned long stack;
int cpu;
preempt_disable_notrace();
cpu = raw_smp_processor_id();
/* no atomic needed, we only modify this variable by this cpu */
if (per_cpu(trace_active, cpu)++ != 0)
goto out;
/*
* When fentry is used, the traced function does not get
* its stack frame set up, and we lose the parent.
* The ip is pretty useless because the function tracer
* was called before that function set up its stack frame.
* In this case, we use the parent ip.
*
* By adding the return address of either the parent ip
* or the current ip we can disregard most of the stack usage
* caused by the stack tracer itself.
*
* The function tracer always reports the address of where the
* mcount call was, but the stack will hold the return address.
*/
if (fentry)
ip = parent_ip;
else
ip += MCOUNT_INSN_SIZE;
check_stack(ip, &stack);
out:
per_cpu(trace_active, cpu)--;
/* prevent recursion in schedule */
preempt_enable_notrace();
}
static struct ftrace_ops trace_ops __read_mostly =
{
.func = stack_trace_call,
.flags = FTRACE_OPS_FL_RECURSION_SAFE,
};
static ssize_t
stack_max_size_read(struct file *filp, char __user *ubuf,
size_t count, loff_t *ppos)
{
unsigned long *ptr = filp->private_data;
char buf[64];
int r;
r = snprintf(buf, sizeof(buf), "%ld\n", *ptr);
if (r > sizeof(buf))
r = sizeof(buf);
return simple_read_from_buffer(ubuf, count, ppos, buf, r);
}
static ssize_t
stack_max_size_write(struct file *filp, const char __user *ubuf,
size_t count, loff_t *ppos)
{
long *ptr = filp->private_data;
unsigned long val, flags;
int ret;
int cpu;
ret = kstrtoul_from_user(ubuf, count, 10, &val);
if (ret)
return ret;
local_irq_save(flags);
/*
* In case we trace inside arch_spin_lock() or after (NMI),
* we will cause circular lock, so we also need to increase
* the percpu trace_active here.
*/
cpu = smp_processor_id();
per_cpu(trace_active, cpu)++;
arch_spin_lock(&max_stack_lock);
*ptr = val;
arch_spin_unlock(&max_stack_lock);
per_cpu(trace_active, cpu)--;
local_irq_restore(flags);
return count;
}
static const struct file_operations stack_max_size_fops = {
.open = tracing_open_generic,
.read = stack_max_size_read,
.write = stack_max_size_write,
.llseek = default_llseek,
};
static void *
__next(struct seq_file *m, loff_t *pos)
{
long n = *pos - 1;
if (n >= max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX)
return NULL;
m->private = (void *)n;
return &m->private;
}
static void *
t_next(struct seq_file *m, void *v, loff_t *pos)
{
(*pos)++;
return __next(m, pos);
}
static void *t_start(struct seq_file *m, loff_t *pos)
{
int cpu;
local_irq_disable();
cpu = smp_processor_id();
per_cpu(trace_active, cpu)++;
arch_spin_lock(&max_stack_lock);
if (*pos == 0)
return SEQ_START_TOKEN;
return __next(m, pos);
}
static void t_stop(struct seq_file *m, void *p)
{
int cpu;
arch_spin_unlock(&max_stack_lock);
cpu = smp_processor_id();
per_cpu(trace_active, cpu)--;
local_irq_enable();
}
static int trace_lookup_stack(struct seq_file *m, long i)
{
unsigned long addr = stack_dump_trace[i];
return seq_printf(m, "%pS\n", (void *)addr);
}
static void print_disabled(struct seq_file *m)
{
seq_puts(m, "#\n"
"# Stack tracer disabled\n"
"#\n"
"# To enable the stack tracer, either add 'stacktrace' to the\n"
"# kernel command line\n"
"# or 'echo 1 > /proc/sys/kernel/stack_tracer_enabled'\n"
"#\n");
}
static int t_show(struct seq_file *m, void *v)
{
long i;
int size;
if (v == SEQ_START_TOKEN) {
seq_printf(m, " Depth Size Location"
" (%d entries)\n"
" ----- ---- --------\n",
max_stack_trace.nr_entries - 1);
if (!stack_tracer_enabled && !max_stack_size)
print_disabled(m);
return 0;
}
i = *(long *)v;
if (i >= max_stack_trace.nr_entries ||
stack_dump_trace[i] == ULONG_MAX)
return 0;
if (i+1 == max_stack_trace.nr_entries ||
stack_dump_trace[i+1] == ULONG_MAX)
size = stack_dump_index[i];
else
size = stack_dump_index[i] - stack_dump_index[i+1];
seq_printf(m, "%3ld) %8d %5d ", i, stack_dump_index[i], size);
trace_lookup_stack(m, i);
return 0;
}
static const struct seq_operations stack_trace_seq_ops = {
.start = t_start,
.next = t_next,
.stop = t_stop,
.show = t_show,
};
static int stack_trace_open(struct inode *inode, struct file *file)
{
return seq_open(file, &stack_trace_seq_ops);
}
static const struct file_operations stack_trace_fops = {
.open = stack_trace_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
static int
stack_trace_filter_open(struct inode *inode, struct file *file)
{
return ftrace_regex_open(&trace_ops, FTRACE_ITER_FILTER,
inode, file);
}
static const struct file_operations stack_trace_filter_fops = {
.open = stack_trace_filter_open,
.read = seq_read,
.write = ftrace_filter_write,
.llseek = tracing_lseek,
.release = ftrace_regex_release,
};
int
stack_trace_sysctl(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
mutex_lock(&stack_sysctl_mutex);
ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (ret || !write ||
(last_stack_tracer_enabled == !!stack_tracer_enabled))
goto out;
last_stack_tracer_enabled = !!stack_tracer_enabled;
if (stack_tracer_enabled)
register_ftrace_function(&trace_ops);
else
unregister_ftrace_function(&trace_ops);
out:
mutex_unlock(&stack_sysctl_mutex);
return ret;
}
static char stack_trace_filter_buf[COMMAND_LINE_SIZE+1] __initdata;
static __init int enable_stacktrace(char *str)
{
if (strncmp(str, "_filter=", 8) == 0)
strncpy(stack_trace_filter_buf, str+8, COMMAND_LINE_SIZE);
stack_tracer_enabled = 1;
last_stack_tracer_enabled = 1;
return 1;
}
__setup("stacktrace", enable_stacktrace);
static __init int stack_trace_init(void)
{
struct dentry *d_tracer;
d_tracer = tracing_init_dentry();
if (!d_tracer)
return 0;
trace_create_file("stack_max_size", 0644, d_tracer,
&max_stack_size, &stack_max_size_fops);
trace_create_file("stack_trace", 0444, d_tracer,
NULL, &stack_trace_fops);
trace_create_file("stack_trace_filter", 0444, d_tracer,
NULL, &stack_trace_filter_fops);
if (stack_trace_filter_buf[0])
ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);
if (stack_tracer_enabled)
register_ftrace_function(&trace_ops);
return 0;
}
device_initcall(stack_trace_init);

359
kernel/trace/trace_stat.c Normal file
View file

@ -0,0 +1,359 @@
/*
* Infrastructure for statistic tracing (histogram output).
*
* Copyright (C) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
*
* Based on the code from trace_branch.c which is
* Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
*
*/
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/rbtree.h>
#include <linux/debugfs.h>
#include "trace_stat.h"
#include "trace.h"
/*
* List of stat red-black nodes from a tracer
* We use a such tree to sort quickly the stat
* entries from the tracer.
*/
struct stat_node {
struct rb_node node;
void *stat;
};
/* A stat session is the stats output in one file */
struct stat_session {
struct list_head session_list;
struct tracer_stat *ts;
struct rb_root stat_root;
struct mutex stat_mutex;
struct dentry *file;
};
/* All of the sessions currently in use. Each stat file embed one session */
static LIST_HEAD(all_stat_sessions);
static DEFINE_MUTEX(all_stat_sessions_mutex);
/* The root directory for all stat files */
static struct dentry *stat_dir;
static void __reset_stat_session(struct stat_session *session)
{
struct stat_node *snode, *n;
rbtree_postorder_for_each_entry_safe(snode, n, &session->stat_root, node) {
if (session->ts->stat_release)
session->ts->stat_release(snode->stat);
kfree(snode);
}
session->stat_root = RB_ROOT;
}
static void reset_stat_session(struct stat_session *session)
{
mutex_lock(&session->stat_mutex);
__reset_stat_session(session);
mutex_unlock(&session->stat_mutex);
}
static void destroy_session(struct stat_session *session)
{
debugfs_remove(session->file);
__reset_stat_session(session);
mutex_destroy(&session->stat_mutex);
kfree(session);
}
typedef int (*cmp_stat_t)(void *, void *);
static int insert_stat(struct rb_root *root, void *stat, cmp_stat_t cmp)
{
struct rb_node **new = &(root->rb_node), *parent = NULL;
struct stat_node *data;
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
data->stat = stat;
/*
* Figure out where to put new node
* This is a descendent sorting
*/
while (*new) {
struct stat_node *this;
int result;
this = container_of(*new, struct stat_node, node);
result = cmp(data->stat, this->stat);
parent = *new;
if (result >= 0)
new = &((*new)->rb_left);
else
new = &((*new)->rb_right);
}
rb_link_node(&data->node, parent, new);
rb_insert_color(&data->node, root);
return 0;
}
/*
* For tracers that don't provide a stat_cmp callback.
* This one will force an insertion as right-most node
* in the rbtree.
*/
static int dummy_cmp(void *p1, void *p2)
{
return -1;
}
/*
* Initialize the stat rbtree at each trace_stat file opening.
* All of these copies and sorting are required on all opening
* since the stats could have changed between two file sessions.
*/
static int stat_seq_init(struct stat_session *session)
{
struct tracer_stat *ts = session->ts;
struct rb_root *root = &session->stat_root;
void *stat;
int ret = 0;
int i;
mutex_lock(&session->stat_mutex);
__reset_stat_session(session);
if (!ts->stat_cmp)
ts->stat_cmp = dummy_cmp;
stat = ts->stat_start(ts);
if (!stat)
goto exit;
ret = insert_stat(root, stat, ts->stat_cmp);
if (ret)
goto exit;
/*
* Iterate over the tracer stat entries and store them in an rbtree.
*/
for (i = 1; ; i++) {
stat = ts->stat_next(stat, i);
/* End of insertion */
if (!stat)
break;
ret = insert_stat(root, stat, ts->stat_cmp);
if (ret)
goto exit_free_rbtree;
}
exit:
mutex_unlock(&session->stat_mutex);
return ret;
exit_free_rbtree:
__reset_stat_session(session);
mutex_unlock(&session->stat_mutex);
return ret;
}
static void *stat_seq_start(struct seq_file *s, loff_t *pos)
{
struct stat_session *session = s->private;
struct rb_node *node;
int n = *pos;
int i;
/* Prevent from tracer switch or rbtree modification */
mutex_lock(&session->stat_mutex);
/* If we are in the beginning of the file, print the headers */
if (session->ts->stat_headers) {
if (n == 0)
return SEQ_START_TOKEN;
n--;
}
node = rb_first(&session->stat_root);
for (i = 0; node && i < n; i++)
node = rb_next(node);
return node;
}
static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos)
{
struct stat_session *session = s->private;
struct rb_node *node = p;
(*pos)++;
if (p == SEQ_START_TOKEN)
return rb_first(&session->stat_root);
return rb_next(node);
}
static void stat_seq_stop(struct seq_file *s, void *p)
{
struct stat_session *session = s->private;
mutex_unlock(&session->stat_mutex);
}
static int stat_seq_show(struct seq_file *s, void *v)
{
struct stat_session *session = s->private;
struct stat_node *l = container_of(v, struct stat_node, node);
if (v == SEQ_START_TOKEN)
return session->ts->stat_headers(s);
return session->ts->stat_show(s, l->stat);
}
static const struct seq_operations trace_stat_seq_ops = {
.start = stat_seq_start,
.next = stat_seq_next,
.stop = stat_seq_stop,
.show = stat_seq_show
};
/* The session stat is refilled and resorted at each stat file opening */
static int tracing_stat_open(struct inode *inode, struct file *file)
{
int ret;
struct seq_file *m;
struct stat_session *session = inode->i_private;
ret = stat_seq_init(session);
if (ret)
return ret;
ret = seq_open(file, &trace_stat_seq_ops);
if (ret) {
reset_stat_session(session);
return ret;
}
m = file->private_data;
m->private = session;
return ret;
}
/*
* Avoid consuming memory with our now useless rbtree.
*/
static int tracing_stat_release(struct inode *i, struct file *f)
{
struct stat_session *session = i->i_private;
reset_stat_session(session);
return seq_release(i, f);
}
static const struct file_operations tracing_stat_fops = {
.open = tracing_stat_open,
.read = seq_read,
.llseek = seq_lseek,
.release = tracing_stat_release
};
static int tracing_stat_init(void)
{
struct dentry *d_tracing;
d_tracing = tracing_init_dentry();
if (!d_tracing)
return 0;
stat_dir = debugfs_create_dir("trace_stat", d_tracing);
if (!stat_dir)
pr_warning("Could not create debugfs "
"'trace_stat' entry\n");
return 0;
}
static int init_stat_file(struct stat_session *session)
{
if (!stat_dir && tracing_stat_init())
return -ENODEV;
session->file = debugfs_create_file(session->ts->name, 0644,
stat_dir,
session, &tracing_stat_fops);
if (!session->file)
return -ENOMEM;
return 0;
}
int register_stat_tracer(struct tracer_stat *trace)
{
struct stat_session *session, *node;
int ret;
if (!trace)
return -EINVAL;
if (!trace->stat_start || !trace->stat_next || !trace->stat_show)
return -EINVAL;
/* Already registered? */
mutex_lock(&all_stat_sessions_mutex);
list_for_each_entry(node, &all_stat_sessions, session_list) {
if (node->ts == trace) {
mutex_unlock(&all_stat_sessions_mutex);
return -EINVAL;
}
}
mutex_unlock(&all_stat_sessions_mutex);
/* Init the session */
session = kzalloc(sizeof(*session), GFP_KERNEL);
if (!session)
return -ENOMEM;
session->ts = trace;
INIT_LIST_HEAD(&session->session_list);
mutex_init(&session->stat_mutex);
ret = init_stat_file(session);
if (ret) {
destroy_session(session);
return ret;
}
/* Register */
mutex_lock(&all_stat_sessions_mutex);
list_add_tail(&session->session_list, &all_stat_sessions);
mutex_unlock(&all_stat_sessions_mutex);
return 0;
}
void unregister_stat_tracer(struct tracer_stat *trace)
{
struct stat_session *node, *tmp;
mutex_lock(&all_stat_sessions_mutex);
list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
if (node->ts == trace) {
list_del(&node->session_list);
destroy_session(node);
break;
}
}
mutex_unlock(&all_stat_sessions_mutex);
}

33
kernel/trace/trace_stat.h Normal file
View file

@ -0,0 +1,33 @@
#ifndef __TRACE_STAT_H
#define __TRACE_STAT_H
#include <linux/seq_file.h>
/*
* If you want to provide a stat file (one-shot statistics), fill
* an iterator with stat_start/stat_next and a stat_show callbacks.
* The others callbacks are optional.
*/
struct tracer_stat {
/* The name of your stat file */
const char *name;
/* Iteration over statistic entries */
void *(*stat_start)(struct tracer_stat *trace);
void *(*stat_next)(void *prev, int idx);
/* Compare two entries for stats sorting */
int (*stat_cmp)(void *p1, void *p2);
/* Print a stat entry */
int (*stat_show)(struct seq_file *s, void *p);
/* Release an entry */
void (*stat_release)(void *stat);
/* Print the headers of your stat entries */
int (*stat_headers)(struct seq_file *s);
};
/*
* Destroy or create a stat file
*/
extern int register_stat_tracer(struct tracer_stat *trace);
extern void unregister_stat_tracer(struct tracer_stat *trace);
#endif /* __TRACE_STAT_H */

View file

@ -0,0 +1,762 @@
#include <trace/syscall.h>
#include <trace/events/syscalls.h>
#include <linux/syscalls.h>
#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/module.h> /* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */
#include <linux/ftrace.h>
#include <linux/perf_event.h>
#include <asm/syscall.h>
#include "trace_output.h"
#include "trace.h"
static DEFINE_MUTEX(syscall_trace_lock);
static int syscall_enter_register(struct ftrace_event_call *event,
enum trace_reg type, void *data);
static int syscall_exit_register(struct ftrace_event_call *event,
enum trace_reg type, void *data);
static struct list_head *
syscall_get_enter_fields(struct ftrace_event_call *call)
{
struct syscall_metadata *entry = call->data;
return &entry->enter_fields;
}
extern struct syscall_metadata *__start_syscalls_metadata[];
extern struct syscall_metadata *__stop_syscalls_metadata[];
static struct syscall_metadata **syscalls_metadata;
#ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME
static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
{
/*
* Only compare after the "sys" prefix. Archs that use
* syscall wrappers may have syscalls symbols aliases prefixed
* with ".SyS" or ".sys" instead of "sys", leading to an unwanted
* mismatch.
*/
return !strcmp(sym + 3, name + 3);
}
#endif
#ifdef ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
/*
* Some architectures that allow for 32bit applications
* to run on a 64bit kernel, do not map the syscalls for
* the 32bit tasks the same as they do for 64bit tasks.
*
* *cough*x86*cough*
*
* In such a case, instead of reporting the wrong syscalls,
* simply ignore them.
*
* For an arch to ignore the compat syscalls it needs to
* define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS as well as
* define the function arch_trace_is_compat_syscall() to let
* the tracing system know that it should ignore it.
*/
static int
trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
{
if (unlikely(arch_trace_is_compat_syscall(regs)))
return -1;
return syscall_get_nr(task, regs);
}
#else
static inline int
trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
{
return syscall_get_nr(task, regs);
}
#endif /* ARCH_TRACE_IGNORE_COMPAT_SYSCALLS */
static __init struct syscall_metadata *
find_syscall_meta(unsigned long syscall)
{
struct syscall_metadata **start;
struct syscall_metadata **stop;
char str[KSYM_SYMBOL_LEN];
start = __start_syscalls_metadata;
stop = __stop_syscalls_metadata;
kallsyms_lookup(syscall, NULL, NULL, NULL, str);
if (arch_syscall_match_sym_name(str, "sys_ni_syscall"))
return NULL;
for ( ; start < stop; start++) {
if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name))
return *start;
}
return NULL;
}
static struct syscall_metadata *syscall_nr_to_meta(int nr)
{
if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
return NULL;
return syscalls_metadata[nr];
}
static enum print_line_t
print_syscall_enter(struct trace_iterator *iter, int flags,
struct trace_event *event)
{
struct trace_seq *s = &iter->seq;
struct trace_entry *ent = iter->ent;
struct syscall_trace_enter *trace;
struct syscall_metadata *entry;
int i, ret, syscall;
trace = (typeof(trace))ent;
syscall = trace->nr;
entry = syscall_nr_to_meta(syscall);
if (!entry)
goto end;
if (entry->enter_event->event.type != ent->type) {
WARN_ON_ONCE(1);
goto end;
}
ret = trace_seq_printf(s, "%s(", entry->name);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
for (i = 0; i < entry->nb_args; i++) {
/* parameter types */
if (trace_flags & TRACE_ITER_VERBOSE) {
ret = trace_seq_printf(s, "%s ", entry->types[i]);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
}
/* parameter values */
ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
trace->args[i],
i == entry->nb_args - 1 ? "" : ", ");
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
}
ret = trace_seq_putc(s, ')');
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
end:
ret = trace_seq_putc(s, '\n');
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
return TRACE_TYPE_HANDLED;
}
static enum print_line_t
print_syscall_exit(struct trace_iterator *iter, int flags,
struct trace_event *event)
{
struct trace_seq *s = &iter->seq;
struct trace_entry *ent = iter->ent;
struct syscall_trace_exit *trace;
int syscall;
struct syscall_metadata *entry;
int ret;
trace = (typeof(trace))ent;
syscall = trace->nr;
entry = syscall_nr_to_meta(syscall);
if (!entry) {
trace_seq_putc(s, '\n');
return TRACE_TYPE_HANDLED;
}
if (entry->exit_event->event.type != ent->type) {
WARN_ON_ONCE(1);
return TRACE_TYPE_UNHANDLED;
}
ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
trace->ret);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
return TRACE_TYPE_HANDLED;
}
extern char *__bad_type_size(void);
#define SYSCALL_FIELD(type, name) \
sizeof(type) != sizeof(trace.name) ? \
__bad_type_size() : \
#type, #name, offsetof(typeof(trace), name), \
sizeof(trace.name), is_signed_type(type)
static int __init
__set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
{
int i;
int pos = 0;
/* When len=0, we just calculate the needed length */
#define LEN_OR_ZERO (len ? len - pos : 0)
pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
for (i = 0; i < entry->nb_args; i++) {
pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
entry->args[i], sizeof(unsigned long),
i == entry->nb_args - 1 ? "" : ", ");
}
pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
for (i = 0; i < entry->nb_args; i++) {
pos += snprintf(buf + pos, LEN_OR_ZERO,
", ((unsigned long)(REC->%s))", entry->args[i]);
}
#undef LEN_OR_ZERO
/* return the length of print_fmt */
return pos;
}
static int __init set_syscall_print_fmt(struct ftrace_event_call *call)
{
char *print_fmt;
int len;
struct syscall_metadata *entry = call->data;
if (entry->enter_event != call) {
call->print_fmt = "\"0x%lx\", REC->ret";
return 0;
}
/* First: called with 0 length to calculate the needed length */
len = __set_enter_print_fmt(entry, NULL, 0);
print_fmt = kmalloc(len + 1, GFP_KERNEL);
if (!print_fmt)
return -ENOMEM;
/* Second: actually write the @print_fmt */
__set_enter_print_fmt(entry, print_fmt, len + 1);
call->print_fmt = print_fmt;
return 0;
}
static void __init free_syscall_print_fmt(struct ftrace_event_call *call)
{
struct syscall_metadata *entry = call->data;
if (entry->enter_event == call)
kfree(call->print_fmt);
}
static int __init syscall_enter_define_fields(struct ftrace_event_call *call)
{
struct syscall_trace_enter trace;
struct syscall_metadata *meta = call->data;
int ret;
int i;
int offset = offsetof(typeof(trace), args);
ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
if (ret)
return ret;
for (i = 0; i < meta->nb_args; i++) {
ret = trace_define_field(call, meta->types[i],
meta->args[i], offset,
sizeof(unsigned long), 0,
FILTER_OTHER);
offset += sizeof(unsigned long);
}
return ret;
}
static int __init syscall_exit_define_fields(struct ftrace_event_call *call)
{
struct syscall_trace_exit trace;
int ret;
ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
if (ret)
return ret;
ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
FILTER_OTHER);
return ret;
}
static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
{
struct trace_array *tr = data;
struct ftrace_event_file *ftrace_file;
struct syscall_trace_enter *entry;
struct syscall_metadata *sys_data;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
unsigned long irq_flags;
int pc;
int syscall_nr;
int size;
syscall_nr = trace_get_syscall_nr(current, regs);
if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
return;
/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */
ftrace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]);
if (!ftrace_file)
return;
if (ftrace_trigger_soft_disabled(ftrace_file))
return;
sys_data = syscall_nr_to_meta(syscall_nr);
if (!sys_data)
return;
size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
local_save_flags(irq_flags);
pc = preempt_count();
buffer = tr->trace_buffer.buffer;
event = trace_buffer_lock_reserve(buffer,
sys_data->enter_event->event.type, size, irq_flags, pc);
if (!event)
return;
entry = ring_buffer_event_data(event);
entry->nr = syscall_nr;
syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
event_trigger_unlock_commit(ftrace_file, buffer, event, entry,
irq_flags, pc);
}
static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
{
struct trace_array *tr = data;
struct ftrace_event_file *ftrace_file;
struct syscall_trace_exit *entry;
struct syscall_metadata *sys_data;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
unsigned long irq_flags;
int pc;
int syscall_nr;
syscall_nr = trace_get_syscall_nr(current, regs);
if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
return;
/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */
ftrace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]);
if (!ftrace_file)
return;
if (ftrace_trigger_soft_disabled(ftrace_file))
return;
sys_data = syscall_nr_to_meta(syscall_nr);
if (!sys_data)
return;
local_save_flags(irq_flags);
pc = preempt_count();
buffer = tr->trace_buffer.buffer;
event = trace_buffer_lock_reserve(buffer,
sys_data->exit_event->event.type, sizeof(*entry),
irq_flags, pc);
if (!event)
return;
entry = ring_buffer_event_data(event);
entry->nr = syscall_nr;
entry->ret = syscall_get_return_value(current, regs);
event_trigger_unlock_commit(ftrace_file, buffer, event, entry,
irq_flags, pc);
}
static int reg_event_syscall_enter(struct ftrace_event_file *file,
struct ftrace_event_call *call)
{
struct trace_array *tr = file->tr;
int ret = 0;
int num;
num = ((struct syscall_metadata *)call->data)->syscall_nr;
if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
return -ENOSYS;
mutex_lock(&syscall_trace_lock);
if (!tr->sys_refcount_enter)
ret = register_trace_sys_enter(ftrace_syscall_enter, tr);
if (!ret) {
rcu_assign_pointer(tr->enter_syscall_files[num], file);
tr->sys_refcount_enter++;
}
mutex_unlock(&syscall_trace_lock);
return ret;
}
static void unreg_event_syscall_enter(struct ftrace_event_file *file,
struct ftrace_event_call *call)
{
struct trace_array *tr = file->tr;
int num;
num = ((struct syscall_metadata *)call->data)->syscall_nr;
if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
return;
mutex_lock(&syscall_trace_lock);
tr->sys_refcount_enter--;
RCU_INIT_POINTER(tr->enter_syscall_files[num], NULL);
if (!tr->sys_refcount_enter)
unregister_trace_sys_enter(ftrace_syscall_enter, tr);
mutex_unlock(&syscall_trace_lock);
}
static int reg_event_syscall_exit(struct ftrace_event_file *file,
struct ftrace_event_call *call)
{
struct trace_array *tr = file->tr;
int ret = 0;
int num;
num = ((struct syscall_metadata *)call->data)->syscall_nr;
if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
return -ENOSYS;
mutex_lock(&syscall_trace_lock);
if (!tr->sys_refcount_exit)
ret = register_trace_sys_exit(ftrace_syscall_exit, tr);
if (!ret) {
rcu_assign_pointer(tr->exit_syscall_files[num], file);
tr->sys_refcount_exit++;
}
mutex_unlock(&syscall_trace_lock);
return ret;
}
static void unreg_event_syscall_exit(struct ftrace_event_file *file,
struct ftrace_event_call *call)
{
struct trace_array *tr = file->tr;
int num;
num = ((struct syscall_metadata *)call->data)->syscall_nr;
if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
return;
mutex_lock(&syscall_trace_lock);
tr->sys_refcount_exit--;
RCU_INIT_POINTER(tr->exit_syscall_files[num], NULL);
if (!tr->sys_refcount_exit)
unregister_trace_sys_exit(ftrace_syscall_exit, tr);
mutex_unlock(&syscall_trace_lock);
}
static int __init init_syscall_trace(struct ftrace_event_call *call)
{
int id;
int num;
num = ((struct syscall_metadata *)call->data)->syscall_nr;
if (num < 0 || num >= NR_syscalls) {
pr_debug("syscall %s metadata not mapped, disabling ftrace event\n",
((struct syscall_metadata *)call->data)->name);
return -ENOSYS;
}
if (set_syscall_print_fmt(call) < 0)
return -ENOMEM;
id = trace_event_raw_init(call);
if (id < 0) {
free_syscall_print_fmt(call);
return id;
}
return id;
}
struct trace_event_functions enter_syscall_print_funcs = {
.trace = print_syscall_enter,
};
struct trace_event_functions exit_syscall_print_funcs = {
.trace = print_syscall_exit,
};
struct ftrace_event_class __refdata event_class_syscall_enter = {
.system = "syscalls",
.reg = syscall_enter_register,
.define_fields = syscall_enter_define_fields,
.get_fields = syscall_get_enter_fields,
.raw_init = init_syscall_trace,
};
struct ftrace_event_class __refdata event_class_syscall_exit = {
.system = "syscalls",
.reg = syscall_exit_register,
.define_fields = syscall_exit_define_fields,
.fields = LIST_HEAD_INIT(event_class_syscall_exit.fields),
.raw_init = init_syscall_trace,
};
unsigned long __init __weak arch_syscall_addr(int nr)
{
return (unsigned long)sys_call_table[nr];
}
static int __init init_ftrace_syscalls(void)
{
struct syscall_metadata *meta;
unsigned long addr;
int i;
syscalls_metadata = kcalloc(NR_syscalls, sizeof(*syscalls_metadata),
GFP_KERNEL);
if (!syscalls_metadata) {
WARN_ON(1);
return -ENOMEM;
}
for (i = 0; i < NR_syscalls; i++) {
addr = arch_syscall_addr(i);
meta = find_syscall_meta(addr);
if (!meta)
continue;
meta->syscall_nr = i;
syscalls_metadata[i] = meta;
}
return 0;
}
early_initcall(init_ftrace_syscalls);
#ifdef CONFIG_PERF_EVENTS
static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
static int sys_perf_refcount_enter;
static int sys_perf_refcount_exit;
static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
{
struct syscall_metadata *sys_data;
struct syscall_trace_enter *rec;
struct hlist_head *head;
int syscall_nr;
int rctx;
int size;
syscall_nr = trace_get_syscall_nr(current, regs);
if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
return;
if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
return;
sys_data = syscall_nr_to_meta(syscall_nr);
if (!sys_data)
return;
head = this_cpu_ptr(sys_data->enter_event->perf_events);
if (hlist_empty(head))
return;
/* get the size after alignment with the u32 buffer size field */
size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
size = ALIGN(size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
sys_data->enter_event->event.type, regs, &rctx);
if (!rec)
return;
rec->nr = syscall_nr;
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
(unsigned long *)&rec->args);
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
}
static int perf_sysenter_enable(struct ftrace_event_call *call)
{
int ret = 0;
int num;
num = ((struct syscall_metadata *)call->data)->syscall_nr;
mutex_lock(&syscall_trace_lock);
if (!sys_perf_refcount_enter)
ret = register_trace_sys_enter(perf_syscall_enter, NULL);
if (ret) {
pr_info("event trace: Could not activate"
"syscall entry trace point");
} else {
set_bit(num, enabled_perf_enter_syscalls);
sys_perf_refcount_enter++;
}
mutex_unlock(&syscall_trace_lock);
return ret;
}
static void perf_sysenter_disable(struct ftrace_event_call *call)
{
int num;
num = ((struct syscall_metadata *)call->data)->syscall_nr;
mutex_lock(&syscall_trace_lock);
sys_perf_refcount_enter--;
clear_bit(num, enabled_perf_enter_syscalls);
if (!sys_perf_refcount_enter)
unregister_trace_sys_enter(perf_syscall_enter, NULL);
mutex_unlock(&syscall_trace_lock);
}
static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
{
struct syscall_metadata *sys_data;
struct syscall_trace_exit *rec;
struct hlist_head *head;
int syscall_nr;
int rctx;
int size;
syscall_nr = trace_get_syscall_nr(current, regs);
if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
return;
if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
return;
sys_data = syscall_nr_to_meta(syscall_nr);
if (!sys_data)
return;
head = this_cpu_ptr(sys_data->exit_event->perf_events);
if (hlist_empty(head))
return;
/* We can probably do that at build time */
size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
sys_data->exit_event->event.type, regs, &rctx);
if (!rec)
return;
rec->nr = syscall_nr;
rec->ret = syscall_get_return_value(current, regs);
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
}
static int perf_sysexit_enable(struct ftrace_event_call *call)
{
int ret = 0;
int num;
num = ((struct syscall_metadata *)call->data)->syscall_nr;
mutex_lock(&syscall_trace_lock);
if (!sys_perf_refcount_exit)
ret = register_trace_sys_exit(perf_syscall_exit, NULL);
if (ret) {
pr_info("event trace: Could not activate"
"syscall exit trace point");
} else {
set_bit(num, enabled_perf_exit_syscalls);
sys_perf_refcount_exit++;
}
mutex_unlock(&syscall_trace_lock);
return ret;
}
static void perf_sysexit_disable(struct ftrace_event_call *call)
{
int num;
num = ((struct syscall_metadata *)call->data)->syscall_nr;
mutex_lock(&syscall_trace_lock);
sys_perf_refcount_exit--;
clear_bit(num, enabled_perf_exit_syscalls);
if (!sys_perf_refcount_exit)
unregister_trace_sys_exit(perf_syscall_exit, NULL);
mutex_unlock(&syscall_trace_lock);
}
#endif /* CONFIG_PERF_EVENTS */
static int syscall_enter_register(struct ftrace_event_call *event,
enum trace_reg type, void *data)
{
struct ftrace_event_file *file = data;
switch (type) {
case TRACE_REG_REGISTER:
return reg_event_syscall_enter(file, event);
case TRACE_REG_UNREGISTER:
unreg_event_syscall_enter(file, event);
return 0;
#ifdef CONFIG_PERF_EVENTS
case TRACE_REG_PERF_REGISTER:
return perf_sysenter_enable(event);
case TRACE_REG_PERF_UNREGISTER:
perf_sysenter_disable(event);
return 0;
case TRACE_REG_PERF_OPEN:
case TRACE_REG_PERF_CLOSE:
case TRACE_REG_PERF_ADD:
case TRACE_REG_PERF_DEL:
return 0;
#endif
}
return 0;
}
static int syscall_exit_register(struct ftrace_event_call *event,
enum trace_reg type, void *data)
{
struct ftrace_event_file *file = data;
switch (type) {
case TRACE_REG_REGISTER:
return reg_event_syscall_exit(file, event);
case TRACE_REG_UNREGISTER:
unreg_event_syscall_exit(file, event);
return 0;
#ifdef CONFIG_PERF_EVENTS
case TRACE_REG_PERF_REGISTER:
return perf_sysexit_enable(event);
case TRACE_REG_PERF_UNREGISTER:
perf_sysexit_disable(event);
return 0;
case TRACE_REG_PERF_OPEN:
case TRACE_REG_PERF_CLOSE:
case TRACE_REG_PERF_ADD:
case TRACE_REG_PERF_DEL:
return 0;
#endif
}
return 0;
}

1339
kernel/trace/trace_uprobe.c Normal file

File diff suppressed because it is too large Load diff