perf: Extend the mmap control page with time (TSC) fields
authorPeter Zijlstra <a.p.zijlstra@chello.nl>
Mon, 21 Nov 2011 10:43:53 +0000 (11:43 +0100)
committerIngo Molnar <mingo@elte.hu>
Wed, 21 Dec 2011 10:01:13 +0000 (11:01 +0100)
Extend the mmap control page with fields so that userspace can compute
time deltas relative to the provided time fields.

Currently only implemented for x86 with constant and nonstop TSC.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Cc: Arun Sharma <asharma@fb.com>
Link: http://lkml.kernel.org/n/tip-3u1jucza77j3wuvs0x2bic0f@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
arch/x86/kernel/cpu/perf_event.c
include/linux/perf_event.h
kernel/events/core.c

index 116b040a73a8e2aea3f0eedfe6ace24c46d2ba0b..f8bddb5b06006d4c837ed8de239fcdab168f5997 100644 (file)
@@ -32,6 +32,7 @@
 #include <asm/compat.h>
 #include <asm/smp.h>
 #include <asm/alternative.h>
 #include <asm/compat.h>
 #include <asm/smp.h>
 #include <asm/alternative.h>
+#include <asm/timer.h>
 
 #include "perf_event.h"
 
 
 #include "perf_event.h"
 
@@ -1627,6 +1628,19 @@ static struct pmu pmu = {
        .event_idx      = x86_pmu_event_idx,
 };
 
        .event_idx      = x86_pmu_event_idx,
 };
 
+void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
+{
+       if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+               return;
+
+       if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
+               return;
+
+       userpg->time_mult = this_cpu_read(cyc2ns);
+       userpg->time_shift = CYC2NS_SCALE_FACTOR;
+       userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
+}
+
 /*
  * callchain support
  */
 /*
  * callchain support
  */
index 5311b79fe62cc2d684425e27447f7722592bdb38..0b91db2522cc28f0f6d21f7932437b6f324d2a1d 100644 (file)
@@ -291,12 +291,14 @@ struct perf_event_mmap_page {
        __s64   offset;                 /* add to hardware event value */
        __u64   time_enabled;           /* time event active */
        __u64   time_running;           /* time event on cpu */
        __s64   offset;                 /* add to hardware event value */
        __u64   time_enabled;           /* time event active */
        __u64   time_running;           /* time event on cpu */
+       __u32   time_mult, time_shift;
+       __u64   time_offset;
 
                /*
                 * Hole for extension of the self monitor capabilities
                 */
 
 
                /*
                 * Hole for extension of the self monitor capabilities
                 */
 
-       __u64   __reserved[123];        /* align to 1k */
+       __u64   __reserved[121];        /* align to 1k */
 
        /*
         * Control data for the mmap() data buffer.
 
        /*
         * Control data for the mmap() data buffer.
index dcd4049e92fcf1832aecb50ff5dfa94245108293..3a9c7d81afbfdbb1a678436b32acb8279d9ad8d5 100644 (file)
@@ -3220,17 +3220,22 @@ static int perf_event_index(struct perf_event *event)
 }
 
 static void calc_timer_values(struct perf_event *event,
 }
 
 static void calc_timer_values(struct perf_event *event,
+                               u64 *now,
                                u64 *enabled,
                                u64 *running)
 {
                                u64 *enabled,
                                u64 *running)
 {
-       u64 now, ctx_time;
+       u64 ctx_time;
 
 
-       now = perf_clock();
-       ctx_time = event->shadow_ctx_time + now;
+       *now = perf_clock();
+       ctx_time = event->shadow_ctx_time + *now;
        *enabled = ctx_time - event->tstamp_enabled;
        *running = ctx_time - event->tstamp_running;
 }
 
        *enabled = ctx_time - event->tstamp_enabled;
        *running = ctx_time - event->tstamp_running;
 }
 
+void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
+{
+}
+
 /*
  * Callers need to ensure there can be no nesting of this function, otherwise
  * the seqlock logic goes bad. We can not serialize this because the arch
 /*
  * Callers need to ensure there can be no nesting of this function, otherwise
  * the seqlock logic goes bad. We can not serialize this because the arch
@@ -3240,7 +3245,7 @@ void perf_event_update_userpage(struct perf_event *event)
 {
        struct perf_event_mmap_page *userpg;
        struct ring_buffer *rb;
 {
        struct perf_event_mmap_page *userpg;
        struct ring_buffer *rb;
-       u64 enabled, running;
+       u64 enabled, running, now;
 
        rcu_read_lock();
        /*
 
        rcu_read_lock();
        /*
@@ -3252,7 +3257,7 @@ void perf_event_update_userpage(struct perf_event *event)
         * because of locking issue as we can be called in
         * NMI context
         */
         * because of locking issue as we can be called in
         * NMI context
         */
-       calc_timer_values(event, &enabled, &running);
+       calc_timer_values(event, &now, &enabled, &running);
        rb = rcu_dereference(event->rb);
        if (!rb)
                goto unlock;
        rb = rcu_dereference(event->rb);
        if (!rb)
                goto unlock;
@@ -3277,6 +3282,8 @@ void perf_event_update_userpage(struct perf_event *event)
        userpg->time_running = running +
                        atomic64_read(&event->child_total_time_running);
 
        userpg->time_running = running +
                        atomic64_read(&event->child_total_time_running);
 
+       perf_update_user_clock(userpg, now);
+
        barrier();
        ++userpg->lock;
        preempt_enable();
        barrier();
        ++userpg->lock;
        preempt_enable();
@@ -3763,7 +3770,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 static void perf_output_read(struct perf_output_handle *handle,
                             struct perf_event *event)
 {
 static void perf_output_read(struct perf_output_handle *handle,
                             struct perf_event *event)
 {
-       u64 enabled = 0, running = 0;
+       u64 enabled = 0, running = 0, now;
        u64 read_format = event->attr.read_format;
 
        /*
        u64 read_format = event->attr.read_format;
 
        /*
@@ -3776,7 +3783,7 @@ static void perf_output_read(struct perf_output_handle *handle,
         * NMI context
         */
        if (read_format & PERF_FORMAT_TOTAL_TIMES)
         * NMI context
         */
        if (read_format & PERF_FORMAT_TOTAL_TIMES)
-               calc_timer_values(event, &enabled, &running);
+               calc_timer_values(event, &now, &enabled, &running);
 
        if (event->attr.read_format & PERF_FORMAT_GROUP)
                perf_output_read_group(handle, event, enabled, running);
 
        if (event->attr.read_format & PERF_FORMAT_GROUP)
                perf_output_read_group(handle, event, enabled, running);