profiling: Remove unused timer hook
[~shefty/rdma-dev.git] / tools / perf / util / evsel.c
1 /*
2  * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3  *
4  * Parts came from builtin-{top,stat,record}.c, see those files for further
5  * copyright notes.
6  *
7  * Released under the GPL v2. (and only v2, not any later version)
8  */
9
10 #include <byteswap.h>
11 #include <linux/bitops.h>
12 #include "asm/bug.h"
13 #include "debugfs.h"
14 #include "event-parse.h"
15 #include "evsel.h"
16 #include "evlist.h"
17 #include "util.h"
18 #include "cpumap.h"
19 #include "thread_map.h"
20 #include "target.h"
21 #include <linux/hw_breakpoint.h>
22 #include <linux/perf_event.h>
23 #include "perf_regs.h"
24
25 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
26
27 static int __perf_evsel__sample_size(u64 sample_type)
28 {
29         u64 mask = sample_type & PERF_SAMPLE_MASK;
30         int size = 0;
31         int i;
32
33         for (i = 0; i < 64; i++) {
34                 if (mask & (1ULL << i))
35                         size++;
36         }
37
38         size *= sizeof(u64);
39
40         return size;
41 }
42
43 void hists__init(struct hists *hists)
44 {
45         memset(hists, 0, sizeof(*hists));
46         hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT;
47         hists->entries_in = &hists->entries_in_array[0];
48         hists->entries_collapsed = RB_ROOT;
49         hists->entries = RB_ROOT;
50         pthread_mutex_init(&hists->lock, NULL);
51 }
52
53 void perf_evsel__init(struct perf_evsel *evsel,
54                       struct perf_event_attr *attr, int idx)
55 {
56         evsel->idx         = idx;
57         evsel->attr        = *attr;
58         INIT_LIST_HEAD(&evsel->node);
59         hists__init(&evsel->hists);
60         evsel->sample_size = __perf_evsel__sample_size(attr->sample_type);
61 }
62
63 struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
64 {
65         struct perf_evsel *evsel = zalloc(sizeof(*evsel));
66
67         if (evsel != NULL)
68                 perf_evsel__init(evsel, attr, idx);
69
70         return evsel;
71 }
72
73 struct event_format *event_format__new(const char *sys, const char *name)
74 {
75         int fd, n;
76         char *filename;
77         void *bf = NULL, *nbf;
78         size_t size = 0, alloc_size = 0;
79         struct event_format *format = NULL;
80
81         if (asprintf(&filename, "%s/%s/%s/format", tracing_events_path, sys, name) < 0)
82                 goto out;
83
84         fd = open(filename, O_RDONLY);
85         if (fd < 0)
86                 goto out_free_filename;
87
88         do {
89                 if (size == alloc_size) {
90                         alloc_size += BUFSIZ;
91                         nbf = realloc(bf, alloc_size);
92                         if (nbf == NULL)
93                                 goto out_free_bf;
94                         bf = nbf;
95                 }
96
97                 n = read(fd, bf + size, BUFSIZ);
98                 if (n < 0)
99                         goto out_free_bf;
100                 size += n;
101         } while (n > 0);
102
103         pevent_parse_format(&format, bf, size, sys);
104
105 out_free_bf:
106         free(bf);
107         close(fd);
108 out_free_filename:
109         free(filename);
110 out:
111         return format;
112 }
113
114 struct perf_evsel *perf_evsel__newtp(const char *sys, const char *name, int idx)
115 {
116         struct perf_evsel *evsel = zalloc(sizeof(*evsel));
117
118         if (evsel != NULL) {
119                 struct perf_event_attr attr = {
120                         .type          = PERF_TYPE_TRACEPOINT,
121                         .sample_type   = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
122                                           PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
123                 };
124
125                 if (asprintf(&evsel->name, "%s:%s", sys, name) < 0)
126                         goto out_free;
127
128                 evsel->tp_format = event_format__new(sys, name);
129                 if (evsel->tp_format == NULL)
130                         goto out_free;
131
132                 event_attr_init(&attr);
133                 attr.config = evsel->tp_format->id;
134                 attr.sample_period = 1;
135                 perf_evsel__init(evsel, &attr, idx);
136         }
137
138         return evsel;
139
140 out_free:
141         free(evsel->name);
142         free(evsel);
143         return NULL;
144 }
145
146 const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = {
147         "cycles",
148         "instructions",
149         "cache-references",
150         "cache-misses",
151         "branches",
152         "branch-misses",
153         "bus-cycles",
154         "stalled-cycles-frontend",
155         "stalled-cycles-backend",
156         "ref-cycles",
157 };
158
159 static const char *__perf_evsel__hw_name(u64 config)
160 {
161         if (config < PERF_COUNT_HW_MAX && perf_evsel__hw_names[config])
162                 return perf_evsel__hw_names[config];
163
164         return "unknown-hardware";
165 }
166
167 static int perf_evsel__add_modifiers(struct perf_evsel *evsel, char *bf, size_t size)
168 {
169         int colon = 0, r = 0;
170         struct perf_event_attr *attr = &evsel->attr;
171         bool exclude_guest_default = false;
172
173 #define MOD_PRINT(context, mod) do {                                    \
174                 if (!attr->exclude_##context) {                         \
175                         if (!colon) colon = ++r;                        \
176                         r += scnprintf(bf + r, size - r, "%c", mod);    \
177                 } } while(0)
178
179         if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv) {
180                 MOD_PRINT(kernel, 'k');
181                 MOD_PRINT(user, 'u');
182                 MOD_PRINT(hv, 'h');
183                 exclude_guest_default = true;
184         }
185
186         if (attr->precise_ip) {
187                 if (!colon)
188                         colon = ++r;
189                 r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp");
190                 exclude_guest_default = true;
191         }
192
193         if (attr->exclude_host || attr->exclude_guest == exclude_guest_default) {
194                 MOD_PRINT(host, 'H');
195                 MOD_PRINT(guest, 'G');
196         }
197 #undef MOD_PRINT
198         if (colon)
199                 bf[colon - 1] = ':';
200         return r;
201 }
202
203 static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size)
204 {
205         int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(evsel->attr.config));
206         return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
207 }
208
209 const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = {
210         "cpu-clock",
211         "task-clock",
212         "page-faults",
213         "context-switches",
214         "cpu-migrations",
215         "minor-faults",
216         "major-faults",
217         "alignment-faults",
218         "emulation-faults",
219 };
220
221 static const char *__perf_evsel__sw_name(u64 config)
222 {
223         if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config])
224                 return perf_evsel__sw_names[config];
225         return "unknown-software";
226 }
227
228 static int perf_evsel__sw_name(struct perf_evsel *evsel, char *bf, size_t size)
229 {
230         int r = scnprintf(bf, size, "%s", __perf_evsel__sw_name(evsel->attr.config));
231         return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
232 }
233
234 static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
235 {
236         int r;
237
238         r = scnprintf(bf, size, "mem:0x%" PRIx64 ":", addr);
239
240         if (type & HW_BREAKPOINT_R)
241                 r += scnprintf(bf + r, size - r, "r");
242
243         if (type & HW_BREAKPOINT_W)
244                 r += scnprintf(bf + r, size - r, "w");
245
246         if (type & HW_BREAKPOINT_X)
247                 r += scnprintf(bf + r, size - r, "x");
248
249         return r;
250 }
251
252 static int perf_evsel__bp_name(struct perf_evsel *evsel, char *bf, size_t size)
253 {
254         struct perf_event_attr *attr = &evsel->attr;
255         int r = __perf_evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type);
256         return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
257 }
258
259 const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
260                                 [PERF_EVSEL__MAX_ALIASES] = {
261  { "L1-dcache", "l1-d",         "l1d",          "L1-data",              },
262  { "L1-icache", "l1-i",         "l1i",          "L1-instruction",       },
263  { "LLC",       "L2",                                                   },
264  { "dTLB",      "d-tlb",        "Data-TLB",                             },
265  { "iTLB",      "i-tlb",        "Instruction-TLB",                      },
266  { "branch",    "branches",     "bpu",          "btb",          "bpc",  },
267  { "node",                                                              },
268 };
269
270 const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX]
271                                    [PERF_EVSEL__MAX_ALIASES] = {
272  { "load",      "loads",        "read",                                 },
273  { "store",     "stores",       "write",                                },
274  { "prefetch",  "prefetches",   "speculative-read", "speculative-load", },
275 };
276
277 const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
278                                        [PERF_EVSEL__MAX_ALIASES] = {
279  { "refs",      "Reference",    "ops",          "access",               },
280  { "misses",    "miss",                                                 },
281 };
282
283 #define C(x)            PERF_COUNT_HW_CACHE_##x
284 #define CACHE_READ      (1 << C(OP_READ))
285 #define CACHE_WRITE     (1 << C(OP_WRITE))
286 #define CACHE_PREFETCH  (1 << C(OP_PREFETCH))
287 #define COP(x)          (1 << x)
288
289 /*
290  * cache operartion stat
291  * L1I : Read and prefetch only
292  * ITLB and BPU : Read-only
293  */
294 static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = {
295  [C(L1D)]       = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
296  [C(L1I)]       = (CACHE_READ | CACHE_PREFETCH),
297  [C(LL)]        = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
298  [C(DTLB)]      = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
299  [C(ITLB)]      = (CACHE_READ),
300  [C(BPU)]       = (CACHE_READ),
301  [C(NODE)]      = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
302 };
303
304 bool perf_evsel__is_cache_op_valid(u8 type, u8 op)
305 {
306         if (perf_evsel__hw_cache_stat[type] & COP(op))
307                 return true;    /* valid */
308         else
309                 return false;   /* invalid */
310 }
311
312 int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
313                                             char *bf, size_t size)
314 {
315         if (result) {
316                 return scnprintf(bf, size, "%s-%s-%s", perf_evsel__hw_cache[type][0],
317                                  perf_evsel__hw_cache_op[op][0],
318                                  perf_evsel__hw_cache_result[result][0]);
319         }
320
321         return scnprintf(bf, size, "%s-%s", perf_evsel__hw_cache[type][0],
322                          perf_evsel__hw_cache_op[op][1]);
323 }
324
325 static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size)
326 {
327         u8 op, result, type = (config >>  0) & 0xff;
328         const char *err = "unknown-ext-hardware-cache-type";
329
330         if (type > PERF_COUNT_HW_CACHE_MAX)
331                 goto out_err;
332
333         op = (config >>  8) & 0xff;
334         err = "unknown-ext-hardware-cache-op";
335         if (op > PERF_COUNT_HW_CACHE_OP_MAX)
336                 goto out_err;
337
338         result = (config >> 16) & 0xff;
339         err = "unknown-ext-hardware-cache-result";
340         if (result > PERF_COUNT_HW_CACHE_RESULT_MAX)
341                 goto out_err;
342
343         err = "invalid-cache";
344         if (!perf_evsel__is_cache_op_valid(type, op))
345                 goto out_err;
346
347         return __perf_evsel__hw_cache_type_op_res_name(type, op, result, bf, size);
348 out_err:
349         return scnprintf(bf, size, "%s", err);
350 }
351
352 static int perf_evsel__hw_cache_name(struct perf_evsel *evsel, char *bf, size_t size)
353 {
354         int ret = __perf_evsel__hw_cache_name(evsel->attr.config, bf, size);
355         return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
356 }
357
358 static int perf_evsel__raw_name(struct perf_evsel *evsel, char *bf, size_t size)
359 {
360         int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->attr.config);
361         return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
362 }
363
364 const char *perf_evsel__name(struct perf_evsel *evsel)
365 {
366         char bf[128];
367
368         if (evsel->name)
369                 return evsel->name;
370
371         switch (evsel->attr.type) {
372         case PERF_TYPE_RAW:
373                 perf_evsel__raw_name(evsel, bf, sizeof(bf));
374                 break;
375
376         case PERF_TYPE_HARDWARE:
377                 perf_evsel__hw_name(evsel, bf, sizeof(bf));
378                 break;
379
380         case PERF_TYPE_HW_CACHE:
381                 perf_evsel__hw_cache_name(evsel, bf, sizeof(bf));
382                 break;
383
384         case PERF_TYPE_SOFTWARE:
385                 perf_evsel__sw_name(evsel, bf, sizeof(bf));
386                 break;
387
388         case PERF_TYPE_TRACEPOINT:
389                 scnprintf(bf, sizeof(bf), "%s", "unknown tracepoint");
390                 break;
391
392         case PERF_TYPE_BREAKPOINT:
393                 perf_evsel__bp_name(evsel, bf, sizeof(bf));
394                 break;
395
396         default:
397                 scnprintf(bf, sizeof(bf), "unknown attr type: %d",
398                           evsel->attr.type);
399                 break;
400         }
401
402         evsel->name = strdup(bf);
403
404         return evsel->name ?: "unknown";
405 }
406
407 /*
408  * The enable_on_exec/disabled value strategy:
409  *
410  *  1) For any type of traced program:
411  *    - all independent events and group leaders are disabled
412  *    - all group members are enabled
413  *
414  *     Group members are ruled by group leaders. They need to
415  *     be enabled, because the group scheduling relies on that.
416  *
417  *  2) For traced programs executed by perf:
418  *     - all independent events and group leaders have
419  *       enable_on_exec set
420  *     - we don't specifically enable or disable any event during
421  *       the record command
422  *
423  *     Independent events and group leaders are initially disabled
424  *     and get enabled by exec. Group members are ruled by group
425  *     leaders as stated in 1).
426  *
427  *  3) For traced programs attached by perf (pid/tid):
428  *     - we specifically enable or disable all events during
429  *       the record command
430  *
431  *     When attaching events to already running traced we
432  *     enable/disable events specifically, as there's no
433  *     initial traced exec call.
434  */
435 void perf_evsel__config(struct perf_evsel *evsel,
436                         struct perf_record_opts *opts)
437 {
438         struct perf_event_attr *attr = &evsel->attr;
439         int track = !evsel->idx; /* only the first counter needs these */
440
441         attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
442         attr->inherit       = !opts->no_inherit;
443         attr->read_format   = PERF_FORMAT_TOTAL_TIME_ENABLED |
444                               PERF_FORMAT_TOTAL_TIME_RUNNING |
445                               PERF_FORMAT_ID;
446
447         attr->sample_type  |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
448
449         /*
450          * We default some events to a 1 default interval. But keep
451          * it a weak assumption overridable by the user.
452          */
453         if (!attr->sample_period || (opts->user_freq != UINT_MAX &&
454                                      opts->user_interval != ULLONG_MAX)) {
455                 if (opts->freq) {
456                         attr->sample_type       |= PERF_SAMPLE_PERIOD;
457                         attr->freq              = 1;
458                         attr->sample_freq       = opts->freq;
459                 } else {
460                         attr->sample_period = opts->default_interval;
461                 }
462         }
463
464         if (opts->no_samples)
465                 attr->sample_freq = 0;
466
467         if (opts->inherit_stat)
468                 attr->inherit_stat = 1;
469
470         if (opts->sample_address) {
471                 attr->sample_type       |= PERF_SAMPLE_ADDR;
472                 attr->mmap_data = track;
473         }
474
475         if (opts->call_graph) {
476                 attr->sample_type       |= PERF_SAMPLE_CALLCHAIN;
477
478                 if (opts->call_graph == CALLCHAIN_DWARF) {
479                         attr->sample_type |= PERF_SAMPLE_REGS_USER |
480                                              PERF_SAMPLE_STACK_USER;
481                         attr->sample_regs_user = PERF_REGS_MASK;
482                         attr->sample_stack_user = opts->stack_dump_size;
483                         attr->exclude_callchain_user = 1;
484                 }
485         }
486
487         if (perf_target__has_cpu(&opts->target))
488                 attr->sample_type       |= PERF_SAMPLE_CPU;
489
490         if (opts->period)
491                 attr->sample_type       |= PERF_SAMPLE_PERIOD;
492
493         if (!opts->sample_id_all_missing &&
494             (opts->sample_time || !opts->no_inherit ||
495              perf_target__has_cpu(&opts->target)))
496                 attr->sample_type       |= PERF_SAMPLE_TIME;
497
498         if (opts->raw_samples) {
499                 attr->sample_type       |= PERF_SAMPLE_TIME;
500                 attr->sample_type       |= PERF_SAMPLE_RAW;
501                 attr->sample_type       |= PERF_SAMPLE_CPU;
502         }
503
504         if (opts->no_delay) {
505                 attr->watermark = 0;
506                 attr->wakeup_events = 1;
507         }
508         if (opts->branch_stack) {
509                 attr->sample_type       |= PERF_SAMPLE_BRANCH_STACK;
510                 attr->branch_sample_type = opts->branch_stack;
511         }
512
513         attr->mmap = track;
514         attr->comm = track;
515
516         /*
517          * XXX see the function comment above
518          *
519          * Disabling only independent events or group leaders,
520          * keeping group members enabled.
521          */
522         if (!perf_evsel__is_group_member(evsel))
523                 attr->disabled = 1;
524
525         /*
526          * Setting enable_on_exec for independent events and
527          * group leaders for traced executed by perf.
528          */
529         if (perf_target__none(&opts->target) && !perf_evsel__is_group_member(evsel))
530                 attr->enable_on_exec = 1;
531 }
532
533 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
534 {
535         int cpu, thread;
536         evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
537
538         if (evsel->fd) {
539                 for (cpu = 0; cpu < ncpus; cpu++) {
540                         for (thread = 0; thread < nthreads; thread++) {
541                                 FD(evsel, cpu, thread) = -1;
542                         }
543                 }
544         }
545
546         return evsel->fd != NULL ? 0 : -ENOMEM;
547 }
548
549 int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
550                            const char *filter)
551 {
552         int cpu, thread;
553
554         for (cpu = 0; cpu < ncpus; cpu++) {
555                 for (thread = 0; thread < nthreads; thread++) {
556                         int fd = FD(evsel, cpu, thread),
557                             err = ioctl(fd, PERF_EVENT_IOC_SET_FILTER, filter);
558
559                         if (err)
560                                 return err;
561                 }
562         }
563
564         return 0;
565 }
566
567 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
568 {
569         evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
570         if (evsel->sample_id == NULL)
571                 return -ENOMEM;
572
573         evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
574         if (evsel->id == NULL) {
575                 xyarray__delete(evsel->sample_id);
576                 evsel->sample_id = NULL;
577                 return -ENOMEM;
578         }
579
580         return 0;
581 }
582
583 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
584 {
585         evsel->counts = zalloc((sizeof(*evsel->counts) +
586                                 (ncpus * sizeof(struct perf_counts_values))));
587         return evsel->counts != NULL ? 0 : -ENOMEM;
588 }
589
590 void perf_evsel__free_fd(struct perf_evsel *evsel)
591 {
592         xyarray__delete(evsel->fd);
593         evsel->fd = NULL;
594 }
595
596 void perf_evsel__free_id(struct perf_evsel *evsel)
597 {
598         xyarray__delete(evsel->sample_id);
599         evsel->sample_id = NULL;
600         free(evsel->id);
601         evsel->id = NULL;
602 }
603
604 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
605 {
606         int cpu, thread;
607
608         for (cpu = 0; cpu < ncpus; cpu++)
609                 for (thread = 0; thread < nthreads; ++thread) {
610                         close(FD(evsel, cpu, thread));
611                         FD(evsel, cpu, thread) = -1;
612                 }
613 }
614
615 void perf_evsel__exit(struct perf_evsel *evsel)
616 {
617         assert(list_empty(&evsel->node));
618         xyarray__delete(evsel->fd);
619         xyarray__delete(evsel->sample_id);
620         free(evsel->id);
621 }
622
623 void perf_evsel__delete(struct perf_evsel *evsel)
624 {
625         perf_evsel__exit(evsel);
626         close_cgroup(evsel->cgrp);
627         free(evsel->group_name);
628         if (evsel->tp_format)
629                 pevent_free_format(evsel->tp_format);
630         free(evsel->name);
631         free(evsel);
632 }
633
634 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
635                               int cpu, int thread, bool scale)
636 {
637         struct perf_counts_values count;
638         size_t nv = scale ? 3 : 1;
639
640         if (FD(evsel, cpu, thread) < 0)
641                 return -EINVAL;
642
643         if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0)
644                 return -ENOMEM;
645
646         if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
647                 return -errno;
648
649         if (scale) {
650                 if (count.run == 0)
651                         count.val = 0;
652                 else if (count.run < count.ena)
653                         count.val = (u64)((double)count.val * count.ena / count.run + 0.5);
654         } else
655                 count.ena = count.run = 0;
656
657         evsel->counts->cpu[cpu] = count;
658         return 0;
659 }
660
661 int __perf_evsel__read(struct perf_evsel *evsel,
662                        int ncpus, int nthreads, bool scale)
663 {
664         size_t nv = scale ? 3 : 1;
665         int cpu, thread;
666         struct perf_counts_values *aggr = &evsel->counts->aggr, count;
667
668         aggr->val = aggr->ena = aggr->run = 0;
669
670         for (cpu = 0; cpu < ncpus; cpu++) {
671                 for (thread = 0; thread < nthreads; thread++) {
672                         if (FD(evsel, cpu, thread) < 0)
673                                 continue;
674
675                         if (readn(FD(evsel, cpu, thread),
676                                   &count, nv * sizeof(u64)) < 0)
677                                 return -errno;
678
679                         aggr->val += count.val;
680                         if (scale) {
681                                 aggr->ena += count.ena;
682                                 aggr->run += count.run;
683                         }
684                 }
685         }
686
687         evsel->counts->scaled = 0;
688         if (scale) {
689                 if (aggr->run == 0) {
690                         evsel->counts->scaled = -1;
691                         aggr->val = 0;
692                         return 0;
693                 }
694
695                 if (aggr->run < aggr->ena) {
696                         evsel->counts->scaled = 1;
697                         aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5);
698                 }
699         } else
700                 aggr->ena = aggr->run = 0;
701
702         return 0;
703 }
704
705 static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread)
706 {
707         struct perf_evsel *leader = evsel->leader;
708         int fd;
709
710         if (!perf_evsel__is_group_member(evsel))
711                 return -1;
712
713         /*
714          * Leader must be already processed/open,
715          * if not it's a bug.
716          */
717         BUG_ON(!leader->fd);
718
719         fd = FD(leader, cpu, thread);
720         BUG_ON(fd == -1);
721
722         return fd;
723 }
724
725 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
726                               struct thread_map *threads)
727 {
728         int cpu, thread;
729         unsigned long flags = 0;
730         int pid = -1, err;
731
732         if (evsel->fd == NULL &&
733             perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
734                 return -ENOMEM;
735
736         if (evsel->cgrp) {
737                 flags = PERF_FLAG_PID_CGROUP;
738                 pid = evsel->cgrp->fd;
739         }
740
741         for (cpu = 0; cpu < cpus->nr; cpu++) {
742
743                 for (thread = 0; thread < threads->nr; thread++) {
744                         int group_fd;
745
746                         if (!evsel->cgrp)
747                                 pid = threads->map[thread];
748
749                         group_fd = get_group_fd(evsel, cpu, thread);
750
751                         FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
752                                                                      pid,
753                                                                      cpus->map[cpu],
754                                                                      group_fd, flags);
755                         if (FD(evsel, cpu, thread) < 0) {
756                                 err = -errno;
757                                 goto out_close;
758                         }
759                 }
760         }
761
762         return 0;
763
764 out_close:
765         do {
766                 while (--thread >= 0) {
767                         close(FD(evsel, cpu, thread));
768                         FD(evsel, cpu, thread) = -1;
769                 }
770                 thread = threads->nr;
771         } while (--cpu >= 0);
772         return err;
773 }
774
775 void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads)
776 {
777         if (evsel->fd == NULL)
778                 return;
779
780         perf_evsel__close_fd(evsel, ncpus, nthreads);
781         perf_evsel__free_fd(evsel);
782         evsel->fd = NULL;
783 }
784
785 static struct {
786         struct cpu_map map;
787         int cpus[1];
788 } empty_cpu_map = {
789         .map.nr = 1,
790         .cpus   = { -1, },
791 };
792
793 static struct {
794         struct thread_map map;
795         int threads[1];
796 } empty_thread_map = {
797         .map.nr  = 1,
798         .threads = { -1, },
799 };
800
801 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
802                      struct thread_map *threads)
803 {
804         if (cpus == NULL) {
805                 /* Work around old compiler warnings about strict aliasing */
806                 cpus = &empty_cpu_map.map;
807         }
808
809         if (threads == NULL)
810                 threads = &empty_thread_map.map;
811
812         return __perf_evsel__open(evsel, cpus, threads);
813 }
814
815 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
816                              struct cpu_map *cpus)
817 {
818         return __perf_evsel__open(evsel, cpus, &empty_thread_map.map);
819 }
820
821 int perf_evsel__open_per_thread(struct perf_evsel *evsel,
822                                 struct thread_map *threads)
823 {
824         return __perf_evsel__open(evsel, &empty_cpu_map.map, threads);
825 }
826
827 static int perf_evsel__parse_id_sample(const struct perf_evsel *evsel,
828                                        const union perf_event *event,
829                                        struct perf_sample *sample)
830 {
831         u64 type = evsel->attr.sample_type;
832         const u64 *array = event->sample.array;
833         bool swapped = evsel->needs_swap;
834         union u64_swap u;
835
836         array += ((event->header.size -
837                    sizeof(event->header)) / sizeof(u64)) - 1;
838
839         if (type & PERF_SAMPLE_CPU) {
840                 u.val64 = *array;
841                 if (swapped) {
842                         /* undo swap of u64, then swap on individual u32s */
843                         u.val64 = bswap_64(u.val64);
844                         u.val32[0] = bswap_32(u.val32[0]);
845                 }
846
847                 sample->cpu = u.val32[0];
848                 array--;
849         }
850
851         if (type & PERF_SAMPLE_STREAM_ID) {
852                 sample->stream_id = *array;
853                 array--;
854         }
855
856         if (type & PERF_SAMPLE_ID) {
857                 sample->id = *array;
858                 array--;
859         }
860
861         if (type & PERF_SAMPLE_TIME) {
862                 sample->time = *array;
863                 array--;
864         }
865
866         if (type & PERF_SAMPLE_TID) {
867                 u.val64 = *array;
868                 if (swapped) {
869                         /* undo swap of u64, then swap on individual u32s */
870                         u.val64 = bswap_64(u.val64);
871                         u.val32[0] = bswap_32(u.val32[0]);
872                         u.val32[1] = bswap_32(u.val32[1]);
873                 }
874
875                 sample->pid = u.val32[0];
876                 sample->tid = u.val32[1];
877         }
878
879         return 0;
880 }
881
882 static bool sample_overlap(const union perf_event *event,
883                            const void *offset, u64 size)
884 {
885         const void *base = event;
886
887         if (offset + size > base + event->header.size)
888                 return true;
889
890         return false;
891 }
892
893 int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
894                              struct perf_sample *data)
895 {
896         u64 type = evsel->attr.sample_type;
897         u64 regs_user = evsel->attr.sample_regs_user;
898         bool swapped = evsel->needs_swap;
899         const u64 *array;
900
901         /*
902          * used for cross-endian analysis. See git commit 65014ab3
903          * for why this goofiness is needed.
904          */
905         union u64_swap u;
906
907         memset(data, 0, sizeof(*data));
908         data->cpu = data->pid = data->tid = -1;
909         data->stream_id = data->id = data->time = -1ULL;
910         data->period = 1;
911
912         if (event->header.type != PERF_RECORD_SAMPLE) {
913                 if (!evsel->attr.sample_id_all)
914                         return 0;
915                 return perf_evsel__parse_id_sample(evsel, event, data);
916         }
917
918         array = event->sample.array;
919
920         if (evsel->sample_size + sizeof(event->header) > event->header.size)
921                 return -EFAULT;
922
923         if (type & PERF_SAMPLE_IP) {
924                 data->ip = event->ip.ip;
925                 array++;
926         }
927
928         if (type & PERF_SAMPLE_TID) {
929                 u.val64 = *array;
930                 if (swapped) {
931                         /* undo swap of u64, then swap on individual u32s */
932                         u.val64 = bswap_64(u.val64);
933                         u.val32[0] = bswap_32(u.val32[0]);
934                         u.val32[1] = bswap_32(u.val32[1]);
935                 }
936
937                 data->pid = u.val32[0];
938                 data->tid = u.val32[1];
939                 array++;
940         }
941
942         if (type & PERF_SAMPLE_TIME) {
943                 data->time = *array;
944                 array++;
945         }
946
947         data->addr = 0;
948         if (type & PERF_SAMPLE_ADDR) {
949                 data->addr = *array;
950                 array++;
951         }
952
953         data->id = -1ULL;
954         if (type & PERF_SAMPLE_ID) {
955                 data->id = *array;
956                 array++;
957         }
958
959         if (type & PERF_SAMPLE_STREAM_ID) {
960                 data->stream_id = *array;
961                 array++;
962         }
963
964         if (type & PERF_SAMPLE_CPU) {
965
966                 u.val64 = *array;
967                 if (swapped) {
968                         /* undo swap of u64, then swap on individual u32s */
969                         u.val64 = bswap_64(u.val64);
970                         u.val32[0] = bswap_32(u.val32[0]);
971                 }
972
973                 data->cpu = u.val32[0];
974                 array++;
975         }
976
977         if (type & PERF_SAMPLE_PERIOD) {
978                 data->period = *array;
979                 array++;
980         }
981
982         if (type & PERF_SAMPLE_READ) {
983                 fprintf(stderr, "PERF_SAMPLE_READ is unsupported for now\n");
984                 return -1;
985         }
986
987         if (type & PERF_SAMPLE_CALLCHAIN) {
988                 if (sample_overlap(event, array, sizeof(data->callchain->nr)))
989                         return -EFAULT;
990
991                 data->callchain = (struct ip_callchain *)array;
992
993                 if (sample_overlap(event, array, data->callchain->nr))
994                         return -EFAULT;
995
996                 array += 1 + data->callchain->nr;
997         }
998
999         if (type & PERF_SAMPLE_RAW) {
1000                 const u64 *pdata;
1001
1002                 u.val64 = *array;
1003                 if (WARN_ONCE(swapped,
1004                               "Endianness of raw data not corrected!\n")) {
1005                         /* undo swap of u64, then swap on individual u32s */
1006                         u.val64 = bswap_64(u.val64);
1007                         u.val32[0] = bswap_32(u.val32[0]);
1008                         u.val32[1] = bswap_32(u.val32[1]);
1009                 }
1010
1011                 if (sample_overlap(event, array, sizeof(u32)))
1012                         return -EFAULT;
1013
1014                 data->raw_size = u.val32[0];
1015                 pdata = (void *) array + sizeof(u32);
1016
1017                 if (sample_overlap(event, pdata, data->raw_size))
1018                         return -EFAULT;
1019
1020                 data->raw_data = (void *) pdata;
1021
1022                 array = (void *)array + data->raw_size + sizeof(u32);
1023         }
1024
1025         if (type & PERF_SAMPLE_BRANCH_STACK) {
1026                 u64 sz;
1027
1028                 data->branch_stack = (struct branch_stack *)array;
1029                 array++; /* nr */
1030
1031                 sz = data->branch_stack->nr * sizeof(struct branch_entry);
1032                 sz /= sizeof(u64);
1033                 array += sz;
1034         }
1035
1036         if (type & PERF_SAMPLE_REGS_USER) {
1037                 /* First u64 tells us if we have any regs in sample. */
1038                 u64 avail = *array++;
1039
1040                 if (avail) {
1041                         data->user_regs.regs = (u64 *)array;
1042                         array += hweight_long(regs_user);
1043                 }
1044         }
1045
1046         if (type & PERF_SAMPLE_STACK_USER) {
1047                 u64 size = *array++;
1048
1049                 data->user_stack.offset = ((char *)(array - 1)
1050                                           - (char *) event);
1051
1052                 if (!size) {
1053                         data->user_stack.size = 0;
1054                 } else {
1055                         data->user_stack.data = (char *)array;
1056                         array += size / sizeof(*array);
1057                         data->user_stack.size = *array;
1058                 }
1059         }
1060
1061         return 0;
1062 }
1063
1064 int perf_event__synthesize_sample(union perf_event *event, u64 type,
1065                                   const struct perf_sample *sample,
1066                                   bool swapped)
1067 {
1068         u64 *array;
1069
1070         /*
1071          * used for cross-endian analysis. See git commit 65014ab3
1072          * for why this goofiness is needed.
1073          */
1074         union u64_swap u;
1075
1076         array = event->sample.array;
1077
1078         if (type & PERF_SAMPLE_IP) {
1079                 event->ip.ip = sample->ip;
1080                 array++;
1081         }
1082
1083         if (type & PERF_SAMPLE_TID) {
1084                 u.val32[0] = sample->pid;
1085                 u.val32[1] = sample->tid;
1086                 if (swapped) {
1087                         /*
1088                          * Inverse of what is done in perf_evsel__parse_sample
1089                          */
1090                         u.val32[0] = bswap_32(u.val32[0]);
1091                         u.val32[1] = bswap_32(u.val32[1]);
1092                         u.val64 = bswap_64(u.val64);
1093                 }
1094
1095                 *array = u.val64;
1096                 array++;
1097         }
1098
1099         if (type & PERF_SAMPLE_TIME) {
1100                 *array = sample->time;
1101                 array++;
1102         }
1103
1104         if (type & PERF_SAMPLE_ADDR) {
1105                 *array = sample->addr;
1106                 array++;
1107         }
1108
1109         if (type & PERF_SAMPLE_ID) {
1110                 *array = sample->id;
1111                 array++;
1112         }
1113
1114         if (type & PERF_SAMPLE_STREAM_ID) {
1115                 *array = sample->stream_id;
1116                 array++;
1117         }
1118
1119         if (type & PERF_SAMPLE_CPU) {
1120                 u.val32[0] = sample->cpu;
1121                 if (swapped) {
1122                         /*
1123                          * Inverse of what is done in perf_evsel__parse_sample
1124                          */
1125                         u.val32[0] = bswap_32(u.val32[0]);
1126                         u.val64 = bswap_64(u.val64);
1127                 }
1128                 *array = u.val64;
1129                 array++;
1130         }
1131
1132         if (type & PERF_SAMPLE_PERIOD) {
1133                 *array = sample->period;
1134                 array++;
1135         }
1136
1137         return 0;
1138 }
1139
1140 struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name)
1141 {
1142         return pevent_find_field(evsel->tp_format, name);
1143 }
1144
1145 void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample,
1146                          const char *name)
1147 {
1148         struct format_field *field = perf_evsel__field(evsel, name);
1149         int offset;
1150
1151         if (!field)
1152                 return NULL;
1153
1154         offset = field->offset;
1155
1156         if (field->flags & FIELD_IS_DYNAMIC) {
1157                 offset = *(int *)(sample->raw_data + field->offset);
1158                 offset &= 0xffff;
1159         }
1160
1161         return sample->raw_data + offset;
1162 }
1163
1164 u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
1165                        const char *name)
1166 {
1167         struct format_field *field = perf_evsel__field(evsel, name);
1168         void *ptr;
1169         u64 value;
1170
1171         if (!field)
1172                 return 0;
1173
1174         ptr = sample->raw_data + field->offset;
1175
1176         switch (field->size) {
1177         case 1:
1178                 return *(u8 *)ptr;
1179         case 2:
1180                 value = *(u16 *)ptr;
1181                 break;
1182         case 4:
1183                 value = *(u32 *)ptr;
1184                 break;
1185         case 8:
1186                 value = *(u64 *)ptr;
1187                 break;
1188         default:
1189                 return 0;
1190         }
1191
1192         if (!evsel->needs_swap)
1193                 return value;
1194
1195         switch (field->size) {
1196         case 2:
1197                 return bswap_16(value);
1198         case 4:
1199                 return bswap_32(value);
1200         case 8:
1201                 return bswap_64(value);
1202         default:
1203                 return 0;
1204         }
1205
1206         return 0;
1207 }