Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git...
[~shefty/rdma-dev.git] / tools / perf / util / evsel.c
1 /*
2  * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3  *
4  * Parts came from builtin-{top,stat,record}.c, see those files for further
5  * copyright notes.
6  *
7  * Released under the GPL v2. (and only v2, not any later version)
8  */
9
10 #include <byteswap.h>
11 #include <linux/bitops.h>
12 #include "asm/bug.h"
13 #include "debugfs.h"
14 #include "event-parse.h"
15 #include "evsel.h"
16 #include "evlist.h"
17 #include "util.h"
18 #include "cpumap.h"
19 #include "thread_map.h"
20 #include "target.h"
21 #include <linux/hw_breakpoint.h>
22 #include <linux/perf_event.h>
23 #include "perf_regs.h"
24
25 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
26
27 static int __perf_evsel__sample_size(u64 sample_type)
28 {
29         u64 mask = sample_type & PERF_SAMPLE_MASK;
30         int size = 0;
31         int i;
32
33         for (i = 0; i < 64; i++) {
34                 if (mask & (1ULL << i))
35                         size++;
36         }
37
38         size *= sizeof(u64);
39
40         return size;
41 }
42
43 void hists__init(struct hists *hists)
44 {
45         memset(hists, 0, sizeof(*hists));
46         hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT;
47         hists->entries_in = &hists->entries_in_array[0];
48         hists->entries_collapsed = RB_ROOT;
49         hists->entries = RB_ROOT;
50         pthread_mutex_init(&hists->lock, NULL);
51 }
52
53 void __perf_evsel__set_sample_bit(struct perf_evsel *evsel,
54                                   enum perf_event_sample_format bit)
55 {
56         if (!(evsel->attr.sample_type & bit)) {
57                 evsel->attr.sample_type |= bit;
58                 evsel->sample_size += sizeof(u64);
59         }
60 }
61
62 void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel,
63                                     enum perf_event_sample_format bit)
64 {
65         if (evsel->attr.sample_type & bit) {
66                 evsel->attr.sample_type &= ~bit;
67                 evsel->sample_size -= sizeof(u64);
68         }
69 }
70
71 void perf_evsel__set_sample_id(struct perf_evsel *evsel)
72 {
73         perf_evsel__set_sample_bit(evsel, ID);
74         evsel->attr.read_format |= PERF_FORMAT_ID;
75 }
76
77 void perf_evsel__init(struct perf_evsel *evsel,
78                       struct perf_event_attr *attr, int idx)
79 {
80         evsel->idx         = idx;
81         evsel->attr        = *attr;
82         evsel->leader      = evsel;
83         INIT_LIST_HEAD(&evsel->node);
84         hists__init(&evsel->hists);
85         evsel->sample_size = __perf_evsel__sample_size(attr->sample_type);
86 }
87
88 struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
89 {
90         struct perf_evsel *evsel = zalloc(sizeof(*evsel));
91
92         if (evsel != NULL)
93                 perf_evsel__init(evsel, attr, idx);
94
95         return evsel;
96 }
97
98 struct event_format *event_format__new(const char *sys, const char *name)
99 {
100         int fd, n;
101         char *filename;
102         void *bf = NULL, *nbf;
103         size_t size = 0, alloc_size = 0;
104         struct event_format *format = NULL;
105
106         if (asprintf(&filename, "%s/%s/%s/format", tracing_events_path, sys, name) < 0)
107                 goto out;
108
109         fd = open(filename, O_RDONLY);
110         if (fd < 0)
111                 goto out_free_filename;
112
113         do {
114                 if (size == alloc_size) {
115                         alloc_size += BUFSIZ;
116                         nbf = realloc(bf, alloc_size);
117                         if (nbf == NULL)
118                                 goto out_free_bf;
119                         bf = nbf;
120                 }
121
122                 n = read(fd, bf + size, BUFSIZ);
123                 if (n < 0)
124                         goto out_free_bf;
125                 size += n;
126         } while (n > 0);
127
128         pevent_parse_format(&format, bf, size, sys);
129
130 out_free_bf:
131         free(bf);
132         close(fd);
133 out_free_filename:
134         free(filename);
135 out:
136         return format;
137 }
138
139 struct perf_evsel *perf_evsel__newtp(const char *sys, const char *name, int idx)
140 {
141         struct perf_evsel *evsel = zalloc(sizeof(*evsel));
142
143         if (evsel != NULL) {
144                 struct perf_event_attr attr = {
145                         .type          = PERF_TYPE_TRACEPOINT,
146                         .sample_type   = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
147                                           PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
148                 };
149
150                 if (asprintf(&evsel->name, "%s:%s", sys, name) < 0)
151                         goto out_free;
152
153                 evsel->tp_format = event_format__new(sys, name);
154                 if (evsel->tp_format == NULL)
155                         goto out_free;
156
157                 event_attr_init(&attr);
158                 attr.config = evsel->tp_format->id;
159                 attr.sample_period = 1;
160                 perf_evsel__init(evsel, &attr, idx);
161         }
162
163         return evsel;
164
165 out_free:
166         free(evsel->name);
167         free(evsel);
168         return NULL;
169 }
170
171 const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = {
172         "cycles",
173         "instructions",
174         "cache-references",
175         "cache-misses",
176         "branches",
177         "branch-misses",
178         "bus-cycles",
179         "stalled-cycles-frontend",
180         "stalled-cycles-backend",
181         "ref-cycles",
182 };
183
184 static const char *__perf_evsel__hw_name(u64 config)
185 {
186         if (config < PERF_COUNT_HW_MAX && perf_evsel__hw_names[config])
187                 return perf_evsel__hw_names[config];
188
189         return "unknown-hardware";
190 }
191
192 static int perf_evsel__add_modifiers(struct perf_evsel *evsel, char *bf, size_t size)
193 {
194         int colon = 0, r = 0;
195         struct perf_event_attr *attr = &evsel->attr;
196         bool exclude_guest_default = false;
197
198 #define MOD_PRINT(context, mod) do {                                    \
199                 if (!attr->exclude_##context) {                         \
200                         if (!colon) colon = ++r;                        \
201                         r += scnprintf(bf + r, size - r, "%c", mod);    \
202                 } } while(0)
203
204         if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv) {
205                 MOD_PRINT(kernel, 'k');
206                 MOD_PRINT(user, 'u');
207                 MOD_PRINT(hv, 'h');
208                 exclude_guest_default = true;
209         }
210
211         if (attr->precise_ip) {
212                 if (!colon)
213                         colon = ++r;
214                 r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp");
215                 exclude_guest_default = true;
216         }
217
218         if (attr->exclude_host || attr->exclude_guest == exclude_guest_default) {
219                 MOD_PRINT(host, 'H');
220                 MOD_PRINT(guest, 'G');
221         }
222 #undef MOD_PRINT
223         if (colon)
224                 bf[colon - 1] = ':';
225         return r;
226 }
227
228 static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size)
229 {
230         int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(evsel->attr.config));
231         return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
232 }
233
234 const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = {
235         "cpu-clock",
236         "task-clock",
237         "page-faults",
238         "context-switches",
239         "cpu-migrations",
240         "minor-faults",
241         "major-faults",
242         "alignment-faults",
243         "emulation-faults",
244 };
245
246 static const char *__perf_evsel__sw_name(u64 config)
247 {
248         if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config])
249                 return perf_evsel__sw_names[config];
250         return "unknown-software";
251 }
252
253 static int perf_evsel__sw_name(struct perf_evsel *evsel, char *bf, size_t size)
254 {
255         int r = scnprintf(bf, size, "%s", __perf_evsel__sw_name(evsel->attr.config));
256         return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
257 }
258
259 static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
260 {
261         int r;
262
263         r = scnprintf(bf, size, "mem:0x%" PRIx64 ":", addr);
264
265         if (type & HW_BREAKPOINT_R)
266                 r += scnprintf(bf + r, size - r, "r");
267
268         if (type & HW_BREAKPOINT_W)
269                 r += scnprintf(bf + r, size - r, "w");
270
271         if (type & HW_BREAKPOINT_X)
272                 r += scnprintf(bf + r, size - r, "x");
273
274         return r;
275 }
276
277 static int perf_evsel__bp_name(struct perf_evsel *evsel, char *bf, size_t size)
278 {
279         struct perf_event_attr *attr = &evsel->attr;
280         int r = __perf_evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type);
281         return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
282 }
283
284 const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
285                                 [PERF_EVSEL__MAX_ALIASES] = {
286  { "L1-dcache", "l1-d",         "l1d",          "L1-data",              },
287  { "L1-icache", "l1-i",         "l1i",          "L1-instruction",       },
288  { "LLC",       "L2",                                                   },
289  { "dTLB",      "d-tlb",        "Data-TLB",                             },
290  { "iTLB",      "i-tlb",        "Instruction-TLB",                      },
291  { "branch",    "branches",     "bpu",          "btb",          "bpc",  },
292  { "node",                                                              },
293 };
294
295 const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX]
296                                    [PERF_EVSEL__MAX_ALIASES] = {
297  { "load",      "loads",        "read",                                 },
298  { "store",     "stores",       "write",                                },
299  { "prefetch",  "prefetches",   "speculative-read", "speculative-load", },
300 };
301
302 const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
303                                        [PERF_EVSEL__MAX_ALIASES] = {
304  { "refs",      "Reference",    "ops",          "access",               },
305  { "misses",    "miss",                                                 },
306 };
307
308 #define C(x)            PERF_COUNT_HW_CACHE_##x
309 #define CACHE_READ      (1 << C(OP_READ))
310 #define CACHE_WRITE     (1 << C(OP_WRITE))
311 #define CACHE_PREFETCH  (1 << C(OP_PREFETCH))
312 #define COP(x)          (1 << x)
313
314 /*
315  * cache operartion stat
316  * L1I : Read and prefetch only
317  * ITLB and BPU : Read-only
318  */
319 static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = {
320  [C(L1D)]       = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
321  [C(L1I)]       = (CACHE_READ | CACHE_PREFETCH),
322  [C(LL)]        = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
323  [C(DTLB)]      = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
324  [C(ITLB)]      = (CACHE_READ),
325  [C(BPU)]       = (CACHE_READ),
326  [C(NODE)]      = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
327 };
328
329 bool perf_evsel__is_cache_op_valid(u8 type, u8 op)
330 {
331         if (perf_evsel__hw_cache_stat[type] & COP(op))
332                 return true;    /* valid */
333         else
334                 return false;   /* invalid */
335 }
336
337 int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
338                                             char *bf, size_t size)
339 {
340         if (result) {
341                 return scnprintf(bf, size, "%s-%s-%s", perf_evsel__hw_cache[type][0],
342                                  perf_evsel__hw_cache_op[op][0],
343                                  perf_evsel__hw_cache_result[result][0]);
344         }
345
346         return scnprintf(bf, size, "%s-%s", perf_evsel__hw_cache[type][0],
347                          perf_evsel__hw_cache_op[op][1]);
348 }
349
350 static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size)
351 {
352         u8 op, result, type = (config >>  0) & 0xff;
353         const char *err = "unknown-ext-hardware-cache-type";
354
355         if (type > PERF_COUNT_HW_CACHE_MAX)
356                 goto out_err;
357
358         op = (config >>  8) & 0xff;
359         err = "unknown-ext-hardware-cache-op";
360         if (op > PERF_COUNT_HW_CACHE_OP_MAX)
361                 goto out_err;
362
363         result = (config >> 16) & 0xff;
364         err = "unknown-ext-hardware-cache-result";
365         if (result > PERF_COUNT_HW_CACHE_RESULT_MAX)
366                 goto out_err;
367
368         err = "invalid-cache";
369         if (!perf_evsel__is_cache_op_valid(type, op))
370                 goto out_err;
371
372         return __perf_evsel__hw_cache_type_op_res_name(type, op, result, bf, size);
373 out_err:
374         return scnprintf(bf, size, "%s", err);
375 }
376
377 static int perf_evsel__hw_cache_name(struct perf_evsel *evsel, char *bf, size_t size)
378 {
379         int ret = __perf_evsel__hw_cache_name(evsel->attr.config, bf, size);
380         return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
381 }
382
383 static int perf_evsel__raw_name(struct perf_evsel *evsel, char *bf, size_t size)
384 {
385         int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->attr.config);
386         return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
387 }
388
389 const char *perf_evsel__name(struct perf_evsel *evsel)
390 {
391         char bf[128];
392
393         if (evsel->name)
394                 return evsel->name;
395
396         switch (evsel->attr.type) {
397         case PERF_TYPE_RAW:
398                 perf_evsel__raw_name(evsel, bf, sizeof(bf));
399                 break;
400
401         case PERF_TYPE_HARDWARE:
402                 perf_evsel__hw_name(evsel, bf, sizeof(bf));
403                 break;
404
405         case PERF_TYPE_HW_CACHE:
406                 perf_evsel__hw_cache_name(evsel, bf, sizeof(bf));
407                 break;
408
409         case PERF_TYPE_SOFTWARE:
410                 perf_evsel__sw_name(evsel, bf, sizeof(bf));
411                 break;
412
413         case PERF_TYPE_TRACEPOINT:
414                 scnprintf(bf, sizeof(bf), "%s", "unknown tracepoint");
415                 break;
416
417         case PERF_TYPE_BREAKPOINT:
418                 perf_evsel__bp_name(evsel, bf, sizeof(bf));
419                 break;
420
421         default:
422                 scnprintf(bf, sizeof(bf), "unknown attr type: %d",
423                           evsel->attr.type);
424                 break;
425         }
426
427         evsel->name = strdup(bf);
428
429         return evsel->name ?: "unknown";
430 }
431
432 /*
433  * The enable_on_exec/disabled value strategy:
434  *
435  *  1) For any type of traced program:
436  *    - all independent events and group leaders are disabled
437  *    - all group members are enabled
438  *
439  *     Group members are ruled by group leaders. They need to
440  *     be enabled, because the group scheduling relies on that.
441  *
442  *  2) For traced programs executed by perf:
443  *     - all independent events and group leaders have
444  *       enable_on_exec set
445  *     - we don't specifically enable or disable any event during
446  *       the record command
447  *
448  *     Independent events and group leaders are initially disabled
449  *     and get enabled by exec. Group members are ruled by group
450  *     leaders as stated in 1).
451  *
452  *  3) For traced programs attached by perf (pid/tid):
453  *     - we specifically enable or disable all events during
454  *       the record command
455  *
456  *     When attaching events to already running traced we
457  *     enable/disable events specifically, as there's no
458  *     initial traced exec call.
459  */
460 void perf_evsel__config(struct perf_evsel *evsel,
461                         struct perf_record_opts *opts)
462 {
463         struct perf_event_attr *attr = &evsel->attr;
464         int track = !evsel->idx; /* only the first counter needs these */
465
466         attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
467         attr->inherit       = !opts->no_inherit;
468
469         perf_evsel__set_sample_bit(evsel, IP);
470         perf_evsel__set_sample_bit(evsel, TID);
471
472         /*
473          * We default some events to a 1 default interval. But keep
474          * it a weak assumption overridable by the user.
475          */
476         if (!attr->sample_period || (opts->user_freq != UINT_MAX &&
477                                      opts->user_interval != ULLONG_MAX)) {
478                 if (opts->freq) {
479                         perf_evsel__set_sample_bit(evsel, PERIOD);
480                         attr->freq              = 1;
481                         attr->sample_freq       = opts->freq;
482                 } else {
483                         attr->sample_period = opts->default_interval;
484                 }
485         }
486
487         if (opts->no_samples)
488                 attr->sample_freq = 0;
489
490         if (opts->inherit_stat)
491                 attr->inherit_stat = 1;
492
493         if (opts->sample_address) {
494                 perf_evsel__set_sample_bit(evsel, ADDR);
495                 attr->mmap_data = track;
496         }
497
498         if (opts->call_graph) {
499                 perf_evsel__set_sample_bit(evsel, CALLCHAIN);
500
501                 if (opts->call_graph == CALLCHAIN_DWARF) {
502                         perf_evsel__set_sample_bit(evsel, REGS_USER);
503                         perf_evsel__set_sample_bit(evsel, STACK_USER);
504                         attr->sample_regs_user = PERF_REGS_MASK;
505                         attr->sample_stack_user = opts->stack_dump_size;
506                         attr->exclude_callchain_user = 1;
507                 }
508         }
509
510         if (perf_target__has_cpu(&opts->target))
511                 perf_evsel__set_sample_bit(evsel, CPU);
512
513         if (opts->period)
514                 perf_evsel__set_sample_bit(evsel, PERIOD);
515
516         if (!opts->sample_id_all_missing &&
517             (opts->sample_time || !opts->no_inherit ||
518              perf_target__has_cpu(&opts->target)))
519                 perf_evsel__set_sample_bit(evsel, TIME);
520
521         if (opts->raw_samples) {
522                 perf_evsel__set_sample_bit(evsel, TIME);
523                 perf_evsel__set_sample_bit(evsel, RAW);
524                 perf_evsel__set_sample_bit(evsel, CPU);
525         }
526
527         if (opts->no_delay) {
528                 attr->watermark = 0;
529                 attr->wakeup_events = 1;
530         }
531         if (opts->branch_stack) {
532                 perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
533                 attr->branch_sample_type = opts->branch_stack;
534         }
535
536         attr->mmap = track;
537         attr->comm = track;
538
539         /*
540          * XXX see the function comment above
541          *
542          * Disabling only independent events or group leaders,
543          * keeping group members enabled.
544          */
545         if (perf_evsel__is_group_leader(evsel))
546                 attr->disabled = 1;
547
548         /*
549          * Setting enable_on_exec for independent events and
550          * group leaders for traced executed by perf.
551          */
552         if (perf_target__none(&opts->target) && perf_evsel__is_group_leader(evsel))
553                 attr->enable_on_exec = 1;
554 }
555
556 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
557 {
558         int cpu, thread;
559         evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
560
561         if (evsel->fd) {
562                 for (cpu = 0; cpu < ncpus; cpu++) {
563                         for (thread = 0; thread < nthreads; thread++) {
564                                 FD(evsel, cpu, thread) = -1;
565                         }
566                 }
567         }
568
569         return evsel->fd != NULL ? 0 : -ENOMEM;
570 }
571
572 int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
573                            const char *filter)
574 {
575         int cpu, thread;
576
577         for (cpu = 0; cpu < ncpus; cpu++) {
578                 for (thread = 0; thread < nthreads; thread++) {
579                         int fd = FD(evsel, cpu, thread),
580                             err = ioctl(fd, PERF_EVENT_IOC_SET_FILTER, filter);
581
582                         if (err)
583                                 return err;
584                 }
585         }
586
587         return 0;
588 }
589
590 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
591 {
592         evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
593         if (evsel->sample_id == NULL)
594                 return -ENOMEM;
595
596         evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
597         if (evsel->id == NULL) {
598                 xyarray__delete(evsel->sample_id);
599                 evsel->sample_id = NULL;
600                 return -ENOMEM;
601         }
602
603         return 0;
604 }
605
606 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
607 {
608         evsel->counts = zalloc((sizeof(*evsel->counts) +
609                                 (ncpus * sizeof(struct perf_counts_values))));
610         return evsel->counts != NULL ? 0 : -ENOMEM;
611 }
612
613 void perf_evsel__free_fd(struct perf_evsel *evsel)
614 {
615         xyarray__delete(evsel->fd);
616         evsel->fd = NULL;
617 }
618
619 void perf_evsel__free_id(struct perf_evsel *evsel)
620 {
621         xyarray__delete(evsel->sample_id);
622         evsel->sample_id = NULL;
623         free(evsel->id);
624         evsel->id = NULL;
625 }
626
627 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
628 {
629         int cpu, thread;
630
631         for (cpu = 0; cpu < ncpus; cpu++)
632                 for (thread = 0; thread < nthreads; ++thread) {
633                         close(FD(evsel, cpu, thread));
634                         FD(evsel, cpu, thread) = -1;
635                 }
636 }
637
638 void perf_evsel__exit(struct perf_evsel *evsel)
639 {
640         assert(list_empty(&evsel->node));
641         xyarray__delete(evsel->fd);
642         xyarray__delete(evsel->sample_id);
643         free(evsel->id);
644 }
645
646 void perf_evsel__delete(struct perf_evsel *evsel)
647 {
648         perf_evsel__exit(evsel);
649         close_cgroup(evsel->cgrp);
650         free(evsel->group_name);
651         if (evsel->tp_format)
652                 pevent_free_format(evsel->tp_format);
653         free(evsel->name);
654         free(evsel);
655 }
656
657 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
658                               int cpu, int thread, bool scale)
659 {
660         struct perf_counts_values count;
661         size_t nv = scale ? 3 : 1;
662
663         if (FD(evsel, cpu, thread) < 0)
664                 return -EINVAL;
665
666         if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0)
667                 return -ENOMEM;
668
669         if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
670                 return -errno;
671
672         if (scale) {
673                 if (count.run == 0)
674                         count.val = 0;
675                 else if (count.run < count.ena)
676                         count.val = (u64)((double)count.val * count.ena / count.run + 0.5);
677         } else
678                 count.ena = count.run = 0;
679
680         evsel->counts->cpu[cpu] = count;
681         return 0;
682 }
683
684 int __perf_evsel__read(struct perf_evsel *evsel,
685                        int ncpus, int nthreads, bool scale)
686 {
687         size_t nv = scale ? 3 : 1;
688         int cpu, thread;
689         struct perf_counts_values *aggr = &evsel->counts->aggr, count;
690
691         aggr->val = aggr->ena = aggr->run = 0;
692
693         for (cpu = 0; cpu < ncpus; cpu++) {
694                 for (thread = 0; thread < nthreads; thread++) {
695                         if (FD(evsel, cpu, thread) < 0)
696                                 continue;
697
698                         if (readn(FD(evsel, cpu, thread),
699                                   &count, nv * sizeof(u64)) < 0)
700                                 return -errno;
701
702                         aggr->val += count.val;
703                         if (scale) {
704                                 aggr->ena += count.ena;
705                                 aggr->run += count.run;
706                         }
707                 }
708         }
709
710         evsel->counts->scaled = 0;
711         if (scale) {
712                 if (aggr->run == 0) {
713                         evsel->counts->scaled = -1;
714                         aggr->val = 0;
715                         return 0;
716                 }
717
718                 if (aggr->run < aggr->ena) {
719                         evsel->counts->scaled = 1;
720                         aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5);
721                 }
722         } else
723                 aggr->ena = aggr->run = 0;
724
725         return 0;
726 }
727
728 static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread)
729 {
730         struct perf_evsel *leader = evsel->leader;
731         int fd;
732
733         if (perf_evsel__is_group_leader(evsel))
734                 return -1;
735
736         /*
737          * Leader must be already processed/open,
738          * if not it's a bug.
739          */
740         BUG_ON(!leader->fd);
741
742         fd = FD(leader, cpu, thread);
743         BUG_ON(fd == -1);
744
745         return fd;
746 }
747
748 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
749                               struct thread_map *threads)
750 {
751         int cpu, thread;
752         unsigned long flags = 0;
753         int pid = -1, err;
754
755         if (evsel->fd == NULL &&
756             perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
757                 return -ENOMEM;
758
759         if (evsel->cgrp) {
760                 flags = PERF_FLAG_PID_CGROUP;
761                 pid = evsel->cgrp->fd;
762         }
763
764         for (cpu = 0; cpu < cpus->nr; cpu++) {
765
766                 for (thread = 0; thread < threads->nr; thread++) {
767                         int group_fd;
768
769                         if (!evsel->cgrp)
770                                 pid = threads->map[thread];
771
772                         group_fd = get_group_fd(evsel, cpu, thread);
773
774                         FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
775                                                                      pid,
776                                                                      cpus->map[cpu],
777                                                                      group_fd, flags);
778                         if (FD(evsel, cpu, thread) < 0) {
779                                 err = -errno;
780                                 goto out_close;
781                         }
782                 }
783         }
784
785         return 0;
786
787 out_close:
788         do {
789                 while (--thread >= 0) {
790                         close(FD(evsel, cpu, thread));
791                         FD(evsel, cpu, thread) = -1;
792                 }
793                 thread = threads->nr;
794         } while (--cpu >= 0);
795         return err;
796 }
797
798 void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads)
799 {
800         if (evsel->fd == NULL)
801                 return;
802
803         perf_evsel__close_fd(evsel, ncpus, nthreads);
804         perf_evsel__free_fd(evsel);
805         evsel->fd = NULL;
806 }
807
808 static struct {
809         struct cpu_map map;
810         int cpus[1];
811 } empty_cpu_map = {
812         .map.nr = 1,
813         .cpus   = { -1, },
814 };
815
816 static struct {
817         struct thread_map map;
818         int threads[1];
819 } empty_thread_map = {
820         .map.nr  = 1,
821         .threads = { -1, },
822 };
823
824 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
825                      struct thread_map *threads)
826 {
827         if (cpus == NULL) {
828                 /* Work around old compiler warnings about strict aliasing */
829                 cpus = &empty_cpu_map.map;
830         }
831
832         if (threads == NULL)
833                 threads = &empty_thread_map.map;
834
835         return __perf_evsel__open(evsel, cpus, threads);
836 }
837
838 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
839                              struct cpu_map *cpus)
840 {
841         return __perf_evsel__open(evsel, cpus, &empty_thread_map.map);
842 }
843
844 int perf_evsel__open_per_thread(struct perf_evsel *evsel,
845                                 struct thread_map *threads)
846 {
847         return __perf_evsel__open(evsel, &empty_cpu_map.map, threads);
848 }
849
850 static int perf_evsel__parse_id_sample(const struct perf_evsel *evsel,
851                                        const union perf_event *event,
852                                        struct perf_sample *sample)
853 {
854         u64 type = evsel->attr.sample_type;
855         const u64 *array = event->sample.array;
856         bool swapped = evsel->needs_swap;
857         union u64_swap u;
858
859         array += ((event->header.size -
860                    sizeof(event->header)) / sizeof(u64)) - 1;
861
862         if (type & PERF_SAMPLE_CPU) {
863                 u.val64 = *array;
864                 if (swapped) {
865                         /* undo swap of u64, then swap on individual u32s */
866                         u.val64 = bswap_64(u.val64);
867                         u.val32[0] = bswap_32(u.val32[0]);
868                 }
869
870                 sample->cpu = u.val32[0];
871                 array--;
872         }
873
874         if (type & PERF_SAMPLE_STREAM_ID) {
875                 sample->stream_id = *array;
876                 array--;
877         }
878
879         if (type & PERF_SAMPLE_ID) {
880                 sample->id = *array;
881                 array--;
882         }
883
884         if (type & PERF_SAMPLE_TIME) {
885                 sample->time = *array;
886                 array--;
887         }
888
889         if (type & PERF_SAMPLE_TID) {
890                 u.val64 = *array;
891                 if (swapped) {
892                         /* undo swap of u64, then swap on individual u32s */
893                         u.val64 = bswap_64(u.val64);
894                         u.val32[0] = bswap_32(u.val32[0]);
895                         u.val32[1] = bswap_32(u.val32[1]);
896                 }
897
898                 sample->pid = u.val32[0];
899                 sample->tid = u.val32[1];
900         }
901
902         return 0;
903 }
904
905 static bool sample_overlap(const union perf_event *event,
906                            const void *offset, u64 size)
907 {
908         const void *base = event;
909
910         if (offset + size > base + event->header.size)
911                 return true;
912
913         return false;
914 }
915
916 int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
917                              struct perf_sample *data)
918 {
919         u64 type = evsel->attr.sample_type;
920         u64 regs_user = evsel->attr.sample_regs_user;
921         bool swapped = evsel->needs_swap;
922         const u64 *array;
923
924         /*
925          * used for cross-endian analysis. See git commit 65014ab3
926          * for why this goofiness is needed.
927          */
928         union u64_swap u;
929
930         memset(data, 0, sizeof(*data));
931         data->cpu = data->pid = data->tid = -1;
932         data->stream_id = data->id = data->time = -1ULL;
933         data->period = 1;
934
935         if (event->header.type != PERF_RECORD_SAMPLE) {
936                 if (!evsel->attr.sample_id_all)
937                         return 0;
938                 return perf_evsel__parse_id_sample(evsel, event, data);
939         }
940
941         array = event->sample.array;
942
943         if (evsel->sample_size + sizeof(event->header) > event->header.size)
944                 return -EFAULT;
945
946         if (type & PERF_SAMPLE_IP) {
947                 data->ip = event->ip.ip;
948                 array++;
949         }
950
951         if (type & PERF_SAMPLE_TID) {
952                 u.val64 = *array;
953                 if (swapped) {
954                         /* undo swap of u64, then swap on individual u32s */
955                         u.val64 = bswap_64(u.val64);
956                         u.val32[0] = bswap_32(u.val32[0]);
957                         u.val32[1] = bswap_32(u.val32[1]);
958                 }
959
960                 data->pid = u.val32[0];
961                 data->tid = u.val32[1];
962                 array++;
963         }
964
965         if (type & PERF_SAMPLE_TIME) {
966                 data->time = *array;
967                 array++;
968         }
969
970         data->addr = 0;
971         if (type & PERF_SAMPLE_ADDR) {
972                 data->addr = *array;
973                 array++;
974         }
975
976         data->id = -1ULL;
977         if (type & PERF_SAMPLE_ID) {
978                 data->id = *array;
979                 array++;
980         }
981
982         if (type & PERF_SAMPLE_STREAM_ID) {
983                 data->stream_id = *array;
984                 array++;
985         }
986
987         if (type & PERF_SAMPLE_CPU) {
988
989                 u.val64 = *array;
990                 if (swapped) {
991                         /* undo swap of u64, then swap on individual u32s */
992                         u.val64 = bswap_64(u.val64);
993                         u.val32[0] = bswap_32(u.val32[0]);
994                 }
995
996                 data->cpu = u.val32[0];
997                 array++;
998         }
999
1000         if (type & PERF_SAMPLE_PERIOD) {
1001                 data->period = *array;
1002                 array++;
1003         }
1004
1005         if (type & PERF_SAMPLE_READ) {
1006                 fprintf(stderr, "PERF_SAMPLE_READ is unsupported for now\n");
1007                 return -1;
1008         }
1009
1010         if (type & PERF_SAMPLE_CALLCHAIN) {
1011                 if (sample_overlap(event, array, sizeof(data->callchain->nr)))
1012                         return -EFAULT;
1013
1014                 data->callchain = (struct ip_callchain *)array;
1015
1016                 if (sample_overlap(event, array, data->callchain->nr))
1017                         return -EFAULT;
1018
1019                 array += 1 + data->callchain->nr;
1020         }
1021
1022         if (type & PERF_SAMPLE_RAW) {
1023                 const u64 *pdata;
1024
1025                 u.val64 = *array;
1026                 if (WARN_ONCE(swapped,
1027                               "Endianness of raw data not corrected!\n")) {
1028                         /* undo swap of u64, then swap on individual u32s */
1029                         u.val64 = bswap_64(u.val64);
1030                         u.val32[0] = bswap_32(u.val32[0]);
1031                         u.val32[1] = bswap_32(u.val32[1]);
1032                 }
1033
1034                 if (sample_overlap(event, array, sizeof(u32)))
1035                         return -EFAULT;
1036
1037                 data->raw_size = u.val32[0];
1038                 pdata = (void *) array + sizeof(u32);
1039
1040                 if (sample_overlap(event, pdata, data->raw_size))
1041                         return -EFAULT;
1042
1043                 data->raw_data = (void *) pdata;
1044
1045                 array = (void *)array + data->raw_size + sizeof(u32);
1046         }
1047
1048         if (type & PERF_SAMPLE_BRANCH_STACK) {
1049                 u64 sz;
1050
1051                 data->branch_stack = (struct branch_stack *)array;
1052                 array++; /* nr */
1053
1054                 sz = data->branch_stack->nr * sizeof(struct branch_entry);
1055                 sz /= sizeof(u64);
1056                 array += sz;
1057         }
1058
1059         if (type & PERF_SAMPLE_REGS_USER) {
1060                 /* First u64 tells us if we have any regs in sample. */
1061                 u64 avail = *array++;
1062
1063                 if (avail) {
1064                         data->user_regs.regs = (u64 *)array;
1065                         array += hweight_long(regs_user);
1066                 }
1067         }
1068
1069         if (type & PERF_SAMPLE_STACK_USER) {
1070                 u64 size = *array++;
1071
1072                 data->user_stack.offset = ((char *)(array - 1)
1073                                           - (char *) event);
1074
1075                 if (!size) {
1076                         data->user_stack.size = 0;
1077                 } else {
1078                         data->user_stack.data = (char *)array;
1079                         array += size / sizeof(*array);
1080                         data->user_stack.size = *array;
1081                 }
1082         }
1083
1084         return 0;
1085 }
1086
1087 int perf_event__synthesize_sample(union perf_event *event, u64 type,
1088                                   const struct perf_sample *sample,
1089                                   bool swapped)
1090 {
1091         u64 *array;
1092
1093         /*
1094          * used for cross-endian analysis. See git commit 65014ab3
1095          * for why this goofiness is needed.
1096          */
1097         union u64_swap u;
1098
1099         array = event->sample.array;
1100
1101         if (type & PERF_SAMPLE_IP) {
1102                 event->ip.ip = sample->ip;
1103                 array++;
1104         }
1105
1106         if (type & PERF_SAMPLE_TID) {
1107                 u.val32[0] = sample->pid;
1108                 u.val32[1] = sample->tid;
1109                 if (swapped) {
1110                         /*
1111                          * Inverse of what is done in perf_evsel__parse_sample
1112                          */
1113                         u.val32[0] = bswap_32(u.val32[0]);
1114                         u.val32[1] = bswap_32(u.val32[1]);
1115                         u.val64 = bswap_64(u.val64);
1116                 }
1117
1118                 *array = u.val64;
1119                 array++;
1120         }
1121
1122         if (type & PERF_SAMPLE_TIME) {
1123                 *array = sample->time;
1124                 array++;
1125         }
1126
1127         if (type & PERF_SAMPLE_ADDR) {
1128                 *array = sample->addr;
1129                 array++;
1130         }
1131
1132         if (type & PERF_SAMPLE_ID) {
1133                 *array = sample->id;
1134                 array++;
1135         }
1136
1137         if (type & PERF_SAMPLE_STREAM_ID) {
1138                 *array = sample->stream_id;
1139                 array++;
1140         }
1141
1142         if (type & PERF_SAMPLE_CPU) {
1143                 u.val32[0] = sample->cpu;
1144                 if (swapped) {
1145                         /*
1146                          * Inverse of what is done in perf_evsel__parse_sample
1147                          */
1148                         u.val32[0] = bswap_32(u.val32[0]);
1149                         u.val64 = bswap_64(u.val64);
1150                 }
1151                 *array = u.val64;
1152                 array++;
1153         }
1154
1155         if (type & PERF_SAMPLE_PERIOD) {
1156                 *array = sample->period;
1157                 array++;
1158         }
1159
1160         return 0;
1161 }
1162
1163 struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name)
1164 {
1165         return pevent_find_field(evsel->tp_format, name);
1166 }
1167
1168 void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample,
1169                          const char *name)
1170 {
1171         struct format_field *field = perf_evsel__field(evsel, name);
1172         int offset;
1173
1174         if (!field)
1175                 return NULL;
1176
1177         offset = field->offset;
1178
1179         if (field->flags & FIELD_IS_DYNAMIC) {
1180                 offset = *(int *)(sample->raw_data + field->offset);
1181                 offset &= 0xffff;
1182         }
1183
1184         return sample->raw_data + offset;
1185 }
1186
1187 u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
1188                        const char *name)
1189 {
1190         struct format_field *field = perf_evsel__field(evsel, name);
1191         void *ptr;
1192         u64 value;
1193
1194         if (!field)
1195                 return 0;
1196
1197         ptr = sample->raw_data + field->offset;
1198
1199         switch (field->size) {
1200         case 1:
1201                 return *(u8 *)ptr;
1202         case 2:
1203                 value = *(u16 *)ptr;
1204                 break;
1205         case 4:
1206                 value = *(u32 *)ptr;
1207                 break;
1208         case 8:
1209                 value = *(u64 *)ptr;
1210                 break;
1211         default:
1212                 return 0;
1213         }
1214
1215         if (!evsel->needs_swap)
1216                 return value;
1217
1218         switch (field->size) {
1219         case 2:
1220                 return bswap_16(value);
1221         case 4:
1222                 return bswap_32(value);
1223         case 8:
1224                 return bswap_64(value);
1225         default:
1226                 return 0;
1227         }
1228
1229         return 0;
1230 }
1231
1232 static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...)
1233 {
1234         va_list args;
1235         int ret = 0;
1236
1237         if (!*first) {
1238                 ret += fprintf(fp, ",");
1239         } else {
1240                 ret += fprintf(fp, ":");
1241                 *first = false;
1242         }
1243
1244         va_start(args, fmt);
1245         ret += vfprintf(fp, fmt, args);
1246         va_end(args);
1247         return ret;
1248 }
1249
1250 static int __if_fprintf(FILE *fp, bool *first, const char *field, u64 value)
1251 {
1252         if (value == 0)
1253                 return 0;
1254
1255         return comma_fprintf(fp, first, " %s: %" PRIu64, field, value);
1256 }
1257
1258 #define if_print(field) printed += __if_fprintf(fp, &first, #field, evsel->attr.field)
1259
1260 struct bit_names {
1261         int bit;
1262         const char *name;
1263 };
1264
1265 static int bits__fprintf(FILE *fp, const char *field, u64 value,
1266                          struct bit_names *bits, bool *first)
1267 {
1268         int i = 0, printed = comma_fprintf(fp, first, " %s: ", field);
1269         bool first_bit = true;
1270
1271         do {
1272                 if (value & bits[i].bit) {
1273                         printed += fprintf(fp, "%s%s", first_bit ? "" : "|", bits[i].name);
1274                         first_bit = false;
1275                 }
1276         } while (bits[++i].name != NULL);
1277
1278         return printed;
1279 }
1280
1281 static int sample_type__fprintf(FILE *fp, bool *first, u64 value)
1282 {
1283 #define bit_name(n) { PERF_SAMPLE_##n, #n }
1284         struct bit_names bits[] = {
1285                 bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR),
1286                 bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU),
1287                 bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
1288                 bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
1289                 { .name = NULL, }
1290         };
1291 #undef bit_name
1292         return bits__fprintf(fp, "sample_type", value, bits, first);
1293 }
1294
1295 static int read_format__fprintf(FILE *fp, bool *first, u64 value)
1296 {
1297 #define bit_name(n) { PERF_FORMAT_##n, #n }
1298         struct bit_names bits[] = {
1299                 bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING),
1300                 bit_name(ID), bit_name(GROUP),
1301                 { .name = NULL, }
1302         };
1303 #undef bit_name
1304         return bits__fprintf(fp, "read_format", value, bits, first);
1305 }
1306
1307 int perf_evsel__fprintf(struct perf_evsel *evsel,
1308                         struct perf_attr_details *details, FILE *fp)
1309 {
1310         bool first = true;
1311         int printed = fprintf(fp, "%s", perf_evsel__name(evsel));
1312
1313         if (details->verbose || details->freq) {
1314                 printed += comma_fprintf(fp, &first, " sample_freq=%" PRIu64,
1315                                          (u64)evsel->attr.sample_freq);
1316         }
1317
1318         if (details->verbose) {
1319                 if_print(type);
1320                 if_print(config);
1321                 if_print(config1);
1322                 if_print(config2);
1323                 if_print(size);
1324                 printed += sample_type__fprintf(fp, &first, evsel->attr.sample_type);
1325                 if (evsel->attr.read_format)
1326                         printed += read_format__fprintf(fp, &first, evsel->attr.read_format);
1327                 if_print(disabled);
1328                 if_print(inherit);
1329                 if_print(pinned);
1330                 if_print(exclusive);
1331                 if_print(exclude_user);
1332                 if_print(exclude_kernel);
1333                 if_print(exclude_hv);
1334                 if_print(exclude_idle);
1335                 if_print(mmap);
1336                 if_print(comm);
1337                 if_print(freq);
1338                 if_print(inherit_stat);
1339                 if_print(enable_on_exec);
1340                 if_print(task);
1341                 if_print(watermark);
1342                 if_print(precise_ip);
1343                 if_print(mmap_data);
1344                 if_print(sample_id_all);
1345                 if_print(exclude_host);
1346                 if_print(exclude_guest);
1347                 if_print(__reserved_1);
1348                 if_print(wakeup_events);
1349                 if_print(bp_type);
1350                 if_print(branch_sample_type);
1351         }
1352
1353         fputc('\n', fp);
1354         return ++printed;
1355 }