Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[~shefty/rdma-dev.git] / tools / perf / util / evsel.c
1 /*
2  * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3  *
4  * Parts came from builtin-{top,stat,record}.c, see those files for further
5  * copyright notes.
6  *
7  * Released under the GPL v2. (and only v2, not any later version)
8  */
9
10 #include <byteswap.h>
11 #include "asm/bug.h"
12 #include "evsel.h"
13 #include "evlist.h"
14 #include "util.h"
15 #include "cpumap.h"
16 #include "thread_map.h"
17
18 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
19
20 int __perf_evsel__sample_size(u64 sample_type)
21 {
22         u64 mask = sample_type & PERF_SAMPLE_MASK;
23         int size = 0;
24         int i;
25
26         for (i = 0; i < 64; i++) {
27                 if (mask & (1ULL << i))
28                         size++;
29         }
30
31         size *= sizeof(u64);
32
33         return size;
34 }
35
36 void perf_evsel__init(struct perf_evsel *evsel,
37                       struct perf_event_attr *attr, int idx)
38 {
39         evsel->idx         = idx;
40         evsel->attr        = *attr;
41         INIT_LIST_HEAD(&evsel->node);
42         hists__init(&evsel->hists);
43 }
44
45 struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
46 {
47         struct perf_evsel *evsel = zalloc(sizeof(*evsel));
48
49         if (evsel != NULL)
50                 perf_evsel__init(evsel, attr, idx);
51
52         return evsel;
53 }
54
55 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
56 {
57         int cpu, thread;
58         evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
59
60         if (evsel->fd) {
61                 for (cpu = 0; cpu < ncpus; cpu++) {
62                         for (thread = 0; thread < nthreads; thread++) {
63                                 FD(evsel, cpu, thread) = -1;
64                         }
65                 }
66         }
67
68         return evsel->fd != NULL ? 0 : -ENOMEM;
69 }
70
71 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
72 {
73         evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
74         if (evsel->sample_id == NULL)
75                 return -ENOMEM;
76
77         evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
78         if (evsel->id == NULL) {
79                 xyarray__delete(evsel->sample_id);
80                 evsel->sample_id = NULL;
81                 return -ENOMEM;
82         }
83
84         return 0;
85 }
86
87 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
88 {
89         evsel->counts = zalloc((sizeof(*evsel->counts) +
90                                 (ncpus * sizeof(struct perf_counts_values))));
91         return evsel->counts != NULL ? 0 : -ENOMEM;
92 }
93
94 void perf_evsel__free_fd(struct perf_evsel *evsel)
95 {
96         xyarray__delete(evsel->fd);
97         evsel->fd = NULL;
98 }
99
100 void perf_evsel__free_id(struct perf_evsel *evsel)
101 {
102         xyarray__delete(evsel->sample_id);
103         evsel->sample_id = NULL;
104         free(evsel->id);
105         evsel->id = NULL;
106 }
107
108 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
109 {
110         int cpu, thread;
111
112         for (cpu = 0; cpu < ncpus; cpu++)
113                 for (thread = 0; thread < nthreads; ++thread) {
114                         close(FD(evsel, cpu, thread));
115                         FD(evsel, cpu, thread) = -1;
116                 }
117 }
118
119 void perf_evsel__exit(struct perf_evsel *evsel)
120 {
121         assert(list_empty(&evsel->node));
122         xyarray__delete(evsel->fd);
123         xyarray__delete(evsel->sample_id);
124         free(evsel->id);
125 }
126
127 void perf_evsel__delete(struct perf_evsel *evsel)
128 {
129         perf_evsel__exit(evsel);
130         close_cgroup(evsel->cgrp);
131         free(evsel->name);
132         free(evsel);
133 }
134
135 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
136                               int cpu, int thread, bool scale)
137 {
138         struct perf_counts_values count;
139         size_t nv = scale ? 3 : 1;
140
141         if (FD(evsel, cpu, thread) < 0)
142                 return -EINVAL;
143
144         if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0)
145                 return -ENOMEM;
146
147         if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
148                 return -errno;
149
150         if (scale) {
151                 if (count.run == 0)
152                         count.val = 0;
153                 else if (count.run < count.ena)
154                         count.val = (u64)((double)count.val * count.ena / count.run + 0.5);
155         } else
156                 count.ena = count.run = 0;
157
158         evsel->counts->cpu[cpu] = count;
159         return 0;
160 }
161
162 int __perf_evsel__read(struct perf_evsel *evsel,
163                        int ncpus, int nthreads, bool scale)
164 {
165         size_t nv = scale ? 3 : 1;
166         int cpu, thread;
167         struct perf_counts_values *aggr = &evsel->counts->aggr, count;
168
169         aggr->val = aggr->ena = aggr->run = 0;
170
171         for (cpu = 0; cpu < ncpus; cpu++) {
172                 for (thread = 0; thread < nthreads; thread++) {
173                         if (FD(evsel, cpu, thread) < 0)
174                                 continue;
175
176                         if (readn(FD(evsel, cpu, thread),
177                                   &count, nv * sizeof(u64)) < 0)
178                                 return -errno;
179
180                         aggr->val += count.val;
181                         if (scale) {
182                                 aggr->ena += count.ena;
183                                 aggr->run += count.run;
184                         }
185                 }
186         }
187
188         evsel->counts->scaled = 0;
189         if (scale) {
190                 if (aggr->run == 0) {
191                         evsel->counts->scaled = -1;
192                         aggr->val = 0;
193                         return 0;
194                 }
195
196                 if (aggr->run < aggr->ena) {
197                         evsel->counts->scaled = 1;
198                         aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5);
199                 }
200         } else
201                 aggr->ena = aggr->run = 0;
202
203         return 0;
204 }
205
206 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
207                               struct thread_map *threads, bool group)
208 {
209         int cpu, thread;
210         unsigned long flags = 0;
211         int pid = -1;
212
213         if (evsel->fd == NULL &&
214             perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
215                 return -1;
216
217         if (evsel->cgrp) {
218                 flags = PERF_FLAG_PID_CGROUP;
219                 pid = evsel->cgrp->fd;
220         }
221
222         for (cpu = 0; cpu < cpus->nr; cpu++) {
223                 int group_fd = -1;
224
225                 for (thread = 0; thread < threads->nr; thread++) {
226
227                         if (!evsel->cgrp)
228                                 pid = threads->map[thread];
229
230                         FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
231                                                                      pid,
232                                                                      cpus->map[cpu],
233                                                                      group_fd, flags);
234                         if (FD(evsel, cpu, thread) < 0)
235                                 goto out_close;
236
237                         if (group && group_fd == -1)
238                                 group_fd = FD(evsel, cpu, thread);
239                 }
240         }
241
242         return 0;
243
244 out_close:
245         do {
246                 while (--thread >= 0) {
247                         close(FD(evsel, cpu, thread));
248                         FD(evsel, cpu, thread) = -1;
249                 }
250                 thread = threads->nr;
251         } while (--cpu >= 0);
252         return -1;
253 }
254
255 static struct {
256         struct cpu_map map;
257         int cpus[1];
258 } empty_cpu_map = {
259         .map.nr = 1,
260         .cpus   = { -1, },
261 };
262
263 static struct {
264         struct thread_map map;
265         int threads[1];
266 } empty_thread_map = {
267         .map.nr  = 1,
268         .threads = { -1, },
269 };
270
271 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
272                      struct thread_map *threads, bool group)
273 {
274         if (cpus == NULL) {
275                 /* Work around old compiler warnings about strict aliasing */
276                 cpus = &empty_cpu_map.map;
277         }
278
279         if (threads == NULL)
280                 threads = &empty_thread_map.map;
281
282         return __perf_evsel__open(evsel, cpus, threads, group);
283 }
284
285 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
286                              struct cpu_map *cpus, bool group)
287 {
288         return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group);
289 }
290
291 int perf_evsel__open_per_thread(struct perf_evsel *evsel,
292                                 struct thread_map *threads, bool group)
293 {
294         return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group);
295 }
296
297 static int perf_event__parse_id_sample(const union perf_event *event, u64 type,
298                                        struct perf_sample *sample)
299 {
300         const u64 *array = event->sample.array;
301
302         array += ((event->header.size -
303                    sizeof(event->header)) / sizeof(u64)) - 1;
304
305         if (type & PERF_SAMPLE_CPU) {
306                 u32 *p = (u32 *)array;
307                 sample->cpu = *p;
308                 array--;
309         }
310
311         if (type & PERF_SAMPLE_STREAM_ID) {
312                 sample->stream_id = *array;
313                 array--;
314         }
315
316         if (type & PERF_SAMPLE_ID) {
317                 sample->id = *array;
318                 array--;
319         }
320
321         if (type & PERF_SAMPLE_TIME) {
322                 sample->time = *array;
323                 array--;
324         }
325
326         if (type & PERF_SAMPLE_TID) {
327                 u32 *p = (u32 *)array;
328                 sample->pid = p[0];
329                 sample->tid = p[1];
330         }
331
332         return 0;
333 }
334
335 static bool sample_overlap(const union perf_event *event,
336                            const void *offset, u64 size)
337 {
338         const void *base = event;
339
340         if (offset + size > base + event->header.size)
341                 return true;
342
343         return false;
344 }
345
346 int perf_event__parse_sample(const union perf_event *event, u64 type,
347                              int sample_size, bool sample_id_all,
348                              struct perf_sample *data, bool swapped)
349 {
350         const u64 *array;
351
352         /*
353          * used for cross-endian analysis. See git commit 65014ab3
354          * for why this goofiness is needed.
355          */
356         union {
357                 u64 val64;
358                 u32 val32[2];
359         } u;
360
361
362         data->cpu = data->pid = data->tid = -1;
363         data->stream_id = data->id = data->time = -1ULL;
364
365         if (event->header.type != PERF_RECORD_SAMPLE) {
366                 if (!sample_id_all)
367                         return 0;
368                 return perf_event__parse_id_sample(event, type, data);
369         }
370
371         array = event->sample.array;
372
373         if (sample_size + sizeof(event->header) > event->header.size)
374                 return -EFAULT;
375
376         if (type & PERF_SAMPLE_IP) {
377                 data->ip = event->ip.ip;
378                 array++;
379         }
380
381         if (type & PERF_SAMPLE_TID) {
382                 u.val64 = *array;
383                 if (swapped) {
384                         /* undo swap of u64, then swap on individual u32s */
385                         u.val64 = bswap_64(u.val64);
386                         u.val32[0] = bswap_32(u.val32[0]);
387                         u.val32[1] = bswap_32(u.val32[1]);
388                 }
389
390                 data->pid = u.val32[0];
391                 data->tid = u.val32[1];
392                 array++;
393         }
394
395         if (type & PERF_SAMPLE_TIME) {
396                 data->time = *array;
397                 array++;
398         }
399
400         data->addr = 0;
401         if (type & PERF_SAMPLE_ADDR) {
402                 data->addr = *array;
403                 array++;
404         }
405
406         data->id = -1ULL;
407         if (type & PERF_SAMPLE_ID) {
408                 data->id = *array;
409                 array++;
410         }
411
412         if (type & PERF_SAMPLE_STREAM_ID) {
413                 data->stream_id = *array;
414                 array++;
415         }
416
417         if (type & PERF_SAMPLE_CPU) {
418
419                 u.val64 = *array;
420                 if (swapped) {
421                         /* undo swap of u64, then swap on individual u32s */
422                         u.val64 = bswap_64(u.val64);
423                         u.val32[0] = bswap_32(u.val32[0]);
424                 }
425
426                 data->cpu = u.val32[0];
427                 array++;
428         }
429
430         if (type & PERF_SAMPLE_PERIOD) {
431                 data->period = *array;
432                 array++;
433         }
434
435         if (type & PERF_SAMPLE_READ) {
436                 fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n");
437                 return -1;
438         }
439
440         if (type & PERF_SAMPLE_CALLCHAIN) {
441                 if (sample_overlap(event, array, sizeof(data->callchain->nr)))
442                         return -EFAULT;
443
444                 data->callchain = (struct ip_callchain *)array;
445
446                 if (sample_overlap(event, array, data->callchain->nr))
447                         return -EFAULT;
448
449                 array += 1 + data->callchain->nr;
450         }
451
452         if (type & PERF_SAMPLE_RAW) {
453                 const u64 *pdata;
454
455                 u.val64 = *array;
456                 if (WARN_ONCE(swapped,
457                               "Endianness of raw data not corrected!\n")) {
458                         /* undo swap of u64, then swap on individual u32s */
459                         u.val64 = bswap_64(u.val64);
460                         u.val32[0] = bswap_32(u.val32[0]);
461                         u.val32[1] = bswap_32(u.val32[1]);
462                 }
463
464                 if (sample_overlap(event, array, sizeof(u32)))
465                         return -EFAULT;
466
467                 data->raw_size = u.val32[0];
468                 pdata = (void *) array + sizeof(u32);
469
470                 if (sample_overlap(event, pdata, data->raw_size))
471                         return -EFAULT;
472
473                 data->raw_data = (void *) pdata;
474         }
475
476         return 0;
477 }