Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[~shefty/rdma-dev.git] / tools / perf / util / evsel.c
1 /*
2  * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3  *
4  * Parts came from builtin-{top,stat,record}.c, see those files for further
5  * copyright notes.
6  *
7  * Released under the GPL v2. (and only v2, not any later version)
8  */
9
10 #include <byteswap.h>
11 #include "asm/bug.h"
12 #include "evsel.h"
13 #include "evlist.h"
14 #include "util.h"
15 #include "cpumap.h"
16 #include "thread_map.h"
17
18 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
19 #define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0))
20
21 int __perf_evsel__sample_size(u64 sample_type)
22 {
23         u64 mask = sample_type & PERF_SAMPLE_MASK;
24         int size = 0;
25         int i;
26
27         for (i = 0; i < 64; i++) {
28                 if (mask & (1ULL << i))
29                         size++;
30         }
31
32         size *= sizeof(u64);
33
34         return size;
35 }
36
37 void perf_evsel__init(struct perf_evsel *evsel,
38                       struct perf_event_attr *attr, int idx)
39 {
40         evsel->idx         = idx;
41         evsel->attr        = *attr;
42         INIT_LIST_HEAD(&evsel->node);
43         hists__init(&evsel->hists);
44 }
45
46 struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
47 {
48         struct perf_evsel *evsel = zalloc(sizeof(*evsel));
49
50         if (evsel != NULL)
51                 perf_evsel__init(evsel, attr, idx);
52
53         return evsel;
54 }
55
56 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
57 {
58         int cpu, thread;
59         evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
60
61         if (evsel->fd) {
62                 for (cpu = 0; cpu < ncpus; cpu++) {
63                         for (thread = 0; thread < nthreads; thread++) {
64                                 FD(evsel, cpu, thread) = -1;
65                         }
66                 }
67         }
68
69         return evsel->fd != NULL ? 0 : -ENOMEM;
70 }
71
72 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
73 {
74         evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
75         if (evsel->sample_id == NULL)
76                 return -ENOMEM;
77
78         evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
79         if (evsel->id == NULL) {
80                 xyarray__delete(evsel->sample_id);
81                 evsel->sample_id = NULL;
82                 return -ENOMEM;
83         }
84
85         return 0;
86 }
87
88 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
89 {
90         evsel->counts = zalloc((sizeof(*evsel->counts) +
91                                 (ncpus * sizeof(struct perf_counts_values))));
92         return evsel->counts != NULL ? 0 : -ENOMEM;
93 }
94
95 void perf_evsel__free_fd(struct perf_evsel *evsel)
96 {
97         xyarray__delete(evsel->fd);
98         evsel->fd = NULL;
99 }
100
101 void perf_evsel__free_id(struct perf_evsel *evsel)
102 {
103         xyarray__delete(evsel->sample_id);
104         evsel->sample_id = NULL;
105         free(evsel->id);
106         evsel->id = NULL;
107 }
108
109 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
110 {
111         int cpu, thread;
112
113         for (cpu = 0; cpu < ncpus; cpu++)
114                 for (thread = 0; thread < nthreads; ++thread) {
115                         close(FD(evsel, cpu, thread));
116                         FD(evsel, cpu, thread) = -1;
117                 }
118 }
119
120 void perf_evsel__exit(struct perf_evsel *evsel)
121 {
122         assert(list_empty(&evsel->node));
123         xyarray__delete(evsel->fd);
124         xyarray__delete(evsel->sample_id);
125         free(evsel->id);
126 }
127
128 void perf_evsel__delete(struct perf_evsel *evsel)
129 {
130         perf_evsel__exit(evsel);
131         close_cgroup(evsel->cgrp);
132         free(evsel->name);
133         free(evsel);
134 }
135
136 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
137                               int cpu, int thread, bool scale)
138 {
139         struct perf_counts_values count;
140         size_t nv = scale ? 3 : 1;
141
142         if (FD(evsel, cpu, thread) < 0)
143                 return -EINVAL;
144
145         if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0)
146                 return -ENOMEM;
147
148         if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
149                 return -errno;
150
151         if (scale) {
152                 if (count.run == 0)
153                         count.val = 0;
154                 else if (count.run < count.ena)
155                         count.val = (u64)((double)count.val * count.ena / count.run + 0.5);
156         } else
157                 count.ena = count.run = 0;
158
159         evsel->counts->cpu[cpu] = count;
160         return 0;
161 }
162
163 int __perf_evsel__read(struct perf_evsel *evsel,
164                        int ncpus, int nthreads, bool scale)
165 {
166         size_t nv = scale ? 3 : 1;
167         int cpu, thread;
168         struct perf_counts_values *aggr = &evsel->counts->aggr, count;
169
170         aggr->val = aggr->ena = aggr->run = 0;
171
172         for (cpu = 0; cpu < ncpus; cpu++) {
173                 for (thread = 0; thread < nthreads; thread++) {
174                         if (FD(evsel, cpu, thread) < 0)
175                                 continue;
176
177                         if (readn(FD(evsel, cpu, thread),
178                                   &count, nv * sizeof(u64)) < 0)
179                                 return -errno;
180
181                         aggr->val += count.val;
182                         if (scale) {
183                                 aggr->ena += count.ena;
184                                 aggr->run += count.run;
185                         }
186                 }
187         }
188
189         evsel->counts->scaled = 0;
190         if (scale) {
191                 if (aggr->run == 0) {
192                         evsel->counts->scaled = -1;
193                         aggr->val = 0;
194                         return 0;
195                 }
196
197                 if (aggr->run < aggr->ena) {
198                         evsel->counts->scaled = 1;
199                         aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5);
200                 }
201         } else
202                 aggr->ena = aggr->run = 0;
203
204         return 0;
205 }
206
207 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
208                               struct thread_map *threads, bool group,
209                               struct xyarray *group_fds)
210 {
211         int cpu, thread;
212         unsigned long flags = 0;
213         int pid = -1, err;
214
215         if (evsel->fd == NULL &&
216             perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
217                 return -ENOMEM;
218
219         if (evsel->cgrp) {
220                 flags = PERF_FLAG_PID_CGROUP;
221                 pid = evsel->cgrp->fd;
222         }
223
224         for (cpu = 0; cpu < cpus->nr; cpu++) {
225                 int group_fd = group_fds ? GROUP_FD(group_fds, cpu) : -1;
226
227                 for (thread = 0; thread < threads->nr; thread++) {
228
229                         if (!evsel->cgrp)
230                                 pid = threads->map[thread];
231
232                         FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
233                                                                      pid,
234                                                                      cpus->map[cpu],
235                                                                      group_fd, flags);
236                         if (FD(evsel, cpu, thread) < 0) {
237                                 err = -errno;
238                                 goto out_close;
239                         }
240
241                         if (group && group_fd == -1)
242                                 group_fd = FD(evsel, cpu, thread);
243                 }
244         }
245
246         return 0;
247
248 out_close:
249         do {
250                 while (--thread >= 0) {
251                         close(FD(evsel, cpu, thread));
252                         FD(evsel, cpu, thread) = -1;
253                 }
254                 thread = threads->nr;
255         } while (--cpu >= 0);
256         return err;
257 }
258
259 void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads)
260 {
261         if (evsel->fd == NULL)
262                 return;
263
264         perf_evsel__close_fd(evsel, ncpus, nthreads);
265         perf_evsel__free_fd(evsel);
266         evsel->fd = NULL;
267 }
268
269 static struct {
270         struct cpu_map map;
271         int cpus[1];
272 } empty_cpu_map = {
273         .map.nr = 1,
274         .cpus   = { -1, },
275 };
276
277 static struct {
278         struct thread_map map;
279         int threads[1];
280 } empty_thread_map = {
281         .map.nr  = 1,
282         .threads = { -1, },
283 };
284
285 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
286                      struct thread_map *threads, bool group,
287                      struct xyarray *group_fd)
288 {
289         if (cpus == NULL) {
290                 /* Work around old compiler warnings about strict aliasing */
291                 cpus = &empty_cpu_map.map;
292         }
293
294         if (threads == NULL)
295                 threads = &empty_thread_map.map;
296
297         return __perf_evsel__open(evsel, cpus, threads, group, group_fd);
298 }
299
300 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
301                              struct cpu_map *cpus, bool group,
302                              struct xyarray *group_fd)
303 {
304         return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group,
305                                   group_fd);
306 }
307
308 int perf_evsel__open_per_thread(struct perf_evsel *evsel,
309                                 struct thread_map *threads, bool group,
310                                 struct xyarray *group_fd)
311 {
312         return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group,
313                                   group_fd);
314 }
315
316 static int perf_event__parse_id_sample(const union perf_event *event, u64 type,
317                                        struct perf_sample *sample)
318 {
319         const u64 *array = event->sample.array;
320
321         array += ((event->header.size -
322                    sizeof(event->header)) / sizeof(u64)) - 1;
323
324         if (type & PERF_SAMPLE_CPU) {
325                 u32 *p = (u32 *)array;
326                 sample->cpu = *p;
327                 array--;
328         }
329
330         if (type & PERF_SAMPLE_STREAM_ID) {
331                 sample->stream_id = *array;
332                 array--;
333         }
334
335         if (type & PERF_SAMPLE_ID) {
336                 sample->id = *array;
337                 array--;
338         }
339
340         if (type & PERF_SAMPLE_TIME) {
341                 sample->time = *array;
342                 array--;
343         }
344
345         if (type & PERF_SAMPLE_TID) {
346                 u32 *p = (u32 *)array;
347                 sample->pid = p[0];
348                 sample->tid = p[1];
349         }
350
351         return 0;
352 }
353
354 static bool sample_overlap(const union perf_event *event,
355                            const void *offset, u64 size)
356 {
357         const void *base = event;
358
359         if (offset + size > base + event->header.size)
360                 return true;
361
362         return false;
363 }
364
365 int perf_event__parse_sample(const union perf_event *event, u64 type,
366                              int sample_size, bool sample_id_all,
367                              struct perf_sample *data, bool swapped)
368 {
369         const u64 *array;
370
371         /*
372          * used for cross-endian analysis. See git commit 65014ab3
373          * for why this goofiness is needed.
374          */
375         union {
376                 u64 val64;
377                 u32 val32[2];
378         } u;
379
380
381         data->cpu = data->pid = data->tid = -1;
382         data->stream_id = data->id = data->time = -1ULL;
383
384         if (event->header.type != PERF_RECORD_SAMPLE) {
385                 if (!sample_id_all)
386                         return 0;
387                 return perf_event__parse_id_sample(event, type, data);
388         }
389
390         array = event->sample.array;
391
392         if (sample_size + sizeof(event->header) > event->header.size)
393                 return -EFAULT;
394
395         if (type & PERF_SAMPLE_IP) {
396                 data->ip = event->ip.ip;
397                 array++;
398         }
399
400         if (type & PERF_SAMPLE_TID) {
401                 u.val64 = *array;
402                 if (swapped) {
403                         /* undo swap of u64, then swap on individual u32s */
404                         u.val64 = bswap_64(u.val64);
405                         u.val32[0] = bswap_32(u.val32[0]);
406                         u.val32[1] = bswap_32(u.val32[1]);
407                 }
408
409                 data->pid = u.val32[0];
410                 data->tid = u.val32[1];
411                 array++;
412         }
413
414         if (type & PERF_SAMPLE_TIME) {
415                 data->time = *array;
416                 array++;
417         }
418
419         data->addr = 0;
420         if (type & PERF_SAMPLE_ADDR) {
421                 data->addr = *array;
422                 array++;
423         }
424
425         data->id = -1ULL;
426         if (type & PERF_SAMPLE_ID) {
427                 data->id = *array;
428                 array++;
429         }
430
431         if (type & PERF_SAMPLE_STREAM_ID) {
432                 data->stream_id = *array;
433                 array++;
434         }
435
436         if (type & PERF_SAMPLE_CPU) {
437
438                 u.val64 = *array;
439                 if (swapped) {
440                         /* undo swap of u64, then swap on individual u32s */
441                         u.val64 = bswap_64(u.val64);
442                         u.val32[0] = bswap_32(u.val32[0]);
443                 }
444
445                 data->cpu = u.val32[0];
446                 array++;
447         }
448
449         if (type & PERF_SAMPLE_PERIOD) {
450                 data->period = *array;
451                 array++;
452         }
453
454         if (type & PERF_SAMPLE_READ) {
455                 fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n");
456                 return -1;
457         }
458
459         if (type & PERF_SAMPLE_CALLCHAIN) {
460                 if (sample_overlap(event, array, sizeof(data->callchain->nr)))
461                         return -EFAULT;
462
463                 data->callchain = (struct ip_callchain *)array;
464
465                 if (sample_overlap(event, array, data->callchain->nr))
466                         return -EFAULT;
467
468                 array += 1 + data->callchain->nr;
469         }
470
471         if (type & PERF_SAMPLE_RAW) {
472                 const u64 *pdata;
473
474                 u.val64 = *array;
475                 if (WARN_ONCE(swapped,
476                               "Endianness of raw data not corrected!\n")) {
477                         /* undo swap of u64, then swap on individual u32s */
478                         u.val64 = bswap_64(u.val64);
479                         u.val32[0] = bswap_32(u.val32[0]);
480                         u.val32[1] = bswap_32(u.val32[1]);
481                 }
482
483                 if (sample_overlap(event, array, sizeof(u32)))
484                         return -EFAULT;
485
486                 data->raw_size = u.val32[0];
487                 pdata = (void *) array + sizeof(u32);
488
489                 if (sample_overlap(event, pdata, data->raw_size))
490                         return -EFAULT;
491
492                 data->raw_data = (void *) pdata;
493         }
494
495         return 0;
496 }