Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[~shefty/rdma-dev.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9
10 #include "builtin.h"
11
12 #include "perf.h"
13
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/symbol.h"
26 #include "util/cpumap.h"
27 #include "util/thread_map.h"
28
29 #include <unistd.h>
30 #include <sched.h>
31 #include <sys/mman.h>
32
33 enum write_mode_t {
34         WRITE_FORCE,
35         WRITE_APPEND
36 };
37
38 static u64                      user_interval                   = ULLONG_MAX;
39 static u64                      default_interval                =      0;
40
41 static unsigned int             page_size;
42 static unsigned int             mmap_pages                      = UINT_MAX;
43 static unsigned int             user_freq                       = UINT_MAX;
44 static int                      freq                            =   1000;
45 static int                      output;
46 static int                      pipe_output                     =      0;
47 static const char               *output_name                    = NULL;
48 static bool                     group                           =  false;
49 static int                      realtime_prio                   =      0;
50 static bool                     nodelay                         =  false;
51 static bool                     raw_samples                     =  false;
52 static bool                     sample_id_all_avail             =   true;
53 static bool                     system_wide                     =  false;
54 static pid_t                    target_pid                      =     -1;
55 static pid_t                    target_tid                      =     -1;
56 static pid_t                    child_pid                       =     -1;
57 static bool                     no_inherit                      =  false;
58 static enum write_mode_t        write_mode                      = WRITE_FORCE;
59 static bool                     call_graph                      =  false;
60 static bool                     inherit_stat                    =  false;
61 static bool                     no_samples                      =  false;
62 static bool                     sample_address                  =  false;
63 static bool                     sample_time                     =  false;
64 static bool                     no_buildid                      =  false;
65 static bool                     no_buildid_cache                =  false;
66 static struct perf_evlist       *evsel_list;
67
68 static long                     samples                         =      0;
69 static u64                      bytes_written                   =      0;
70
71 static int                      file_new                        =      1;
72 static off_t                    post_processing_offset;
73
74 static struct perf_session      *session;
75 static const char               *cpu_list;
76 static const char               *progname;
77
78 static void advance_output(size_t size)
79 {
80         bytes_written += size;
81 }
82
83 static void write_output(void *buf, size_t size)
84 {
85         while (size) {
86                 int ret = write(output, buf, size);
87
88                 if (ret < 0)
89                         die("failed to write");
90
91                 size -= ret;
92                 buf += ret;
93
94                 bytes_written += ret;
95         }
96 }
97
98 static int process_synthesized_event(union perf_event *event,
99                                      struct perf_sample *sample __used,
100                                      struct perf_session *self __used)
101 {
102         write_output(event, event->header.size);
103         return 0;
104 }
105
106 static void mmap_read(struct perf_mmap *md)
107 {
108         unsigned int head = perf_mmap__read_head(md);
109         unsigned int old = md->prev;
110         unsigned char *data = md->base + page_size;
111         unsigned long size;
112         void *buf;
113
114         if (old == head)
115                 return;
116
117         samples++;
118
119         size = head - old;
120
121         if ((old & md->mask) + size != (head & md->mask)) {
122                 buf = &data[old & md->mask];
123                 size = md->mask + 1 - (old & md->mask);
124                 old += size;
125
126                 write_output(buf, size);
127         }
128
129         buf = &data[old & md->mask];
130         size = head - old;
131         old += size;
132
133         write_output(buf, size);
134
135         md->prev = old;
136         perf_mmap__write_tail(md, old);
137 }
138
139 static volatile int done = 0;
140 static volatile int signr = -1;
141 static volatile int child_finished = 0;
142
143 static void sig_handler(int sig)
144 {
145         if (sig == SIGCHLD)
146                 child_finished = 1;
147
148         done = 1;
149         signr = sig;
150 }
151
152 static void sig_atexit(void)
153 {
154         int status;
155
156         if (child_pid > 0) {
157                 if (!child_finished)
158                         kill(child_pid, SIGTERM);
159
160                 wait(&status);
161                 if (WIFSIGNALED(status))
162                         psignal(WTERMSIG(status), progname);
163         }
164
165         if (signr == -1 || signr == SIGUSR1)
166                 return;
167
168         signal(signr, SIG_DFL);
169         kill(getpid(), signr);
170 }
171
172 static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
173 {
174         struct perf_event_attr *attr = &evsel->attr;
175         int track = !evsel->idx; /* only the first counter needs these */
176
177         attr->disabled          = 1;
178         attr->inherit           = !no_inherit;
179         attr->read_format       = PERF_FORMAT_TOTAL_TIME_ENABLED |
180                                   PERF_FORMAT_TOTAL_TIME_RUNNING |
181                                   PERF_FORMAT_ID;
182
183         attr->sample_type       |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
184
185         if (evlist->nr_entries > 1)
186                 attr->sample_type |= PERF_SAMPLE_ID;
187
188         /*
189          * We default some events to a 1 default interval. But keep
190          * it a weak assumption overridable by the user.
191          */
192         if (!attr->sample_period || (user_freq != UINT_MAX &&
193                                      user_interval != ULLONG_MAX)) {
194                 if (freq) {
195                         attr->sample_type       |= PERF_SAMPLE_PERIOD;
196                         attr->freq              = 1;
197                         attr->sample_freq       = freq;
198                 } else {
199                         attr->sample_period = default_interval;
200                 }
201         }
202
203         if (no_samples)
204                 attr->sample_freq = 0;
205
206         if (inherit_stat)
207                 attr->inherit_stat = 1;
208
209         if (sample_address) {
210                 attr->sample_type       |= PERF_SAMPLE_ADDR;
211                 attr->mmap_data = track;
212         }
213
214         if (call_graph)
215                 attr->sample_type       |= PERF_SAMPLE_CALLCHAIN;
216
217         if (system_wide)
218                 attr->sample_type       |= PERF_SAMPLE_CPU;
219
220         if (sample_id_all_avail &&
221             (sample_time || system_wide || !no_inherit || cpu_list))
222                 attr->sample_type       |= PERF_SAMPLE_TIME;
223
224         if (raw_samples) {
225                 attr->sample_type       |= PERF_SAMPLE_TIME;
226                 attr->sample_type       |= PERF_SAMPLE_RAW;
227                 attr->sample_type       |= PERF_SAMPLE_CPU;
228         }
229
230         if (nodelay) {
231                 attr->watermark = 0;
232                 attr->wakeup_events = 1;
233         }
234
235         attr->mmap              = track;
236         attr->comm              = track;
237
238         if (target_pid == -1 && target_tid == -1 && !system_wide) {
239                 attr->disabled = 1;
240                 attr->enable_on_exec = 1;
241         }
242 }
243
244 static bool perf_evlist__equal(struct perf_evlist *evlist,
245                                struct perf_evlist *other)
246 {
247         struct perf_evsel *pos, *pair;
248
249         if (evlist->nr_entries != other->nr_entries)
250                 return false;
251
252         pair = list_entry(other->entries.next, struct perf_evsel, node);
253
254         list_for_each_entry(pos, &evlist->entries, node) {
255                 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
256                         return false;
257                 pair = list_entry(pair->node.next, struct perf_evsel, node);
258         }
259
260         return true;
261 }
262
263 static void open_counters(struct perf_evlist *evlist)
264 {
265         struct perf_evsel *pos, *first;
266
267         if (evlist->cpus->map[0] < 0)
268                 no_inherit = true;
269
270         first = list_entry(evlist->entries.next, struct perf_evsel, node);
271
272         list_for_each_entry(pos, &evlist->entries, node) {
273                 struct perf_event_attr *attr = &pos->attr;
274                 struct xyarray *group_fd = NULL;
275                 /*
276                  * Check if parse_single_tracepoint_event has already asked for
277                  * PERF_SAMPLE_TIME.
278                  *
279                  * XXX this is kludgy but short term fix for problems introduced by
280                  * eac23d1c that broke 'perf script' by having different sample_types
281                  * when using multiple tracepoint events when we use a perf binary
282                  * that tries to use sample_id_all on an older kernel.
283                  *
284                  * We need to move counter creation to perf_session, support
285                  * different sample_types, etc.
286                  */
287                 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
288
289                 if (group && pos != first)
290                         group_fd = first->fd;
291
292                 config_attr(pos, evlist);
293 retry_sample_id:
294                 attr->sample_id_all = sample_id_all_avail ? 1 : 0;
295 try_again:
296                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group,
297                                      group_fd) < 0) {
298                         int err = errno;
299
300                         if (err == EPERM || err == EACCES) {
301                                 ui__error_paranoid();
302                                 exit(EXIT_FAILURE);
303                         } else if (err ==  ENODEV && cpu_list) {
304                                 die("No such device - did you specify"
305                                         " an out-of-range profile CPU?\n");
306                         } else if (err == EINVAL && sample_id_all_avail) {
307                                 /*
308                                  * Old kernel, no attr->sample_id_type_all field
309                                  */
310                                 sample_id_all_avail = false;
311                                 if (!sample_time && !raw_samples && !time_needed)
312                                         attr->sample_type &= ~PERF_SAMPLE_TIME;
313
314                                 goto retry_sample_id;
315                         }
316
317                         /*
318                          * If it's cycles then fall back to hrtimer
319                          * based cpu-clock-tick sw counter, which
320                          * is always available even if no PMU support:
321                          */
322                         if (attr->type == PERF_TYPE_HARDWARE
323                                         && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
324
325                                 if (verbose)
326                                         ui__warning("The cycles event is not supported, "
327                                                     "trying to fall back to cpu-clock-ticks\n");
328                                 attr->type = PERF_TYPE_SOFTWARE;
329                                 attr->config = PERF_COUNT_SW_CPU_CLOCK;
330                                 goto try_again;
331                         }
332
333                         if (err == ENOENT) {
334                                 ui__warning("The %s event is not supported.\n",
335                                             event_name(pos));
336                                 exit(EXIT_FAILURE);
337                         }
338
339                         printf("\n");
340                         error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
341                               err, strerror(err));
342
343 #if defined(__i386__) || defined(__x86_64__)
344                         if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
345                                 die("No hardware sampling interrupt available."
346                                     " No APIC? If so then you can boot the kernel"
347                                     " with the \"lapic\" boot parameter to"
348                                     " force-enable it.\n");
349 #endif
350
351                         die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
352                 }
353         }
354
355         if (perf_evlist__set_filters(evlist)) {
356                 error("failed to set filter with %d (%s)\n", errno,
357                         strerror(errno));
358                 exit(-1);
359         }
360
361         if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
362                 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
363
364         if (file_new)
365                 session->evlist = evlist;
366         else {
367                 if (!perf_evlist__equal(session->evlist, evlist)) {
368                         fprintf(stderr, "incompatible append\n");
369                         exit(-1);
370                 }
371         }
372
373         perf_session__update_sample_type(session);
374 }
375
376 static int process_buildids(void)
377 {
378         u64 size = lseek(output, 0, SEEK_CUR);
379
380         if (size == 0)
381                 return 0;
382
383         session->fd = output;
384         return __perf_session__process_events(session, post_processing_offset,
385                                               size - post_processing_offset,
386                                               size, &build_id__mark_dso_hit_ops);
387 }
388
389 static void atexit_header(void)
390 {
391         if (!pipe_output) {
392                 session->header.data_size += bytes_written;
393
394                 if (!no_buildid)
395                         process_buildids();
396                 perf_session__write_header(session, evsel_list, output, true);
397                 perf_session__delete(session);
398                 perf_evlist__delete(evsel_list);
399                 symbol__exit();
400         }
401 }
402
403 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
404 {
405         int err;
406         struct perf_session *psession = data;
407
408         if (machine__is_host(machine))
409                 return;
410
411         /*
412          *As for guest kernel when processing subcommand record&report,
413          *we arrange module mmap prior to guest kernel mmap and trigger
414          *a preload dso because default guest module symbols are loaded
415          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
416          *method is used to avoid symbol missing when the first addr is
417          *in module instead of in guest kernel.
418          */
419         err = perf_event__synthesize_modules(process_synthesized_event,
420                                              psession, machine);
421         if (err < 0)
422                 pr_err("Couldn't record guest kernel [%d]'s reference"
423                        " relocation symbol.\n", machine->pid);
424
425         /*
426          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
427          * have no _text sometimes.
428          */
429         err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
430                                                  psession, machine, "_text");
431         if (err < 0)
432                 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
433                                                          psession, machine,
434                                                          "_stext");
435         if (err < 0)
436                 pr_err("Couldn't record guest kernel [%d]'s reference"
437                        " relocation symbol.\n", machine->pid);
438 }
439
440 static struct perf_event_header finished_round_event = {
441         .size = sizeof(struct perf_event_header),
442         .type = PERF_RECORD_FINISHED_ROUND,
443 };
444
445 static void mmap_read_all(void)
446 {
447         int i;
448
449         for (i = 0; i < evsel_list->nr_mmaps; i++) {
450                 if (evsel_list->mmap[i].base)
451                         mmap_read(&evsel_list->mmap[i]);
452         }
453
454         if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
455                 write_output(&finished_round_event, sizeof(finished_round_event));
456 }
457
458 static int __cmd_record(int argc, const char **argv)
459 {
460         struct stat st;
461         int flags;
462         int err;
463         unsigned long waking = 0;
464         int child_ready_pipe[2], go_pipe[2];
465         const bool forks = argc > 0;
466         char buf;
467         struct machine *machine;
468
469         progname = argv[0];
470
471         page_size = sysconf(_SC_PAGE_SIZE);
472
473         atexit(sig_atexit);
474         signal(SIGCHLD, sig_handler);
475         signal(SIGINT, sig_handler);
476         signal(SIGUSR1, sig_handler);
477
478         if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
479                 perror("failed to create pipes");
480                 exit(-1);
481         }
482
483         if (!output_name) {
484                 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
485                         pipe_output = 1;
486                 else
487                         output_name = "perf.data";
488         }
489         if (output_name) {
490                 if (!strcmp(output_name, "-"))
491                         pipe_output = 1;
492                 else if (!stat(output_name, &st) && st.st_size) {
493                         if (write_mode == WRITE_FORCE) {
494                                 char oldname[PATH_MAX];
495                                 snprintf(oldname, sizeof(oldname), "%s.old",
496                                          output_name);
497                                 unlink(oldname);
498                                 rename(output_name, oldname);
499                         }
500                 } else if (write_mode == WRITE_APPEND) {
501                         write_mode = WRITE_FORCE;
502                 }
503         }
504
505         flags = O_CREAT|O_RDWR;
506         if (write_mode == WRITE_APPEND)
507                 file_new = 0;
508         else
509                 flags |= O_TRUNC;
510
511         if (pipe_output)
512                 output = STDOUT_FILENO;
513         else
514                 output = open(output_name, flags, S_IRUSR | S_IWUSR);
515         if (output < 0) {
516                 perror("failed to create output file");
517                 exit(-1);
518         }
519
520         session = perf_session__new(output_name, O_WRONLY,
521                                     write_mode == WRITE_FORCE, false, NULL);
522         if (session == NULL) {
523                 pr_err("Not enough memory for reading perf file header\n");
524                 return -1;
525         }
526
527         if (!no_buildid)
528                 perf_header__set_feat(&session->header, HEADER_BUILD_ID);
529
530         if (!file_new) {
531                 err = perf_session__read_header(session, output);
532                 if (err < 0)
533                         goto out_delete_session;
534         }
535
536         if (have_tracepoints(&evsel_list->entries))
537                 perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
538
539         perf_header__set_feat(&session->header, HEADER_HOSTNAME);
540         perf_header__set_feat(&session->header, HEADER_OSRELEASE);
541         perf_header__set_feat(&session->header, HEADER_ARCH);
542         perf_header__set_feat(&session->header, HEADER_CPUDESC);
543         perf_header__set_feat(&session->header, HEADER_NRCPUS);
544         perf_header__set_feat(&session->header, HEADER_EVENT_DESC);
545         perf_header__set_feat(&session->header, HEADER_CMDLINE);
546         perf_header__set_feat(&session->header, HEADER_VERSION);
547         perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
548         perf_header__set_feat(&session->header, HEADER_TOTAL_MEM);
549         perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
550         perf_header__set_feat(&session->header, HEADER_CPUID);
551
552         /* 512 kiB: default amount of unprivileged mlocked memory */
553         if (mmap_pages == UINT_MAX)
554                 mmap_pages = (512 * 1024) / page_size;
555
556         if (forks) {
557                 child_pid = fork();
558                 if (child_pid < 0) {
559                         perror("failed to fork");
560                         exit(-1);
561                 }
562
563                 if (!child_pid) {
564                         if (pipe_output)
565                                 dup2(2, 1);
566                         close(child_ready_pipe[0]);
567                         close(go_pipe[1]);
568                         fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
569
570                         /*
571                          * Do a dummy execvp to get the PLT entry resolved,
572                          * so we avoid the resolver overhead on the real
573                          * execvp call.
574                          */
575                         execvp("", (char **)argv);
576
577                         /*
578                          * Tell the parent we're ready to go
579                          */
580                         close(child_ready_pipe[1]);
581
582                         /*
583                          * Wait until the parent tells us to go.
584                          */
585                         if (read(go_pipe[0], &buf, 1) == -1)
586                                 perror("unable to read pipe");
587
588                         execvp(argv[0], (char **)argv);
589
590                         perror(argv[0]);
591                         kill(getppid(), SIGUSR1);
592                         exit(-1);
593                 }
594
595                 if (!system_wide && target_tid == -1 && target_pid == -1)
596                         evsel_list->threads->map[0] = child_pid;
597
598                 close(child_ready_pipe[1]);
599                 close(go_pipe[0]);
600                 /*
601                  * wait for child to settle
602                  */
603                 if (read(child_ready_pipe[0], &buf, 1) == -1) {
604                         perror("unable to read pipe");
605                         exit(-1);
606                 }
607                 close(child_ready_pipe[0]);
608         }
609
610         open_counters(evsel_list);
611
612         /*
613          * perf_session__delete(session) will be called at atexit_header()
614          */
615         atexit(atexit_header);
616
617         if (pipe_output) {
618                 err = perf_header__write_pipe(output);
619                 if (err < 0)
620                         return err;
621         } else if (file_new) {
622                 err = perf_session__write_header(session, evsel_list,
623                                                  output, false);
624                 if (err < 0)
625                         return err;
626         }
627
628         post_processing_offset = lseek(output, 0, SEEK_CUR);
629
630         if (pipe_output) {
631                 err = perf_session__synthesize_attrs(session,
632                                                      process_synthesized_event);
633                 if (err < 0) {
634                         pr_err("Couldn't synthesize attrs.\n");
635                         return err;
636                 }
637
638                 err = perf_event__synthesize_event_types(process_synthesized_event,
639                                                          session);
640                 if (err < 0) {
641                         pr_err("Couldn't synthesize event_types.\n");
642                         return err;
643                 }
644
645                 if (have_tracepoints(&evsel_list->entries)) {
646                         /*
647                          * FIXME err <= 0 here actually means that
648                          * there were no tracepoints so its not really
649                          * an error, just that we don't need to
650                          * synthesize anything.  We really have to
651                          * return this more properly and also
652                          * propagate errors that now are calling die()
653                          */
654                         err = perf_event__synthesize_tracing_data(output, evsel_list,
655                                                                   process_synthesized_event,
656                                                                   session);
657                         if (err <= 0) {
658                                 pr_err("Couldn't record tracing data.\n");
659                                 return err;
660                         }
661                         advance_output(err);
662                 }
663         }
664
665         machine = perf_session__find_host_machine(session);
666         if (!machine) {
667                 pr_err("Couldn't find native kernel information.\n");
668                 return -1;
669         }
670
671         err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
672                                                  session, machine, "_text");
673         if (err < 0)
674                 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
675                                                          session, machine, "_stext");
676         if (err < 0)
677                 pr_err("Couldn't record kernel reference relocation symbol\n"
678                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
679                        "Check /proc/kallsyms permission or run as root.\n");
680
681         err = perf_event__synthesize_modules(process_synthesized_event,
682                                              session, machine);
683         if (err < 0)
684                 pr_err("Couldn't record kernel module information.\n"
685                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
686                        "Check /proc/modules permission or run as root.\n");
687
688         if (perf_guest)
689                 perf_session__process_machines(session,
690                                                perf_event__synthesize_guest_os);
691
692         if (!system_wide)
693                 perf_event__synthesize_thread_map(evsel_list->threads,
694                                                   process_synthesized_event,
695                                                   session);
696         else
697                 perf_event__synthesize_threads(process_synthesized_event,
698                                                session);
699
700         if (realtime_prio) {
701                 struct sched_param param;
702
703                 param.sched_priority = realtime_prio;
704                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
705                         pr_err("Could not set realtime priority.\n");
706                         exit(-1);
707                 }
708         }
709
710         perf_evlist__enable(evsel_list);
711
712         /*
713          * Let the child rip
714          */
715         if (forks)
716                 close(go_pipe[1]);
717
718         for (;;) {
719                 int hits = samples;
720
721                 mmap_read_all();
722
723                 if (hits == samples) {
724                         if (done)
725                                 break;
726                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
727                         waking++;
728                 }
729
730                 if (done)
731                         perf_evlist__disable(evsel_list);
732         }
733
734         if (quiet || signr == SIGUSR1)
735                 return 0;
736
737         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
738
739         /*
740          * Approximate RIP event size: 24 bytes.
741          */
742         fprintf(stderr,
743                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
744                 (double)bytes_written / 1024.0 / 1024.0,
745                 output_name,
746                 bytes_written / 24);
747
748         return 0;
749
750 out_delete_session:
751         perf_session__delete(session);
752         return err;
753 }
754
755 static const char * const record_usage[] = {
756         "perf record [<options>] [<command>]",
757         "perf record [<options>] -- <command> [<options>]",
758         NULL
759 };
760
761 static bool force, append_file;
762
763 const struct option record_options[] = {
764         OPT_CALLBACK('e', "event", &evsel_list, "event",
765                      "event selector. use 'perf list' to list available events",
766                      parse_events_option),
767         OPT_CALLBACK(0, "filter", &evsel_list, "filter",
768                      "event filter", parse_filter),
769         OPT_INTEGER('p', "pid", &target_pid,
770                     "record events on existing process id"),
771         OPT_INTEGER('t', "tid", &target_tid,
772                     "record events on existing thread id"),
773         OPT_INTEGER('r', "realtime", &realtime_prio,
774                     "collect data with this RT SCHED_FIFO priority"),
775         OPT_BOOLEAN('D', "no-delay", &nodelay,
776                     "collect data without buffering"),
777         OPT_BOOLEAN('R', "raw-samples", &raw_samples,
778                     "collect raw sample records from all opened counters"),
779         OPT_BOOLEAN('a', "all-cpus", &system_wide,
780                             "system-wide collection from all CPUs"),
781         OPT_BOOLEAN('A', "append", &append_file,
782                             "append to the output file to do incremental profiling"),
783         OPT_STRING('C', "cpu", &cpu_list, "cpu",
784                     "list of cpus to monitor"),
785         OPT_BOOLEAN('f', "force", &force,
786                         "overwrite existing data file (deprecated)"),
787         OPT_U64('c', "count", &user_interval, "event period to sample"),
788         OPT_STRING('o', "output", &output_name, "file",
789                     "output file name"),
790         OPT_BOOLEAN('i', "no-inherit", &no_inherit,
791                     "child tasks do not inherit counters"),
792         OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
793         OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
794         OPT_BOOLEAN(0, "group", &group,
795                     "put the counters into a counter group"),
796         OPT_BOOLEAN('g', "call-graph", &call_graph,
797                     "do call-graph (stack chain/backtrace) recording"),
798         OPT_INCR('v', "verbose", &verbose,
799                     "be more verbose (show counter open errors, etc)"),
800         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
801         OPT_BOOLEAN('s', "stat", &inherit_stat,
802                     "per thread counts"),
803         OPT_BOOLEAN('d', "data", &sample_address,
804                     "Sample addresses"),
805         OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
806         OPT_BOOLEAN('n', "no-samples", &no_samples,
807                     "don't sample"),
808         OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
809                     "do not update the buildid cache"),
810         OPT_BOOLEAN('B', "no-buildid", &no_buildid,
811                     "do not collect buildids in perf.data"),
812         OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
813                      "monitor event in cgroup name only",
814                      parse_cgroups),
815         OPT_END()
816 };
817
818 int cmd_record(int argc, const char **argv, const char *prefix __used)
819 {
820         int err = -ENOMEM;
821         struct perf_evsel *pos;
822
823         perf_header__set_cmdline(argc, argv);
824
825         evsel_list = perf_evlist__new(NULL, NULL);
826         if (evsel_list == NULL)
827                 return -ENOMEM;
828
829         argc = parse_options(argc, argv, record_options, record_usage,
830                             PARSE_OPT_STOP_AT_NON_OPTION);
831         if (!argc && target_pid == -1 && target_tid == -1 &&
832                 !system_wide && !cpu_list)
833                 usage_with_options(record_usage, record_options);
834
835         if (force && append_file) {
836                 fprintf(stderr, "Can't overwrite and append at the same time."
837                                 " You need to choose between -f and -A");
838                 usage_with_options(record_usage, record_options);
839         } else if (append_file) {
840                 write_mode = WRITE_APPEND;
841         } else {
842                 write_mode = WRITE_FORCE;
843         }
844
845         if (nr_cgroups && !system_wide) {
846                 fprintf(stderr, "cgroup monitoring only available in"
847                         " system-wide mode\n");
848                 usage_with_options(record_usage, record_options);
849         }
850
851         symbol__init();
852
853         if (symbol_conf.kptr_restrict)
854                 pr_warning(
855 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
856 "check /proc/sys/kernel/kptr_restrict.\n\n"
857 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
858 "file is not found in the buildid cache or in the vmlinux path.\n\n"
859 "Samples in kernel modules won't be resolved at all.\n\n"
860 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
861 "even with a suitable vmlinux or kallsyms file.\n\n");
862
863         if (no_buildid_cache || no_buildid)
864                 disable_buildid_cache();
865
866         if (evsel_list->nr_entries == 0 &&
867             perf_evlist__add_default(evsel_list) < 0) {
868                 pr_err("Not enough memory for event selector list\n");
869                 goto out_symbol_exit;
870         }
871
872         if (target_pid != -1)
873                 target_tid = target_pid;
874
875         if (perf_evlist__create_maps(evsel_list, target_pid,
876                                      target_tid, cpu_list) < 0)
877                 usage_with_options(record_usage, record_options);
878
879         list_for_each_entry(pos, &evsel_list->entries, node) {
880                 if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
881                                          evsel_list->threads->nr) < 0)
882                         goto out_free_fd;
883                 if (perf_header__push_event(pos->attr.config, event_name(pos)))
884                         goto out_free_fd;
885         }
886
887         if (perf_evlist__alloc_pollfd(evsel_list) < 0)
888                 goto out_free_fd;
889
890         if (user_interval != ULLONG_MAX)
891                 default_interval = user_interval;
892         if (user_freq != UINT_MAX)
893                 freq = user_freq;
894
895         /*
896          * User specified count overrides default frequency.
897          */
898         if (default_interval)
899                 freq = 0;
900         else if (freq) {
901                 default_interval = freq;
902         } else {
903                 fprintf(stderr, "frequency and count are zero, aborting\n");
904                 err = -EINVAL;
905                 goto out_free_fd;
906         }
907
908         err = __cmd_record(argc, argv);
909 out_free_fd:
910         perf_evlist__delete_maps(evsel_list);
911 out_symbol_exit:
912         symbol__exit();
913         return err;
914 }