perf tools: Add x86 RDPMC, RDTSC test
[~shefty/rdma-dev.git] / tools / perf / builtin-record.c
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9
10 #include "builtin.h"
11
12 #include "perf.h"
13
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29
30 #include <unistd.h>
31 #include <sched.h>
32 #include <sys/mman.h>
33
34 enum write_mode_t {
35         WRITE_FORCE,
36         WRITE_APPEND
37 };
38
39 struct perf_record {
40         struct perf_tool        tool;
41         struct perf_record_opts opts;
42         u64                     bytes_written;
43         const char              *output_name;
44         struct perf_evlist      *evlist;
45         struct perf_session     *session;
46         const char              *progname;
47         int                     output;
48         unsigned int            page_size;
49         int                     realtime_prio;
50         enum write_mode_t       write_mode;
51         bool                    no_buildid;
52         bool                    no_buildid_cache;
53         bool                    force;
54         bool                    file_new;
55         bool                    append_file;
56         long                    samples;
57         off_t                   post_processing_offset;
58 };
59
60 static void advance_output(struct perf_record *rec, size_t size)
61 {
62         rec->bytes_written += size;
63 }
64
65 static void write_output(struct perf_record *rec, void *buf, size_t size)
66 {
67         while (size) {
68                 int ret = write(rec->output, buf, size);
69
70                 if (ret < 0)
71                         die("failed to write");
72
73                 size -= ret;
74                 buf += ret;
75
76                 rec->bytes_written += ret;
77         }
78 }
79
80 static int process_synthesized_event(struct perf_tool *tool,
81                                      union perf_event *event,
82                                      struct perf_sample *sample __used,
83                                      struct machine *machine __used)
84 {
85         struct perf_record *rec = container_of(tool, struct perf_record, tool);
86         write_output(rec, event, event->header.size);
87         return 0;
88 }
89
90 static void perf_record__mmap_read(struct perf_record *rec,
91                                    struct perf_mmap *md)
92 {
93         unsigned int head = perf_mmap__read_head(md);
94         unsigned int old = md->prev;
95         unsigned char *data = md->base + rec->page_size;
96         unsigned long size;
97         void *buf;
98
99         if (old == head)
100                 return;
101
102         rec->samples++;
103
104         size = head - old;
105
106         if ((old & md->mask) + size != (head & md->mask)) {
107                 buf = &data[old & md->mask];
108                 size = md->mask + 1 - (old & md->mask);
109                 old += size;
110
111                 write_output(rec, buf, size);
112         }
113
114         buf = &data[old & md->mask];
115         size = head - old;
116         old += size;
117
118         write_output(rec, buf, size);
119
120         md->prev = old;
121         perf_mmap__write_tail(md, old);
122 }
123
124 static volatile int done = 0;
125 static volatile int signr = -1;
126 static volatile int child_finished = 0;
127
128 static void sig_handler(int sig)
129 {
130         if (sig == SIGCHLD)
131                 child_finished = 1;
132
133         done = 1;
134         signr = sig;
135 }
136
137 static void perf_record__sig_exit(int exit_status __used, void *arg)
138 {
139         struct perf_record *rec = arg;
140         int status;
141
142         if (rec->evlist->workload.pid > 0) {
143                 if (!child_finished)
144                         kill(rec->evlist->workload.pid, SIGTERM);
145
146                 wait(&status);
147                 if (WIFSIGNALED(status))
148                         psignal(WTERMSIG(status), rec->progname);
149         }
150
151         if (signr == -1 || signr == SIGUSR1)
152                 return;
153
154         signal(signr, SIG_DFL);
155         kill(getpid(), signr);
156 }
157
158 static bool perf_evlist__equal(struct perf_evlist *evlist,
159                                struct perf_evlist *other)
160 {
161         struct perf_evsel *pos, *pair;
162
163         if (evlist->nr_entries != other->nr_entries)
164                 return false;
165
166         pair = list_entry(other->entries.next, struct perf_evsel, node);
167
168         list_for_each_entry(pos, &evlist->entries, node) {
169                 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
170                         return false;
171                 pair = list_entry(pair->node.next, struct perf_evsel, node);
172         }
173
174         return true;
175 }
176
177 static void perf_record__open(struct perf_record *rec)
178 {
179         struct perf_evsel *pos, *first;
180         struct perf_evlist *evlist = rec->evlist;
181         struct perf_session *session = rec->session;
182         struct perf_record_opts *opts = &rec->opts;
183
184         first = list_entry(evlist->entries.next, struct perf_evsel, node);
185
186         perf_evlist__config_attrs(evlist, opts);
187
188         list_for_each_entry(pos, &evlist->entries, node) {
189                 struct perf_event_attr *attr = &pos->attr;
190                 struct xyarray *group_fd = NULL;
191                 /*
192                  * Check if parse_single_tracepoint_event has already asked for
193                  * PERF_SAMPLE_TIME.
194                  *
195                  * XXX this is kludgy but short term fix for problems introduced by
196                  * eac23d1c that broke 'perf script' by having different sample_types
197                  * when using multiple tracepoint events when we use a perf binary
198                  * that tries to use sample_id_all on an older kernel.
199                  *
200                  * We need to move counter creation to perf_session, support
201                  * different sample_types, etc.
202                  */
203                 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
204
205                 if (opts->group && pos != first)
206                         group_fd = first->fd;
207 retry_sample_id:
208                 attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0;
209 try_again:
210                 if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
211                                      opts->group, group_fd) < 0) {
212                         int err = errno;
213
214                         if (err == EPERM || err == EACCES) {
215                                 ui__error_paranoid();
216                                 exit(EXIT_FAILURE);
217                         } else if (err ==  ENODEV && opts->cpu_list) {
218                                 die("No such device - did you specify"
219                                         " an out-of-range profile CPU?\n");
220                         } else if (err == EINVAL && opts->sample_id_all_avail) {
221                                 /*
222                                  * Old kernel, no attr->sample_id_type_all field
223                                  */
224                                 opts->sample_id_all_avail = false;
225                                 if (!opts->sample_time && !opts->raw_samples && !time_needed)
226                                         attr->sample_type &= ~PERF_SAMPLE_TIME;
227
228                                 goto retry_sample_id;
229                         }
230
231                         /*
232                          * If it's cycles then fall back to hrtimer
233                          * based cpu-clock-tick sw counter, which
234                          * is always available even if no PMU support:
235                          */
236                         if (attr->type == PERF_TYPE_HARDWARE
237                                         && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
238
239                                 if (verbose)
240                                         ui__warning("The cycles event is not supported, "
241                                                     "trying to fall back to cpu-clock-ticks\n");
242                                 attr->type = PERF_TYPE_SOFTWARE;
243                                 attr->config = PERF_COUNT_SW_CPU_CLOCK;
244                                 goto try_again;
245                         }
246
247                         if (err == ENOENT) {
248                                 ui__warning("The %s event is not supported.\n",
249                                             event_name(pos));
250                                 exit(EXIT_FAILURE);
251                         }
252
253                         printf("\n");
254                         error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
255                               err, strerror(err));
256
257 #if defined(__i386__) || defined(__x86_64__)
258                         if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
259                                 die("No hardware sampling interrupt available."
260                                     " No APIC? If so then you can boot the kernel"
261                                     " with the \"lapic\" boot parameter to"
262                                     " force-enable it.\n");
263 #endif
264
265                         die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
266                 }
267         }
268
269         if (perf_evlist__set_filters(evlist)) {
270                 error("failed to set filter with %d (%s)\n", errno,
271                         strerror(errno));
272                 exit(-1);
273         }
274
275         if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0)
276                 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
277
278         if (rec->file_new)
279                 session->evlist = evlist;
280         else {
281                 if (!perf_evlist__equal(session->evlist, evlist)) {
282                         fprintf(stderr, "incompatible append\n");
283                         exit(-1);
284                 }
285         }
286
287         perf_session__update_sample_type(session);
288 }
289
290 static int process_buildids(struct perf_record *rec)
291 {
292         u64 size = lseek(rec->output, 0, SEEK_CUR);
293
294         if (size == 0)
295                 return 0;
296
297         rec->session->fd = rec->output;
298         return __perf_session__process_events(rec->session, rec->post_processing_offset,
299                                               size - rec->post_processing_offset,
300                                               size, &build_id__mark_dso_hit_ops);
301 }
302
303 static void perf_record__exit(int status __used, void *arg)
304 {
305         struct perf_record *rec = arg;
306
307         if (!rec->opts.pipe_output) {
308                 rec->session->header.data_size += rec->bytes_written;
309
310                 if (!rec->no_buildid)
311                         process_buildids(rec);
312                 perf_session__write_header(rec->session, rec->evlist,
313                                            rec->output, true);
314                 perf_session__delete(rec->session);
315                 perf_evlist__delete(rec->evlist);
316                 symbol__exit();
317         }
318 }
319
320 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
321 {
322         int err;
323         struct perf_tool *tool = data;
324
325         if (machine__is_host(machine))
326                 return;
327
328         /*
329          *As for guest kernel when processing subcommand record&report,
330          *we arrange module mmap prior to guest kernel mmap and trigger
331          *a preload dso because default guest module symbols are loaded
332          *from guest kallsyms instead of /lib/modules/XXX/XXX. This
333          *method is used to avoid symbol missing when the first addr is
334          *in module instead of in guest kernel.
335          */
336         err = perf_event__synthesize_modules(tool, process_synthesized_event,
337                                              machine);
338         if (err < 0)
339                 pr_err("Couldn't record guest kernel [%d]'s reference"
340                        " relocation symbol.\n", machine->pid);
341
342         /*
343          * We use _stext for guest kernel because guest kernel's /proc/kallsyms
344          * have no _text sometimes.
345          */
346         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
347                                                  machine, "_text");
348         if (err < 0)
349                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
350                                                          machine, "_stext");
351         if (err < 0)
352                 pr_err("Couldn't record guest kernel [%d]'s reference"
353                        " relocation symbol.\n", machine->pid);
354 }
355
356 static struct perf_event_header finished_round_event = {
357         .size = sizeof(struct perf_event_header),
358         .type = PERF_RECORD_FINISHED_ROUND,
359 };
360
361 static void perf_record__mmap_read_all(struct perf_record *rec)
362 {
363         int i;
364
365         for (i = 0; i < rec->evlist->nr_mmaps; i++) {
366                 if (rec->evlist->mmap[i].base)
367                         perf_record__mmap_read(rec, &rec->evlist->mmap[i]);
368         }
369
370         if (perf_header__has_feat(&rec->session->header, HEADER_TRACE_INFO))
371                 write_output(rec, &finished_round_event, sizeof(finished_round_event));
372 }
373
374 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
375 {
376         struct stat st;
377         int flags;
378         int err, output;
379         unsigned long waking = 0;
380         const bool forks = argc > 0;
381         struct machine *machine;
382         struct perf_tool *tool = &rec->tool;
383         struct perf_record_opts *opts = &rec->opts;
384         struct perf_evlist *evsel_list = rec->evlist;
385         const char *output_name = rec->output_name;
386         struct perf_session *session;
387
388         rec->progname = argv[0];
389
390         rec->page_size = sysconf(_SC_PAGE_SIZE);
391
392         on_exit(perf_record__sig_exit, rec);
393         signal(SIGCHLD, sig_handler);
394         signal(SIGINT, sig_handler);
395         signal(SIGUSR1, sig_handler);
396
397         if (!output_name) {
398                 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
399                         opts->pipe_output = true;
400                 else
401                         rec->output_name = output_name = "perf.data";
402         }
403         if (output_name) {
404                 if (!strcmp(output_name, "-"))
405                         opts->pipe_output = true;
406                 else if (!stat(output_name, &st) && st.st_size) {
407                         if (rec->write_mode == WRITE_FORCE) {
408                                 char oldname[PATH_MAX];
409                                 snprintf(oldname, sizeof(oldname), "%s.old",
410                                          output_name);
411                                 unlink(oldname);
412                                 rename(output_name, oldname);
413                         }
414                 } else if (rec->write_mode == WRITE_APPEND) {
415                         rec->write_mode = WRITE_FORCE;
416                 }
417         }
418
419         flags = O_CREAT|O_RDWR;
420         if (rec->write_mode == WRITE_APPEND)
421                 rec->file_new = 0;
422         else
423                 flags |= O_TRUNC;
424
425         if (opts->pipe_output)
426                 output = STDOUT_FILENO;
427         else
428                 output = open(output_name, flags, S_IRUSR | S_IWUSR);
429         if (output < 0) {
430                 perror("failed to create output file");
431                 exit(-1);
432         }
433
434         rec->output = output;
435
436         session = perf_session__new(output_name, O_WRONLY,
437                                     rec->write_mode == WRITE_FORCE, false, NULL);
438         if (session == NULL) {
439                 pr_err("Not enough memory for reading perf file header\n");
440                 return -1;
441         }
442
443         rec->session = session;
444
445         if (!rec->no_buildid)
446                 perf_header__set_feat(&session->header, HEADER_BUILD_ID);
447
448         if (!rec->file_new) {
449                 err = perf_session__read_header(session, output);
450                 if (err < 0)
451                         goto out_delete_session;
452         }
453
454         if (have_tracepoints(&evsel_list->entries))
455                 perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
456
457         perf_header__set_feat(&session->header, HEADER_HOSTNAME);
458         perf_header__set_feat(&session->header, HEADER_OSRELEASE);
459         perf_header__set_feat(&session->header, HEADER_ARCH);
460         perf_header__set_feat(&session->header, HEADER_CPUDESC);
461         perf_header__set_feat(&session->header, HEADER_NRCPUS);
462         perf_header__set_feat(&session->header, HEADER_EVENT_DESC);
463         perf_header__set_feat(&session->header, HEADER_CMDLINE);
464         perf_header__set_feat(&session->header, HEADER_VERSION);
465         perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
466         perf_header__set_feat(&session->header, HEADER_TOTAL_MEM);
467         perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
468         perf_header__set_feat(&session->header, HEADER_CPUID);
469
470         if (forks) {
471                 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
472                 if (err < 0) {
473                         pr_err("Couldn't run the workload!\n");
474                         goto out_delete_session;
475                 }
476         }
477
478         perf_record__open(rec);
479
480         /*
481          * perf_session__delete(session) will be called at perf_record__exit()
482          */
483         on_exit(perf_record__exit, rec);
484
485         if (opts->pipe_output) {
486                 err = perf_header__write_pipe(output);
487                 if (err < 0)
488                         return err;
489         } else if (rec->file_new) {
490                 err = perf_session__write_header(session, evsel_list,
491                                                  output, false);
492                 if (err < 0)
493                         return err;
494         }
495
496         rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
497
498         machine = perf_session__find_host_machine(session);
499         if (!machine) {
500                 pr_err("Couldn't find native kernel information.\n");
501                 return -1;
502         }
503
504         if (opts->pipe_output) {
505                 err = perf_event__synthesize_attrs(tool, session,
506                                                    process_synthesized_event);
507                 if (err < 0) {
508                         pr_err("Couldn't synthesize attrs.\n");
509                         return err;
510                 }
511
512                 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
513                                                          machine);
514                 if (err < 0) {
515                         pr_err("Couldn't synthesize event_types.\n");
516                         return err;
517                 }
518
519                 if (have_tracepoints(&evsel_list->entries)) {
520                         /*
521                          * FIXME err <= 0 here actually means that
522                          * there were no tracepoints so its not really
523                          * an error, just that we don't need to
524                          * synthesize anything.  We really have to
525                          * return this more properly and also
526                          * propagate errors that now are calling die()
527                          */
528                         err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
529                                                                   process_synthesized_event);
530                         if (err <= 0) {
531                                 pr_err("Couldn't record tracing data.\n");
532                                 return err;
533                         }
534                         advance_output(rec, err);
535                 }
536         }
537
538         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
539                                                  machine, "_text");
540         if (err < 0)
541                 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
542                                                          machine, "_stext");
543         if (err < 0)
544                 pr_err("Couldn't record kernel reference relocation symbol\n"
545                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
546                        "Check /proc/kallsyms permission or run as root.\n");
547
548         err = perf_event__synthesize_modules(tool, process_synthesized_event,
549                                              machine);
550         if (err < 0)
551                 pr_err("Couldn't record kernel module information.\n"
552                        "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
553                        "Check /proc/modules permission or run as root.\n");
554
555         if (perf_guest)
556                 perf_session__process_machines(session, tool,
557                                                perf_event__synthesize_guest_os);
558
559         if (!opts->system_wide)
560                 perf_event__synthesize_thread_map(tool, evsel_list->threads,
561                                                   process_synthesized_event,
562                                                   machine);
563         else
564                 perf_event__synthesize_threads(tool, process_synthesized_event,
565                                                machine);
566
567         if (rec->realtime_prio) {
568                 struct sched_param param;
569
570                 param.sched_priority = rec->realtime_prio;
571                 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
572                         pr_err("Could not set realtime priority.\n");
573                         exit(-1);
574                 }
575         }
576
577         perf_evlist__enable(evsel_list);
578
579         /*
580          * Let the child rip
581          */
582         if (forks)
583                 perf_evlist__start_workload(evsel_list);
584
585         for (;;) {
586                 int hits = rec->samples;
587
588                 perf_record__mmap_read_all(rec);
589
590                 if (hits == rec->samples) {
591                         if (done)
592                                 break;
593                         err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
594                         waking++;
595                 }
596
597                 if (done)
598                         perf_evlist__disable(evsel_list);
599         }
600
601         if (quiet || signr == SIGUSR1)
602                 return 0;
603
604         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
605
606         /*
607          * Approximate RIP event size: 24 bytes.
608          */
609         fprintf(stderr,
610                 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
611                 (double)rec->bytes_written / 1024.0 / 1024.0,
612                 output_name,
613                 rec->bytes_written / 24);
614
615         return 0;
616
617 out_delete_session:
618         perf_session__delete(session);
619         return err;
620 }
621
622 static const char * const record_usage[] = {
623         "perf record [<options>] [<command>]",
624         "perf record [<options>] -- <command> [<options>]",
625         NULL
626 };
627
628 /*
629  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
630  * because we need to have access to it in perf_record__exit, that is called
631  * after cmd_record() exits, but since record_options need to be accessible to
632  * builtin-script, leave it here.
633  *
634  * At least we don't ouch it in all the other functions here directly.
635  *
636  * Just say no to tons of global variables, sigh.
637  */
638 static struct perf_record record = {
639         .opts = {
640                 .target_pid          = -1,
641                 .target_tid          = -1,
642                 .mmap_pages          = UINT_MAX,
643                 .user_freq           = UINT_MAX,
644                 .user_interval       = ULLONG_MAX,
645                 .freq                = 1000,
646                 .sample_id_all_avail = true,
647         },
648         .write_mode = WRITE_FORCE,
649         .file_new   = true,
650 };
651
652 /*
653  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
654  * with it and switch to use the library functions in perf_evlist that came
655  * from builtin-record.c, i.e. use perf_record_opts,
656  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
657  * using pipes, etc.
658  */
659 const struct option record_options[] = {
660         OPT_CALLBACK('e', "event", &record.evlist, "event",
661                      "event selector. use 'perf list' to list available events",
662                      parse_events_option),
663         OPT_CALLBACK(0, "filter", &record.evlist, "filter",
664                      "event filter", parse_filter),
665         OPT_INTEGER('p', "pid", &record.opts.target_pid,
666                     "record events on existing process id"),
667         OPT_INTEGER('t', "tid", &record.opts.target_tid,
668                     "record events on existing thread id"),
669         OPT_INTEGER('r', "realtime", &record.realtime_prio,
670                     "collect data with this RT SCHED_FIFO priority"),
671         OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
672                     "collect data without buffering"),
673         OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
674                     "collect raw sample records from all opened counters"),
675         OPT_BOOLEAN('a', "all-cpus", &record.opts.system_wide,
676                             "system-wide collection from all CPUs"),
677         OPT_BOOLEAN('A', "append", &record.append_file,
678                             "append to the output file to do incremental profiling"),
679         OPT_STRING('C', "cpu", &record.opts.cpu_list, "cpu",
680                     "list of cpus to monitor"),
681         OPT_BOOLEAN('f', "force", &record.force,
682                         "overwrite existing data file (deprecated)"),
683         OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
684         OPT_STRING('o', "output", &record.output_name, "file",
685                     "output file name"),
686         OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
687                     "child tasks do not inherit counters"),
688         OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
689         OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
690                      "number of mmap data pages"),
691         OPT_BOOLEAN(0, "group", &record.opts.group,
692                     "put the counters into a counter group"),
693         OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph,
694                     "do call-graph (stack chain/backtrace) recording"),
695         OPT_INCR('v', "verbose", &verbose,
696                     "be more verbose (show counter open errors, etc)"),
697         OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
698         OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
699                     "per thread counts"),
700         OPT_BOOLEAN('d', "data", &record.opts.sample_address,
701                     "Sample addresses"),
702         OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
703         OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
704         OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
705                     "don't sample"),
706         OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
707                     "do not update the buildid cache"),
708         OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
709                     "do not collect buildids in perf.data"),
710         OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
711                      "monitor event in cgroup name only",
712                      parse_cgroups),
713         OPT_END()
714 };
715
716 int cmd_record(int argc, const char **argv, const char *prefix __used)
717 {
718         int err = -ENOMEM;
719         struct perf_evsel *pos;
720         struct perf_evlist *evsel_list;
721         struct perf_record *rec = &record;
722
723         perf_header__set_cmdline(argc, argv);
724
725         evsel_list = perf_evlist__new(NULL, NULL);
726         if (evsel_list == NULL)
727                 return -ENOMEM;
728
729         rec->evlist = evsel_list;
730
731         argc = parse_options(argc, argv, record_options, record_usage,
732                             PARSE_OPT_STOP_AT_NON_OPTION);
733         if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 &&
734                 !rec->opts.system_wide && !rec->opts.cpu_list)
735                 usage_with_options(record_usage, record_options);
736
737         if (rec->force && rec->append_file) {
738                 fprintf(stderr, "Can't overwrite and append at the same time."
739                                 " You need to choose between -f and -A");
740                 usage_with_options(record_usage, record_options);
741         } else if (rec->append_file) {
742                 rec->write_mode = WRITE_APPEND;
743         } else {
744                 rec->write_mode = WRITE_FORCE;
745         }
746
747         if (nr_cgroups && !rec->opts.system_wide) {
748                 fprintf(stderr, "cgroup monitoring only available in"
749                         " system-wide mode\n");
750                 usage_with_options(record_usage, record_options);
751         }
752
753         symbol__init();
754
755         if (symbol_conf.kptr_restrict)
756                 pr_warning(
757 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
758 "check /proc/sys/kernel/kptr_restrict.\n\n"
759 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
760 "file is not found in the buildid cache or in the vmlinux path.\n\n"
761 "Samples in kernel modules won't be resolved at all.\n\n"
762 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
763 "even with a suitable vmlinux or kallsyms file.\n\n");
764
765         if (rec->no_buildid_cache || rec->no_buildid)
766                 disable_buildid_cache();
767
768         if (evsel_list->nr_entries == 0 &&
769             perf_evlist__add_default(evsel_list) < 0) {
770                 pr_err("Not enough memory for event selector list\n");
771                 goto out_symbol_exit;
772         }
773
774         if (rec->opts.target_pid != -1)
775                 rec->opts.target_tid = rec->opts.target_pid;
776
777         if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid,
778                                      rec->opts.target_tid, rec->opts.cpu_list) < 0)
779                 usage_with_options(record_usage, record_options);
780
781         list_for_each_entry(pos, &evsel_list->entries, node) {
782                 if (perf_header__push_event(pos->attr.config, event_name(pos)))
783                         goto out_free_fd;
784         }
785
786         if (rec->opts.user_interval != ULLONG_MAX)
787                 rec->opts.default_interval = rec->opts.user_interval;
788         if (rec->opts.user_freq != UINT_MAX)
789                 rec->opts.freq = rec->opts.user_freq;
790
791         /*
792          * User specified count overrides default frequency.
793          */
794         if (rec->opts.default_interval)
795                 rec->opts.freq = 0;
796         else if (rec->opts.freq) {
797                 rec->opts.default_interval = rec->opts.freq;
798         } else {
799                 fprintf(stderr, "frequency and count are zero, aborting\n");
800                 err = -EINVAL;
801                 goto out_free_fd;
802         }
803
804         err = __cmd_record(&record, argc, argv);
805 out_free_fd:
806         perf_evlist__delete_maps(evsel_list);
807 out_symbol_exit:
808         symbol__exit();
809         return err;
810 }