Skip to content
Snippets Groups Projects
builtin-stat.c 39.3 KiB
Newer Older
  • Learn to ignore specific revisions
  • 	if (scaled) {
    		double avg_enabled, avg_running;
    
    
    		avg_enabled = avg_stats(&ps->res_stats[1]);
    		avg_running = avg_stats(&ps->res_stats[2]);
    
    		fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled);
    
    	fprintf(output, "\n");
    
    /*
     * Print out the results of a single counter:
     * does not use aggregated count in system-wide
     */
    
    static void print_counter(struct perf_evsel *counter, char *prefix)
    
    	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
    
    		val = counter->counts->cpu[cpu].val;
    		ena = counter->counts->cpu[cpu].ena;
    		run = counter->counts->cpu[cpu].run;
    
    
    		if (prefix)
    			fprintf(output, "%s", prefix);
    
    
    		if (run == 0 || ena == 0) {
    
    			fprintf(output, "CPU%*d%s%*s%s%*s",
    
    				csv_output ? 0 : -4,
    
    				perf_evsel__cpus(counter)->map[cpu], csv_sep,
    
    				csv_output ? 0 : 18,
    
    				counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
    				csv_sep,
    
    				csv_output ? 0 : -24,
    
    			if (counter->cgrp)
    
    				fprintf(output, "%s%s",
    					csv_sep, counter->cgrp->name);
    
    			fputc('\n', output);
    
    			continue;
    		}
    
    		if (nsec_counter(counter))
    
    			nsec_printout(cpu, 0, counter, val);
    
    			abs_printout(cpu, 0, counter, val);
    
    		if (!csv_output) {
    			print_noise(counter, 1.0);
    
    				fprintf(output, "  (%.2f%%)",
    					100.0 * run / ena);
    
    		fputc('\n', output);
    
    static void print_stat(int argc, const char **argv)
    {
    
    	struct perf_evsel *counter;
    	int i;
    
    	if (!csv_output) {
    
    		fprintf(output, "\n");
    		fprintf(output, " Performance counter stats for ");
    
    		if (!perf_target__has_task(&target)) {
    
    			fprintf(output, "\'%s", argv[0]);
    
    			for (i = 1; i < argc; i++)
    
    				fprintf(output, " %s", argv[i]);
    
    		} else if (target.pid)
    			fprintf(output, "process id \'%s", target.pid);
    
    			fprintf(output, "thread id \'%s", target.tid);
    
    		fprintf(output, "\'");
    
    		if (run_count > 1)
    
    			fprintf(output, " (%d runs)", run_count);
    		fprintf(output, ":\n\n");
    
    	if (aggr_socket)
    		print_aggr_socket(NULL);
    	else if (no_aggr) {
    
    		list_for_each_entry(counter, &evsel_list->entries, node)
    
    			print_counter(counter, NULL);
    
    		list_for_each_entry(counter, &evsel_list->entries, node)
    
    			print_counter_aggr(counter, NULL);
    
    	if (!csv_output) {
    
    			fprintf(output, "\n");
    		fprintf(output, " %17.9f seconds time elapsed",
    
    				avg_stats(&walltime_nsecs_stats)/1e9);
    		if (run_count > 1) {
    
    			fprintf(output, "                                        ");
    
    			print_noise_pct(stddev_stats(&walltime_nsecs_stats),
    					avg_stats(&walltime_nsecs_stats));
    
    		fprintf(output, "\n\n");
    
    static volatile int signr = -1;
    
    
    static void skip_signal(int signo)
    
    	if ((child_pid == -1) || interval)
    
    	signr = signo;
    }
    
    static void sig_atexit(void)
    {
    
    	if (child_pid != -1)
    		kill(child_pid, SIGTERM);
    
    
    	if (signr == -1)
    		return;
    
    	signal(signr, SIG_DFL);
    	kill(getpid(), signr);
    
    static int stat__set_big_num(const struct option *opt __maybe_unused,
    			     const char *s __maybe_unused, int unset)
    
    {
    	big_num_opt = unset ? 0 : 1;
    	return 0;
    }
    
    
    /*
     * Add default attributes, if there were no attributes specified or
     * if -d/--detailed, -d -d or -d -d -d is used:
     */
    static int add_default_attributes(void)
    {
    
    	struct perf_event_attr default_attrs[] = {
    
      { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
      { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
      { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS		},
      { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
    
      { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
      { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
      { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
      { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
      { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
      { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},
    
    };
    
    /*
     * Detailed stats (-d), covering the L1 and last level data caches:
     */
    	struct perf_event_attr detailed_attrs[] = {
    
      { .type = PERF_TYPE_HW_CACHE,
        .config =
    	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
    	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
    	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
    
      { .type = PERF_TYPE_HW_CACHE,
        .config =
    	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
    	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
    	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
    
      { .type = PERF_TYPE_HW_CACHE,
        .config =
    	 PERF_COUNT_HW_CACHE_LL			<<  0  |
    	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
    	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
    
      { .type = PERF_TYPE_HW_CACHE,
        .config =
    	 PERF_COUNT_HW_CACHE_LL			<<  0  |
    	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
    	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
    };
    
    /*
     * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
     */
    	struct perf_event_attr very_detailed_attrs[] = {
    
      { .type = PERF_TYPE_HW_CACHE,
        .config =
    	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
    	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
    	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
    
      { .type = PERF_TYPE_HW_CACHE,
        .config =
    	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
    	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
    	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
    
      { .type = PERF_TYPE_HW_CACHE,
        .config =
    	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
    	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
    	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
    
      { .type = PERF_TYPE_HW_CACHE,
        .config =
    	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
    	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
    	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
    
      { .type = PERF_TYPE_HW_CACHE,
        .config =
    	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
    	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
    	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
    
      { .type = PERF_TYPE_HW_CACHE,
        .config =
    	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
    	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
    	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
    
    };
    
    /*
     * Very, very detailed stats (-d -d -d), adding prefetch events:
     */
    	struct perf_event_attr very_very_detailed_attrs[] = {
    
      { .type = PERF_TYPE_HW_CACHE,
        .config =
    	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
    	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
    	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
    
      { .type = PERF_TYPE_HW_CACHE,
        .config =
    	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
    	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
    	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
    };
    
    
    	/* Set attrs if no event is selected and !null_run: */
    	if (null_run)
    		return 0;
    
    	if (!evsel_list->nr_entries) {
    
    		if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
    
    	}
    
    	/* Detailed events get appended to the event list: */
    
    	if (detailed_run <  1)
    		return 0;
    
    	/* Append detailed run extra attributes: */
    
    	if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
    
    
    	if (detailed_run < 2)
    		return 0;
    
    	/* Append very detailed run extra attributes: */
    
    	if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
    
    
    	if (detailed_run < 3)
    		return 0;
    
    	/* Append very, very detailed run extra attributes: */
    
    	return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
    
    int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
    
    	bool append_file = false;
    
    	int output_fd = 0;
    	const char *output_name	= NULL;
    	const struct option options[] = {
    	OPT_CALLBACK('e', "event", &evsel_list, "event",
    		     "event selector. use 'perf list' to list available events",
    		     parse_events_option),
    	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
    		     "event filter", parse_filter),
    	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
    		    "child tasks do not inherit counters"),
    	OPT_STRING('p', "pid", &target.pid, "pid",
    		   "stat events on existing process id"),
    	OPT_STRING('t', "tid", &target.tid, "tid",
    		   "stat events on existing thread id"),
    	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
    		    "system-wide collection from all CPUs"),
    	OPT_BOOLEAN('g', "group", &group,
    		    "put the counters into a counter group"),
    	OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"),
    	OPT_INCR('v', "verbose", &verbose,
    		    "be more verbose (show counter open errors, etc)"),
    	OPT_INTEGER('r', "repeat", &run_count,
    
    		    "repeat command and print average + stddev (max: 100, forever: 0)"),
    
    	OPT_BOOLEAN('n', "null", &null_run,
    		    "null run - dont start any counters"),
    	OPT_INCR('d', "detailed", &detailed_run,
    		    "detailed run - start a lot of events"),
    	OPT_BOOLEAN('S', "sync", &sync_run,
    		    "call sync() before starting a run"),
    	OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 
    			   "print large numbers with thousands\' separators",
    			   stat__set_big_num),
    	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
    		    "list of cpus to monitor in system-wide"),
    	OPT_BOOLEAN('A', "no-aggr", &no_aggr, "disable CPU count aggregation"),
    	OPT_STRING('x', "field-separator", &csv_sep, "separator",
    		   "print counts with custom separator"),
    	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
    		     "monitor event in cgroup name only", parse_cgroups),
    	OPT_STRING('o', "output", &output_name, "file", "output file name"),
    	OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
    	OPT_INTEGER(0, "log-fd", &output_fd,
    		    "log output to fd, instead of stderr"),
    
    	OPT_STRING(0, "pre", &pre_cmd, "command",
    			"command to run prior to the measured command"),
    	OPT_STRING(0, "post", &post_cmd, "command",
    			"command to run after to the measured command"),
    
    	OPT_UINTEGER('I', "interval-print", &interval,
    		    "print counts at regular interval in ms (>= 100)"),
    
    	OPT_BOOLEAN(0, "aggr-socket", &aggr_socket, "aggregate counts per processor socket"),
    
    	OPT_END()
    	};
    	const char * const stat_usage[] = {
    		"perf stat [<options>] [<command>]",
    		NULL
    	};
    	int status = -ENOMEM, run_idx;
    
    	const char *mode;
    
    	if (evsel_list == NULL)
    		return -ENOMEM;
    
    
    	argc = parse_options(argc, argv, options, stat_usage,
    		PARSE_OPT_STOP_AT_NON_OPTION);
    
    	output = stderr;
    	if (output_name && strcmp(output_name, "-"))
    		output = NULL;
    
    
    	if (output_name && output_fd) {
    		fprintf(stderr, "cannot use both --output and --log-fd\n");
    		usage_with_options(stat_usage, options);
    	}
    
    
    	if (output_fd < 0) {
    		fprintf(stderr, "argument to --log-fd must be a > 0\n");
    		usage_with_options(stat_usage, options);
    	}
    
    
    	if (!output) {
    		struct timespec tm;
    		mode = append_file ? "a" : "w";
    
    		output = fopen(output_name, mode);
    		if (!output) {
    			perror("failed to create output file");
    
    		}
    		clock_gettime(CLOCK_REALTIME, &tm);
    		fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
    
    	} else if (output_fd > 0) {
    
    		mode = append_file ? "a" : "w";
    		output = fdopen(output_fd, mode);
    		if (!output) {
    			perror("Failed opening logfd");
    			return -errno;
    		}
    
    	if (csv_sep) {
    
    		csv_output = true;
    
    		if (!strcmp(csv_sep, "\\t"))
    			csv_sep = "\t";
    	} else
    
    		csv_sep = DEFAULT_SEPARATOR;
    
    	/*
    	 * let the spreadsheet do the pretty-printing
    	 */
    	if (csv_output) {
    
    		/* User explicitly passed -B? */
    
    		if (big_num_opt == 1) {
    			fprintf(stderr, "-B option not supported with -x\n");
    			usage_with_options(stat_usage, options);
    		} else /* Nope, so disable big number formatting */
    			big_num = false;
    	} else if (big_num_opt == 0) /* User passed --no-big-num */
    		big_num = false;
    
    
    	if (!argc && !perf_target__has_task(&target))
    
    		usage_with_options(stat_usage, options);
    
    	if (run_count < 0) {
    
    		usage_with_options(stat_usage, options);
    
    	} else if (run_count == 0) {
    		forever = true;
    		run_count = 1;
    	}
    
    	/* no_aggr, cgroup are for system-wide only */
    
    	if ((no_aggr || nr_cgroups) && !perf_target__has_cpu(&target)) {
    
    		fprintf(stderr, "both cgroup and no-aggregation "
    			"modes only available in system-wide mode\n");
    
    
    		usage_with_options(stat_usage, options);
    
    	if (aggr_socket) {
    		if (!perf_target__has_cpu(&target)) {
    			fprintf(stderr, "--aggr-socket only available in system-wide mode (-a)\n");
    			usage_with_options(stat_usage, options);
    		}
    		no_aggr = true;
    	}
    
    
    	if (add_default_attributes())
    		goto out;
    
    	perf_target__validate(&target);
    
    	if (perf_evlist__create_maps(evsel_list, &target) < 0) {
    
    		if (perf_target__has_task(&target))
    
    			pr_err("Problems finding threads of monitor\n");
    
    		if (perf_target__has_cpu(&target))
    
    			perror("failed to parse CPUs map");
    
    		usage_with_options(stat_usage, options);
    
    	if (interval && interval < 100) {
    		pr_err("print interval must be >= 100ms\n");
    		usage_with_options(stat_usage, options);
    		return -1;
    	}
    
    	if (perf_evlist__alloc_stats(evsel_list, interval))
    		goto out_free_maps;
    
    Ingo Molnar's avatar
    Ingo Molnar committed
    	/*
    	 * We dont want to block the signals - that would cause
    	 * child tasks to inherit that and Ctrl-C would not work.
    	 * What we want is for Ctrl-C to work in the exec()-ed
    	 * task, but being ignored by perf stat itself:
    	 */
    
    	atexit(sig_atexit);
    
    	if (!forever)
    		signal(SIGINT,  skip_signal);
    
    	signal(SIGCHLD, skip_signal);
    
    Ingo Molnar's avatar
    Ingo Molnar committed
    	signal(SIGALRM, skip_signal);
    	signal(SIGABRT, skip_signal);
    
    
    	for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
    
    		if (run_count != 1 && verbose)
    
    			fprintf(output, "[ perf stat: executing run #%d ... ]\n",
    				run_idx + 1);
    
    		status = run_perf_stat(argc, argv);
    
    		if (forever && status != -1) {
    			print_stat(argc, argv);
    
    	if (!forever && status != -1 && !interval)
    
    
    	perf_evlist__free_stats(evsel_list);
    out_free_maps:
    
    	perf_evlist__delete_maps(evsel_list);
    
    out:
    	perf_evlist__delete(evsel_list);