Newer
Older
* builtin-stat.c
*
* Builtin stat command: Give a precise performance counters summary
* overview about any workload, CPU or specific PID.
*
* Sample output:
$ perf stat ./hackbench 10
Time: 0.118
Performance counter stats for './hackbench 10':
1708.761321 task-clock # 11.037 CPUs utilized
41,190 context-switches # 0.024 M/sec
6,735 CPU-migrations # 0.004 M/sec
17,318 page-faults # 0.010 M/sec
5,205,202,243 cycles # 3.046 GHz
3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle
1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle
2,603,501,247 instructions # 0.50 insns per cycle
# 1.48 stalled cycles per insn
484,357,498 branches # 283.455 M/sec
6,388,934 branch-misses # 1.32% of all branches
0.154822978 seconds time elapsed
* Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
*
* Improvements and fixes by:
*
* Arjan van de Ven <arjan@linux.intel.com>
* Yanmin Zhang <yanmin.zhang@intel.com>
* Wu Fengguang <fengguang.wu@intel.com>
* Mike Galbraith <efault@gmx.de>
* Paul Mackerras <paulus@samba.org>
* Jaswinder Singh Rajput <jaswinder@kernel.org>
*
* Released under the GPL v2. (and only v2, not any later version)
*/
#include "builtin.h"
#include "util/parse-options.h"
#include "util/parse-events.h"
#include "util/cpumap.h"

Zhang, Yanmin
committed
#include "util/thread.h"
#include "util/thread_map.h"
#include <sys/prctl.h>
#include <math.h>
#include <locale.h>

Peter Zijlstra
committed
#define CNTR_NOT_SUPPORTED "<not supported>"
#define CNTR_NOT_COUNTED "<not counted>"
static struct perf_event_attr default_attrs[] = {
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
* Detailed stats (-d), covering the L1 and last level data caches:
*/
static struct perf_event_attr detailed_attrs[] = {
{ .type = PERF_TYPE_HW_CACHE,
.config =
PERF_COUNT_HW_CACHE_L1D << 0 |
(PERF_COUNT_HW_CACHE_OP_READ << 8) |
(PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
{ .type = PERF_TYPE_HW_CACHE,
.config =
PERF_COUNT_HW_CACHE_L1D << 0 |
(PERF_COUNT_HW_CACHE_OP_READ << 8) |
(PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
{ .type = PERF_TYPE_HW_CACHE,
.config =
PERF_COUNT_HW_CACHE_LL << 0 |
(PERF_COUNT_HW_CACHE_OP_READ << 8) |
(PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
{ .type = PERF_TYPE_HW_CACHE,
.config =
PERF_COUNT_HW_CACHE_LL << 0 |
(PERF_COUNT_HW_CACHE_OP_READ << 8) |
(PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
};
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
/*
* Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
*/
static struct perf_event_attr very_detailed_attrs[] = {
{ .type = PERF_TYPE_HW_CACHE,
.config =
PERF_COUNT_HW_CACHE_L1I << 0 |
(PERF_COUNT_HW_CACHE_OP_READ << 8) |
(PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
{ .type = PERF_TYPE_HW_CACHE,
.config =
PERF_COUNT_HW_CACHE_L1I << 0 |
(PERF_COUNT_HW_CACHE_OP_READ << 8) |
(PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
{ .type = PERF_TYPE_HW_CACHE,
.config =
PERF_COUNT_HW_CACHE_DTLB << 0 |
(PERF_COUNT_HW_CACHE_OP_READ << 8) |
(PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
{ .type = PERF_TYPE_HW_CACHE,
.config =
PERF_COUNT_HW_CACHE_DTLB << 0 |
(PERF_COUNT_HW_CACHE_OP_READ << 8) |
(PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
{ .type = PERF_TYPE_HW_CACHE,
.config =
PERF_COUNT_HW_CACHE_ITLB << 0 |
(PERF_COUNT_HW_CACHE_OP_READ << 8) |
(PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
{ .type = PERF_TYPE_HW_CACHE,
.config =
PERF_COUNT_HW_CACHE_ITLB << 0 |
(PERF_COUNT_HW_CACHE_OP_READ << 8) |
(PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
};
/*
* Very, very detailed stats (-d -d -d), adding prefetch events:
*/
static struct perf_event_attr very_very_detailed_attrs[] = {
{ .type = PERF_TYPE_HW_CACHE,
.config =
PERF_COUNT_HW_CACHE_L1D << 0 |
(PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
(PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
{ .type = PERF_TYPE_HW_CACHE,
.config =
PERF_COUNT_HW_CACHE_L1D << 0 |
(PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
(PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
};
static struct perf_evlist *evsel_list;
static struct perf_target target;
static bool no_inherit = false;

Ian Munsie
committed
static bool scale = true;
static bool no_aggr = false;

Ian Munsie
committed
static bool null_run = false;
static int detailed_run = 0;
static bool big_num = true;
static int big_num_opt = -1;
static const char *csv_sep = NULL;
static bool csv_output = false;
static bool group = false;
static const char *output_name = NULL;
static FILE *output = NULL;
static int output_fd;
struct perf_stat {
struct stats res_stats[3];
};
static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
evsel->priv = zalloc(sizeof(struct perf_stat));
return evsel->priv == NULL ? -ENOMEM : 0;
}
static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
{
free(evsel->priv);
evsel->priv = NULL;
}
static void update_stats(struct stats *stats, u64 val)
{
stats->n++;
delta = val - stats->mean;
stats->mean += delta / stats->n;
stats->M2 += delta*(val - stats->mean);
static double avg_stats(struct stats *stats)
{
* http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
*
* (\Sum n_i^2) - ((\Sum n_i)^2)/n
* s^2 = -------------------------------
* n - 1
*
* http://en.wikipedia.org/wiki/Stddev
*
* The std dev of the mean is related to the std dev by:
*
* s
* s_mean = -------
* sqrt(n)
*
*/
static double stddev_stats(struct stats *stats)
{
double variance, variance_mean;
if (!stats->n)
return 0.0;
variance = stats->M2 / (stats->n - 1);
variance_mean = variance / stats->n;
return sqrt(variance_mean);
static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
static struct stats runtime_cycles_stats[MAX_NR_CPUS];
static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
static struct stats runtime_branches_stats[MAX_NR_CPUS];
static struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
static struct stats walltime_nsecs_stats;
static int create_perf_stat_counter(struct perf_evsel *evsel,
struct perf_evsel *first)
struct perf_event_attr *attr = &evsel->attr;
struct xyarray *group_fd = NULL;
if (group && evsel != first)
group_fd = first->fd;

Peter Zijlstra
committed
if (scale)
attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
PERF_FORMAT_TOTAL_TIME_RUNNING;
attr->inherit = !no_inherit;
if (!perf_target__no_cpu(&target))
return perf_evsel__open_per_cpu(evsel, evsel_list->cpus,
group, group_fd);
if (perf_target__no_task(&target) && (!group || evsel == first)) {
attr->disabled = 1;
attr->enable_on_exec = 1;
return perf_evsel__open_per_thread(evsel, evsel_list->threads,
group, group_fd);
}
/*
* Does the counter have nsecs as a unit?
*/
static inline int nsec_counter(struct perf_evsel *evsel)
if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
return 1;
return 0;
}
/*
* Update various tracking values we maintain to print
* more semantic information such as miss/hit ratios,
* instruction rates, etc:
*/
static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
{
if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
update_stats(&runtime_nsecs_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
update_stats(&runtime_cycles_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
update_stats(&runtime_branches_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
update_stats(&runtime_cacherefs_stats[0], count[0]);
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
update_stats(&runtime_l1_dcache_stats[0], count[0]);
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
update_stats(&runtime_l1_icache_stats[0], count[0]);
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
update_stats(&runtime_ll_cache_stats[0], count[0]);
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
update_stats(&runtime_dtlb_cache_stats[0], count[0]);
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
update_stats(&runtime_itlb_cache_stats[0], count[0]);
* Read out the results of a single counter:
* aggregate counts across CPUs in system-wide mode
static int read_counter_aggr(struct perf_evsel *counter)
struct perf_stat *ps = counter->priv;
u64 *count = counter->counts->aggr.values;
int i;
if (__perf_evsel__read(counter, evsel_list->cpus->nr,
evsel_list->threads->nr, scale) < 0)
return -1;
update_stats(&ps->res_stats[i], count[i]);
fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
event_name(counter), count[0], count[1], count[2]);
/*
* Save the full runtime - to allow normalization during printout:
*/
return 0;
}
/*
* Read out the results of a single counter:
* do not aggregate counts across CPUs in system-wide mode
*/
static int read_counter(struct perf_evsel *counter)
u64 *count;
for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
return -1;
count = counter->counts->cpu[cpu].values;
return 0;
static int run_perf_stat(int argc __used, const char **argv)
{
unsigned long long t0, t1;
struct perf_evsel *counter, *first;
int status = 0;
int child_ready_pipe[2], go_pipe[2];
const bool forks = (argc > 0);
char buf;
if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
perror("failed to create pipes");
exit(1);
}
if ((child_pid = fork()) < 0)
if (!child_pid) {
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
close(child_ready_pipe[0]);
close(go_pipe[1]);
fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
/*
* Do a dummy execvp to get the PLT entry resolved,
* so we avoid the resolver overhead on the real
* execvp call.
*/
execvp("", (char **)argv);
/*
* Tell the parent we're ready to go
*/
close(child_ready_pipe[1]);
/*
* Wait until the parent tells us to go.
*/
if (read(go_pipe[0], &buf, 1) == -1)
perror("unable to read pipe");
execvp(argv[0], (char **)argv);
perror(argv[0]);
exit(-1);
}
if (perf_target__none(&target))
evsel_list->threads->map[0] = child_pid;

Zhang, Yanmin
committed
* Wait for the child to be ready to exec.
*/
close(child_ready_pipe[1]);
close(go_pipe[0]);
if (read(child_ready_pipe[0], &buf, 1) == -1)
perror("unable to read pipe");
first = list_entry(evsel_list->entries.next, struct perf_evsel, node);
list_for_each_entry(counter, &evsel_list->entries, node) {
if (create_perf_stat_counter(counter, first) < 0) {
if (errno == EINVAL || errno == ENOSYS ||
errno == ENOENT || errno == EOPNOTSUPP) {
if (verbose)
ui__warning("%s event is not supported by the kernel.\n",
event_name(counter));
counter->supported = false;
if (errno == EPERM || errno == EACCES) {
error("You may not have permission to collect %sstats.\n"
"\t Consider tweaking"
" /proc/sys/kernel/perf_event_paranoid or running as root.",
target.system_wide ? "system-wide " : "");
} else {
error("open_counter returned with %d (%s). "
"/bin/dmesg may provide additional information.\n",
errno, strerror(errno));
}
if (child_pid != -1)
kill(child_pid, SIGTERM);
die("Not all events could be opened.\n");
return -1;
}
counter->supported = true;
}
if (perf_evlist__set_filters(evsel_list)) {
error("failed to set filter with %d (%s)\n", errno,
strerror(errno));
return -1;
}
/*
* Enable counters and exec the command:
*/
t0 = rdclock();
if (forks) {
close(go_pipe[1]);
wait(&status);
if (WIFSIGNALED(status))
psignal(WTERMSIG(status), argv[0]);
while(!done) sleep(1);
t1 = rdclock();
update_stats(&walltime_nsecs_stats, t1 - t0);
list_for_each_entry(counter, &evsel_list->entries, node) {
perf_evsel__close_fd(counter, evsel_list->cpus->nr, 1);
list_for_each_entry(counter, &evsel_list->entries, node) {
perf_evsel__close_fd(counter, evsel_list->cpus->nr,
evsel_list->threads->nr);
return WEXITSTATUS(status);
}
static void print_noise_pct(double total, double avg)
{
double pct = 0.0;
if (avg)
pct = 100.0*total/avg;
fprintf(output, "%s%.2f%%", csv_sep, pct);
fprintf(output, " ( +-%6.2f%% )", pct);
static void print_noise(struct perf_evsel *evsel, double avg)
struct perf_stat *ps;
if (run_count == 1)
return;
print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
double msecs = avg / 1e6;
const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s";
sprintf(cpustr, "CPU%*d%s",
csv_output ? 0 : -4,
evsel_list->cpus->map[cpu], csv_sep);
fprintf(output, fmt, cpustr, msecs, csv_sep, event_name(evsel));
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
fprintf(output, " # %8.3f CPUs utilized ",
avg / avg_stats(&walltime_nsecs_stats));
else
fprintf(output, " ");
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
/* used for get_ratio_color() */
enum grc_type {
GRC_STALLED_CYCLES_FE,
GRC_STALLED_CYCLES_BE,
GRC_CACHE_MISSES,
GRC_MAX_NR
};
static const char *get_ratio_color(enum grc_type type, double ratio)
{
static const double grc_table[GRC_MAX_NR][3] = {
[GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
[GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
[GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 },
};
const char *color = PERF_COLOR_NORMAL;
if (ratio > grc_table[type][0])
color = PERF_COLOR_RED;
else if (ratio > grc_table[type][1])
color = PERF_COLOR_MAGENTA;
else if (ratio > grc_table[type][2])
color = PERF_COLOR_YELLOW;
return color;
}
static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __used, double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_cycles_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " frontend cycles idle ");
}
static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __used, double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_cycles_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " backend cycles idle ");
static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_branches_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " of all branches ");
static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_l1_dcache_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " of all L1-dcache hits ");
static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_l1_icache_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " of all L1-icache hits ");
}
static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_dtlb_cache_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " of all dTLB cache hits ");
}
static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_itlb_cache_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " of all iTLB cache hits ");
}
static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_ll_cache_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " of all LL-cache hits ");
static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
char cpustr[16] = { '\0', };
const char *fmt;
if (csv_output)
fmt = "%s%.0f%s%s";
else if (big_num)
fmt = "%s%'18.0f%s%-25s";
fmt = "%s%18.0f%s%-25s";
sprintf(cpustr, "CPU%*d%s",
csv_output ? 0 : -4,
evsel_list->cpus->map[cpu], csv_sep);
fprintf(output, fmt, cpustr, avg, csv_sep, event_name(evsel));
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
total = avg_stats(&runtime_cycles_stats[cpu]);
if (total)
ratio = avg / total;
fprintf(output, " # %5.2f insns per cycle ", ratio);
total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
if (total && avg) {
ratio = total / avg;
fprintf(output, "\n # %5.2f stalled cycles per insn", ratio);
}
} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
runtime_branches_stats[cpu].n != 0) {
print_branch_misses(cpu, evsel, avg);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_l1_dcache_stats[cpu].n != 0) {
print_l1_dcache_misses(cpu, evsel, avg);
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_l1_icache_stats[cpu].n != 0) {
print_l1_icache_misses(cpu, evsel, avg);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_dtlb_cache_stats[cpu].n != 0) {
print_dtlb_cache_misses(cpu, evsel, avg);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_itlb_cache_stats[cpu].n != 0) {
print_itlb_cache_misses(cpu, evsel, avg);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_ll_cache_stats[cpu].n != 0) {
print_ll_cache_misses(cpu, evsel, avg);
} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
runtime_cacherefs_stats[cpu].n != 0) {
total = avg_stats(&runtime_cacherefs_stats[cpu]);
if (total)
ratio = avg * 100 / total;
fprintf(output, " # %8.3f %% of all cache refs ", ratio);
} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
print_stalled_cycles_frontend(cpu, evsel, avg);
} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
print_stalled_cycles_backend(cpu, evsel, avg);
} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
total = avg_stats(&runtime_nsecs_stats[cpu]);
ratio = 1.0 * avg / total;
fprintf(output, " # %8.3f GHz ", ratio);
} else if (runtime_nsecs_stats[cpu].n != 0) {
total = avg_stats(&runtime_nsecs_stats[cpu]);
ratio = 1000.0 * avg / total;
if (ratio < 0.001) {
ratio *= 1000;
unit = 'K';
}
fprintf(output, " # %8.3f %c/sec ", ratio, unit);
/*
* Print out the results of a single counter:
* aggregated counts in system-wide mode
static void print_counter_aggr(struct perf_evsel *counter)
struct perf_stat *ps = counter->priv;
double avg = avg_stats(&ps->res_stats[0]);
int scaled = counter->counts->scaled;
if (scaled == -1) {
counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
csv_sep,
csv_output ? 0 : -24,
event_name(counter));
if (counter->cgrp)
fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
nsec_printout(-1, counter, avg);
abs_printout(-1, counter, avg);
if (scaled) {
double avg_enabled, avg_running;
avg_enabled = avg_stats(&ps->res_stats[1]);
avg_running = avg_stats(&ps->res_stats[2]);
fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled);
/*
* Print out the results of a single counter:
* does not use aggregated count in system-wide
*/
static void print_counter(struct perf_evsel *counter)
{
u64 ena, run, val;
int cpu;
for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
val = counter->counts->cpu[cpu].val;
ena = counter->counts->cpu[cpu].ena;
run = counter->counts->cpu[cpu].run;
fprintf(output, "CPU%*d%s%*s%s%*s",
evsel_list->cpus->map[cpu], csv_sep,
counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
csv_sep,
fprintf(output, "%s%s",
csv_sep, counter->cgrp->name);
continue;
}
if (nsec_counter(counter))
nsec_printout(cpu, counter, val);
else
abs_printout(cpu, counter, val);
if (!csv_output) {
print_noise(counter, 1.0);
if (run != ena)
fprintf(output, " (%.2f%%)",
100.0 * run / ena);
static void print_stat(int argc, const char **argv)
{
struct perf_evsel *counter;
int i;
fflush(stdout);
fprintf(output, "\n");
fprintf(output, " Performance counter stats for ");
if (perf_target__no_task(&target)) {
fprintf(output, "\'%s", argv[0]);
fprintf(output, " %s", argv[i]);
} else if (target.pid)
fprintf(output, "process id \'%s", target.pid);
fprintf(output, "thread id \'%s", target.tid);
fprintf(output, " (%d runs)", run_count);
fprintf(output, ":\n\n");
list_for_each_entry(counter, &evsel_list->entries, node)
print_counter(counter);
} else {
list_for_each_entry(counter, &evsel_list->entries, node)
print_counter_aggr(counter);
}
fprintf(output, "\n");
fprintf(output, " %17.9f seconds time elapsed",
avg_stats(&walltime_nsecs_stats)/1e9);
if (run_count > 1) {
print_noise_pct(stddev_stats(&walltime_nsecs_stats),
avg_stats(&walltime_nsecs_stats));