Newer
Older
* builtin-stat.c
*
* Builtin stat command: Give a precise performance counters summary
* overview about any workload, CPU or specific PID.
*
* Sample output:
$ perf stat ./hackbench 10
Time: 0.118
Performance counter stats for './hackbench 10':
1708.761321 task-clock # 11.037 CPUs utilized
41,190 context-switches # 0.024 M/sec
6,735 CPU-migrations # 0.004 M/sec
17,318 page-faults # 0.010 M/sec
5,205,202,243 cycles # 3.046 GHz
3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle
1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle
2,603,501,247 instructions # 0.50 insns per cycle
# 1.48 stalled cycles per insn
484,357,498 branches # 283.455 M/sec
6,388,934 branch-misses # 1.32% of all branches
0.154822978 seconds time elapsed
* Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
*
* Improvements and fixes by:
*
* Arjan van de Ven <arjan@linux.intel.com>
* Yanmin Zhang <yanmin.zhang@intel.com>
* Wu Fengguang <fengguang.wu@intel.com>
* Mike Galbraith <efault@gmx.de>
* Paul Mackerras <paulus@samba.org>
* Jaswinder Singh Rajput <jaswinder@kernel.org>
*
* Released under the GPL v2. (and only v2, not any later version)
*/
#include "builtin.h"
#include "util/parse-options.h"
#include "util/parse-events.h"
#include "util/stat.h"
#include "util/cpumap.h"

Zhang, Yanmin
committed
#include "util/thread.h"
#include "util/thread_map.h"
#include <sys/prctl.h>
#include <locale.h>

Peter Zijlstra
committed
#define CNTR_NOT_SUPPORTED "<not supported>"
#define CNTR_NOT_COUNTED "<not counted>"
static void print_stat(int argc, const char **argv);
static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
static void print_counter(struct perf_evsel *counter, char *prefix);
static void print_aggr_socket(char *prefix);
static struct perf_evlist *evsel_list;
static struct perf_target target = {
.uid = UINT_MAX,
};
static bool no_inherit = false;

Ian Munsie
committed
static bool scale = true;
static bool aggr_socket = false;

Ian Munsie
committed
static bool null_run = false;
static int detailed_run = 0;
static bool big_num = true;
static int big_num_opt = -1;
static const char *csv_sep = NULL;
static bool csv_output = false;
static bool group = false;
static const char *pre_cmd = NULL;
static const char *post_cmd = NULL;
static bool sync_run = false;
static unsigned int interval = 0;
static struct timespec ref_time;
static struct cpu_map *sock_map;
struct perf_stat {
struct stats res_stats[3];
};
static inline void diff_timespec(struct timespec *r, struct timespec *a,
struct timespec *b)
{
r->tv_sec = a->tv_sec - b->tv_sec;
if (a->tv_nsec < b->tv_nsec) {
r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
r->tv_sec--;
} else {
r->tv_nsec = a->tv_nsec - b->tv_nsec ;
}
}
static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
{
return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
}
static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
{
return perf_evsel__cpus(evsel)->nr;
}
static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
evsel->priv = zalloc(sizeof(struct perf_stat));
return evsel->priv == NULL ? -ENOMEM : 0;
}
static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
{
free(evsel->priv);
evsel->priv = NULL;
}
static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
void *addr;
size_t sz;
sz = sizeof(*evsel->counts) +
(perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values));
addr = zalloc(sz);
if (!addr)
return -ENOMEM;
evsel->prev_raw_counts = addr;
return 0;
static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
free(evsel->prev_raw_counts);
evsel->prev_raw_counts = NULL;
static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
static struct stats runtime_cycles_stats[MAX_NR_CPUS];
static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
static struct stats runtime_branches_stats[MAX_NR_CPUS];
static struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
static struct stats walltime_nsecs_stats;
static int create_perf_stat_counter(struct perf_evsel *evsel)
struct perf_event_attr *attr = &evsel->attr;
if (scale)
attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
PERF_FORMAT_TOTAL_TIME_RUNNING;
attr->inherit = !no_inherit;
if (perf_target__has_cpu(&target))
return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));

Stephane Eranian
committed
if (!perf_target__has_task(&target) &&
perf_evsel__is_group_leader(evsel)) {
attr->disabled = 1;
attr->enable_on_exec = 1;
return perf_evsel__open_per_thread(evsel, evsel_list->threads);
}
/*
* Does the counter have nsecs as a unit?
*/
static inline int nsec_counter(struct perf_evsel *evsel)
if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
return 1;
return 0;
}
/*
* Update various tracking values we maintain to print
* more semantic information such as miss/hit ratios,
* instruction rates, etc:
*/
static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
{
if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
update_stats(&runtime_nsecs_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
update_stats(&runtime_cycles_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
update_stats(&runtime_branches_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
update_stats(&runtime_cacherefs_stats[0], count[0]);
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
update_stats(&runtime_l1_dcache_stats[0], count[0]);
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
update_stats(&runtime_l1_icache_stats[0], count[0]);
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
update_stats(&runtime_ll_cache_stats[0], count[0]);
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
update_stats(&runtime_dtlb_cache_stats[0], count[0]);
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
update_stats(&runtime_itlb_cache_stats[0], count[0]);
* Read out the results of a single counter:
* aggregate counts across CPUs in system-wide mode
static int read_counter_aggr(struct perf_evsel *counter)
struct perf_stat *ps = counter->priv;
u64 *count = counter->counts->aggr.values;
int i;
if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter),
thread_map__nr(evsel_list->threads), scale) < 0)
return -1;
update_stats(&ps->res_stats[i], count[i]);
fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
perf_evsel__name(counter), count[0], count[1], count[2]);
/*
* Save the full runtime - to allow normalization during printout:
*/
return 0;
}
/*
* Read out the results of a single counter:
* do not aggregate counts across CPUs in system-wide mode
*/
static int read_counter(struct perf_evsel *counter)
u64 *count;
for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
return -1;
count = counter->counts->cpu[cpu].values;
return 0;
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
static void print_interval(void)
{
static int num_print_interval;
struct perf_evsel *counter;
struct perf_stat *ps;
struct timespec ts, rs;
char prefix[64];
if (no_aggr) {
list_for_each_entry(counter, &evsel_list->entries, node) {
ps = counter->priv;
memset(ps->res_stats, 0, sizeof(ps->res_stats));
read_counter(counter);
}
} else {
list_for_each_entry(counter, &evsel_list->entries, node) {
ps = counter->priv;
memset(ps->res_stats, 0, sizeof(ps->res_stats));
read_counter_aggr(counter);
}
}
clock_gettime(CLOCK_MONOTONIC, &ts);
diff_timespec(&rs, &ts, &ref_time);
sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
if (num_print_interval == 0 && !csv_output) {
if (aggr_socket)
fprintf(output, "# time socket cpus counts events\n");
else if (no_aggr)
fprintf(output, "# time CPU counts events\n");
else
fprintf(output, "# time counts events\n");
}
if (++num_print_interval == 25)
num_print_interval = 0;
if (aggr_socket)
print_aggr_socket(prefix);
else if (no_aggr) {
list_for_each_entry(counter, &evsel_list->entries, node)
print_counter(counter, prefix);
} else {
list_for_each_entry(counter, &evsel_list->entries, node)
print_counter_aggr(counter, prefix);
}
}
static int __run_perf_stat(int argc __maybe_unused, const char **argv)
char msg[512];
unsigned long long t0, t1;
struct perf_evsel *counter;
int status = 0;
int child_ready_pipe[2], go_pipe[2];
const bool forks = (argc > 0);
char buf;
if (interval) {
ts.tv_sec = interval / 1000;
ts.tv_nsec = (interval % 1000) * 1000000;
} else {
ts.tv_sec = 1;
ts.tv_nsec = 0;
}
if (aggr_socket
&& cpu_map__build_socket_map(evsel_list->cpus, &sock_map)) {
perror("cannot build socket map");
return -1;
}
if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
perror("failed to create pipes");
if ((child_pid = fork()) < 0)
if (!child_pid) {
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
close(child_ready_pipe[0]);
close(go_pipe[1]);
fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
/*
* Do a dummy execvp to get the PLT entry resolved,
* so we avoid the resolver overhead on the real
* execvp call.
*/
execvp("", (char **)argv);
/*
* Tell the parent we're ready to go
*/
close(child_ready_pipe[1]);
/*
* Wait until the parent tells us to go.
*/
if (read(go_pipe[0], &buf, 1) == -1)
perror("unable to read pipe");
execvp(argv[0], (char **)argv);
perror(argv[0]);
exit(-1);
}
if (perf_target__none(&target))
evsel_list->threads->map[0] = child_pid;

Zhang, Yanmin
committed
* Wait for the child to be ready to exec.
*/
close(child_ready_pipe[1]);
close(go_pipe[0]);
if (read(child_ready_pipe[0], &buf, 1) == -1)
perror("unable to read pipe");
perf_evlist__set_leader(evsel_list);
list_for_each_entry(counter, &evsel_list->entries, node) {
if (create_perf_stat_counter(counter) < 0) {
/*
* PPC returns ENXIO for HW counters until 2.6.37
* (behavior changed with commit b0a873e).
*/
if (errno == EINVAL || errno == ENOSYS ||
errno == ENOENT || errno == EOPNOTSUPP ||
errno == ENXIO) {
if (verbose)
ui__warning("%s event is not supported by the kernel.\n",
perf_evsel__name(counter));
counter->supported = false;
perf_evsel__open_strerror(counter, &target,
errno, msg, sizeof(msg));
ui__error("%s\n", msg);
if (child_pid != -1)
kill(child_pid, SIGTERM);
return -1;
}
counter->supported = true;
}
if (perf_evlist__apply_filters(evsel_list)) {
error("failed to set filter with %d (%s)\n", errno,
strerror(errno));
return -1;
}
/*
* Enable counters and exec the command:
*/
t0 = rdclock();
clock_gettime(CLOCK_MONOTONIC, &ref_time);
if (interval) {
while (!waitpid(child_pid, &status, WNOHANG)) {
nanosleep(&ts, NULL);
print_interval();
}
}
if (WIFSIGNALED(status))
psignal(WTERMSIG(status), argv[0]);
while (!done) {
nanosleep(&ts, NULL);
if (interval)
print_interval();
}
t1 = rdclock();
update_stats(&walltime_nsecs_stats, t1 - t0);
list_for_each_entry(counter, &evsel_list->entries, node) {
perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
list_for_each_entry(counter, &evsel_list->entries, node) {
perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
thread_map__nr(evsel_list->threads));
return WEXITSTATUS(status);
}
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
static int run_perf_stat(int argc __maybe_unused, const char **argv)
{
int ret;
if (pre_cmd) {
ret = system(pre_cmd);
if (ret)
return ret;
}
if (sync_run)
sync();
ret = __run_perf_stat(argc, argv);
if (ret)
return ret;
if (post_cmd) {
ret = system(post_cmd);
if (ret)
return ret;
}
return ret;
}
static void print_noise_pct(double total, double avg)
{
double pct = rel_stddev_stats(total, avg);
fprintf(output, "%s%.2f%%", csv_sep, pct);
fprintf(output, " ( +-%6.2f%% )", pct);
static void print_noise(struct perf_evsel *evsel, double avg)
struct perf_stat *ps;
if (run_count == 1)
return;
print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
double msecs = avg / 1e6;
const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s";
if (aggr_socket)
sprintf(cpustr, "S%*d%s%*d%s",
csv_output ? 0 : -5,
cpu,
csv_sep,
csv_output ? 0 : 4,
nr,
csv_sep);
else if (no_aggr)
sprintf(cpustr, "CPU%*d%s",
csv_output ? 0 : -4,
perf_evsel__cpus(evsel)->map[cpu], csv_sep);
fprintf(output, fmt, cpustr, msecs, csv_sep, perf_evsel__name(evsel));
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
fprintf(output, " # %8.3f CPUs utilized ",
avg / avg_stats(&walltime_nsecs_stats));
else
fprintf(output, " ");
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
/* used for get_ratio_color() */
enum grc_type {
GRC_STALLED_CYCLES_FE,
GRC_STALLED_CYCLES_BE,
GRC_CACHE_MISSES,
GRC_MAX_NR
};
static const char *get_ratio_color(enum grc_type type, double ratio)
{
static const double grc_table[GRC_MAX_NR][3] = {
[GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
[GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
[GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 },
};
const char *color = PERF_COLOR_NORMAL;
if (ratio > grc_table[type][0])
color = PERF_COLOR_RED;
else if (ratio > grc_table[type][1])
color = PERF_COLOR_MAGENTA;
else if (ratio > grc_table[type][2])
color = PERF_COLOR_YELLOW;
return color;
}
static void print_stalled_cycles_frontend(int cpu,
struct perf_evsel *evsel
__maybe_unused, double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_cycles_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " frontend cycles idle ");
static void print_stalled_cycles_backend(int cpu,
struct perf_evsel *evsel
__maybe_unused, double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_cycles_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " backend cycles idle ");
static void print_branch_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_branches_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " of all branches ");
static void print_l1_dcache_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_l1_dcache_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " of all L1-dcache hits ");
static void print_l1_icache_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_l1_icache_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " of all L1-icache hits ");
static void print_dtlb_cache_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_dtlb_cache_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " of all dTLB cache hits ");
static void print_itlb_cache_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_itlb_cache_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " of all iTLB cache hits ");
static void print_ll_cache_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_ll_cache_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " of all LL-cache hits ");
static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
char cpustr[16] = { '\0', };
const char *fmt;
if (csv_output)
fmt = "%s%.0f%s%s";
else if (big_num)
fmt = "%s%'18.0f%s%-25s";
fmt = "%s%18.0f%s%-25s";
if (aggr_socket)
sprintf(cpustr, "S%*d%s%*d%s",
csv_output ? 0 : -5,
cpu,
csv_sep,
csv_output ? 0 : 4,
nr,
csv_sep);
else if (no_aggr)
sprintf(cpustr, "CPU%*d%s",
csv_output ? 0 : -4,
perf_evsel__cpus(evsel)->map[cpu], csv_sep);
fprintf(output, fmt, cpustr, avg, csv_sep, perf_evsel__name(evsel));
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
total = avg_stats(&runtime_cycles_stats[cpu]);
if (total)
ratio = avg / total;
fprintf(output, " # %5.2f insns per cycle ", ratio);
total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
if (total && avg) {
ratio = total / avg;
fprintf(output, "\n # %5.2f stalled cycles per insn", ratio);
}
} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
runtime_branches_stats[cpu].n != 0) {
print_branch_misses(cpu, evsel, avg);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_l1_dcache_stats[cpu].n != 0) {
print_l1_dcache_misses(cpu, evsel, avg);
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_l1_icache_stats[cpu].n != 0) {
print_l1_icache_misses(cpu, evsel, avg);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_dtlb_cache_stats[cpu].n != 0) {
print_dtlb_cache_misses(cpu, evsel, avg);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_itlb_cache_stats[cpu].n != 0) {
print_itlb_cache_misses(cpu, evsel, avg);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_ll_cache_stats[cpu].n != 0) {
print_ll_cache_misses(cpu, evsel, avg);
} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
runtime_cacherefs_stats[cpu].n != 0) {
total = avg_stats(&runtime_cacherefs_stats[cpu]);
if (total)
ratio = avg * 100 / total;
fprintf(output, " # %8.3f %% of all cache refs ", ratio);
} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
print_stalled_cycles_frontend(cpu, evsel, avg);
} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
print_stalled_cycles_backend(cpu, evsel, avg);
} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
total = avg_stats(&runtime_nsecs_stats[cpu]);
ratio = 1.0 * avg / total;
fprintf(output, " # %8.3f GHz ", ratio);
} else if (runtime_nsecs_stats[cpu].n != 0) {
total = avg_stats(&runtime_nsecs_stats[cpu]);
ratio = 1000.0 * avg / total;
if (ratio < 0.001) {
ratio *= 1000;
unit = 'K';
}
fprintf(output, " # %8.3f %c/sec ", ratio, unit);
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
static void print_aggr_socket(char *prefix)
{
struct perf_evsel *counter;
u64 ena, run, val;
int cpu, s, s2, sock, nr;
if (!sock_map)
return;
for (s = 0; s < sock_map->nr; s++) {
sock = cpu_map__socket(sock_map, s);
list_for_each_entry(counter, &evsel_list->entries, node) {
val = ena = run = 0;
nr = 0;
for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
s2 = cpu_map__get_socket(evsel_list->cpus, cpu);
if (s2 != sock)
continue;
val += counter->counts->cpu[cpu].val;
ena += counter->counts->cpu[cpu].ena;
run += counter->counts->cpu[cpu].run;
nr++;
}
if (prefix)
fprintf(output, "%s", prefix);
if (run == 0 || ena == 0) {
fprintf(output, "S%*d%s%*d%s%*s%s%*s",
csv_output ? 0 : -5,
s,
csv_sep,
csv_output ? 0 : 4,
nr,
csv_sep,
csv_output ? 0 : 18,
counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
csv_sep,
csv_output ? 0 : -24,
perf_evsel__name(counter));
if (counter->cgrp)
fprintf(output, "%s%s",
csv_sep, counter->cgrp->name);
fputc('\n', output);
continue;
}
if (nsec_counter(counter))
nsec_printout(sock, nr, counter, val);
else
abs_printout(sock, nr, counter, val);
if (!csv_output) {
print_noise(counter, 1.0);
if (run != ena)
fprintf(output, " (%.2f%%)",
100.0 * run / ena);
}
fputc('\n', output);
}
}
}
/*
* Print out the results of a single counter:
* aggregated counts in system-wide mode
static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
struct perf_stat *ps = counter->priv;
double avg = avg_stats(&ps->res_stats[0]);
int scaled = counter->counts->scaled;
if (prefix)
fprintf(output, "%s", prefix);
if (scaled == -1) {
counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
perf_evsel__name(counter));
fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
nsec_printout(-1, 0, counter, avg);
abs_printout(-1, 0, counter, avg);
if (scaled) {
double avg_enabled, avg_running;
avg_enabled = avg_stats(&ps->res_stats[1]);
avg_running = avg_stats(&ps->res_stats[2]);
fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled);