Newer
Older
* builtin-stat.c
*
* Builtin stat command: Give a precise performance counters summary
* overview about any workload, CPU or specific PID.
*
* Sample output:
$ perf stat ./hackbench 10
Time: 0.118
Performance counter stats for './hackbench 10':
1708.761321 task-clock # 11.037 CPUs utilized
41,190 context-switches # 0.024 M/sec
6,735 CPU-migrations # 0.004 M/sec
17,318 page-faults # 0.010 M/sec
5,205,202,243 cycles # 3.046 GHz
3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle
1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle
2,603,501,247 instructions # 0.50 insns per cycle
# 1.48 stalled cycles per insn
484,357,498 branches # 283.455 M/sec
6,388,934 branch-misses # 1.32% of all branches
0.154822978 seconds time elapsed
* Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
*
* Improvements and fixes by:
*
* Arjan van de Ven <arjan@linux.intel.com>
* Yanmin Zhang <yanmin.zhang@intel.com>
* Wu Fengguang <fengguang.wu@intel.com>
* Mike Galbraith <efault@gmx.de>
* Paul Mackerras <paulus@samba.org>
* Jaswinder Singh Rajput <jaswinder@kernel.org>
*
* Released under the GPL v2. (and only v2, not any later version)
*/
#include "builtin.h"
#include "util/parse-options.h"
#include "util/parse-events.h"
#include "util/stat.h"
#include "util/cpumap.h"

Zhang, Yanmin
committed
#include "util/thread.h"
#include "util/thread_map.h"
#include <sys/prctl.h>
#include <locale.h>

Peter Zijlstra
committed
#define CNTR_NOT_SUPPORTED "<not supported>"
#define CNTR_NOT_COUNTED "<not counted>"
static void print_stat(int argc, const char **argv);
static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
static void print_counter(struct perf_evsel *counter, char *prefix);
static void print_aggr(char *prefix);
static struct perf_evlist *evsel_list;
static struct perf_target target = {
.uid = UINT_MAX,
};
enum aggr_mode {
AGGR_NONE,
AGGR_GLOBAL,
AGGR_SOCKET,
static bool no_inherit = false;

Ian Munsie
committed
static bool scale = true;
static enum aggr_mode aggr_mode = AGGR_GLOBAL;
static volatile pid_t child_pid = -1;

Ian Munsie
committed
static bool null_run = false;
static int detailed_run = 0;
static bool big_num = true;
static int big_num_opt = -1;
static const char *csv_sep = NULL;
static bool csv_output = false;
static bool group = false;
static const char *pre_cmd = NULL;
static const char *post_cmd = NULL;
static bool sync_run = false;
static unsigned int initial_delay = 0;
static struct cpu_map *aggr_map;
static int (*aggr_get_id)(struct cpu_map *m, int cpu);
struct perf_stat {
struct stats res_stats[3];
};
static inline void diff_timespec(struct timespec *r, struct timespec *a,
struct timespec *b)
{
r->tv_sec = a->tv_sec - b->tv_sec;
if (a->tv_nsec < b->tv_nsec) {
r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
r->tv_sec--;
} else {
r->tv_nsec = a->tv_nsec - b->tv_nsec ;
}
}
static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
{
return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
}
static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
{
return perf_evsel__cpus(evsel)->nr;
}
static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
{
memset(evsel->priv, 0, sizeof(struct perf_stat));
}
static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
evsel->priv = zalloc(sizeof(struct perf_stat));
return evsel->priv == NULL ? -ENOMEM : 0;
}
static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
{
free(evsel->priv);
evsel->priv = NULL;
}
static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
void *addr;
size_t sz;
sz = sizeof(*evsel->counts) +
(perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values));
addr = zalloc(sz);
if (!addr)
return -ENOMEM;
evsel->prev_raw_counts = addr;
return 0;
static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
free(evsel->prev_raw_counts);
evsel->prev_raw_counts = NULL;
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
static void perf_evlist__free_stats(struct perf_evlist *evlist)
{
struct perf_evsel *evsel;
list_for_each_entry(evsel, &evlist->entries, node) {
perf_evsel__free_stat_priv(evsel);
perf_evsel__free_counts(evsel);
perf_evsel__free_prev_raw_counts(evsel);
}
}
static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw)
{
struct perf_evsel *evsel;
list_for_each_entry(evsel, &evlist->entries, node) {
if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 ||
(alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0))
goto out_free;
}
return 0;
out_free:
perf_evlist__free_stats(evlist);
return -1;
}
static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
static struct stats runtime_cycles_stats[MAX_NR_CPUS];
static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
static struct stats runtime_branches_stats[MAX_NR_CPUS];
static struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
static struct stats walltime_nsecs_stats;
static void perf_stat__reset_stats(struct perf_evlist *evlist)
struct perf_evsel *evsel;
list_for_each_entry(evsel, &evlist->entries, node) {
perf_evsel__reset_stat_priv(evsel);
perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel));
}
memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
}
static int create_perf_stat_counter(struct perf_evsel *evsel)
struct perf_event_attr *attr = &evsel->attr;
if (scale)
attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
PERF_FORMAT_TOTAL_TIME_RUNNING;
attr->inherit = !no_inherit;
if (perf_target__has_cpu(&target))
return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));

Stephane Eranian
committed
if (!perf_target__has_task(&target) &&
perf_evsel__is_group_leader(evsel)) {
attr->disabled = 1;
if (!initial_delay)
attr->enable_on_exec = 1;
return perf_evsel__open_per_thread(evsel, evsel_list->threads);
}
/*
* Does the counter have nsecs as a unit?
*/
static inline int nsec_counter(struct perf_evsel *evsel)
if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
return 1;
return 0;
}
/*
* Update various tracking values we maintain to print
* more semantic information such as miss/hit ratios,
* instruction rates, etc:
*/
static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
{
if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
update_stats(&runtime_nsecs_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
update_stats(&runtime_cycles_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
update_stats(&runtime_branches_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
update_stats(&runtime_cacherefs_stats[0], count[0]);
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
update_stats(&runtime_l1_dcache_stats[0], count[0]);
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
update_stats(&runtime_l1_icache_stats[0], count[0]);
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
update_stats(&runtime_ll_cache_stats[0], count[0]);
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
update_stats(&runtime_dtlb_cache_stats[0], count[0]);
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
update_stats(&runtime_itlb_cache_stats[0], count[0]);
* Read out the results of a single counter:
* aggregate counts across CPUs in system-wide mode
static int read_counter_aggr(struct perf_evsel *counter)
struct perf_stat *ps = counter->priv;
u64 *count = counter->counts->aggr.values;
int i;
if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter),
thread_map__nr(evsel_list->threads), scale) < 0)
return -1;
update_stats(&ps->res_stats[i], count[i]);
fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
perf_evsel__name(counter), count[0], count[1], count[2]);
/*
* Save the full runtime - to allow normalization during printout:
*/
return 0;
}
/*
* Read out the results of a single counter:
* do not aggregate counts across CPUs in system-wide mode
*/
static int read_counter(struct perf_evsel *counter)
u64 *count;
for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
return -1;
count = counter->counts->cpu[cpu].values;
return 0;
static void print_interval(void)
{
static int num_print_interval;
struct perf_evsel *counter;
struct perf_stat *ps;
struct timespec ts, rs;
char prefix[64];
list_for_each_entry(counter, &evsel_list->entries, node) {
ps = counter->priv;
memset(ps->res_stats, 0, sizeof(ps->res_stats));
list_for_each_entry(counter, &evsel_list->entries, node) {
ps = counter->priv;
memset(ps->res_stats, 0, sizeof(ps->res_stats));
clock_gettime(CLOCK_MONOTONIC, &ts);
diff_timespec(&rs, &ts, &ref_time);
sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
if (num_print_interval == 0 && !csv_output) {
switch (aggr_mode) {
case AGGR_SOCKET:
fprintf(output, "# time socket cpus counts events\n");
case AGGR_CORE:
fprintf(output, "# time core cpus counts events\n");
break;
fprintf(output, "# time CPU counts events\n");
break;
case AGGR_GLOBAL:
default:
fprintf(output, "# time counts events\n");
}
if (++num_print_interval == 25)
num_print_interval = 0;
case AGGR_SOCKET:
print_aggr(prefix);
break;
case AGGR_NONE:
list_for_each_entry(counter, &evsel_list->entries, node)
print_counter(counter, prefix);
break;
case AGGR_GLOBAL:
default:
list_for_each_entry(counter, &evsel_list->entries, node)
print_counter_aggr(counter, prefix);
}
fflush(output);
static void handle_initial_delay(void)
{
struct perf_evsel *counter;
if (initial_delay) {
const int ncpus = cpu_map__nr(evsel_list->cpus),
nthreads = thread_map__nr(evsel_list->threads);
usleep(initial_delay * 1000);
list_for_each_entry(counter, &evsel_list->entries, node)
perf_evsel__enable(counter, ncpus, nthreads);
}
}
static int __run_perf_stat(int argc, const char **argv)
char msg[512];
unsigned long long t0, t1;
struct perf_evsel *counter;
int status = 0;
const bool forks = (argc > 0);
if (interval) {
ts.tv_sec = interval / 1000;
ts.tv_nsec = (interval % 1000) * 1000000;
} else {
ts.tv_sec = 1;
ts.tv_nsec = 0;
}
if (perf_evlist__prepare_workload(evsel_list, &target, argv,
false, false) < 0) {
perror("failed to prepare workload");
return -1;
child_pid = evsel_list->workload.pid;
perf_evlist__set_leader(evsel_list);
list_for_each_entry(counter, &evsel_list->entries, node) {
if (create_perf_stat_counter(counter) < 0) {
/*
* PPC returns ENXIO for HW counters until 2.6.37
* (behavior changed with commit b0a873e).
*/
if (errno == EINVAL || errno == ENOSYS ||
errno == ENOENT || errno == EOPNOTSUPP ||
errno == ENXIO) {
if (verbose)
ui__warning("%s event is not supported by the kernel.\n",
perf_evsel__name(counter));
counter->supported = false;
perf_evsel__open_strerror(counter, &target,
errno, msg, sizeof(msg));
ui__error("%s\n", msg);
if (child_pid != -1)
kill(child_pid, SIGTERM);
return -1;
}
counter->supported = true;
}
if (perf_evlist__apply_filters(evsel_list)) {
error("failed to set filter with %d (%s)\n", errno,
strerror(errno));
return -1;
}
/*
* Enable counters and exec the command:
*/
t0 = rdclock();
clock_gettime(CLOCK_MONOTONIC, &ref_time);
perf_evlist__start_workload(evsel_list);
if (interval) {
while (!waitpid(child_pid, &status, WNOHANG)) {
nanosleep(&ts, NULL);
print_interval();
}
}
if (WIFSIGNALED(status))
psignal(WTERMSIG(status), argv[0]);
while (!done) {
nanosleep(&ts, NULL);
if (interval)
print_interval();
}
t1 = rdclock();
update_stats(&walltime_nsecs_stats, t1 - t0);
list_for_each_entry(counter, &evsel_list->entries, node) {
perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
thread_map__nr(evsel_list->threads));
} else {
list_for_each_entry(counter, &evsel_list->entries, node) {
read_counter(counter);
perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
}
return WEXITSTATUS(status);
}
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
static int run_perf_stat(int argc __maybe_unused, const char **argv)
{
int ret;
if (pre_cmd) {
ret = system(pre_cmd);
if (ret)
return ret;
}
if (sync_run)
sync();
ret = __run_perf_stat(argc, argv);
if (ret)
return ret;
if (post_cmd) {
ret = system(post_cmd);
if (ret)
return ret;
}
return ret;
}
static void print_noise_pct(double total, double avg)
{
double pct = rel_stddev_stats(total, avg);
fprintf(output, "%s%.2f%%", csv_sep, pct);
fprintf(output, " ( +-%6.2f%% )", pct);
static void print_noise(struct perf_evsel *evsel, double avg)
struct perf_stat *ps;
if (run_count == 1)
return;
print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
case AGGR_CORE:
fprintf(output, "S%d-C%*d%s%*d%s",
cpu_map__id_to_socket(id),
csv_output ? 0 : -8,
cpu_map__id_to_cpu(id),
csv_sep,
csv_output ? 0 : 4,
nr,
csv_sep);
break;
case AGGR_SOCKET:
fprintf(output, "S%*d%s%*d%s",
csv_output ? 0 : -5,
csv_sep,
csv_output ? 0 : 4,
nr,
csv_sep);
break;
case AGGR_NONE:
fprintf(output, "CPU%*d%s",
perf_evsel__cpus(evsel)->map[id], csv_sep);
break;
case AGGR_GLOBAL:
default:
break;
}
}
static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
{
double msecs = avg / 1e6;
const char *fmt = csv_output ? "%.6f%s%s" : "%18.6f%s%-25s";
aggr_printout(evsel, cpu, nr);
fprintf(output, fmt, msecs, csv_sep, perf_evsel__name(evsel));
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
fprintf(output, " # %8.3f CPUs utilized ",
avg / avg_stats(&walltime_nsecs_stats));
else
fprintf(output, " ");
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
/* used for get_ratio_color() */
enum grc_type {
GRC_STALLED_CYCLES_FE,
GRC_STALLED_CYCLES_BE,
GRC_CACHE_MISSES,
GRC_MAX_NR
};
static const char *get_ratio_color(enum grc_type type, double ratio)
{
static const double grc_table[GRC_MAX_NR][3] = {
[GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
[GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
[GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 },
};
const char *color = PERF_COLOR_NORMAL;
if (ratio > grc_table[type][0])
color = PERF_COLOR_RED;
else if (ratio > grc_table[type][1])
color = PERF_COLOR_MAGENTA;
else if (ratio > grc_table[type][2])
color = PERF_COLOR_YELLOW;
return color;
}
static void print_stalled_cycles_frontend(int cpu,
struct perf_evsel *evsel
__maybe_unused, double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_cycles_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " frontend cycles idle ");
static void print_stalled_cycles_backend(int cpu,
struct perf_evsel *evsel
__maybe_unused, double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_cycles_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " backend cycles idle ");
static void print_branch_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_branches_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " of all branches ");
static void print_l1_dcache_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_l1_dcache_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " of all L1-dcache hits ");
static void print_l1_icache_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_l1_icache_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " of all L1-icache hits ");
static void print_dtlb_cache_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_dtlb_cache_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " of all dTLB cache hits ");
static void print_itlb_cache_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_itlb_cache_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " of all iTLB cache hits ");
static void print_ll_cache_misses(int cpu,
struct perf_evsel *evsel __maybe_unused,
double avg)
{
double total, ratio = 0.0;
const char *color;
total = avg_stats(&runtime_ll_cache_stats[cpu]);
if (total)
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
fprintf(output, " of all LL-cache hits ");
static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
const char *fmt;
if (csv_output)
aggr_printout(evsel, cpu, nr);
if (aggr_mode == AGGR_GLOBAL)
fprintf(output, fmt, avg, csv_sep, perf_evsel__name(evsel));
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
total = avg_stats(&runtime_cycles_stats[cpu]);
if (total)
ratio = avg / total;
fprintf(output, " # %5.2f insns per cycle ", ratio);
total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
if (total && avg) {
ratio = total / avg;
fprintf(output, "\n # %5.2f stalled cycles per insn", ratio);
}
} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
runtime_branches_stats[cpu].n != 0) {
print_branch_misses(cpu, evsel, avg);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_l1_dcache_stats[cpu].n != 0) {
print_l1_dcache_misses(cpu, evsel, avg);
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_l1_icache_stats[cpu].n != 0) {
print_l1_icache_misses(cpu, evsel, avg);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_dtlb_cache_stats[cpu].n != 0) {
print_dtlb_cache_misses(cpu, evsel, avg);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_itlb_cache_stats[cpu].n != 0) {
print_itlb_cache_misses(cpu, evsel, avg);
} else if (
evsel->attr.type == PERF_TYPE_HW_CACHE &&
evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
runtime_ll_cache_stats[cpu].n != 0) {
print_ll_cache_misses(cpu, evsel, avg);
} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
runtime_cacherefs_stats[cpu].n != 0) {
total = avg_stats(&runtime_cacherefs_stats[cpu]);
if (total)
ratio = avg * 100 / total;
fprintf(output, " # %8.3f %% of all cache refs ", ratio);
} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
print_stalled_cycles_frontend(cpu, evsel, avg);
} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
print_stalled_cycles_backend(cpu, evsel, avg);
} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
total = avg_stats(&runtime_nsecs_stats[cpu]);
ratio = 1.0 * avg / total;
fprintf(output, " # %8.3f GHz ", ratio);
} else if (runtime_nsecs_stats[cpu].n != 0) {
total = avg_stats(&runtime_nsecs_stats[cpu]);
ratio = 1000.0 * avg / total;
if (ratio < 0.001) {
ratio *= 1000;
unit = 'K';
}
fprintf(output, " # %8.3f %c/sec ", ratio, unit);
static void print_aggr(char *prefix)
{
struct perf_evsel *counter;
int cpu, cpu2, s, s2, id, nr;
u64 ena, run, val;
for (s = 0; s < aggr_map->nr; s++) {
id = aggr_map->map[s];
list_for_each_entry(counter, &evsel_list->entries, node) {
val = ena = run = 0;
nr = 0;
for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
cpu2 = perf_evsel__cpus(counter)->map[cpu];
s2 = aggr_get_id(evsel_list->cpus, cpu2);
continue;
val += counter->counts->cpu[cpu].val;
ena += counter->counts->cpu[cpu].ena;
run += counter->counts->cpu[cpu].run;
nr++;
}
if (prefix)
fprintf(output, "%s", prefix);
if (run == 0 || ena == 0) {
aggr_printout(counter, id, nr);
csv_output ? 0 : 18,
counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
csv_sep,
csv_output ? 0 : -24,
perf_evsel__name(counter));
if (counter->cgrp)
fprintf(output, "%s%s",
csv_sep, counter->cgrp->name);
fputc('\n', output);
continue;
}
if (nsec_counter(counter))
nsec_printout(id, nr, counter, val);
abs_printout(id, nr, counter, val);
if (!csv_output) {
print_noise(counter, 1.0);
if (run != ena)
fprintf(output, " (%.2f%%)",
100.0 * run / ena);