diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h
index 23016020915e765edd720d315a0480d139e25ffd..75c6ecdb8f3728a175bd61808cad34a9689fd47e 100644
--- a/arch/powerpc/include/asm/uprobes.h
+++ b/arch/powerpc/include/asm/uprobes.h
@@ -37,6 +37,7 @@ typedef ppc_opcode_t uprobe_opcode_t;
 struct arch_uprobe {
 	union {
 		u8	insn[MAX_UINSN_BYTES];
+		u8	ixol[MAX_UINSN_BYTES];
 		u32	ainsn;
 	};
 };
@@ -45,11 +46,4 @@ struct arch_uprobe_task {
 	unsigned long	saved_trap_nr;
 };
 
-extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
-extern int  arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-extern int  arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
-extern int  arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
-extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
 #endif	/* _ASM_UPROBES_H */
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index 6e5197910fd87b178921e3dd2ecf883ae3fb0894..3087ea9c5f2e86dfcf9c596258a3d1df6f22bc90 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -35,7 +35,10 @@ typedef u8 uprobe_opcode_t;
 
 struct arch_uprobe {
 	u16				fixups;
-	u8				insn[MAX_UINSN_BYTES];
+	union {
+		u8			insn[MAX_UINSN_BYTES];
+		u8			ixol[MAX_UINSN_BYTES];
+	};
 #ifdef CONFIG_X86_64
 	unsigned long			rip_rela_target_address;
 #endif
@@ -49,11 +52,4 @@ struct arch_uprobe_task {
 	unsigned int			saved_tf;
 };
 
-extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
-extern int  arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-extern int  arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
-extern int  arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
-extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
-extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
 #endif	/* _ASM_UPROBES_H */
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8a87a3224121fdeac2d2fd59fb2142f6de8d8a24..8e132931614d618b03b9f7d69a492b793a27bc30 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1989,7 +1989,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
 		frame.return_address = 0;
 
 		bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
-		if (bytes != sizeof(frame))
+		if (bytes != 0)
 			break;
 
 		if (!valid_user_frame(fp, sizeof(frame)))
@@ -2041,7 +2041,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 		frame.return_address = 0;
 
 		bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
-		if (bytes != sizeof(frame))
+		if (bytes != 0)
 			break;
 
 		if (!valid_user_frame(fp, sizeof(frame)))
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index cc16faae0538431073b01dfa0bd5c96ab4383ae1..fd00bb29425d4da50194241c6ae3835775e12e6e 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -163,6 +163,11 @@ struct cpu_hw_events {
 	u64				intel_ctrl_host_mask;
 	struct perf_guest_switch_msr	guest_switch_msrs[X86_PMC_IDX_MAX];
 
+	/*
+	 * Intel checkpoint mask
+	 */
+	u64				intel_cp_status;
+
 	/*
 	 * manage shared (per-core, per-cpu) registers
 	 * used on Intel NHM/WSM/SNB
@@ -440,6 +445,7 @@ struct x86_pmu {
 	int		lbr_nr;			   /* hardware stack size */
 	u64		lbr_sel_mask;		   /* LBR_SELECT valid bits */
 	const int	*lbr_sel_map;		   /* lbr_select mappings */
+	bool		lbr_double_abort;	   /* duplicated lbr aborts */
 
 	/*
 	 * Extra registers for events
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index f31a1655d1ff5bd602239e211b14bdd28d95a79a..0fa4f242f0504ad53297360966e957b84a0edab8 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -190,9 +190,9 @@ static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
 	EVENT_EXTRA_END
 };
 
-EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
-EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
-EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
+EVENT_ATTR_STR(mem-loads,	mem_ld_nhm,	"event=0x0b,umask=0x10,ldlat=3");
+EVENT_ATTR_STR(mem-loads,	mem_ld_snb,	"event=0xcd,umask=0x1,ldlat=3");
+EVENT_ATTR_STR(mem-stores,	mem_st_snb,	"event=0xcd,umask=0x2");
 
 struct attribute *nhm_events_attrs[] = {
 	EVENT_PTR(mem_ld_nhm),
@@ -1184,6 +1184,11 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
 	wrmsrl(hwc->config_base, ctrl_val);
 }
 
+static inline bool event_is_checkpointed(struct perf_event *event)
+{
+	return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
+}
+
 static void intel_pmu_disable_event(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
@@ -1197,6 +1202,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
 
 	cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
 	cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
+	cpuc->intel_cp_status &= ~(1ull << hwc->idx);
 
 	/*
 	 * must disable before any actual event
@@ -1271,6 +1277,9 @@ static void intel_pmu_enable_event(struct perf_event *event)
 	if (event->attr.exclude_guest)
 		cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
 
+	if (unlikely(event_is_checkpointed(event)))
+		cpuc->intel_cp_status |= (1ull << hwc->idx);
+
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
 		intel_pmu_enable_fixed(hwc);
 		return;
@@ -1289,6 +1298,17 @@ static void intel_pmu_enable_event(struct perf_event *event)
 int intel_pmu_save_and_restart(struct perf_event *event)
 {
 	x86_perf_event_update(event);
+	/*
+	 * For a checkpointed counter always reset back to 0.  This
+	 * avoids a situation where the counter overflows, aborts the
+	 * transaction and is then set back to shortly before the
+	 * overflow, and overflows and aborts again.
+	 */
+	if (unlikely(event_is_checkpointed(event))) {
+		/* No race with NMIs because the counter should not be armed */
+		wrmsrl(event->hw.event_base, 0);
+		local64_set(&event->hw.prev_count, 0);
+	}
 	return x86_perf_event_set_period(event);
 }
 
@@ -1372,6 +1392,13 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 		x86_pmu.drain_pebs(regs);
 	}
 
+	/*
+	 * Checkpointed counters can lead to 'spurious' PMIs because the
+	 * rollback caused by the PMI will have cleared the overflow status
+	 * bit. Therefore always force probe these counters.
+	 */
+	status |= cpuc->intel_cp_status;
+
 	for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
 		struct perf_event *event = cpuc->events[bit];
 
@@ -1837,6 +1864,20 @@ static int hsw_hw_config(struct perf_event *event)
 	      event->attr.precise_ip > 0))
 		return -EOPNOTSUPP;
 
+	if (event_is_checkpointed(event)) {
+		/*
+		 * Sampling of checkpointed events can cause situations where
+		 * the CPU constantly aborts because of a overflow, which is
+		 * then checkpointed back and ignored. Forbid checkpointing
+		 * for sampling.
+		 *
+		 * But still allow a long sampling period, so that perf stat
+		 * from KVM works.
+		 */
+		if (event->attr.sample_period > 0 &&
+		    event->attr.sample_period < 0x7fffffff)
+			return -EOPNOTSUPP;
+	}
 	return 0;
 }
 
@@ -2182,10 +2223,36 @@ static __init void intel_nehalem_quirk(void)
 	}
 }
 
-EVENT_ATTR_STR(mem-loads,      mem_ld_hsw,     "event=0xcd,umask=0x1,ldlat=3");
-EVENT_ATTR_STR(mem-stores,     mem_st_hsw,     "event=0xd0,umask=0x82")
+EVENT_ATTR_STR(mem-loads,	mem_ld_hsw,	"event=0xcd,umask=0x1,ldlat=3");
+EVENT_ATTR_STR(mem-stores,	mem_st_hsw,	"event=0xd0,umask=0x82")
+
+/* Haswell special events */
+EVENT_ATTR_STR(tx-start,	tx_start,	"event=0xc9,umask=0x1");
+EVENT_ATTR_STR(tx-commit,	tx_commit,	"event=0xc9,umask=0x2");
+EVENT_ATTR_STR(tx-abort,	tx_abort,	"event=0xc9,umask=0x4");
+EVENT_ATTR_STR(tx-capacity,	tx_capacity,	"event=0x54,umask=0x2");
+EVENT_ATTR_STR(tx-conflict,	tx_conflict,	"event=0x54,umask=0x1");
+EVENT_ATTR_STR(el-start,	el_start,	"event=0xc8,umask=0x1");
+EVENT_ATTR_STR(el-commit,	el_commit,	"event=0xc8,umask=0x2");
+EVENT_ATTR_STR(el-abort,	el_abort,	"event=0xc8,umask=0x4");
+EVENT_ATTR_STR(el-capacity,	el_capacity,	"event=0x54,umask=0x2");
+EVENT_ATTR_STR(el-conflict,	el_conflict,	"event=0x54,umask=0x1");
+EVENT_ATTR_STR(cycles-t,	cycles_t,	"event=0x3c,in_tx=1");
+EVENT_ATTR_STR(cycles-ct,	cycles_ct,	"event=0x3c,in_tx=1,in_tx_cp=1");
 
 static struct attribute *hsw_events_attrs[] = {
+	EVENT_PTR(tx_start),
+	EVENT_PTR(tx_commit),
+	EVENT_PTR(tx_abort),
+	EVENT_PTR(tx_capacity),
+	EVENT_PTR(tx_conflict),
+	EVENT_PTR(el_start),
+	EVENT_PTR(el_commit),
+	EVENT_PTR(el_abort),
+	EVENT_PTR(el_capacity),
+	EVENT_PTR(el_conflict),
+	EVENT_PTR(cycles_t),
+	EVENT_PTR(cycles_ct),
 	EVENT_PTR(mem_ld_hsw),
 	EVENT_PTR(mem_st_hsw),
 	NULL
@@ -2452,6 +2519,7 @@ __init int intel_pmu_init(void)
 		x86_pmu.hw_config = hsw_hw_config;
 		x86_pmu.get_event_constraints = hsw_get_event_constraints;
 		x86_pmu.cpu_events = hsw_events_attrs;
+		x86_pmu.lbr_double_abort = true;
 		pr_cont("Haswell events, ");
 		break;
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index ab3ba1c1b7dd2c425dd5edf4c2b5091d76355b34..ae96cfa5eddd74a30780e4062fae9af8f34ecd82 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -12,6 +12,7 @@
 
 #define BTS_BUFFER_SIZE		(PAGE_SIZE << 4)
 #define PEBS_BUFFER_SIZE	PAGE_SIZE
+#define PEBS_FIXUP_SIZE		PAGE_SIZE
 
 /*
  * pebs_record_32 for p4 and core not supported
@@ -182,18 +183,32 @@ struct pebs_record_nhm {
  * Same as pebs_record_nhm, with two additional fields.
  */
 struct pebs_record_hsw {
-	struct pebs_record_nhm nhm;
-	/*
-	 * Real IP of the event. In the Intel documentation this
-	 * is called eventingrip.
-	 */
-	u64 real_ip;
-	/*
-	 * TSX tuning information field: abort cycles and abort flags.
-	 */
-	u64 tsx_tuning;
+	u64 flags, ip;
+	u64 ax, bx, cx, dx;
+	u64 si, di, bp, sp;
+	u64 r8,  r9,  r10, r11;
+	u64 r12, r13, r14, r15;
+	u64 status, dla, dse, lat;
+	u64 real_ip, tsx_tuning;
+};
+
+union hsw_tsx_tuning {
+	struct {
+		u32 cycles_last_block     : 32,
+		    hle_abort		  : 1,
+		    rtm_abort		  : 1,
+		    instruction_abort     : 1,
+		    non_instruction_abort : 1,
+		    retry		  : 1,
+		    data_conflict	  : 1,
+		    capacity_writes	  : 1,
+		    capacity_reads	  : 1;
+	};
+	u64	    value;
 };
 
+#define PEBS_HSW_TSX_FLAGS	0xff00000000ULL
+
 void init_debug_store_on_cpu(int cpu)
 {
 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
@@ -214,12 +229,14 @@ void fini_debug_store_on_cpu(int cpu)
 	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
 }
 
+static DEFINE_PER_CPU(void *, insn_buffer);
+
 static int alloc_pebs_buffer(int cpu)
 {
 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 	int node = cpu_to_node(cpu);
 	int max, thresh = 1; /* always use a single PEBS record */
-	void *buffer;
+	void *buffer, *ibuffer;
 
 	if (!x86_pmu.pebs)
 		return 0;
@@ -228,6 +245,19 @@ static int alloc_pebs_buffer(int cpu)
 	if (unlikely(!buffer))
 		return -ENOMEM;
 
+	/*
+	 * HSW+ already provides us the eventing ip; no need to allocate this
+	 * buffer then.
+	 */
+	if (x86_pmu.intel_cap.pebs_format < 2) {
+		ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
+		if (!ibuffer) {
+			kfree(buffer);
+			return -ENOMEM;
+		}
+		per_cpu(insn_buffer, cpu) = ibuffer;
+	}
+
 	max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
 
 	ds->pebs_buffer_base = (u64)(unsigned long)buffer;
@@ -248,6 +278,9 @@ static void release_pebs_buffer(int cpu)
 	if (!ds || !x86_pmu.pebs)
 		return;
 
+	kfree(per_cpu(insn_buffer, cpu));
+	per_cpu(insn_buffer, cpu) = NULL;
+
 	kfree((void *)(unsigned long)ds->pebs_buffer_base);
 	ds->pebs_buffer_base = 0;
 }
@@ -715,6 +748,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
 	unsigned long old_to, to = cpuc->lbr_entries[0].to;
 	unsigned long ip = regs->ip;
 	int is_64bit = 0;
+	void *kaddr;
 
 	/*
 	 * We don't need to fixup if the PEBS assist is fault like
@@ -738,7 +772,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
 	 * unsigned math, either ip is before the start (impossible) or
 	 * the basic block is larger than 1 page (sanity)
 	 */
-	if ((ip - to) > PAGE_SIZE)
+	if ((ip - to) > PEBS_FIXUP_SIZE)
 		return 0;
 
 	/*
@@ -749,29 +783,33 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
 		return 1;
 	}
 
+	if (!kernel_ip(ip)) {
+		int size, bytes;
+		u8 *buf = this_cpu_read(insn_buffer);
+
+		size = ip - to; /* Must fit our buffer, see above */
+		bytes = copy_from_user_nmi(buf, (void __user *)to, size);
+		if (bytes != 0)
+			return 0;
+
+		kaddr = buf;
+	} else {
+		kaddr = (void *)to;
+	}
+
 	do {
 		struct insn insn;
-		u8 buf[MAX_INSN_SIZE];
-		void *kaddr;
 
 		old_to = to;
-		if (!kernel_ip(ip)) {
-			int bytes, size = MAX_INSN_SIZE;
-
-			bytes = copy_from_user_nmi(buf, (void __user *)to, size);
-			if (bytes != size)
-				return 0;
-
-			kaddr = buf;
-		} else
-			kaddr = (void *)to;
 
 #ifdef CONFIG_X86_64
 		is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
 #endif
 		insn_init(&insn, kaddr, is_64bit);
 		insn_get_length(&insn);
+
 		to += insn.length;
+		kaddr += insn.length;
 	} while (to < ip);
 
 	if (to == ip) {
@@ -786,16 +824,34 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
 	return 0;
 }
 
+static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs)
+{
+	if (pebs->tsx_tuning) {
+		union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
+		return tsx.cycles_last_block;
+	}
+	return 0;
+}
+
+static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs)
+{
+	u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
+
+	/* For RTM XABORTs also log the abort code from AX */
+	if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1))
+		txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
+	return txn;
+}
+
 static void __intel_pmu_pebs_event(struct perf_event *event,
 				   struct pt_regs *iregs, void *__pebs)
 {
 	/*
-	 * We cast to pebs_record_nhm to get the load latency data
-	 * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used
+	 * We cast to the biggest pebs_record but are careful not to
+	 * unconditionally access the 'extra' entries.
 	 */
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	struct pebs_record_nhm *pebs = __pebs;
-	struct pebs_record_hsw *pebs_hsw = __pebs;
+	struct pebs_record_hsw *pebs = __pebs;
 	struct perf_sample_data data;
 	struct pt_regs regs;
 	u64 sample_type;
@@ -854,7 +910,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 	regs.sp = pebs->sp;
 
 	if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
-		regs.ip = pebs_hsw->real_ip;
+		regs.ip = pebs->real_ip;
 		regs.flags |= PERF_EFLAGS_EXACT;
 	} else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
 		regs.flags |= PERF_EFLAGS_EXACT;
@@ -862,9 +918,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 		regs.flags &= ~PERF_EFLAGS_EXACT;
 
 	if ((event->attr.sample_type & PERF_SAMPLE_ADDR) &&
-		x86_pmu.intel_cap.pebs_format >= 1)
+	    x86_pmu.intel_cap.pebs_format >= 1)
 		data.addr = pebs->dla;
 
+	if (x86_pmu.intel_cap.pebs_format >= 2) {
+		/* Only set the TSX weight when no memory weight. */
+		if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && !fll)
+			data.weight = intel_hsw_weight(pebs);
+
+		if (event->attr.sample_type & PERF_SAMPLE_TRANSACTION)
+			data.txn = intel_hsw_transaction(pebs);
+	}
+
 	if (has_branch_stack(event))
 		data.br_stack = &cpuc->lbr_stack;
 
@@ -913,17 +978,34 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
 	__intel_pmu_pebs_event(event, iregs, at);
 }
 
-static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
-					void *top)
+static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct debug_store *ds = cpuc->ds;
 	struct perf_event *event = NULL;
+	void *at, *top;
 	u64 status = 0;
 	int bit;
 
+	if (!x86_pmu.pebs_active)
+		return;
+
+	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
+	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
+
 	ds->pebs_index = ds->pebs_buffer_base;
 
+	if (unlikely(at > top))
+		return;
+
+	/*
+	 * Should not happen, we program the threshold at 1 and do not
+	 * set a reset value.
+	 */
+	WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size,
+		  "Unexpected number of pebs records %ld\n",
+		  (long)(top - at) / x86_pmu.pebs_record_size);
+
 	for (; at < top; at += x86_pmu.pebs_record_size) {
 		struct pebs_record_nhm *p = at;
 
@@ -951,61 +1033,6 @@ static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
 	}
 }
 
-static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
-{
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	struct debug_store *ds = cpuc->ds;
-	struct pebs_record_nhm *at, *top;
-	int n;
-
-	if (!x86_pmu.pebs_active)
-		return;
-
-	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
-	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
-
-	ds->pebs_index = ds->pebs_buffer_base;
-
-	n = top - at;
-	if (n <= 0)
-		return;
-
-	/*
-	 * Should not happen, we program the threshold at 1 and do not
-	 * set a reset value.
-	 */
-	WARN_ONCE(n > x86_pmu.max_pebs_events,
-		  "Unexpected number of pebs records %d\n", n);
-
-	return __intel_pmu_drain_pebs_nhm(iregs, at, top);
-}
-
-static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs)
-{
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	struct debug_store *ds = cpuc->ds;
-	struct pebs_record_hsw *at, *top;
-	int n;
-
-	if (!x86_pmu.pebs_active)
-		return;
-
-	at  = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base;
-	top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index;
-
-	n = top - at;
-	if (n <= 0)
-		return;
-	/*
-	 * Should not happen, we program the threshold at 1 and do not
-	 * set a reset value.
-	 */
-	WARN_ONCE(n > x86_pmu.max_pebs_events,
-		  "Unexpected number of pebs records %d\n", n);
-
-	return __intel_pmu_drain_pebs_nhm(iregs, at, top);
-}
-
 /*
  * BTS, PEBS probe and setup
  */
@@ -1040,7 +1067,7 @@ void intel_ds_init(void)
 		case 2:
 			pr_cont("PEBS fmt2%c, ", pebs_type);
 			x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
-			x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw;
+			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
 			break;
 
 		default:
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index d5be06a5005e99eb9ac8dbe808f565013bddc929..d82d155aca8c7c6c44c9ba7150dae39e88e16fad 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -284,6 +284,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 	int lbr_format = x86_pmu.intel_cap.lbr_format;
 	u64 tos = intel_pmu_lbr_tos();
 	int i;
+	int out = 0;
 
 	for (i = 0; i < x86_pmu.lbr_nr; i++) {
 		unsigned long lbr_idx = (tos - i) & mask;
@@ -306,15 +307,27 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 		}
 		from = (u64)((((s64)from) << skip) >> skip);
 
-		cpuc->lbr_entries[i].from	= from;
-		cpuc->lbr_entries[i].to		= to;
-		cpuc->lbr_entries[i].mispred	= mis;
-		cpuc->lbr_entries[i].predicted	= pred;
-		cpuc->lbr_entries[i].in_tx	= in_tx;
-		cpuc->lbr_entries[i].abort	= abort;
-		cpuc->lbr_entries[i].reserved	= 0;
+		/*
+		 * Some CPUs report duplicated abort records,
+		 * with the second entry not having an abort bit set.
+		 * Skip them here. This loop runs backwards,
+		 * so we need to undo the previous record.
+		 * If the abort just happened outside the window
+		 * the extra entry cannot be removed.
+		 */
+		if (abort && x86_pmu.lbr_double_abort && out > 0)
+			out--;
+
+		cpuc->lbr_entries[out].from	 = from;
+		cpuc->lbr_entries[out].to	 = to;
+		cpuc->lbr_entries[out].mispred	 = mis;
+		cpuc->lbr_entries[out].predicted = pred;
+		cpuc->lbr_entries[out].in_tx	 = in_tx;
+		cpuc->lbr_entries[out].abort	 = abort;
+		cpuc->lbr_entries[out].reserved	 = 0;
+		out++;
 	}
-	cpuc->lbr_stack.nr = i;
+	cpuc->lbr_stack.nr = out;
 }
 
 void intel_pmu_lbr_read(void)
@@ -478,7 +491,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
 
 		/* may fail if text not present */
 		bytes = copy_from_user_nmi(buf, (void __user *)from, size);
-		if (bytes != size)
+		if (bytes != 0)
 			return X86_BR_NONE;
 
 		addr = buf;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 4118f9f683151e99402740f1882b205a261a1465..29c248799cede4432ac2d57ca8689a215c35d0cc 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -997,6 +997,20 @@ static int snbep_pci2phy_map_init(int devid)
 		}
 	}
 
+	if (!err) {
+		/*
+		 * For PCI bus with no UBOX device, find the next bus
+		 * that has UBOX device and use its mapping.
+		 */
+		i = -1;
+		for (bus = 255; bus >= 0; bus--) {
+			if (pcibus_to_physid[bus] >= 0)
+				i = pcibus_to_physid[bus];
+			else
+				pcibus_to_physid[bus] = i;
+		}
+	}
+
 	if (ubox_dev)
 		pci_dev_put(ubox_dev);
 
@@ -1099,6 +1113,24 @@ static struct attribute *ivt_uncore_qpi_formats_attr[] = {
 	&format_attr_umask.attr,
 	&format_attr_edge.attr,
 	&format_attr_thresh8.attr,
+	&format_attr_match_rds.attr,
+	&format_attr_match_rnid30.attr,
+	&format_attr_match_rnid4.attr,
+	&format_attr_match_dnid.attr,
+	&format_attr_match_mc.attr,
+	&format_attr_match_opc.attr,
+	&format_attr_match_vnw.attr,
+	&format_attr_match0.attr,
+	&format_attr_match1.attr,
+	&format_attr_mask_rds.attr,
+	&format_attr_mask_rnid30.attr,
+	&format_attr_mask_rnid4.attr,
+	&format_attr_mask_dnid.attr,
+	&format_attr_mask_mc.attr,
+	&format_attr_mask_opc.attr,
+	&format_attr_mask_vnw.attr,
+	&format_attr_mask0.attr,
+	&format_attr_mask1.attr,
 	NULL,
 };
 
@@ -1312,17 +1344,83 @@ static struct intel_uncore_type ivt_uncore_imc = {
 	IVT_UNCORE_PCI_COMMON_INIT(),
 };
 
+/* registers in IRP boxes are not properly aligned */
+static unsigned ivt_uncore_irp_ctls[] = {0xd8, 0xdc, 0xe0, 0xe4};
+static unsigned ivt_uncore_irp_ctrs[] = {0xa0, 0xb0, 0xb8, 0xc0};
+
+static void ivt_uncore_irp_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct pci_dev *pdev = box->pci_dev;
+	struct hw_perf_event *hwc = &event->hw;
+
+	pci_write_config_dword(pdev, ivt_uncore_irp_ctls[hwc->idx],
+			       hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static void ivt_uncore_irp_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct pci_dev *pdev = box->pci_dev;
+	struct hw_perf_event *hwc = &event->hw;
+
+	pci_write_config_dword(pdev, ivt_uncore_irp_ctls[hwc->idx], hwc->config);
+}
+
+static u64 ivt_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct pci_dev *pdev = box->pci_dev;
+	struct hw_perf_event *hwc = &event->hw;
+	u64 count = 0;
+
+	pci_read_config_dword(pdev, ivt_uncore_irp_ctrs[hwc->idx], (u32 *)&count);
+	pci_read_config_dword(pdev, ivt_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1);
+
+	return count;
+}
+
+static struct intel_uncore_ops ivt_uncore_irp_ops = {
+	.init_box	= ivt_uncore_pci_init_box,
+	.disable_box	= snbep_uncore_pci_disable_box,
+	.enable_box	= snbep_uncore_pci_enable_box,
+	.disable_event	= ivt_uncore_irp_disable_event,
+	.enable_event	= ivt_uncore_irp_enable_event,
+	.read_counter	= ivt_uncore_irp_read_counter,
+};
+
+static struct intel_uncore_type ivt_uncore_irp = {
+	.name			= "irp",
+	.num_counters		= 4,
+	.num_boxes		= 1,
+	.perf_ctr_bits		= 48,
+	.event_mask		= IVT_PMON_RAW_EVENT_MASK,
+	.box_ctl		= SNBEP_PCI_PMON_BOX_CTL,
+	.ops			= &ivt_uncore_irp_ops,
+	.format_group		= &ivt_uncore_format_group,
+};
+
+static struct intel_uncore_ops ivt_uncore_qpi_ops = {
+	.init_box	= ivt_uncore_pci_init_box,
+	.disable_box	= snbep_uncore_pci_disable_box,
+	.enable_box	= snbep_uncore_pci_enable_box,
+	.disable_event	= snbep_uncore_pci_disable_event,
+	.enable_event	= snbep_qpi_enable_event,
+	.read_counter	= snbep_uncore_pci_read_counter,
+	.hw_config	= snbep_qpi_hw_config,
+	.get_constraint	= uncore_get_constraint,
+	.put_constraint	= uncore_put_constraint,
+};
+
 static struct intel_uncore_type ivt_uncore_qpi = {
-	.name		= "qpi",
-	.num_counters   = 4,
-	.num_boxes	= 3,
-	.perf_ctr_bits	= 48,
-	.perf_ctr	= SNBEP_PCI_PMON_CTR0,
-	.event_ctl	= SNBEP_PCI_PMON_CTL0,
-	.event_mask	= IVT_QPI_PCI_PMON_RAW_EVENT_MASK,
-	.box_ctl	= SNBEP_PCI_PMON_BOX_CTL,
-	.ops		= &ivt_uncore_pci_ops,
-	.format_group	= &ivt_uncore_qpi_format_group,
+	.name			= "qpi",
+	.num_counters		= 4,
+	.num_boxes		= 3,
+	.perf_ctr_bits		= 48,
+	.perf_ctr		= SNBEP_PCI_PMON_CTR0,
+	.event_ctl		= SNBEP_PCI_PMON_CTL0,
+	.event_mask		= IVT_QPI_PCI_PMON_RAW_EVENT_MASK,
+	.box_ctl		= SNBEP_PCI_PMON_BOX_CTL,
+	.num_shared_regs	= 1,
+	.ops			= &ivt_uncore_qpi_ops,
+	.format_group		= &ivt_uncore_qpi_format_group,
 };
 
 static struct intel_uncore_type ivt_uncore_r2pcie = {
@@ -1346,6 +1444,7 @@ static struct intel_uncore_type ivt_uncore_r3qpi = {
 enum {
 	IVT_PCI_UNCORE_HA,
 	IVT_PCI_UNCORE_IMC,
+	IVT_PCI_UNCORE_IRP,
 	IVT_PCI_UNCORE_QPI,
 	IVT_PCI_UNCORE_R2PCIE,
 	IVT_PCI_UNCORE_R3QPI,
@@ -1354,6 +1453,7 @@ enum {
 static struct intel_uncore_type *ivt_pci_uncores[] = {
 	[IVT_PCI_UNCORE_HA]	= &ivt_uncore_ha,
 	[IVT_PCI_UNCORE_IMC]	= &ivt_uncore_imc,
+	[IVT_PCI_UNCORE_IRP]	= &ivt_uncore_irp,
 	[IVT_PCI_UNCORE_QPI]	= &ivt_uncore_qpi,
 	[IVT_PCI_UNCORE_R2PCIE]	= &ivt_uncore_r2pcie,
 	[IVT_PCI_UNCORE_R3QPI]	= &ivt_uncore_r3qpi,
@@ -1401,6 +1501,10 @@ static DEFINE_PCI_DEVICE_TABLE(ivt_uncore_pci_ids) = {
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1),
 		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 7),
 	},
+	{ /* IRP */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe39),
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IRP, 0),
+	},
 	{ /* QPI0 Port 0 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32),
 		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 0),
@@ -1429,6 +1533,16 @@ static DEFINE_PCI_DEVICE_TABLE(ivt_uncore_pci_ids) = {
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e),
 		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 2),
 	},
+	{ /* QPI Port 0 filter  */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe86),
+		.driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+						   SNBEP_PCI_QPI_PORT0_FILTER),
+	},
+	{ /* QPI Port 0 filter  */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe96),
+		.driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+						   SNBEP_PCI_QPI_PORT1_FILTER),
+	},
 	{ /* end: all zeroes */ }
 };
 
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index 4f74d94c8d9727f013b3ebed6fe5da6e3975ac93..ddf9ecb53cc3e23171ea889dc47339e67ba5d2c8 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -11,39 +11,26 @@
 #include <linux/sched.h>
 
 /*
- * best effort, GUP based copy_from_user() that is NMI-safe
+ * We rely on the nested NMI work to allow atomic faults from the NMI path; the
+ * nested NMI paths are careful to preserve CR2.
  */
 unsigned long
 copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
 {
-	unsigned long offset, addr = (unsigned long)from;
-	unsigned long size, len = 0;
-	struct page *page;
-	void *map;
-	int ret;
+	unsigned long ret;
 
 	if (__range_not_ok(from, n, TASK_SIZE))
-		return len;
-
-	do {
-		ret = __get_user_pages_fast(addr, 1, 0, &page);
-		if (!ret)
-			break;
-
-		offset = addr & (PAGE_SIZE - 1);
-		size = min(PAGE_SIZE - offset, n - len);
-
-		map = kmap_atomic(page);
-		memcpy(to, map+offset, size);
-		kunmap_atomic(map);
-		put_page(page);
-
-		len  += size;
-		to   += size;
-		addr += size;
-
-	} while (len < n);
-
-	return len;
+		return 0;
+
+	/*
+	 * Even though this function is typically called from NMI/IRQ context
+	 * disable pagefaults so that its behaviour is consistent even when
+	 * called form other contexts.
+	 */
+	pagefault_disable();
+	ret = __copy_from_user_inatomic(to, from, n);
+	pagefault_enable();
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(copy_from_user_nmi);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 3aaeffcfd67a6d9ae3b3e89514fed55c31e2ea4d..7a517bb410600ffc6d741973ff865c7c1d8382e1 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -51,7 +51,7 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr)
 	return 0;
 }
 
-static inline int __kprobes notify_page_fault(struct pt_regs *regs)
+static inline int __kprobes kprobes_fault(struct pt_regs *regs)
 {
 	int ret = 0;
 
@@ -1048,7 +1048,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
 			return;
 
 		/* kprobes don't want to hook the spurious faults: */
-		if (notify_page_fault(regs))
+		if (kprobes_fault(regs))
 			return;
 		/*
 		 * Don't take the mm semaphore here. If we fixup a prefetch
@@ -1060,23 +1060,8 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	}
 
 	/* kprobes don't want to hook the spurious faults: */
-	if (unlikely(notify_page_fault(regs)))
+	if (unlikely(kprobes_fault(regs)))
 		return;
-	/*
-	 * It's safe to allow irq's after cr2 has been saved and the
-	 * vmalloc fault has been handled.
-	 *
-	 * User-mode registers count as a user access even for any
-	 * potential system fault or CPU buglet:
-	 */
-	if (user_mode_vm(regs)) {
-		local_irq_enable();
-		error_code |= PF_USER;
-		flags |= FAULT_FLAG_USER;
-	} else {
-		if (regs->flags & X86_EFLAGS_IF)
-			local_irq_enable();
-	}
 
 	if (unlikely(error_code & PF_RSVD))
 		pgtable_bad(regs, error_code, address);
@@ -1088,8 +1073,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
 		}
 	}
 
-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
-
 	/*
 	 * If we're in an interrupt, have no user context or are running
 	 * in an atomic region then we must not take the fault:
@@ -1099,6 +1082,24 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
 		return;
 	}
 
+	/*
+	 * It's safe to allow irq's after cr2 has been saved and the
+	 * vmalloc fault has been handled.
+	 *
+	 * User-mode registers count as a user access even for any
+	 * potential system fault or CPU buglet:
+	 */
+	if (user_mode_vm(regs)) {
+		local_irq_enable();
+		error_code |= PF_USER;
+		flags |= FAULT_FLAG_USER;
+	} else {
+		if (regs->flags & X86_EFLAGS_IF)
+			local_irq_enable();
+	}
+
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+
 	if (error_code & PF_WRITE)
 		flags |= FAULT_FLAG_WRITE;
 
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index d6aa6e8315d13ac718615947077f2b6fd113cb35..5d04be5efb6401b1b7512f4ca519a24a016b20d2 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -47,7 +47,7 @@ dump_user_backtrace_32(struct stack_frame_ia32 *head)
 	unsigned long bytes;
 
 	bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
-	if (bytes != sizeof(bufhead))
+	if (bytes != 0)
 		return NULL;
 
 	fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame);
@@ -93,7 +93,7 @@ static struct stack_frame *dump_user_backtrace(struct stack_frame *head)
 	unsigned long bytes;
 
 	bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
-	if (bytes != sizeof(bufhead))
+	if (bytes != 0)
 		return NULL;
 
 	oprofile_add_trace(bufhead[0].return_address);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c8ba627c1d608733b8480bb929d9e81d97e57fa0..2e069d1288df5f04e6982243d7cac0f7455cd31d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -584,6 +584,10 @@ struct perf_sample_data {
 	struct perf_regs_user		regs_user;
 	u64				stack_user_size;
 	u64				weight;
+	/*
+	 * Transaction flags for abort events:
+	 */
+	u64				txn;
 };
 
 static inline void perf_sample_data_init(struct perf_sample_data *data,
@@ -599,6 +603,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
 	data->stack_user_size = 0;
 	data->weight = 0;
 	data->data_src.val = 0;
+	data->txn = 0;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 06f28beed7c2c405a0a7760ca231c188257926b2..319eae70fe8415f774e57c4861b98688fb81bb4c 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -30,6 +30,7 @@
 struct vm_area_struct;
 struct mm_struct;
 struct inode;
+struct notifier_block;
 
 #ifdef CONFIG_ARCH_SUPPORTS_UPROBES
 # include <asm/uprobes.h>
@@ -108,6 +109,7 @@ extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsign
 extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
 extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
 extern bool __weak is_trap_insn(uprobe_opcode_t *insn);
+extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
 extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
 extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
 extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
@@ -117,14 +119,21 @@ extern void uprobe_start_dup_mmap(void);
 extern void uprobe_end_dup_mmap(void);
 extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm);
 extern void uprobe_free_utask(struct task_struct *t);
-extern void uprobe_copy_process(struct task_struct *t);
+extern void uprobe_copy_process(struct task_struct *t, unsigned long flags);
 extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs);
 extern int uprobe_post_sstep_notifier(struct pt_regs *regs);
 extern int uprobe_pre_sstep_notifier(struct pt_regs *regs);
 extern void uprobe_notify_resume(struct pt_regs *regs);
 extern bool uprobe_deny_signal(void);
-extern bool __weak arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs);
+extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs);
 extern void uprobe_clear_state(struct mm_struct *mm);
+extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
+extern int  arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+extern int  arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
+extern int  arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
+extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
 };
@@ -174,7 +183,7 @@ static inline unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
 static inline void uprobe_free_utask(struct task_struct *t)
 {
 }
-static inline void uprobe_copy_process(struct task_struct *t)
+static inline void uprobe_copy_process(struct task_struct *t, unsigned long flags)
 {
 }
 static inline void uprobe_clear_state(struct mm_struct *mm)
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 2fc1602e23bb041b9087597370096fcc90e84527..e1802d6153aec4d0c5fd81300dcde057d9bf8a59 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -136,8 +136,9 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_WEIGHT			= 1U << 14,
 	PERF_SAMPLE_DATA_SRC			= 1U << 15,
 	PERF_SAMPLE_IDENTIFIER			= 1U << 16,
+	PERF_SAMPLE_TRANSACTION			= 1U << 17,
 
-	PERF_SAMPLE_MAX = 1U << 17,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 18,		/* non-ABI */
 };
 
 /*
@@ -180,6 +181,28 @@ enum perf_sample_regs_abi {
 	PERF_SAMPLE_REGS_ABI_64		= 2,
 };
 
+/*
+ * Values for the memory transaction event qualifier, mostly for
+ * abort events. Multiple bits can be set.
+ */
+enum {
+	PERF_TXN_ELISION        = (1 << 0), /* From elision */
+	PERF_TXN_TRANSACTION    = (1 << 1), /* From transaction */
+	PERF_TXN_SYNC           = (1 << 2), /* Instruction is related */
+	PERF_TXN_ASYNC          = (1 << 3), /* Instruction not related */
+	PERF_TXN_RETRY          = (1 << 4), /* Retry possible */
+	PERF_TXN_CONFLICT       = (1 << 5), /* Conflict abort */
+	PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */
+	PERF_TXN_CAPACITY_READ  = (1 << 7), /* Capacity read abort */
+
+	PERF_TXN_MAX	        = (1 << 8), /* non-ABI */
+
+	/* bits 32..63 are reserved for the abort code */
+
+	PERF_TXN_ABORT_MASK  = (0xffffffffULL << 32),
+	PERF_TXN_ABORT_SHIFT = 32,
+};
+
 /*
  * The format of the data returned by read() on a perf event fd,
  * as specified by attr.read_format:
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 663f43a20f73c02c73fbeba9bcb28deb82be15a2..8c875ef6e120920c5fad9c1c70caf20d418bb7aa 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -175,8 +175,8 @@ int sysctl_perf_event_sample_rate __read_mostly	= DEFAULT_MAX_SAMPLE_RATE;
 static int max_samples_per_tick __read_mostly	= DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ);
 static int perf_sample_period_ns __read_mostly	= DEFAULT_SAMPLE_PERIOD_NS;
 
-static atomic_t perf_sample_allowed_ns __read_mostly =
-	ATOMIC_INIT( DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100);
+static int perf_sample_allowed_ns __read_mostly =
+	DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100;
 
 void update_perf_cpu_limits(void)
 {
@@ -184,7 +184,7 @@ void update_perf_cpu_limits(void)
 
 	tmp *= sysctl_perf_cpu_time_max_percent;
 	do_div(tmp, 100);
-	atomic_set(&perf_sample_allowed_ns, tmp);
+	ACCESS_ONCE(perf_sample_allowed_ns) = tmp;
 }
 
 static int perf_rotate_context(struct perf_cpu_context *cpuctx);
@@ -193,7 +193,7 @@ int perf_proc_update_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *lenp,
 		loff_t *ppos)
 {
-	int ret = proc_dointvec(table, write, buffer, lenp, ppos);
+	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 
 	if (ret || !write)
 		return ret;
@@ -228,14 +228,15 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
  * we detect that events are taking too long.
  */
 #define NR_ACCUMULATED_SAMPLES 128
-DEFINE_PER_CPU(u64, running_sample_length);
+static DEFINE_PER_CPU(u64, running_sample_length);
 
 void perf_sample_event_took(u64 sample_len_ns)
 {
 	u64 avg_local_sample_len;
 	u64 local_samples_len;
+	u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns);
 
-	if (atomic_read(&perf_sample_allowed_ns) == 0)
+	if (allowed_ns == 0)
 		return;
 
 	/* decay the counter by 1 average sample */
@@ -251,7 +252,7 @@ void perf_sample_event_took(u64 sample_len_ns)
 	 */
 	avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES;
 
-	if (avg_local_sample_len <= atomic_read(&perf_sample_allowed_ns))
+	if (avg_local_sample_len <= allowed_ns)
 		return;
 
 	if (max_samples_per_tick <= 1)
@@ -262,10 +263,9 @@ void perf_sample_event_took(u64 sample_len_ns)
 	perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
 
 	printk_ratelimited(KERN_WARNING
-			"perf samples too long (%lld > %d), lowering "
+			"perf samples too long (%lld > %lld), lowering "
 			"kernel.perf_event_max_sample_rate to %d\n",
-			avg_local_sample_len,
-			atomic_read(&perf_sample_allowed_ns),
+			avg_local_sample_len, allowed_ns,
 			sysctl_perf_event_sample_rate);
 
 	update_perf_cpu_limits();
@@ -899,6 +899,7 @@ static void unclone_ctx(struct perf_event_context *ctx)
 		put_ctx(ctx->parent_ctx);
 		ctx->parent_ctx = NULL;
 	}
+	ctx->generation++;
 }
 
 static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
@@ -1136,6 +1137,8 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 	ctx->nr_events++;
 	if (event->attr.inherit_stat)
 		ctx->nr_stat++;
+
+	ctx->generation++;
 }
 
 /*
@@ -1201,6 +1204,9 @@ static void perf_event__header_size(struct perf_event *event)
 	if (sample_type & PERF_SAMPLE_DATA_SRC)
 		size += sizeof(data->data_src.val);
 
+	if (sample_type & PERF_SAMPLE_TRANSACTION)
+		size += sizeof(data->txn);
+
 	event->header_size = size;
 }
 
@@ -1310,6 +1316,8 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 	 */
 	if (event->state > PERF_EVENT_STATE_OFF)
 		event->state = PERF_EVENT_STATE_OFF;
+
+	ctx->generation++;
 }
 
 static void perf_group_detach(struct perf_event *event)
@@ -2146,22 +2154,38 @@ static void ctx_sched_out(struct perf_event_context *ctx,
 }
 
 /*
- * Test whether two contexts are equivalent, i.e. whether they
- * have both been cloned from the same version of the same context
- * and they both have the same number of enabled events.
- * If the number of enabled events is the same, then the set
- * of enabled events should be the same, because these are both
- * inherited contexts, therefore we can't access individual events
- * in them directly with an fd; we can only enable/disable all
- * events via prctl, or enable/disable all events in a family
- * via ioctl, which will have the same effect on both contexts.
+ * Test whether two contexts are equivalent, i.e. whether they have both been
+ * cloned from the same version of the same context.
+ *
+ * Equivalence is measured using a generation number in the context that is
+ * incremented on each modification to it; see unclone_ctx(), list_add_event()
+ * and list_del_event().
  */
 static int context_equiv(struct perf_event_context *ctx1,
 			 struct perf_event_context *ctx2)
 {
-	return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
-		&& ctx1->parent_gen == ctx2->parent_gen
-		&& !ctx1->pin_count && !ctx2->pin_count;
+	/* Pinning disables the swap optimization */
+	if (ctx1->pin_count || ctx2->pin_count)
+		return 0;
+
+	/* If ctx1 is the parent of ctx2 */
+	if (ctx1 == ctx2->parent_ctx && ctx1->generation == ctx2->parent_gen)
+		return 1;
+
+	/* If ctx2 is the parent of ctx1 */
+	if (ctx1->parent_ctx == ctx2 && ctx1->parent_gen == ctx2->generation)
+		return 1;
+
+	/*
+	 * If ctx1 and ctx2 have the same parent; we flatten the parent
+	 * hierarchy, see perf_event_init_context().
+	 */
+	if (ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx &&
+			ctx1->parent_gen == ctx2->parent_gen)
+		return 1;
+
+	/* Unmatched */
+	return 0;
 }
 
 static void __perf_event_sync_stat(struct perf_event *event,
@@ -2244,7 +2268,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
 {
 	struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
 	struct perf_event_context *next_ctx;
-	struct perf_event_context *parent;
+	struct perf_event_context *parent, *next_parent;
 	struct perf_cpu_context *cpuctx;
 	int do_switch = 1;
 
@@ -2256,10 +2280,18 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
 		return;
 
 	rcu_read_lock();
-	parent = rcu_dereference(ctx->parent_ctx);
 	next_ctx = next->perf_event_ctxp[ctxn];
-	if (parent && next_ctx &&
-	    rcu_dereference(next_ctx->parent_ctx) == parent) {
+	if (!next_ctx)
+		goto unlock;
+
+	parent = rcu_dereference(ctx->parent_ctx);
+	next_parent = rcu_dereference(next_ctx->parent_ctx);
+
+	/* If neither context have a parent context; they cannot be clones. */
+	if (!parent && !next_parent)
+		goto unlock;
+
+	if (next_parent == ctx || next_ctx == parent || next_parent == parent) {
 		/*
 		 * Looks like the two contexts are clones, so we might be
 		 * able to optimize the context switch.  We lock both
@@ -2287,6 +2319,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
 		raw_spin_unlock(&next_ctx->lock);
 		raw_spin_unlock(&ctx->lock);
 	}
+unlock:
 	rcu_read_unlock();
 
 	if (do_switch) {
@@ -4572,6 +4605,9 @@ void perf_output_sample(struct perf_output_handle *handle,
 	if (sample_type & PERF_SAMPLE_DATA_SRC)
 		perf_output_put(handle, data->data_src.val);
 
+	if (sample_type & PERF_SAMPLE_TRANSACTION)
+		perf_output_put(handle, data->txn);
+
 	if (!event->attr.watermark) {
 		int wakeup_events = event->attr.wakeup_events;
 
@@ -5100,27 +5136,26 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 	unsigned int size;
 	char tmp[16];
 	char *buf = NULL;
-	const char *name;
-
-	memset(tmp, 0, sizeof(tmp));
+	char *name;
 
 	if (file) {
 		struct inode *inode;
 		dev_t dev;
+
+		buf = kmalloc(PATH_MAX, GFP_KERNEL);
+		if (!buf) {
+			name = "//enomem";
+			goto cpy_name;
+		}
 		/*
-		 * d_path works from the end of the rb backwards, so we
+		 * d_path() works from the end of the rb backwards, so we
 		 * need to add enough zero bytes after the string to handle
 		 * the 64bit alignment we do later.
 		 */
-		buf = kzalloc(PATH_MAX + sizeof(u64), GFP_KERNEL);
-		if (!buf) {
-			name = strncpy(tmp, "//enomem", sizeof(tmp));
-			goto got_name;
-		}
-		name = d_path(&file->f_path, buf, PATH_MAX);
+		name = d_path(&file->f_path, buf, PATH_MAX - sizeof(u64));
 		if (IS_ERR(name)) {
-			name = strncpy(tmp, "//toolong", sizeof(tmp));
-			goto got_name;
+			name = "//toolong";
+			goto cpy_name;
 		}
 		inode = file_inode(vma->vm_file);
 		dev = inode->i_sb->s_dev;
@@ -5128,34 +5163,39 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 		gen = inode->i_generation;
 		maj = MAJOR(dev);
 		min = MINOR(dev);
-
+		goto got_name;
 	} else {
-		if (arch_vma_name(mmap_event->vma)) {
-			name = strncpy(tmp, arch_vma_name(mmap_event->vma),
-				       sizeof(tmp) - 1);
-			tmp[sizeof(tmp) - 1] = '\0';
-			goto got_name;
-		}
+		name = (char *)arch_vma_name(vma);
+		if (name)
+			goto cpy_name;
 
-		if (!vma->vm_mm) {
-			name = strncpy(tmp, "[vdso]", sizeof(tmp));
-			goto got_name;
-		} else if (vma->vm_start <= vma->vm_mm->start_brk &&
+		if (vma->vm_start <= vma->vm_mm->start_brk &&
 				vma->vm_end >= vma->vm_mm->brk) {
-			name = strncpy(tmp, "[heap]", sizeof(tmp));
-			goto got_name;
-		} else if (vma->vm_start <= vma->vm_mm->start_stack &&
+			name = "[heap]";
+			goto cpy_name;
+		}
+		if (vma->vm_start <= vma->vm_mm->start_stack &&
 				vma->vm_end >= vma->vm_mm->start_stack) {
-			name = strncpy(tmp, "[stack]", sizeof(tmp));
-			goto got_name;
+			name = "[stack]";
+			goto cpy_name;
 		}
 
-		name = strncpy(tmp, "//anon", sizeof(tmp));
-		goto got_name;
+		name = "//anon";
+		goto cpy_name;
 	}
 
+cpy_name:
+	strlcpy(tmp, name, sizeof(tmp));
+	name = tmp;
 got_name:
-	size = ALIGN(strlen(name)+1, sizeof(u64));
+	/*
+	 * Since our buffer works in 8 byte units we need to align our string
+	 * size to a multiple of 8. However, we must guarantee the tail end is
+	 * zero'd out to avoid leaking random bits to userspace.
+	 */
+	size = strlen(name)+1;
+	while (!IS_ALIGNED(size, sizeof(u64)))
+		name[size++] = '\0';
 
 	mmap_event->file_name = name;
 	mmap_event->file_size = size;
@@ -7129,7 +7169,6 @@ SYSCALL_DEFINE5(perf_event_open,
 	}
 
 	perf_install_in_context(ctx, event, event->cpu);
-	++ctx->generation;
 	perf_unpin_context(ctx);
 	mutex_unlock(&ctx->mutex);
 
@@ -7212,7 +7251,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 	WARN_ON_ONCE(ctx->parent_ctx);
 	mutex_lock(&ctx->mutex);
 	perf_install_in_context(ctx, event, cpu);
-	++ctx->generation;
 	perf_unpin_context(ctx);
 	mutex_unlock(&ctx->mutex);
 
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index ca6599723be5624cdeb4a3cad5ef5dd10a4e1c7b..569b218782ad6f52053a21495c893935dcde5b10 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -82,16 +82,16 @@ static inline unsigned long perf_data_size(struct ring_buffer *rb)
 }
 
 #define DEFINE_OUTPUT_COPY(func_name, memcpy_func)			\
-static inline unsigned int						\
+static inline unsigned long						\
 func_name(struct perf_output_handle *handle,				\
-	  const void *buf, unsigned int len)				\
+	  const void *buf, unsigned long len)				\
 {									\
 	unsigned long size, written;					\
 									\
 	do {								\
-		size = min_t(unsigned long, handle->size, len);		\
-									\
+		size    = min(handle->size, len);			\
 		written = memcpy_func(handle->addr, buf, size);		\
+		written = size - written;				\
 									\
 		len -= written;						\
 		handle->addr += written;				\
@@ -110,20 +110,37 @@ func_name(struct perf_output_handle *handle,				\
 	return len;							\
 }
 
-static inline int memcpy_common(void *dst, const void *src, size_t n)
+static inline unsigned long
+memcpy_common(void *dst, const void *src, unsigned long n)
 {
 	memcpy(dst, src, n);
-	return n;
+	return 0;
 }
 
 DEFINE_OUTPUT_COPY(__output_copy, memcpy_common)
 
-#define MEMCPY_SKIP(dst, src, n) (n)
+static inline unsigned long
+memcpy_skip(void *dst, const void *src, unsigned long n)
+{
+	return 0;
+}
 
-DEFINE_OUTPUT_COPY(__output_skip, MEMCPY_SKIP)
+DEFINE_OUTPUT_COPY(__output_skip, memcpy_skip)
 
 #ifndef arch_perf_out_copy_user
-#define arch_perf_out_copy_user __copy_from_user_inatomic
+#define arch_perf_out_copy_user arch_perf_out_copy_user
+
+static inline unsigned long
+arch_perf_out_copy_user(void *dst, const void *src, unsigned long n)
+{
+	unsigned long ret;
+
+	pagefault_disable();
+	ret = __copy_from_user_inatomic(dst, src, n);
+	pagefault_enable();
+
+	return ret;
+}
 #endif
 
 DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 9c2ddfbf452554902d9ad1c23deba5794d0ed664..e8b168af135ba3ec7d72c780746ce02f051be2e3 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -12,40 +12,10 @@
 #include <linux/perf_event.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
+#include <linux/circ_buf.h>
 
 #include "internal.h"
 
-static bool perf_output_space(struct ring_buffer *rb, unsigned long tail,
-			      unsigned long offset, unsigned long head)
-{
-	unsigned long sz = perf_data_size(rb);
-	unsigned long mask = sz - 1;
-
-	/*
-	 * check if user-writable
-	 * overwrite : over-write its own tail
-	 * !overwrite: buffer possibly drops events.
-	 */
-	if (rb->overwrite)
-		return true;
-
-	/*
-	 * verify that payload is not bigger than buffer
-	 * otherwise masking logic may fail to detect
-	 * the "not enough space" condition
-	 */
-	if ((head - offset) > sz)
-		return false;
-
-	offset = (offset - tail) & mask;
-	head   = (head   - tail) & mask;
-
-	if ((int)(head - offset) < 0)
-		return false;
-
-	return true;
-}
-
 static void perf_output_wakeup(struct perf_output_handle *handle)
 {
 	atomic_set(&handle->rb->poll, POLL_IN);
@@ -115,8 +85,8 @@ static void perf_output_put_handle(struct perf_output_handle *handle)
 	rb->user_page->data_head = head;
 
 	/*
-	 * Now check if we missed an update, rely on the (compiler)
-	 * barrier in atomic_dec_and_test() to re-read rb->head.
+	 * Now check if we missed an update -- rely on previous implied
+	 * compiler barriers to force a re-read.
 	 */
 	if (unlikely(head != local_read(&rb->head))) {
 		local_inc(&rb->nest);
@@ -135,8 +105,7 @@ int perf_output_begin(struct perf_output_handle *handle,
 {
 	struct ring_buffer *rb;
 	unsigned long tail, offset, head;
-	int have_lost;
-	struct perf_sample_data sample_data;
+	int have_lost, page_shift;
 	struct {
 		struct perf_event_header header;
 		u64			 id;
@@ -151,57 +120,63 @@ int perf_output_begin(struct perf_output_handle *handle,
 		event = event->parent;
 
 	rb = rcu_dereference(event->rb);
-	if (!rb)
+	if (unlikely(!rb))
 		goto out;
 
-	handle->rb	= rb;
-	handle->event	= event;
-
-	if (!rb->nr_pages)
+	if (unlikely(!rb->nr_pages))
 		goto out;
 
+	handle->rb    = rb;
+	handle->event = event;
+
 	have_lost = local_read(&rb->lost);
-	if (have_lost) {
-		lost_event.header.size = sizeof(lost_event);
-		perf_event_header__init_id(&lost_event.header, &sample_data,
-					   event);
-		size += lost_event.header.size;
+	if (unlikely(have_lost)) {
+		size += sizeof(lost_event);
+		if (event->attr.sample_id_all)
+			size += event->id_header_size;
 	}
 
 	perf_output_get_handle(handle);
 
 	do {
-		/*
-		 * Userspace could choose to issue a mb() before updating the
-		 * tail pointer. So that all reads will be completed before the
-		 * write is issued.
-		 *
-		 * See perf_output_put_handle().
-		 */
 		tail = ACCESS_ONCE(rb->user_page->data_tail);
-		smp_mb();
 		offset = head = local_read(&rb->head);
-		head += size;
-		if (unlikely(!perf_output_space(rb, tail, offset, head)))
+		if (!rb->overwrite &&
+		    unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size))
 			goto fail;
+		head += size;
 	} while (local_cmpxchg(&rb->head, offset, head) != offset);
 
-	if (head - local_read(&rb->wakeup) > rb->watermark)
+	/*
+	 * Separate the userpage->tail read from the data stores below.
+	 * Matches the MB userspace SHOULD issue after reading the data
+	 * and before storing the new tail position.
+	 *
+	 * See perf_output_put_handle().
+	 */
+	smp_mb();
+
+	if (unlikely(head - local_read(&rb->wakeup) > rb->watermark))
 		local_add(rb->watermark, &rb->wakeup);
 
-	handle->page = offset >> (PAGE_SHIFT + page_order(rb));
-	handle->page &= rb->nr_pages - 1;
-	handle->size = offset & ((PAGE_SIZE << page_order(rb)) - 1);
-	handle->addr = rb->data_pages[handle->page];
-	handle->addr += handle->size;
-	handle->size = (PAGE_SIZE << page_order(rb)) - handle->size;
+	page_shift = PAGE_SHIFT + page_order(rb);
 
-	if (have_lost) {
+	handle->page = (offset >> page_shift) & (rb->nr_pages - 1);
+	offset &= (1UL << page_shift) - 1;
+	handle->addr = rb->data_pages[handle->page] + offset;
+	handle->size = (1UL << page_shift) - offset;
+
+	if (unlikely(have_lost)) {
+		struct perf_sample_data sample_data;
+
+		lost_event.header.size = sizeof(lost_event);
 		lost_event.header.type = PERF_RECORD_LOST;
 		lost_event.header.misc = 0;
 		lost_event.id          = event->id;
 		lost_event.lost        = local_xchg(&rb->lost, 0);
 
+		perf_event_header__init_id(&lost_event.header,
+					   &sample_data, event);
 		perf_output_put(handle, lost_event);
 		perf_event__output_id_sample(event, handle, &sample_data);
 	}
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index ad8e1bdca70e4c702ff5c6e86255dc74a8b98259..24b7d6ca871b632f2e35912e720bfc0162c939c5 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -35,6 +35,7 @@
 #include <linux/kdebug.h>	/* notifier mechanism */
 #include "../../mm/internal.h"	/* munlock_vma_page */
 #include <linux/percpu-rwsem.h>
+#include <linux/task_work.h>
 
 #include <linux/uprobes.h>
 
@@ -244,12 +245,12 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
  * the architecture. If an arch has variable length instruction and the
  * breakpoint instruction is not of the smallest length instruction
  * supported by that architecture then we need to modify is_trap_at_addr and
- * write_opcode accordingly. This would never be a problem for archs that
- * have fixed length instructions.
+ * uprobe_write_opcode accordingly. This would never be a problem for archs
+ * that have fixed length instructions.
  */
 
 /*
- * write_opcode - write the opcode at a given virtual address.
+ * uprobe_write_opcode - write the opcode at a given virtual address.
  * @mm: the probed process address space.
  * @vaddr: the virtual address to store the opcode.
  * @opcode: opcode to be written at @vaddr.
@@ -260,7 +261,7 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
  * For mm @mm, write the opcode at @vaddr.
  * Return 0 (success) or a negative errno.
  */
-static int write_opcode(struct mm_struct *mm, unsigned long vaddr,
+int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
 			uprobe_opcode_t opcode)
 {
 	struct page *old_page, *new_page;
@@ -314,7 +315,7 @@ static int write_opcode(struct mm_struct *mm, unsigned long vaddr,
  */
 int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
 {
-	return write_opcode(mm, vaddr, UPROBE_SWBP_INSN);
+	return uprobe_write_opcode(mm, vaddr, UPROBE_SWBP_INSN);
 }
 
 /**
@@ -329,7 +330,7 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned
 int __weak
 set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
 {
-	return write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn);
+	return uprobe_write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn);
 }
 
 static int match_uprobe(struct uprobe *l, struct uprobe *r)
@@ -503,9 +504,8 @@ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)
 	return ret;
 }
 
-static int
-__copy_insn(struct address_space *mapping, struct file *filp, char *insn,
-			unsigned long nbytes, loff_t offset)
+static int __copy_insn(struct address_space *mapping, struct file *filp,
+			void *insn, int nbytes, loff_t offset)
 {
 	struct page *page;
 
@@ -527,28 +527,28 @@ __copy_insn(struct address_space *mapping, struct file *filp, char *insn,
 
 static int copy_insn(struct uprobe *uprobe, struct file *filp)
 {
-	struct address_space *mapping;
-	unsigned long nbytes;
-	int bytes;
-
-	nbytes = PAGE_SIZE - (uprobe->offset & ~PAGE_MASK);
-	mapping = uprobe->inode->i_mapping;
+	struct address_space *mapping = uprobe->inode->i_mapping;
+	loff_t offs = uprobe->offset;
+	void *insn = uprobe->arch.insn;
+	int size = MAX_UINSN_BYTES;
+	int len, err = -EIO;
 
-	/* Instruction at end of binary; copy only available bytes */
-	if (uprobe->offset + MAX_UINSN_BYTES > uprobe->inode->i_size)
-		bytes = uprobe->inode->i_size - uprobe->offset;
-	else
-		bytes = MAX_UINSN_BYTES;
+	/* Copy only available bytes, -EIO if nothing was read */
+	do {
+		if (offs >= i_size_read(uprobe->inode))
+			break;
 
-	/* Instruction at the page-boundary; copy bytes in second page */
-	if (nbytes < bytes) {
-		int err = __copy_insn(mapping, filp, uprobe->arch.insn + nbytes,
-				bytes - nbytes, uprobe->offset + nbytes);
+		len = min_t(int, size, PAGE_SIZE - (offs & ~PAGE_MASK));
+		err = __copy_insn(mapping, filp, insn, len, offs);
 		if (err)
-			return err;
-		bytes = nbytes;
-	}
-	return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset);
+			break;
+
+		insn += len;
+		offs += len;
+		size -= len;
+	} while (size);
+
+	return err;
 }
 
 static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
@@ -576,7 +576,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
 	if (ret)
 		goto out;
 
-	/* write_opcode() assumes we don't cross page boundary */
+	/* uprobe_write_opcode() assumes we don't cross page boundary */
 	BUG_ON((uprobe->offset & ~PAGE_MASK) +
 			UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
 
@@ -1096,21 +1096,22 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
 }
 
 /* Slot allocation for XOL */
-static int xol_add_vma(struct xol_area *area)
+static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
 {
-	struct mm_struct *mm = current->mm;
 	int ret = -EALREADY;
 
 	down_write(&mm->mmap_sem);
 	if (mm->uprobes_state.xol_area)
 		goto fail;
 
-	ret = -ENOMEM;
-	/* Try to map as high as possible, this is only a hint. */
-	area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0);
-	if (area->vaddr & ~PAGE_MASK) {
-		ret = area->vaddr;
-		goto fail;
+	if (!area->vaddr) {
+		/* Try to map as high as possible, this is only a hint. */
+		area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE,
+						PAGE_SIZE, 0, 0);
+		if (area->vaddr & ~PAGE_MASK) {
+			ret = area->vaddr;
+			goto fail;
+		}
 	}
 
 	ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE,
@@ -1120,30 +1121,19 @@ static int xol_add_vma(struct xol_area *area)
 
 	smp_wmb();	/* pairs with get_xol_area() */
 	mm->uprobes_state.xol_area = area;
-	ret = 0;
  fail:
 	up_write(&mm->mmap_sem);
 
 	return ret;
 }
 
-/*
- * get_xol_area - Allocate process's xol_area if necessary.
- * This area will be used for storing instructions for execution out of line.
- *
- * Returns the allocated area or NULL.
- */
-static struct xol_area *get_xol_area(void)
+static struct xol_area *__create_xol_area(unsigned long vaddr)
 {
 	struct mm_struct *mm = current->mm;
-	struct xol_area *area;
 	uprobe_opcode_t insn = UPROBE_SWBP_INSN;
+	struct xol_area *area;
 
-	area = mm->uprobes_state.xol_area;
-	if (area)
-		goto ret;
-
-	area = kzalloc(sizeof(*area), GFP_KERNEL);
+	area = kmalloc(sizeof(*area), GFP_KERNEL);
 	if (unlikely(!area))
 		goto out;
 
@@ -1155,13 +1145,14 @@ static struct xol_area *get_xol_area(void)
 	if (!area->page)
 		goto free_bitmap;
 
-	/* allocate first slot of task's xol_area for the return probes */
+	area->vaddr = vaddr;
+	init_waitqueue_head(&area->wq);
+	/* Reserve the 1st slot for get_trampoline_vaddr() */
 	set_bit(0, area->bitmap);
-	copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE);
 	atomic_set(&area->slot_count, 1);
-	init_waitqueue_head(&area->wq);
+	copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE);
 
-	if (!xol_add_vma(area))
+	if (!xol_add_vma(mm, area))
 		return area;
 
 	__free_page(area->page);
@@ -1170,9 +1161,25 @@ static struct xol_area *get_xol_area(void)
  free_area:
 	kfree(area);
  out:
+	return NULL;
+}
+
+/*
+ * get_xol_area - Allocate process's xol_area if necessary.
+ * This area will be used for storing instructions for execution out of line.
+ *
+ * Returns the allocated area or NULL.
+ */
+static struct xol_area *get_xol_area(void)
+{
+	struct mm_struct *mm = current->mm;
+	struct xol_area *area;
+
+	if (!mm->uprobes_state.xol_area)
+		__create_xol_area(0);
+
 	area = mm->uprobes_state.xol_area;
- ret:
-	smp_read_barrier_depends();     /* pairs with wmb in xol_add_vma() */
+	smp_read_barrier_depends();	/* pairs with wmb in xol_add_vma() */
 	return area;
 }
 
@@ -1256,7 +1263,8 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
 		return 0;
 
 	/* Initialize the slot */
-	copy_to_page(area->page, xol_vaddr, uprobe->arch.insn, MAX_UINSN_BYTES);
+	copy_to_page(area->page, xol_vaddr,
+			uprobe->arch.ixol, sizeof(uprobe->arch.ixol));
 	/*
 	 * We probably need flush_icache_user_range() but it needs vma.
 	 * This should work on supported architectures too.
@@ -1344,14 +1352,6 @@ void uprobe_free_utask(struct task_struct *t)
 	t->utask = NULL;
 }
 
-/*
- * Called in context of a new clone/fork from copy_process.
- */
-void uprobe_copy_process(struct task_struct *t)
-{
-	t->utask = NULL;
-}
-
 /*
  * Allocate a uprobe_task object for the task if if necessary.
  * Called when the thread hits a breakpoint.
@@ -1367,6 +1367,90 @@ static struct uprobe_task *get_utask(void)
 	return current->utask;
 }
 
+static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
+{
+	struct uprobe_task *n_utask;
+	struct return_instance **p, *o, *n;
+
+	n_utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL);
+	if (!n_utask)
+		return -ENOMEM;
+	t->utask = n_utask;
+
+	p = &n_utask->return_instances;
+	for (o = o_utask->return_instances; o; o = o->next) {
+		n = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
+		if (!n)
+			return -ENOMEM;
+
+		*n = *o;
+		atomic_inc(&n->uprobe->ref);
+		n->next = NULL;
+
+		*p = n;
+		p = &n->next;
+		n_utask->depth++;
+	}
+
+	return 0;
+}
+
+static void uprobe_warn(struct task_struct *t, const char *msg)
+{
+	pr_warn("uprobe: %s:%d failed to %s\n",
+			current->comm, current->pid, msg);
+}
+
+static void dup_xol_work(struct callback_head *work)
+{
+	kfree(work);
+
+	if (current->flags & PF_EXITING)
+		return;
+
+	if (!__create_xol_area(current->utask->vaddr))
+		uprobe_warn(current, "dup xol area");
+}
+
+/*
+ * Called in context of a new clone/fork from copy_process.
+ */
+void uprobe_copy_process(struct task_struct *t, unsigned long flags)
+{
+	struct uprobe_task *utask = current->utask;
+	struct mm_struct *mm = current->mm;
+	struct callback_head *work;
+	struct xol_area *area;
+
+	t->utask = NULL;
+
+	if (!utask || !utask->return_instances)
+		return;
+
+	if (mm == t->mm && !(flags & CLONE_VFORK))
+		return;
+
+	if (dup_utask(t, utask))
+		return uprobe_warn(t, "dup ret instances");
+
+	/* The task can fork() after dup_xol_work() fails */
+	area = mm->uprobes_state.xol_area;
+	if (!area)
+		return uprobe_warn(t, "dup xol area");
+
+	if (mm == t->mm)
+		return;
+
+	/* TODO: move it into the union in uprobe_task */
+	work = kmalloc(sizeof(*work), GFP_KERNEL);
+	if (!work)
+		return uprobe_warn(t, "dup xol area");
+
+	t->utask->vaddr = area->vaddr;
+	init_task_work(work, dup_xol_work);
+	task_work_add(t, work, true);
+}
+
 /*
  * Current area->vaddr notion assume the trampoline address is always
  * equal area->vaddr.
@@ -1857,9 +1941,4 @@ static int __init init_uprobes(void)
 
 	return register_die_notifier(&uprobe_exception_nb);
 }
-module_init(init_uprobes);
-
-static void __exit exit_uprobes(void)
-{
-}
-module_exit(exit_uprobes);
+__initcall(init_uprobes);
diff --git a/kernel/fork.c b/kernel/fork.c
index 086fe73ad6bde5b1ac1703f29f3d9f154878968b..8531609b6a824a28f5e738ba0d3eddb653733e28 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1373,7 +1373,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	INIT_LIST_HEAD(&p->pi_state_list);
 	p->pi_state_cache = NULL;
 #endif
-	uprobe_copy_process(p);
 	/*
 	 * sigaltstack should be cleared when sharing the same VM
 	 */
@@ -1490,6 +1489,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	perf_event_fork(p);
 
 	trace_task_newtask(p, clone_flags);
+	uprobe_copy_process(p, clone_flags);
 
 	return p;
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8b80f1bae21a56de33b18ce198d1df5272fbce0f..5fee859888a46b516b7a70d85552f9bb18cf43d7 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1049,6 +1049,7 @@ static struct ctl_table kern_table[] = {
 		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
 		.mode		= 0644,
 		.proc_handler	= perf_proc_update_handler,
+		.extra1		= &one,
 	},
 	{
 		.procname	= "perf_cpu_time_max_percent",
diff --git a/scripts/package/Makefile b/scripts/package/Makefile
index a4f31c900fa6a44ab75d1a4f3916d2bb1075c448..c5d473393816f971ec43135bd7dd3a27cc660beb 100644
--- a/scripts/package/Makefile
+++ b/scripts/package/Makefile
@@ -115,7 +115,9 @@ git --git-dir=$(srctree)/.git archive --prefix=$(perf-tar)/         \
 	-o $(perf-tar).tar;                                         \
 mkdir -p $(perf-tar);                                               \
 git --git-dir=$(srctree)/.git rev-parse HEAD > $(perf-tar)/HEAD;    \
-tar rf $(perf-tar).tar $(perf-tar)/HEAD;                            \
+(cd $(srctree)/tools/perf;                                          \
+util/PERF-VERSION-GEN ../../$(perf-tar)/ 2>/dev/null);              \
+tar rf $(perf-tar).tar $(perf-tar)/HEAD $(perf-tar)/PERF-VERSION-FILE; \
 rm -r $(perf-tar);                                                  \
 $(if $(findstring tar-src,$@),,                                     \
 $(if $(findstring bz2,$@),bzip2,                                    \
diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index ca6cb779876a02de1bdd89f5ab0686e887bd2da0..fc1502098595b8b960f262c4f789c15c247d0d61 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -134,14 +134,14 @@ ifeq ($(VERBOSE),1)
   print_install =
 else
   Q = @
-  print_compile =		echo '  CC                 '$(OBJ);
-  print_app_build =		echo '  BUILD              '$(OBJ);
-  print_fpic_compile =		echo '  CC FPIC            '$(OBJ);
-  print_shared_lib_compile =	echo '  BUILD SHARED LIB   '$(OBJ);
-  print_plugin_obj_compile =	echo '  CC PLUGIN OBJ      '$(OBJ);
-  print_plugin_build =		echo '  CC PLUGI           '$(OBJ);
-  print_static_lib_build =	echo '  BUILD STATIC LIB   '$(OBJ);
-  print_install =		echo '  INSTALL     '$1'	to	$(DESTDIR_SQ)$2';
+  print_compile =		echo '  CC       '$(OBJ);
+  print_app_build =		echo '  BUILD    '$(OBJ);
+  print_fpic_compile =		echo '  CC FPIC  '$(OBJ);
+  print_shared_lib_compile =	echo '  BUILD    SHARED LIB '$(OBJ);
+  print_plugin_obj_compile =	echo '  BUILD    PLUGIN OBJ '$(OBJ);
+  print_plugin_build =		echo '  BUILD    PLUGIN     '$(OBJ);
+  print_static_lib_build =	echo '  BUILD    STATIC LIB '$(OBJ);
+  print_install =		echo '  INSTALL  '$1'	to	$(DESTDIR_SQ)$2';
 endif
 
 do_fpic_compile =					\
@@ -268,7 +268,7 @@ TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):$(ARCH):$(CROSS_COMPILE)
 TRACEEVENT-CFLAGS: force
 	@FLAGS='$(TRACK_CFLAGS)'; \
 	    if test x"$$FLAGS" != x"`cat TRACEEVENT-CFLAGS 2>/dev/null`" ; then \
-		echo 1>&2 "    * new build flags or cross compiler"; \
+		echo 1>&2 "  FLAGS:   * new build flags or cross compiler"; \
 		echo "$$FLAGS" >TRACEEVENT-CFLAGS; \
             fi
 
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index d1c2a6a4cd32b125a981dca6779abfce1b50967d..8f450adaa9c27107d6ecc42bed6b5702833babe9 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -305,6 +305,11 @@ int pevent_register_comm(struct pevent *pevent, const char *comm, int pid)
 	return 0;
 }
 
+void pevent_register_trace_clock(struct pevent *pevent, char *trace_clock)
+{
+	pevent->trace_clock = trace_clock;
+}
+
 struct func_map {
 	unsigned long long		addr;
 	char				*func;
@@ -599,10 +604,11 @@ find_printk(struct pevent *pevent, unsigned long long addr)
  * This registers a string by the address it was stored in the kernel.
  * The @fmt passed in is duplicated.
  */
-int pevent_register_print_string(struct pevent *pevent, char *fmt,
+int pevent_register_print_string(struct pevent *pevent, const char *fmt,
 				 unsigned long long addr)
 {
 	struct printk_list *item = malloc(sizeof(*item));
+	char *p;
 
 	if (!item)
 		return -1;
@@ -610,10 +616,21 @@ int pevent_register_print_string(struct pevent *pevent, char *fmt,
 	item->next = pevent->printklist;
 	item->addr = addr;
 
+	/* Strip off quotes and '\n' from the end */
+	if (fmt[0] == '"')
+		fmt++;
 	item->printk = strdup(fmt);
 	if (!item->printk)
 		goto out_free;
 
+	p = item->printk + strlen(item->printk) - 1;
+	if (*p == '"')
+		*p = 0;
+
+	p -= 2;
+	if (strcmp(p, "\\n") == 0)
+		*p = 0;
+
 	pevent->printklist = item;
 	pevent->printk_count++;
 
@@ -3488,6 +3505,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 	struct pevent *pevent = event->pevent;
 	struct print_flag_sym *flag;
 	struct format_field *field;
+	struct printk_map *printk;
 	unsigned long long val, fval;
 	unsigned long addr;
 	char *str;
@@ -3523,7 +3541,12 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 		if (!(field->flags & FIELD_IS_ARRAY) &&
 		    field->size == pevent->long_size) {
 			addr = *(unsigned long *)(data + field->offset);
-			trace_seq_printf(s, "%lx", addr);
+			/* Check if it matches a print format */
+			printk = find_printk(pevent, addr);
+			if (printk)
+				trace_seq_puts(s, printk->printk);
+			else
+				trace_seq_printf(s, "%lx", addr);
 			break;
 		}
 		str = malloc(len + 1);
@@ -3565,15 +3588,23 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 		}
 		break;
 	case PRINT_HEX:
-		field = arg->hex.field->field.field;
-		if (!field) {
-			str = arg->hex.field->field.name;
-			field = pevent_find_any_field(event, str);
-			if (!field)
-				goto out_warning_field;
-			arg->hex.field->field.field = field;
+		if (arg->hex.field->type == PRINT_DYNAMIC_ARRAY) {
+			unsigned long offset;
+			offset = pevent_read_number(pevent,
+				data + arg->hex.field->dynarray.field->offset,
+				arg->hex.field->dynarray.field->size);
+			hex = data + (offset & 0xffff);
+		} else {
+			field = arg->hex.field->field.field;
+			if (!field) {
+				str = arg->hex.field->field.name;
+				field = pevent_find_any_field(event, str);
+				if (!field)
+					goto out_warning_field;
+				arg->hex.field->field.field = field;
+			}
+			hex = data + field->offset;
 		}
-		hex = data + field->offset;
 		len = eval_num_arg(data, size, event, arg->hex.size);
 		for (i = 0; i < len; i++) {
 			if (i)
@@ -3771,8 +3802,8 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc
 	if (asprintf(&arg->atom.atom, "%lld", ip) < 0)
 		goto out_free;
 
-	/* skip the first "%pf : " */
-	for (ptr = fmt + 6, bptr = data + field->offset;
+	/* skip the first "%pf: " */
+	for (ptr = fmt + 5, bptr = data + field->offset;
 	     bptr < data + size && *ptr; ptr++) {
 		int ls = 0;
 
@@ -3882,7 +3913,6 @@ get_bprint_format(void *data, int size __maybe_unused,
 	struct format_field *field;
 	struct printk_map *printk;
 	char *format;
-	char *p;
 
 	field = pevent->bprint_fmt_field;
 
@@ -3899,25 +3929,13 @@ get_bprint_format(void *data, int size __maybe_unused,
 
 	printk = find_printk(pevent, addr);
 	if (!printk) {
-		if (asprintf(&format, "%%pf : (NO FORMAT FOUND at %llx)\n", addr) < 0)
+		if (asprintf(&format, "%%pf: (NO FORMAT FOUND at %llx)\n", addr) < 0)
 			return NULL;
 		return format;
 	}
 
-	p = printk->printk;
-	/* Remove any quotes. */
-	if (*p == '"')
-		p++;
-	if (asprintf(&format, "%s : %s", "%pf", p) < 0)
+	if (asprintf(&format, "%s: %s", "%pf", printk->printk) < 0)
 		return NULL;
-	/* remove ending quotes and new line since we will add one too */
-	p = format + strlen(format) - 1;
-	if (*p == '"')
-		*p = 0;
-
-	p -= 2;
-	if (strcmp(p, "\\n") == 0)
-		*p = 0;
 
 	return format;
 }
@@ -3963,7 +3981,7 @@ static int is_printable_array(char *p, unsigned int len)
 	unsigned int i;
 
 	for (i = 0; i < len && p[i]; i++)
-		if (!isprint(p[i]))
+		if (!isprint(p[i]) && !isspace(p[i]))
 		    return 0;
 	return 1;
 }
@@ -4428,11 +4446,11 @@ void pevent_event_info(struct trace_seq *s, struct event_format *event,
 {
 	int print_pretty = 1;
 
-	if (event->pevent->print_raw)
+	if (event->pevent->print_raw || (event->flags & EVENT_FL_PRINTRAW))
 		print_event_fields(s, record->data, record->size, event);
 	else {
 
-		if (event->handler)
+		if (event->handler && !(event->flags & EVENT_FL_NOHANDLE))
 			print_pretty = event->handler(s, record, event,
 						      event->context);
 
@@ -4443,8 +4461,21 @@ void pevent_event_info(struct trace_seq *s, struct event_format *event,
 	trace_seq_terminate(s);
 }
 
+static bool is_timestamp_in_us(char *trace_clock, bool use_trace_clock)
+{
+	if (!use_trace_clock)
+		return true;
+
+	if (!strcmp(trace_clock, "local") || !strcmp(trace_clock, "global")
+	    || !strcmp(trace_clock, "uptime") || !strcmp(trace_clock, "perf"))
+		return true;
+
+	/* trace_clock is setting in tsc or counter mode */
+	return false;
+}
+
 void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
-			struct pevent_record *record)
+			struct pevent_record *record, bool use_trace_clock)
 {
 	static const char *spaces = "                    "; /* 20 spaces */
 	struct event_format *event;
@@ -4457,9 +4488,14 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
 	int pid;
 	int len;
 	int p;
+	bool use_usec_format;
 
-	secs = record->ts / NSECS_PER_SEC;
-	nsecs = record->ts - secs * NSECS_PER_SEC;
+	use_usec_format = is_timestamp_in_us(pevent->trace_clock,
+							use_trace_clock);
+	if (use_usec_format) {
+		secs = record->ts / NSECS_PER_SEC;
+		nsecs = record->ts - secs * NSECS_PER_SEC;
+	}
 
 	if (record->size < 0) {
 		do_warning("ug! negative record size %d", record->size);
@@ -4484,15 +4520,20 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
 	} else
 		trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu);
 
-	if (pevent->flags & PEVENT_NSEC_OUTPUT) {
-		usecs = nsecs;
-		p = 9;
-	} else {
-		usecs = (nsecs + 500) / NSECS_PER_USEC;
-		p = 6;
-	}
+	if (use_usec_format) {
+		if (pevent->flags & PEVENT_NSEC_OUTPUT) {
+			usecs = nsecs;
+			p = 9;
+		} else {
+			usecs = (nsecs + 500) / NSECS_PER_USEC;
+			p = 6;
+		}
 
-	trace_seq_printf(s, " %5lu.%0*lu: %s: ", secs, p, usecs, event->name);
+		trace_seq_printf(s, " %5lu.%0*lu: %s: ",
+					secs, p, usecs, event->name);
+	} else
+		trace_seq_printf(s, " %12llu: %s: ",
+					record->ts, event->name);
 
 	/* Space out the event names evenly. */
 	len = strlen(event->name);
@@ -5326,6 +5367,48 @@ int pevent_print_num_field(struct trace_seq *s, const char *fmt,
 	return -1;
 }
 
+/**
+ * pevent_print_func_field - print a field and a format for function pointers
+ * @s: The seq to print to
+ * @fmt: The printf format to print the field with.
+ * @event: the event that the field is for
+ * @name: The name of the field
+ * @record: The record with the field name.
+ * @err: print default error if failed.
+ *
+ * Returns: 0 on success, -1 field not found, or 1 if buffer is full.
+ */
+int pevent_print_func_field(struct trace_seq *s, const char *fmt,
+			    struct event_format *event, const char *name,
+			    struct pevent_record *record, int err)
+{
+	struct format_field *field = pevent_find_field(event, name);
+	struct pevent *pevent = event->pevent;
+	unsigned long long val;
+	struct func_map *func;
+	char tmp[128];
+
+	if (!field)
+		goto failed;
+
+	if (pevent_read_number_field(field, record->data, &val))
+		goto failed;
+
+	func = find_func(pevent, val);
+
+	if (func)
+		snprintf(tmp, 128, "%s/0x%llx", func->func, func->addr - val);
+	else
+		sprintf(tmp, "0x%08llx", val);
+
+	return trace_seq_printf(s, fmt, tmp);
+
+ failed:
+	if (err)
+		trace_seq_printf(s, "CAN'T FIND FIELD \"%s\"", name);
+	return -1;
+}
+
 static void free_func_handle(struct pevent_function_handler *func)
 {
 	struct pevent_func_params *params;
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index c37b2026d04a991b5d70db230db6a56d67320b99..8d73d2594f65de39c4a0c9d35c976847e30ab0fe 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -20,6 +20,7 @@
 #ifndef _PARSE_EVENTS_H
 #define _PARSE_EVENTS_H
 
+#include <stdbool.h>
 #include <stdarg.h>
 #include <regex.h>
 
@@ -307,6 +308,8 @@ enum {
 	EVENT_FL_ISBPRINT	= 0x04,
 	EVENT_FL_ISFUNCENT	= 0x10,
 	EVENT_FL_ISFUNCRET	= 0x20,
+	EVENT_FL_NOHANDLE	= 0x40,
+	EVENT_FL_PRINTRAW	= 0x80,
 
 	EVENT_FL_FAILED		= 0x80000000
 };
@@ -450,6 +453,8 @@ struct pevent {
 
 	/* cache */
 	struct event_format *last_event;
+
+	char *trace_clock;
 };
 
 static inline void pevent_set_flag(struct pevent *pevent, int flag)
@@ -527,14 +532,15 @@ enum trace_flag_type {
 };
 
 int pevent_register_comm(struct pevent *pevent, const char *comm, int pid);
+void pevent_register_trace_clock(struct pevent *pevent, char *trace_clock);
 int pevent_register_function(struct pevent *pevent, char *name,
 			     unsigned long long addr, char *mod);
-int pevent_register_print_string(struct pevent *pevent, char *fmt,
+int pevent_register_print_string(struct pevent *pevent, const char *fmt,
 				 unsigned long long addr);
 int pevent_pid_is_registered(struct pevent *pevent, int pid);
 
 void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
-			struct pevent_record *record);
+			struct pevent_record *record, bool use_trace_clock);
 
 int pevent_parse_header_page(struct pevent *pevent, char *buf, unsigned long size,
 			     int long_size);
@@ -563,6 +569,10 @@ int pevent_print_num_field(struct trace_seq *s, const char *fmt,
 			   struct event_format *event, const char *name,
 			   struct pevent_record *record, int err);
 
+int pevent_print_func_field(struct trace_seq *s, const char *fmt,
+			   struct event_format *event, const char *name,
+			   struct pevent_record *record, int err);
+
 int pevent_register_event_handler(struct pevent *pevent, int id,
 				  const char *sys_name, const char *event_name,
 				  pevent_event_handler_func func, void *context);
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index 8f8fbc227a468477147629548ea983ee238e6ea6..782d86e961b9fb79195407e7634bceae9d55f279 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -13,6 +13,7 @@ perf*.html
 common-cmds.h
 perf.data
 perf.data.old
+output.svg
 perf-archive
 tags
 TAGS
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index 5a37a7c84e69891fc753f205c50b81d22238499f..3ba1c0b09908b0e66a8a87f5307d7e9066a003ce 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -145,16 +145,17 @@ endif
 
 ifneq ($(findstring $(MAKEFLAGS),s),s)
 ifneq ($(V),1)
-	QUIET_ASCIIDOC	= @echo '   ' ASCIIDOC $@;
-	QUIET_XMLTO	= @echo '   ' XMLTO $@;
-	QUIET_DB2TEXI	= @echo '   ' DB2TEXI $@;
-	QUIET_MAKEINFO	= @echo '   ' MAKEINFO $@;
-	QUIET_DBLATEX	= @echo '   ' DBLATEX $@;
-	QUIET_XSLTPROC	= @echo '   ' XSLTPROC $@;
-	QUIET_GEN	= @echo '   ' GEN $@;
+	QUIET_ASCIIDOC	= @echo '  ASCIIDOC '$@;
+	QUIET_XMLTO	= @echo '  XMLTO    '$@;
+	QUIET_DB2TEXI	= @echo '  DB2TEXI  '$@;
+	QUIET_MAKEINFO	= @echo '  MAKEINFO '$@;
+	QUIET_DBLATEX	= @echo '  DBLATEX  '$@;
+	QUIET_XSLTPROC	= @echo '  XSLTPROC '$@;
+	QUIET_GEN	= @echo '  GEN      '$@;
 	QUIET_STDERR	= 2> /dev/null
 	QUIET_SUBDIR0	= +@subdir=
-	QUIET_SUBDIR1	= ;$(NO_SUBDIR) echo '   ' SUBDIR $$subdir; \
+	QUIET_SUBDIR1	= ;$(NO_SUBDIR) \
+			   echo '  SUBDIR   ' $$subdir; \
 			  $(MAKE) $(PRINT_DIR) -C $$subdir
 	export V
 endif
@@ -183,47 +184,43 @@ ifdef missing_tools
 endif
 
 do-install-man: man
-	$(INSTALL) -d -m 755 $(DESTDIR)$(man1dir)
-#	$(INSTALL) -d -m 755 $(DESTDIR)$(man5dir)
-#	$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir)
-	$(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(man1dir)
-#	$(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir)
-#	$(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
+	$(call QUIET_INSTALL, Documentation-man) \
+		$(INSTALL) -d -m 755 $(DESTDIR)$(man1dir); \
+#		$(INSTALL) -d -m 755 $(DESTDIR)$(man5dir); \
+#		$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir); \
+		$(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(man1dir); \
+#		$(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir); \
+#		$(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
 
 install-man: check-man-tools man
 
-try-install-man:
 ifdef missing_tools
-	$(warning Please install $(missing_tools) to have the man pages installed)
+  DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed)
 else
-	$(MAKE) do-install-man
+  DO_INSTALL_MAN = do-install-man
 endif
 
+try-install-man: $(DO_INSTALL_MAN)
+
 install-info: info
-	$(INSTALL) -d -m 755 $(DESTDIR)$(infodir)
-	$(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir)
+	$(call QUIET_INSTALL, Documentation-info) \
+		$(INSTALL) -d -m 755 $(DESTDIR)$(infodir); \
+		$(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir); \
 	if test -r $(DESTDIR)$(infodir)/dir; then \
-	  $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\
-	  $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\
+		$(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\
+		$(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\
 	else \
 	  echo "No directory found in $(DESTDIR)$(infodir)" >&2 ; \
 	fi
 
 install-pdf: pdf
-	$(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir)
-	$(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir)
+	$(call QUIET_INSTALL, Documentation-pdf) \
+		$(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir); \
+		$(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir)
 
 #install-html: html
 #	'$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
 
-ifneq ($(MAKECMDGOALS),clean)
-ifneq ($(MAKECMDGOALS),tags)
-$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
-	$(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) $(OUTPUT)PERF-VERSION-FILE
-
--include $(OUTPUT)PERF-VERSION-FILE
-endif
-endif
 
 #
 # Determine "include::" file references in asciidoc files.
@@ -253,15 +250,17 @@ $(OUTPUT)cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT)
 	$(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \
 	date >$@
 
+CLEAN_FILES =									\
+	$(MAN_XML) $(addsuffix +,$(MAN_XML))					\
+	$(MAN_HTML) $(addsuffix +,$(MAN_HTML))					\
+	$(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7)				\
+	$(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++			\
+	$(OUTPUT)perf.info $(OUTPUT)perfman.info				\
+	$(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep		\
+	$(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt		\
+	$(cmds_txt) $(OUTPUT)*.made
 clean:
-	$(RM) $(MAN_XML) $(addsuffix +,$(MAN_XML))
-	$(RM) $(MAN_HTML) $(addsuffix +,$(MAN_HTML))
-	$(RM) $(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7)
-	$(RM) $(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++
-	$(RM) $(OUTPUT)perf.info $(OUTPUT)perfman.info
-	$(RM) $(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep
-	$(RM) $(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt
-	$(RM) $(cmds_txt) $(OUTPUT)*.made
+	$(call QUIET_CLEAN, Documentation) $(RM) $(CLEAN_FILES)
 
 $(MAN_HTML): $(OUTPUT)%.html : %.txt
 	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
@@ -342,5 +341,3 @@ $(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt
 
 #quick-install-html:
 #	'$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir)
-
-.PHONY: .FORCE-PERF-VERSION-FILE
diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt
index e9a8349a7172f7533fe970062c5936b4fd7df6fd..fd77d81ea748663284bc55db4db8fa5c20fc2a28 100644
--- a/tools/perf/Documentation/perf-buildid-cache.txt
+++ b/tools/perf/Documentation/perf-buildid-cache.txt
@@ -21,6 +21,19 @@ OPTIONS
 -a::
 --add=::
         Add specified file to the cache.
+-k::
+--kcore::
+        Add specified kcore file to the cache. For the current host that is
+        /proc/kcore which requires root permissions to read. Be aware that
+        running 'perf buildid-cache' as root may update root's build-id cache
+        not the user's. Use the -v option to see where the file is created.
+        Note that the copied file contains only code sections not the whole core
+        image. Note also that files "kallsyms" and "modules" must also be in the
+        same directory and are also copied.  All 3 files are created with read
+        permissions for root only. kcore will not be added if there is already a
+        kcore in the cache (with the same build-id) that has the same modules at
+        the same addresses. Use the -v option to see if a copy of kcore is
+        actually made.
 -r::
 --remove=::
         Remove specified file from the cache.
diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt
index ac84db2d23342aaa269255e34c9ab3f5d70a3e4f..6a06cefe96427453ad3b21e1caf64889f5f9ea55 100644
--- a/tools/perf/Documentation/perf-kvm.txt
+++ b/tools/perf/Documentation/perf-kvm.txt
@@ -109,7 +109,9 @@ STAT LIVE OPTIONS
 
 -m::
 --mmap-pages=::
-    Number of mmap data pages. Must be a power of two.
+    Number of mmap data pages (must be a power of two) or size
+    specification with appended unit character - B/K/M/G. The
+    size is rounded up to have nearest pages power of two value.
 
 -a::
 --all-cpus::
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index c7f5f55634ac3fab974216404fdcb6348aec94d0..ab25be28c9dcaab31adb1822195afab571308910 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -48,7 +48,7 @@ REPORT OPTIONS
 -k::
 --key=<value>::
         Sorting key. Possible values: acquired (default), contended,
-        wait_total, wait_max, wait_min.
+	avg_wait, wait_total, wait_max, wait_min.
 
 INFO OPTIONS
 ------------
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index ca0d3d9f4bac831d377fd90f9a38be1021c486ec..052f7c4dc00c25a47dbe6355f0f762744ad5a79a 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -87,7 +87,9 @@ OPTIONS
 
 -m::
 --mmap-pages=::
-	Number of mmap data pages. Must be a power of two.
+	Number of mmap data pages (must be a power of two) or size
+	specification with appended unit character - B/K/M/G. The
+	size is rounded up to have nearest pages power of two value.
 
 -g::
 	Enables call-graph (stack chain/backtrace) recording.
@@ -178,6 +180,9 @@ following filters are defined:
         - u:  only when the branch target is at the user level
         - k: only when the branch target is in the kernel
         - hv: only when the target is at the hypervisor level
+	- in_tx: only when the target is in a hardware transaction
+	- no_tx: only when the target is not in a hardware transaction
+	- abort_tx: only when the target is a hardware transaction abort
 
 +
 The option requires at least one branch type among any, any_call, any_ret, ind_call.
@@ -188,12 +193,14 @@ is enabled for all the sampling events. The sampled branch type is the same for
 The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
 Note that this feature may not be available on all processors.
 
--W::
 --weight::
 Enable weightened sampling. An additional weight is recorded per sample and can be
 displayed with the weight and local_weight sort keys.  This currently works for TSX
 abort events and some memory events in precise mode on modern Intel CPUs.
 
+--transaction::
+Record transaction flags for transaction related events.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 2b8097ee39d83c194fd8393a0a1a3e2b67e40b65..10a2798712511a26616ea618a91130a6ed467345 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -71,7 +71,11 @@ OPTIONS
 	entries are displayed as "[other]".
 	- cpu: cpu number the task ran at the time of sample
 	- srcline: filename and line number executed at the time of sample.  The
-	DWARF debuggin info must be provided.
+	DWARF debugging info must be provided.
+	- weight: Event specific weight, e.g. memory latency or transaction
+	abort cost. This is the global weight.
+	- local_weight: Local weight version of the weight above.
+	- transaction: Transaction abort flags.
 
 	By default, comm, dso and symbol keys are used.
 	(i.e. --sort comm,dso,symbol)
@@ -85,6 +89,8 @@ OPTIONS
 	- symbol_from: name of function branched from
 	- symbol_to: name of function branched to
 	- mispredict: "N" for predicted branch, "Y" for mispredicted branch
+	- in_tx: branch in TSX transaction
+	- abort: TSX transaction abort.
 
 	And default sort keys are changed to comm, dso_from, symbol_from, dso_to
 	and symbol_to, see '--branch-stack'.
@@ -135,6 +141,14 @@ OPTIONS
 
 	Default: fractal,0.5,callee,function.
 
+--max-stack::
+	Set the stack depth limit when parsing the callchain, anything
+	beyond the specified depth will be ignored. This is a trade-off
+	between information loss and faster processing especially for
+	workloads that can have a very long callchain stack.
+
+	Default: 127
+
 -G::
 --inverted::
         alias for inverted caller based call graph.
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 73c9759005a354eb8e052e7425f981bd16fb8c2a..80c7da6732f294782d86c51df8e8ba9225bf1d04 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -137,6 +137,11 @@ core number and the number of online logical processors on that physical process
 After starting the program, wait msecs before measuring. This is useful to
 filter out the startup phase of the program, which is often very different.
 
+-T::
+--transaction::
+
+Print statistics of transactional execution if supported.
+
 EXAMPLES
 --------
 
diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt
index 1632b0efc7577a730388154164be888c619ad934..3ff8bd4f0b4d94163d77231881b462e50bbd4e17 100644
--- a/tools/perf/Documentation/perf-timechart.txt
+++ b/tools/perf/Documentation/perf-timechart.txt
@@ -8,7 +8,8 @@ perf-timechart - Tool to visualize total system behavior during a workload
 SYNOPSIS
 --------
 [verse]
-'perf timechart' {record}
+'perf timechart' record <command>
+'perf timechart' [<options>]
 
 DESCRIPTION
 -----------
@@ -41,6 +42,18 @@ OPTIONS
 --symfs=<directory>::
         Look for files with symbols relative to this directory.
 
+EXAMPLES
+--------
+
+$ perf timechart record git pull
+
+  [ perf record: Woken up 13 times to write data ]
+  [ perf record: Captured and wrote 4.253 MB perf.data (~185801 samples) ]
+
+$ perf timechart
+
+  Written 10.2 seconds of trace to output.svg.
+
 SEE ALSO
 --------
 linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 6a118e71d0032e15d3ce8d55b91c24a1be83e31f..7de01dd7968898c18e34f40914017ae549a3f618 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -68,7 +68,9 @@ Default is to monitor all CPUS.
 
 -m <pages>::
 --mmap-pages=<pages>::
-	Number of mmapped data pages.
+	Number of mmap data pages (must be a power of two) or size
+	specification with appended unit character - B/K/M/G. The
+	size is rounded up to have nearest pages power of two value.
 
 -p <pid>::
 --pid=<pid>::
@@ -112,7 +114,8 @@ Default is to monitor all CPUS.
 
 -s::
 --sort::
-	Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight, local_weight.
+	Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight,
+	local_weight, abort, in_tx, transaction
 
 -n::
 --show-nr-samples::
@@ -147,6 +150,14 @@ Default is to monitor all CPUS.
 	Setup and enable call-graph (stack chain/backtrace) recording,
 	implies -G.
 
+--max-stack::
+	Set the stack depth limit when parsing the callchain, anything
+	beyond the specified depth will be ignored. This is a trade-off
+	between information loss and faster processing especially for
+	workloads that can have a very long callchain stack.
+
+	Default: 127
+
 --ignore-callees=<regex>::
         Ignore callees of the function(s) matching the given regex.
         This has the effect of collecting the callers of each such
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index daccd2c0a48fe9c933cf50c870f68aa9bfe253a6..7b0497f95a75f0dafdf079742cec136dcff3ffdc 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -9,6 +9,7 @@ SYNOPSIS
 --------
 [verse]
 'perf trace'
+'perf trace record'
 
 DESCRIPTION
 -----------
@@ -16,9 +17,14 @@ This command will show the events associated with the target, initially
 syscalls, but other system events like pagefaults, task lifetime events,
 scheduling events, etc.
 
-Initially this is a live mode only tool, but eventually will work with
-perf.data files like the other tools, allowing a detached 'record' from
-analysis phases.
+This is a live mode tool in addition to working with perf.data files like
+the other perf tools. Files can be generated using the 'perf record' command
+but the session needs to include the raw_syscalls events (-e 'raw_syscalls:*').
+Alernatively, the 'perf trace record' can be used as a shortcut to
+automatically include the raw_syscalls events when writing events to a file.
+
+The following options apply to perf trace; options to perf trace record are
+found in the perf record man page.
 
 OPTIONS
 -------
@@ -59,7 +65,9 @@ OPTIONS
 
 -m::
 --mmap-pages=::
-	Number of mmap data pages. Must be a power of two.
+	Number of mmap data pages (must be a power of two) or size
+	specification with appended unit character - B/K/M/G. The
+	size is rounded up to have nearest pages power of two value.
 
 -C::
 --cpu::
@@ -78,6 +86,21 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 --input
 	Process events from a given perf data file.
 
+-T
+--time
+	Print full timestamp rather time relative to first sample.
+
+--comm::
+        Show process COMM right beside its ID, on by default, disable with --no-comm.
+
+--summary::
+	Show a summary of syscalls by thread with min, max, and average times (in
+    msec) and relative stddev.
+
+--tool_stats::
+	Show tool stats such as number of times fd->pathname was discovered thru
+	hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script[1]
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 64c043b7a43848f17a023dcf9479dbd77f96d7be..4835618a5608892054de87d497e45e644fdeb0da 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -1,819 +1,79 @@
-include ../scripts/Makefile.include
-
-# The default target of this Makefile is...
-all:
-
-include config/utilities.mak
-
-# Define V to have a more verbose compile.
-#
-# Define O to save output files in a separate directory.
-#
-# Define ARCH as name of target architecture if you want cross-builds.
-#
-# Define CROSS_COMPILE as prefix name of compiler if you want cross-builds.
-#
-# Define NO_LIBPERL to disable perl script extension.
-#
-# Define NO_LIBPYTHON to disable python script extension.
-#
-# Define PYTHON to point to the python binary if the default
-# `python' is not correct; for example: PYTHON=python2
-#
-# Define PYTHON_CONFIG to point to the python-config binary if
-# the default `$(PYTHON)-config' is not correct.
 #
-# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8
+# This is a simple wrapper Makefile that calls the main Makefile.perf
+# with a -j option to do parallel builds
 #
-# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72.
+# If you want to invoke the perf build in some non-standard way then
+# you can use the 'make -f Makefile.perf' method to invoke it.
 #
-# Define LDFLAGS=-static to build a static binary.
-#
-# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds.
-#
-# Define NO_DWARF if you do not want debug-info analysis feature at all.
-#
-# Define WERROR=0 to disable treating any warnings as errors.
-#
-# Define NO_NEWT if you do not want TUI support. (deprecated)
-#
-# Define NO_SLANG if you do not want TUI support.
-#
-# Define NO_GTK2 if you do not want GTK+ GUI support.
+
 #
-# Define NO_DEMANGLE if you do not want C++ symbol demangling.
+# Clear out the built-in rules GNU make defines by default (such as .o targets),
+# so that we pass through all targets to Makefile.perf:
 #
-# Define NO_LIBELF if you do not want libelf dependency (e.g. cross-builds)
+.SUFFIXES:
+
 #
-# Define NO_LIBUNWIND if you do not want libunwind dependency for dwarf
-# backtrace post unwind.
+# We don't want to pass along options like -j:
 #
-# Define NO_BACKTRACE if you do not want stack backtrace debug feature
+unexport MAKEFLAGS
+
 #
-# Define NO_LIBNUMA if you do not want numa perf benchmark
+# Do a parallel build with multiple jobs, based on the number of CPUs online
+# in this system: 'make -j8' on a 8-CPU system, etc.
 #
-# Define NO_LIBAUDIT if you do not want libaudit support
+# (To override it, run 'make JOBS=1' and similar.)
 #
-# Define NO_LIBBIONIC if you do not want bionic support
-
-ifeq ($(srctree),)
-srctree := $(patsubst %/,%,$(dir $(shell pwd)))
-srctree := $(patsubst %/,%,$(dir $(srctree)))
-#$(info Determined 'srctree' to be $(srctree))
-endif
-
-ifneq ($(objtree),)
-#$(info Determined 'objtree' to be $(objtree))
-endif
-
-ifneq ($(OUTPUT),)
-#$(info Determined 'OUTPUT' to be $(OUTPUT))
-endif
-
-$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
-	@$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
-
-CC = $(CROSS_COMPILE)gcc
-AR = $(CROSS_COMPILE)ar
-
-RM      = rm -f
-MKDIR   = mkdir
-FIND    = find
-INSTALL = install
-FLEX    = flex
-BISON   = bison
-STRIP   = strip
-
-LK_DIR          = $(srctree)/tools/lib/lk/
-TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
-
-# include config/Makefile by default and rule out
-# non-config cases
-config := 1
-
-NON_CONFIG_TARGETS := clean TAGS tags cscope help
-
-ifdef MAKECMDGOALS
-ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
-  config := 0
-endif
+ifeq ($(JOBS),)
+  JOBS := $(shell grep -c ^processor /proc/cpuinfo 2>/dev/null)
+  ifeq ($(JOBS),)
+    JOBS := 1
+  endif
 endif
 
-ifeq ($(config),1)
-include config/Makefile
+#
+# Only pass canonical directory names as the output directory:
+#
+ifneq ($(O),)
+  FULL_O := $(shell readlink -f $(O) || echo $(O))
 endif
 
-export prefix bindir sharedir sysconfdir
-
-# sparse is architecture-neutral, which means that we need to tell it
-# explicitly what architecture to check for. Fix this up for yours..
-SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
-
-# Guard against environment variables
-BUILTIN_OBJS =
-LIB_H =
-LIB_OBJS =
-PYRF_OBJS =
-SCRIPT_SH =
-
-SCRIPT_SH += perf-archive.sh
-
-grep-libs = $(filter -l%,$(1))
-strip-libs = $(filter-out -l%,$(1))
-
-ifneq ($(OUTPUT),)
-  TE_PATH=$(OUTPUT)
-ifneq ($(subdir),)
-  LK_PATH=$(OUTPUT)/../lib/lk/
-else
-  LK_PATH=$(OUTPUT)
-endif
+#
+# Only accept the 'DEBUG' variable from the command line:
+#
+ifeq ("$(origin DEBUG)", "command line")
+  ifeq ($(DEBUG),)
+    override DEBUG = 0
+  else
+    SET_DEBUG = "DEBUG=$(DEBUG)"
+  endif
 else
-  TE_PATH=$(TRACE_EVENT_DIR)
-  LK_PATH=$(LK_DIR)
+  override DEBUG = 0
 endif
 
-LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
-export LIBTRACEEVENT
-
-LIBLK = $(LK_PATH)liblk.a
-export LIBLK
-
-# python extension build directories
-PYTHON_EXTBUILD     := $(OUTPUT)python_ext_build/
-PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
-PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/
-export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
+define print_msg
+  @printf '  BUILD:   Doing '\''make \033[33m-j'$(JOBS)'\033[m'\'' parallel build\n'
+endef
 
-python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
+define make
+  @$(MAKE) -f Makefile.perf --no-print-directory -j$(JOBS) O=$(FULL_O) $(SET_DEBUG) $@
+endef
 
-PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
-PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBLK)
-
-$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
-	$(QUIET_GEN)CFLAGS='$(CFLAGS)' $(PYTHON_WORD) util/setup.py \
-	  --quiet build_ext; \
-	mkdir -p $(OUTPUT)python && \
-	cp $(PYTHON_EXTBUILD_LIB)perf.so $(OUTPUT)python/
 #
-# No Perl scripts right now:
+# Needed if no target specified:
 #
-
-SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH))
+all:
+	$(print_msg)
+	$(make)
 
 #
-# Single 'perf' binary right now:
+# The clean target is not really parallel, don't print the jobs info:
 #
-PROGRAMS += $(OUTPUT)perf
-
-# what 'all' will build and 'install' will install, in perfexecdir
-ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
-
-# what 'all' will build but not install in perfexecdir
-OTHER_PROGRAMS = $(OUTPUT)perf
-
-# Set paths to tools early so that they can be used for version tests.
-ifndef SHELL_PATH
-  SHELL_PATH = /bin/sh
-endif
-ifndef PERL_PATH
-  PERL_PATH = /usr/bin/perl
-endif
-
-export PERL_PATH
-
-$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
-	$(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) -t util/parse-events.l > $(OUTPUT)util/parse-events-flex.c
-
-$(OUTPUT)util/parse-events-bison.c: util/parse-events.y
-	$(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c -p parse_events_
-
-$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
-	$(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/pmu-flex.h -t util/pmu.l > $(OUTPUT)util/pmu-flex.c
-
-$(OUTPUT)util/pmu-bison.c: util/pmu.y
-	$(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_
-
-$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
-$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
-
-LIB_FILE=$(OUTPUT)libperf.a
-
-LIB_H += ../../include/uapi/linux/perf_event.h
-LIB_H += ../../include/linux/rbtree.h
-LIB_H += ../../include/linux/list.h
-LIB_H += ../../include/uapi/linux/const.h
-LIB_H += ../../include/linux/hash.h
-LIB_H += ../../include/linux/stringify.h
-LIB_H += util/include/linux/bitmap.h
-LIB_H += util/include/linux/bitops.h
-LIB_H += util/include/linux/compiler.h
-LIB_H += util/include/linux/const.h
-LIB_H += util/include/linux/ctype.h
-LIB_H += util/include/linux/kernel.h
-LIB_H += util/include/linux/list.h
-LIB_H += util/include/linux/export.h
-LIB_H += util/include/linux/magic.h
-LIB_H += util/include/linux/poison.h
-LIB_H += util/include/linux/prefetch.h
-LIB_H += util/include/linux/rbtree.h
-LIB_H += util/include/linux/rbtree_augmented.h
-LIB_H += util/include/linux/string.h
-LIB_H += util/include/linux/types.h
-LIB_H += util/include/linux/linkage.h
-LIB_H += util/include/asm/asm-offsets.h
-LIB_H += util/include/asm/bug.h
-LIB_H += util/include/asm/byteorder.h
-LIB_H += util/include/asm/hweight.h
-LIB_H += util/include/asm/swab.h
-LIB_H += util/include/asm/system.h
-LIB_H += util/include/asm/uaccess.h
-LIB_H += util/include/dwarf-regs.h
-LIB_H += util/include/asm/dwarf2.h
-LIB_H += util/include/asm/cpufeature.h
-LIB_H += util/include/asm/unistd_32.h
-LIB_H += util/include/asm/unistd_64.h
-LIB_H += perf.h
-LIB_H += util/annotate.h
-LIB_H += util/cache.h
-LIB_H += util/callchain.h
-LIB_H += util/build-id.h
-LIB_H += util/debug.h
-LIB_H += util/sysfs.h
-LIB_H += util/pmu.h
-LIB_H += util/event.h
-LIB_H += util/evsel.h
-LIB_H += util/evlist.h
-LIB_H += util/exec_cmd.h
-LIB_H += util/types.h
-LIB_H += util/levenshtein.h
-LIB_H += util/machine.h
-LIB_H += util/map.h
-LIB_H += util/parse-options.h
-LIB_H += util/parse-events.h
-LIB_H += util/quote.h
-LIB_H += util/util.h
-LIB_H += util/xyarray.h
-LIB_H += util/header.h
-LIB_H += util/help.h
-LIB_H += util/session.h
-LIB_H += util/strbuf.h
-LIB_H += util/strlist.h
-LIB_H += util/strfilter.h
-LIB_H += util/svghelper.h
-LIB_H += util/tool.h
-LIB_H += util/run-command.h
-LIB_H += util/sigchain.h
-LIB_H += util/dso.h
-LIB_H += util/symbol.h
-LIB_H += util/color.h
-LIB_H += util/values.h
-LIB_H += util/sort.h
-LIB_H += util/hist.h
-LIB_H += util/thread.h
-LIB_H += util/thread_map.h
-LIB_H += util/trace-event.h
-LIB_H += util/probe-finder.h
-LIB_H += util/dwarf-aux.h
-LIB_H += util/probe-event.h
-LIB_H += util/pstack.h
-LIB_H += util/cpumap.h
-LIB_H += util/top.h
-LIB_H += $(ARCH_INCLUDE)
-LIB_H += util/cgroup.h
-LIB_H += $(LIB_INCLUDE)traceevent/event-parse.h
-LIB_H += util/target.h
-LIB_H += util/rblist.h
-LIB_H += util/intlist.h
-LIB_H += util/perf_regs.h
-LIB_H += util/unwind.h
-LIB_H += util/vdso.h
-LIB_H += ui/helpline.h
-LIB_H += ui/progress.h
-LIB_H += ui/util.h
-LIB_H += ui/ui.h
-
-LIB_OBJS += $(OUTPUT)util/abspath.o
-LIB_OBJS += $(OUTPUT)util/alias.o
-LIB_OBJS += $(OUTPUT)util/annotate.o
-LIB_OBJS += $(OUTPUT)util/build-id.o
-LIB_OBJS += $(OUTPUT)util/config.o
-LIB_OBJS += $(OUTPUT)util/ctype.o
-LIB_OBJS += $(OUTPUT)util/sysfs.o
-LIB_OBJS += $(OUTPUT)util/pmu.o
-LIB_OBJS += $(OUTPUT)util/environment.o
-LIB_OBJS += $(OUTPUT)util/event.o
-LIB_OBJS += $(OUTPUT)util/evlist.o
-LIB_OBJS += $(OUTPUT)util/evsel.o
-LIB_OBJS += $(OUTPUT)util/exec_cmd.o
-LIB_OBJS += $(OUTPUT)util/help.o
-LIB_OBJS += $(OUTPUT)util/levenshtein.o
-LIB_OBJS += $(OUTPUT)util/parse-options.o
-LIB_OBJS += $(OUTPUT)util/parse-events.o
-LIB_OBJS += $(OUTPUT)util/path.o
-LIB_OBJS += $(OUTPUT)util/rbtree.o
-LIB_OBJS += $(OUTPUT)util/bitmap.o
-LIB_OBJS += $(OUTPUT)util/hweight.o
-LIB_OBJS += $(OUTPUT)util/run-command.o
-LIB_OBJS += $(OUTPUT)util/quote.o
-LIB_OBJS += $(OUTPUT)util/strbuf.o
-LIB_OBJS += $(OUTPUT)util/string.o
-LIB_OBJS += $(OUTPUT)util/strlist.o
-LIB_OBJS += $(OUTPUT)util/strfilter.o
-LIB_OBJS += $(OUTPUT)util/top.o
-LIB_OBJS += $(OUTPUT)util/usage.o
-LIB_OBJS += $(OUTPUT)util/wrapper.o
-LIB_OBJS += $(OUTPUT)util/sigchain.o
-LIB_OBJS += $(OUTPUT)util/dso.o
-LIB_OBJS += $(OUTPUT)util/symbol.o
-LIB_OBJS += $(OUTPUT)util/symbol-elf.o
-LIB_OBJS += $(OUTPUT)util/color.o
-LIB_OBJS += $(OUTPUT)util/pager.o
-LIB_OBJS += $(OUTPUT)util/header.o
-LIB_OBJS += $(OUTPUT)util/callchain.o
-LIB_OBJS += $(OUTPUT)util/values.o
-LIB_OBJS += $(OUTPUT)util/debug.o
-LIB_OBJS += $(OUTPUT)util/machine.o
-LIB_OBJS += $(OUTPUT)util/map.o
-LIB_OBJS += $(OUTPUT)util/pstack.o
-LIB_OBJS += $(OUTPUT)util/session.o
-LIB_OBJS += $(OUTPUT)util/thread.o
-LIB_OBJS += $(OUTPUT)util/thread_map.o
-LIB_OBJS += $(OUTPUT)util/trace-event-parse.o
-LIB_OBJS += $(OUTPUT)util/parse-events-flex.o
-LIB_OBJS += $(OUTPUT)util/parse-events-bison.o
-LIB_OBJS += $(OUTPUT)util/pmu-flex.o
-LIB_OBJS += $(OUTPUT)util/pmu-bison.o
-LIB_OBJS += $(OUTPUT)util/trace-event-read.o
-LIB_OBJS += $(OUTPUT)util/trace-event-info.o
-LIB_OBJS += $(OUTPUT)util/trace-event-scripting.o
-LIB_OBJS += $(OUTPUT)util/svghelper.o
-LIB_OBJS += $(OUTPUT)util/sort.o
-LIB_OBJS += $(OUTPUT)util/hist.o
-LIB_OBJS += $(OUTPUT)util/probe-event.o
-LIB_OBJS += $(OUTPUT)util/util.o
-LIB_OBJS += $(OUTPUT)util/xyarray.o
-LIB_OBJS += $(OUTPUT)util/cpumap.o
-LIB_OBJS += $(OUTPUT)util/cgroup.o
-LIB_OBJS += $(OUTPUT)util/target.o
-LIB_OBJS += $(OUTPUT)util/rblist.o
-LIB_OBJS += $(OUTPUT)util/intlist.o
-LIB_OBJS += $(OUTPUT)util/vdso.o
-LIB_OBJS += $(OUTPUT)util/stat.o
-LIB_OBJS += $(OUTPUT)util/record.o
-
-LIB_OBJS += $(OUTPUT)ui/setup.o
-LIB_OBJS += $(OUTPUT)ui/helpline.o
-LIB_OBJS += $(OUTPUT)ui/progress.o
-LIB_OBJS += $(OUTPUT)ui/util.o
-LIB_OBJS += $(OUTPUT)ui/hist.o
-LIB_OBJS += $(OUTPUT)ui/stdio/hist.o
-
-LIB_OBJS += $(OUTPUT)arch/common.o
-
-LIB_OBJS += $(OUTPUT)tests/parse-events.o
-LIB_OBJS += $(OUTPUT)tests/dso-data.o
-LIB_OBJS += $(OUTPUT)tests/attr.o
-LIB_OBJS += $(OUTPUT)tests/vmlinux-kallsyms.o
-LIB_OBJS += $(OUTPUT)tests/open-syscall.o
-LIB_OBJS += $(OUTPUT)tests/open-syscall-all-cpus.o
-LIB_OBJS += $(OUTPUT)tests/open-syscall-tp-fields.o
-LIB_OBJS += $(OUTPUT)tests/mmap-basic.o
-LIB_OBJS += $(OUTPUT)tests/perf-record.o
-LIB_OBJS += $(OUTPUT)tests/rdpmc.o
-LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o
-LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o
-LIB_OBJS += $(OUTPUT)tests/pmu.o
-LIB_OBJS += $(OUTPUT)tests/hists_link.o
-LIB_OBJS += $(OUTPUT)tests/python-use.o
-LIB_OBJS += $(OUTPUT)tests/bp_signal.o
-LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o
-LIB_OBJS += $(OUTPUT)tests/task-exit.o
-LIB_OBJS += $(OUTPUT)tests/sw-clock.o
-ifeq ($(ARCH),x86)
-LIB_OBJS += $(OUTPUT)tests/perf-time-to-tsc.o
-endif
-LIB_OBJS += $(OUTPUT)tests/code-reading.o
-LIB_OBJS += $(OUTPUT)tests/sample-parsing.o
-LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o
-
-BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
-BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
-# Benchmark modules
-BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
-BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
-ifeq ($(RAW_ARCH),x86_64)
-BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
-BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o
-endif
-BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
-BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o
-
-BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
-BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o
-BUILTIN_OBJS += $(OUTPUT)builtin-help.o
-BUILTIN_OBJS += $(OUTPUT)builtin-sched.o
-BUILTIN_OBJS += $(OUTPUT)builtin-buildid-list.o
-BUILTIN_OBJS += $(OUTPUT)builtin-buildid-cache.o
-BUILTIN_OBJS += $(OUTPUT)builtin-list.o
-BUILTIN_OBJS += $(OUTPUT)builtin-record.o
-BUILTIN_OBJS += $(OUTPUT)builtin-report.o
-BUILTIN_OBJS += $(OUTPUT)builtin-stat.o
-BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o
-BUILTIN_OBJS += $(OUTPUT)builtin-top.o
-BUILTIN_OBJS += $(OUTPUT)builtin-script.o
-BUILTIN_OBJS += $(OUTPUT)builtin-probe.o
-BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o
-BUILTIN_OBJS += $(OUTPUT)builtin-lock.o
-BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o
-BUILTIN_OBJS += $(OUTPUT)builtin-inject.o
-BUILTIN_OBJS += $(OUTPUT)tests/builtin-test.o
-BUILTIN_OBJS += $(OUTPUT)builtin-mem.o
-
-PERFLIBS = $(LIB_FILE) $(LIBLK) $(LIBTRACEEVENT)
-
-# We choose to avoid "if .. else if .. else .. endif endif"
-# because maintaining the nesting to match is a pain.  If
-# we had "elif" things would have been much nicer...
-
--include arch/$(ARCH)/Makefile
-
-ifneq ($(OUTPUT),)
-  CFLAGS += -I$(OUTPUT)
-endif
-
-ifdef NO_LIBELF
-EXTLIBS := $(filter-out -lelf,$(EXTLIBS))
-
-# Remove ELF/DWARF dependent codes
-LIB_OBJS := $(filter-out $(OUTPUT)util/symbol-elf.o,$(LIB_OBJS))
-LIB_OBJS := $(filter-out $(OUTPUT)util/dwarf-aux.o,$(LIB_OBJS))
-LIB_OBJS := $(filter-out $(OUTPUT)util/probe-event.o,$(LIB_OBJS))
-LIB_OBJS := $(filter-out $(OUTPUT)util/probe-finder.o,$(LIB_OBJS))
-
-BUILTIN_OBJS := $(filter-out $(OUTPUT)builtin-probe.o,$(BUILTIN_OBJS))
-
-# Use minimal symbol handling
-LIB_OBJS += $(OUTPUT)util/symbol-minimal.o
-
-else # NO_LIBELF
-ifndef NO_DWARF
-  LIB_OBJS += $(OUTPUT)util/probe-finder.o
-  LIB_OBJS += $(OUTPUT)util/dwarf-aux.o
-endif # NO_DWARF
-endif # NO_LIBELF
-
-ifndef NO_LIBUNWIND
-  LIB_OBJS += $(OUTPUT)util/unwind.o
-endif
-LIB_OBJS += $(OUTPUT)tests/keep-tracking.o
-
-ifndef NO_LIBAUDIT
-  BUILTIN_OBJS += $(OUTPUT)builtin-trace.o
-endif
-
-ifndef NO_SLANG
-  LIB_OBJS += $(OUTPUT)ui/browser.o
-  LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o
-  LIB_OBJS += $(OUTPUT)ui/browsers/hists.o
-  LIB_OBJS += $(OUTPUT)ui/browsers/map.o
-  LIB_OBJS += $(OUTPUT)ui/browsers/scripts.o
-  LIB_OBJS += $(OUTPUT)ui/tui/setup.o
-  LIB_OBJS += $(OUTPUT)ui/tui/util.o
-  LIB_OBJS += $(OUTPUT)ui/tui/helpline.o
-  LIB_OBJS += $(OUTPUT)ui/tui/progress.o
-  LIB_H += ui/browser.h
-  LIB_H += ui/browsers/map.h
-  LIB_H += ui/keysyms.h
-  LIB_H += ui/libslang.h
-endif
-
-ifndef NO_GTK2
-  LIB_OBJS += $(OUTPUT)ui/gtk/browser.o
-  LIB_OBJS += $(OUTPUT)ui/gtk/hists.o
-  LIB_OBJS += $(OUTPUT)ui/gtk/setup.o
-  LIB_OBJS += $(OUTPUT)ui/gtk/util.o
-  LIB_OBJS += $(OUTPUT)ui/gtk/helpline.o
-  LIB_OBJS += $(OUTPUT)ui/gtk/progress.o
-  LIB_OBJS += $(OUTPUT)ui/gtk/annotate.o
-endif
-
-ifndef NO_LIBPERL
-  LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o
-  LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o
-endif
-
-ifndef NO_LIBPYTHON
-  LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
-  LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
-endif
-
-ifeq ($(NO_PERF_REGS),0)
-  ifeq ($(ARCH),x86)
-    LIB_H += arch/x86/include/perf_regs.h
-  endif
-endif
-
-ifndef NO_LIBNUMA
-  BUILTIN_OBJS += $(OUTPUT)bench/numa.o
-endif
-
-ifdef ASCIIDOC8
-  export ASCIIDOC8
-endif
-
-LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group
-
-export INSTALL SHELL_PATH
-
-### Build rules
-
-SHELL = $(SHELL_PATH)
-
-all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
-
-please_set_SHELL_PATH_to_a_more_modern_shell:
-	@$$(:)
-
-shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell
-
-strip: $(PROGRAMS) $(OUTPUT)perf
-	$(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf
-
-$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -include $(OUTPUT)PERF-VERSION-FILE \
-		'-DPERF_HTML_PATH="$(htmldir_SQ)"' \
-		$(CFLAGS) -c $(filter %.c,$^) -o $@
-
-$(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS)
-	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OUTPUT)perf.o \
-               $(BUILTIN_OBJS) $(LIBS) -o $@
-
-$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
-		'-DPERF_HTML_PATH="$(htmldir_SQ)"' \
-		'-DPERF_MAN_PATH="$(mandir_SQ)"' \
-		'-DPERF_INFO_PATH="$(infodir_SQ)"' $<
-
-$(OUTPUT)builtin-timechart.o: builtin-timechart.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
-		'-DPERF_HTML_PATH="$(htmldir_SQ)"' \
-		'-DPERF_MAN_PATH="$(mandir_SQ)"' \
-		'-DPERF_INFO_PATH="$(infodir_SQ)"' $<
-
-$(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt
-
-$(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt)
-	$(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@
-
-$(SCRIPTS) : % : %.sh
-	$(QUIET_GEN)$(INSTALL) '$@.sh' '$(OUTPUT)$@'
-
-# These can record PERF_VERSION
-$(OUTPUT)perf.o perf.spec \
-	$(SCRIPTS) \
-	: $(OUTPUT)PERF-VERSION-FILE
-
-.SUFFIXES:
-.SUFFIXES: .o .c .S .s
-
-# These two need to be here so that when O= is not used they take precedence
-# over the general rule for .o
-
-$(OUTPUT)util/%-flex.o: $(OUTPUT)util/%-flex.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -w $<
-
-$(OUTPUT)util/%-bison.o: $(OUTPUT)util/%-bison.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $<
-
-$(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $<
-$(OUTPUT)%.i: %.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $<
-$(OUTPUT)%.s: %.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -S $(CFLAGS) $<
-$(OUTPUT)%.o: %.S
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $<
-$(OUTPUT)%.s: %.S
-	$(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $<
-
-$(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
-		'-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \
-		'-DPREFIX="$(prefix_SQ)"' \
-		$<
-
-$(OUTPUT)tests/attr.o: tests/attr.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
-		'-DBINDIR="$(bindir_SQ)"' -DPYTHON='"$(PYTHON_WORD)"' \
-		$<
-
-$(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
-		-DPYTHONPATH='"$(OUTPUT)python"' \
-		-DPYTHON='"$(PYTHON_WORD)"' \
-		$<
-
-$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
-
-$(OUTPUT)ui/browser.o: ui/browser.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
-
-$(OUTPUT)ui/browsers/annotate.o: ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
+clean:
+	$(make)
 
-$(OUTPUT)ui/browsers/hists.o: ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
-
-$(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
-
-$(OUTPUT)ui/browsers/scripts.o: ui/browsers/scripts.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
-
-$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
-
-$(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $<
-
-$(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default $<
-
-$(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default $<
-
-$(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
-
-$(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
-
-$(OUTPUT)perf-%: %.o $(PERFLIBS)
-	$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(filter %.o,$^) $(LIBS)
-
-$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
-$(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
-
-# we compile into subdirectories. if the target directory is not the source directory, they might not exists. So
-# we depend the various files onto their directories.
-DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h
-$(DIRECTORY_DEPS): | $(sort $(dir $(DIRECTORY_DEPS)))
-# In the second step, we make a rule to actually create these directories
-$(sort $(dir $(DIRECTORY_DEPS))):
-	$(QUIET_MKDIR)$(MKDIR) -p $@ 2>/dev/null
-
-$(LIB_FILE): $(LIB_OBJS)
-	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
-
-# libtraceevent.a
-$(LIBTRACEEVENT):
-	$(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libtraceevent.a
-
-$(LIBTRACEEVENT)-clean:
-	$(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) clean
-
-# if subdir is set, we've been called from above so target has been built
-# already
-$(LIBLK):
-ifeq ($(subdir),)
-	$(QUIET_SUBDIR0)$(LK_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) liblk.a
-endif
-
-$(LIBLK)-clean:
-ifeq ($(subdir),)
-	$(QUIET_SUBDIR0)$(LK_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) clean
-endif
-
-help:
-	@echo 'Perf make targets:'
-	@echo '  doc		- make *all* documentation (see below)'
-	@echo '  man		- make manpage documentation (access with man <foo>)'
-	@echo '  html		- make html documentation'
-	@echo '  info		- make GNU info documentation (access with info <foo>)'
-	@echo '  pdf		- make pdf documentation'
-	@echo '  TAGS		- use etags to make tag information for source browsing'
-	@echo '  tags		- use ctags to make tag information for source browsing'
-	@echo '  cscope	- use cscope to make interactive browsing database'
-	@echo ''
-	@echo 'Perf install targets:'
-	@echo '  NOTE: documentation build requires asciidoc, xmlto packages to be installed'
-	@echo '  HINT: use "make prefix=<path> <install target>" to install to a particular'
-	@echo '        path like make prefix=/usr/local install install-doc'
-	@echo '  install	- install compiled binaries'
-	@echo '  install-doc	- install *all* documentation'
-	@echo '  install-man	- install manpage documentation'
-	@echo '  install-html	- install html documentation'
-	@echo '  install-info	- install GNU info documentation'
-	@echo '  install-pdf	- install pdf documentation'
-	@echo ''
-	@echo '  quick-install-doc	- alias for quick-install-man'
-	@echo '  quick-install-man	- install the documentation quickly'
-	@echo '  quick-install-html	- install the html documentation quickly'
-	@echo ''
-	@echo 'Perf maintainer targets:'
-	@echo '  clean			- clean all binary objects and build output'
-
-
-DOC_TARGETS := doc man html info pdf
-
-INSTALL_DOC_TARGETS := $(patsubst %,install-%,$(DOC_TARGETS)) try-install-man
-INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html
-
-# 'make doc' should call 'make -C Documentation all'
-$(DOC_TARGETS):
-	$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all)
-
-TAGS:
-	$(RM) TAGS
-	$(FIND) . -name '*.[hcS]' -print | xargs etags -a
-
-tags:
-	$(RM) tags
-	$(FIND) . -name '*.[hcS]' -print | xargs ctags -a
-
-cscope:
-	$(RM) cscope*
-	$(FIND) . -name '*.[hcS]' -print | xargs cscope -b
-
-### Detect prefix changes
-TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\
-             $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ)
-
-$(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS
-	@FLAGS='$(TRACK_CFLAGS)'; \
-	    if test x"$$FLAGS" != x"`cat $(OUTPUT)PERF-CFLAGS 2>/dev/null`" ; then \
-		echo 1>&2 "    * new build flags or prefix"; \
-		echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \
-            fi
-
-### Testing rules
-
-# GNU make supports exporting all variables by "export" without parameters.
-# However, the environment gets quite big, and some programs have problems
-# with that.
-
-check: $(OUTPUT)common-cmds.h
-	if sparse; \
-	then \
-		for i in *.c */*.c; \
-		do \
-			sparse $(CFLAGS) $(SPARSE_FLAGS) $$i || exit; \
-		done; \
-	else \
-		exit 1; \
-	fi
-
-### Installation rules
-
-install-bin: all
-	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
-	$(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'
-	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
-	$(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
-ifndef NO_LIBPERL
-	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
-	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
-	$(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
-	$(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'
-	$(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
-endif
-ifndef NO_LIBPYTHON
-	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'
-	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
-	$(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'
-	$(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'
-	$(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
-endif
-	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'
-	$(INSTALL) bash_completion '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
-	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'
-	$(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'
-	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
-	$(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
-
-install: install-bin try-install-man
-
-install-python_ext:
-	$(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)'
-
-# 'make install-doc' should call 'make -C Documentation install'
-$(INSTALL_DOC_TARGETS):
-	$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:-doc=)
-
-### Cleaning rules
-
-clean: $(LIBTRACEEVENT)-clean $(LIBLK)-clean
-	$(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS)
-	$(RM) $(ALL_PROGRAMS) perf
-	$(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope*
-	$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
-	$(RM) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS
-	$(RM) $(OUTPUT)util/*-bison*
-	$(RM) $(OUTPUT)util/*-flex*
-	$(python-clean)
-
-.PHONY: all install clean strip $(LIBTRACEEVENT) $(LIBLK)
-.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
-.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS
+#
+# All other targets get passed through:
+#
+%:
+	$(print_msg)
+	$(make)
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
new file mode 100644
index 0000000000000000000000000000000000000000..7fc8f179cae74d08e2cc507031c773009723bb00
--- /dev/null
+++ b/tools/perf/Makefile.perf
@@ -0,0 +1,892 @@
+include ../scripts/Makefile.include
+
+# The default target of this Makefile is...
+all:
+
+include config/utilities.mak
+
+# Define V to have a more verbose compile.
+#
+# Define O to save output files in a separate directory.
+#
+# Define ARCH as name of target architecture if you want cross-builds.
+#
+# Define CROSS_COMPILE as prefix name of compiler if you want cross-builds.
+#
+# Define NO_LIBPERL to disable perl script extension.
+#
+# Define NO_LIBPYTHON to disable python script extension.
+#
+# Define PYTHON to point to the python binary if the default
+# `python' is not correct; for example: PYTHON=python2
+#
+# Define PYTHON_CONFIG to point to the python-config binary if
+# the default `$(PYTHON)-config' is not correct.
+#
+# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8
+#
+# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72.
+#
+# Define LDFLAGS=-static to build a static binary.
+#
+# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds.
+#
+# Define NO_DWARF if you do not want debug-info analysis feature at all.
+#
+# Define WERROR=0 to disable treating any warnings as errors.
+#
+# Define NO_NEWT if you do not want TUI support. (deprecated)
+#
+# Define NO_SLANG if you do not want TUI support.
+#
+# Define NO_GTK2 if you do not want GTK+ GUI support.
+#
+# Define NO_DEMANGLE if you do not want C++ symbol demangling.
+#
+# Define NO_LIBELF if you do not want libelf dependency (e.g. cross-builds)
+#
+# Define NO_LIBUNWIND if you do not want libunwind dependency for dwarf
+# backtrace post unwind.
+#
+# Define NO_BACKTRACE if you do not want stack backtrace debug feature
+#
+# Define NO_LIBNUMA if you do not want numa perf benchmark
+#
+# Define NO_LIBAUDIT if you do not want libaudit support
+#
+# Define NO_LIBBIONIC if you do not want bionic support
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(shell pwd)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+ifneq ($(objtree),)
+#$(info Determined 'objtree' to be $(objtree))
+endif
+
+ifneq ($(OUTPUT),)
+#$(info Determined 'OUTPUT' to be $(OUTPUT))
+endif
+
+$(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD
+	@$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
+	@touch $(OUTPUT)PERF-VERSION-FILE
+
+CC = $(CROSS_COMPILE)gcc
+AR = $(CROSS_COMPILE)ar
+
+RM      = rm -f
+LN      = ln -f
+MKDIR   = mkdir
+FIND    = find
+INSTALL = install
+FLEX    = flex
+BISON   = bison
+STRIP   = strip
+
+LK_DIR          = $(srctree)/tools/lib/lk/
+TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
+
+# include config/Makefile by default and rule out
+# non-config cases
+config := 1
+
+NON_CONFIG_TARGETS := clean TAGS tags cscope help
+
+ifdef MAKECMDGOALS
+ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
+  config := 0
+endif
+endif
+
+ifeq ($(config),1)
+include config/Makefile
+endif
+
+export prefix bindir sharedir sysconfdir
+
+# sparse is architecture-neutral, which means that we need to tell it
+# explicitly what architecture to check for. Fix this up for yours..
+SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
+
+# Guard against environment variables
+BUILTIN_OBJS =
+LIB_H =
+LIB_OBJS =
+GTK_OBJS =
+PYRF_OBJS =
+SCRIPT_SH =
+
+SCRIPT_SH += perf-archive.sh
+
+grep-libs = $(filter -l%,$(1))
+strip-libs = $(filter-out -l%,$(1))
+
+ifneq ($(OUTPUT),)
+  TE_PATH=$(OUTPUT)
+ifneq ($(subdir),)
+  LK_PATH=$(OUTPUT)/../lib/lk/
+else
+  LK_PATH=$(OUTPUT)
+endif
+else
+  TE_PATH=$(TRACE_EVENT_DIR)
+  LK_PATH=$(LK_DIR)
+endif
+
+LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
+export LIBTRACEEVENT
+
+LIBLK = $(LK_PATH)liblk.a
+export LIBLK
+
+# python extension build directories
+PYTHON_EXTBUILD     := $(OUTPUT)python_ext_build/
+PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
+PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/
+export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
+
+python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
+
+PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
+PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBLK)
+
+$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
+	$(QUIET_GEN)CFLAGS='$(CFLAGS)' $(PYTHON_WORD) util/setup.py \
+	  --quiet build_ext; \
+	mkdir -p $(OUTPUT)python && \
+	cp $(PYTHON_EXTBUILD_LIB)perf.so $(OUTPUT)python/
+#
+# No Perl scripts right now:
+#
+
+SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH))
+
+#
+# Single 'perf' binary right now:
+#
+PROGRAMS += $(OUTPUT)perf
+
+# what 'all' will build and 'install' will install, in perfexecdir
+ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
+
+# what 'all' will build but not install in perfexecdir
+OTHER_PROGRAMS = $(OUTPUT)perf
+
+# Set paths to tools early so that they can be used for version tests.
+ifndef SHELL_PATH
+  SHELL_PATH = /bin/sh
+endif
+ifndef PERL_PATH
+  PERL_PATH = /usr/bin/perl
+endif
+
+export PERL_PATH
+
+$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
+	$(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) -t util/parse-events.l > $(OUTPUT)util/parse-events-flex.c
+
+$(OUTPUT)util/parse-events-bison.c: util/parse-events.y
+	$(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c -p parse_events_
+
+$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
+	$(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/pmu-flex.h -t util/pmu.l > $(OUTPUT)util/pmu-flex.c
+
+$(OUTPUT)util/pmu-bison.c: util/pmu.y
+	$(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_
+
+$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
+$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
+
+LIB_FILE=$(OUTPUT)libperf.a
+
+LIB_H += ../../include/uapi/linux/perf_event.h
+LIB_H += ../../include/linux/rbtree.h
+LIB_H += ../../include/linux/list.h
+LIB_H += ../../include/uapi/linux/const.h
+LIB_H += ../../include/linux/hash.h
+LIB_H += ../../include/linux/stringify.h
+LIB_H += util/include/linux/bitmap.h
+LIB_H += util/include/linux/bitops.h
+LIB_H += util/include/linux/compiler.h
+LIB_H += util/include/linux/const.h
+LIB_H += util/include/linux/ctype.h
+LIB_H += util/include/linux/kernel.h
+LIB_H += util/include/linux/list.h
+LIB_H += util/include/linux/export.h
+LIB_H += util/include/linux/magic.h
+LIB_H += util/include/linux/poison.h
+LIB_H += util/include/linux/prefetch.h
+LIB_H += util/include/linux/rbtree.h
+LIB_H += util/include/linux/rbtree_augmented.h
+LIB_H += util/include/linux/string.h
+LIB_H += util/include/linux/types.h
+LIB_H += util/include/linux/linkage.h
+LIB_H += util/include/asm/asm-offsets.h
+LIB_H += util/include/asm/bug.h
+LIB_H += util/include/asm/byteorder.h
+LIB_H += util/include/asm/hweight.h
+LIB_H += util/include/asm/swab.h
+LIB_H += util/include/asm/system.h
+LIB_H += util/include/asm/uaccess.h
+LIB_H += util/include/dwarf-regs.h
+LIB_H += util/include/asm/dwarf2.h
+LIB_H += util/include/asm/cpufeature.h
+LIB_H += util/include/asm/unistd_32.h
+LIB_H += util/include/asm/unistd_64.h
+LIB_H += perf.h
+LIB_H += util/annotate.h
+LIB_H += util/cache.h
+LIB_H += util/callchain.h
+LIB_H += util/build-id.h
+LIB_H += util/debug.h
+LIB_H += util/fs.h
+LIB_H += util/pmu.h
+LIB_H += util/event.h
+LIB_H += util/evsel.h
+LIB_H += util/evlist.h
+LIB_H += util/exec_cmd.h
+LIB_H += util/types.h
+LIB_H += util/levenshtein.h
+LIB_H += util/machine.h
+LIB_H += util/map.h
+LIB_H += util/parse-options.h
+LIB_H += util/parse-events.h
+LIB_H += util/quote.h
+LIB_H += util/util.h
+LIB_H += util/xyarray.h
+LIB_H += util/header.h
+LIB_H += util/help.h
+LIB_H += util/session.h
+LIB_H += util/strbuf.h
+LIB_H += util/strlist.h
+LIB_H += util/strfilter.h
+LIB_H += util/svghelper.h
+LIB_H += util/tool.h
+LIB_H += util/run-command.h
+LIB_H += util/sigchain.h
+LIB_H += util/dso.h
+LIB_H += util/symbol.h
+LIB_H += util/color.h
+LIB_H += util/values.h
+LIB_H += util/sort.h
+LIB_H += util/hist.h
+LIB_H += util/comm.h
+LIB_H += util/thread.h
+LIB_H += util/thread_map.h
+LIB_H += util/trace-event.h
+LIB_H += util/probe-finder.h
+LIB_H += util/dwarf-aux.h
+LIB_H += util/probe-event.h
+LIB_H += util/pstack.h
+LIB_H += util/cpumap.h
+LIB_H += util/top.h
+LIB_H += $(ARCH_INCLUDE)
+LIB_H += util/cgroup.h
+LIB_H += $(LIB_INCLUDE)traceevent/event-parse.h
+LIB_H += util/target.h
+LIB_H += util/rblist.h
+LIB_H += util/intlist.h
+LIB_H += util/perf_regs.h
+LIB_H += util/unwind.h
+LIB_H += util/vdso.h
+LIB_H += ui/helpline.h
+LIB_H += ui/progress.h
+LIB_H += ui/util.h
+LIB_H += ui/ui.h
+LIB_H += util/data.h
+
+LIB_OBJS += $(OUTPUT)util/abspath.o
+LIB_OBJS += $(OUTPUT)util/alias.o
+LIB_OBJS += $(OUTPUT)util/annotate.o
+LIB_OBJS += $(OUTPUT)util/build-id.o
+LIB_OBJS += $(OUTPUT)util/config.o
+LIB_OBJS += $(OUTPUT)util/ctype.o
+LIB_OBJS += $(OUTPUT)util/fs.o
+LIB_OBJS += $(OUTPUT)util/pmu.o
+LIB_OBJS += $(OUTPUT)util/environment.o
+LIB_OBJS += $(OUTPUT)util/event.o
+LIB_OBJS += $(OUTPUT)util/evlist.o
+LIB_OBJS += $(OUTPUT)util/evsel.o
+LIB_OBJS += $(OUTPUT)util/exec_cmd.o
+LIB_OBJS += $(OUTPUT)util/help.o
+LIB_OBJS += $(OUTPUT)util/levenshtein.o
+LIB_OBJS += $(OUTPUT)util/parse-options.o
+LIB_OBJS += $(OUTPUT)util/parse-events.o
+LIB_OBJS += $(OUTPUT)util/path.o
+LIB_OBJS += $(OUTPUT)util/rbtree.o
+LIB_OBJS += $(OUTPUT)util/bitmap.o
+LIB_OBJS += $(OUTPUT)util/hweight.o
+LIB_OBJS += $(OUTPUT)util/run-command.o
+LIB_OBJS += $(OUTPUT)util/quote.o
+LIB_OBJS += $(OUTPUT)util/strbuf.o
+LIB_OBJS += $(OUTPUT)util/string.o
+LIB_OBJS += $(OUTPUT)util/strlist.o
+LIB_OBJS += $(OUTPUT)util/strfilter.o
+LIB_OBJS += $(OUTPUT)util/top.o
+LIB_OBJS += $(OUTPUT)util/usage.o
+LIB_OBJS += $(OUTPUT)util/wrapper.o
+LIB_OBJS += $(OUTPUT)util/sigchain.o
+LIB_OBJS += $(OUTPUT)util/dso.o
+LIB_OBJS += $(OUTPUT)util/symbol.o
+LIB_OBJS += $(OUTPUT)util/symbol-elf.o
+LIB_OBJS += $(OUTPUT)util/color.o
+LIB_OBJS += $(OUTPUT)util/pager.o
+LIB_OBJS += $(OUTPUT)util/header.o
+LIB_OBJS += $(OUTPUT)util/callchain.o
+LIB_OBJS += $(OUTPUT)util/values.o
+LIB_OBJS += $(OUTPUT)util/debug.o
+LIB_OBJS += $(OUTPUT)util/machine.o
+LIB_OBJS += $(OUTPUT)util/map.o
+LIB_OBJS += $(OUTPUT)util/pstack.o
+LIB_OBJS += $(OUTPUT)util/session.o
+LIB_OBJS += $(OUTPUT)util/comm.o
+LIB_OBJS += $(OUTPUT)util/thread.o
+LIB_OBJS += $(OUTPUT)util/thread_map.o
+LIB_OBJS += $(OUTPUT)util/trace-event-parse.o
+LIB_OBJS += $(OUTPUT)util/parse-events-flex.o
+LIB_OBJS += $(OUTPUT)util/parse-events-bison.o
+LIB_OBJS += $(OUTPUT)util/pmu-flex.o
+LIB_OBJS += $(OUTPUT)util/pmu-bison.o
+LIB_OBJS += $(OUTPUT)util/trace-event-read.o
+LIB_OBJS += $(OUTPUT)util/trace-event-info.o
+LIB_OBJS += $(OUTPUT)util/trace-event-scripting.o
+LIB_OBJS += $(OUTPUT)util/svghelper.o
+LIB_OBJS += $(OUTPUT)util/sort.o
+LIB_OBJS += $(OUTPUT)util/hist.o
+LIB_OBJS += $(OUTPUT)util/probe-event.o
+LIB_OBJS += $(OUTPUT)util/util.o
+LIB_OBJS += $(OUTPUT)util/xyarray.o
+LIB_OBJS += $(OUTPUT)util/cpumap.o
+LIB_OBJS += $(OUTPUT)util/cgroup.o
+LIB_OBJS += $(OUTPUT)util/target.o
+LIB_OBJS += $(OUTPUT)util/rblist.o
+LIB_OBJS += $(OUTPUT)util/intlist.o
+LIB_OBJS += $(OUTPUT)util/vdso.o
+LIB_OBJS += $(OUTPUT)util/stat.o
+LIB_OBJS += $(OUTPUT)util/record.o
+LIB_OBJS += $(OUTPUT)util/srcline.o
+LIB_OBJS += $(OUTPUT)util/data.o
+
+LIB_OBJS += $(OUTPUT)ui/setup.o
+LIB_OBJS += $(OUTPUT)ui/helpline.o
+LIB_OBJS += $(OUTPUT)ui/progress.o
+LIB_OBJS += $(OUTPUT)ui/util.o
+LIB_OBJS += $(OUTPUT)ui/hist.o
+LIB_OBJS += $(OUTPUT)ui/stdio/hist.o
+
+LIB_OBJS += $(OUTPUT)arch/common.o
+
+LIB_OBJS += $(OUTPUT)tests/parse-events.o
+LIB_OBJS += $(OUTPUT)tests/dso-data.o
+LIB_OBJS += $(OUTPUT)tests/attr.o
+LIB_OBJS += $(OUTPUT)tests/vmlinux-kallsyms.o
+LIB_OBJS += $(OUTPUT)tests/open-syscall.o
+LIB_OBJS += $(OUTPUT)tests/open-syscall-all-cpus.o
+LIB_OBJS += $(OUTPUT)tests/open-syscall-tp-fields.o
+LIB_OBJS += $(OUTPUT)tests/mmap-basic.o
+LIB_OBJS += $(OUTPUT)tests/perf-record.o
+LIB_OBJS += $(OUTPUT)tests/rdpmc.o
+LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o
+LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o
+LIB_OBJS += $(OUTPUT)tests/pmu.o
+LIB_OBJS += $(OUTPUT)tests/hists_link.o
+LIB_OBJS += $(OUTPUT)tests/python-use.o
+LIB_OBJS += $(OUTPUT)tests/bp_signal.o
+LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o
+LIB_OBJS += $(OUTPUT)tests/task-exit.o
+LIB_OBJS += $(OUTPUT)tests/sw-clock.o
+ifeq ($(ARCH),x86)
+LIB_OBJS += $(OUTPUT)tests/perf-time-to-tsc.o
+endif
+LIB_OBJS += $(OUTPUT)tests/code-reading.o
+LIB_OBJS += $(OUTPUT)tests/sample-parsing.o
+LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o
+
+BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
+BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
+# Benchmark modules
+BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
+BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
+ifeq ($(RAW_ARCH),x86_64)
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o
+endif
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o
+
+BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
+BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o
+BUILTIN_OBJS += $(OUTPUT)builtin-help.o
+BUILTIN_OBJS += $(OUTPUT)builtin-sched.o
+BUILTIN_OBJS += $(OUTPUT)builtin-buildid-list.o
+BUILTIN_OBJS += $(OUTPUT)builtin-buildid-cache.o
+BUILTIN_OBJS += $(OUTPUT)builtin-list.o
+BUILTIN_OBJS += $(OUTPUT)builtin-record.o
+BUILTIN_OBJS += $(OUTPUT)builtin-report.o
+BUILTIN_OBJS += $(OUTPUT)builtin-stat.o
+BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o
+BUILTIN_OBJS += $(OUTPUT)builtin-top.o
+BUILTIN_OBJS += $(OUTPUT)builtin-script.o
+BUILTIN_OBJS += $(OUTPUT)builtin-probe.o
+BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o
+BUILTIN_OBJS += $(OUTPUT)builtin-lock.o
+BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o
+BUILTIN_OBJS += $(OUTPUT)builtin-inject.o
+BUILTIN_OBJS += $(OUTPUT)tests/builtin-test.o
+BUILTIN_OBJS += $(OUTPUT)builtin-mem.o
+
+PERFLIBS = $(LIB_FILE) $(LIBLK) $(LIBTRACEEVENT)
+
+# We choose to avoid "if .. else if .. else .. endif endif"
+# because maintaining the nesting to match is a pain.  If
+# we had "elif" things would have been much nicer...
+
+-include arch/$(ARCH)/Makefile
+
+ifneq ($(OUTPUT),)
+  CFLAGS += -I$(OUTPUT)
+endif
+
+ifdef NO_LIBELF
+EXTLIBS := $(filter-out -lelf,$(EXTLIBS))
+
+# Remove ELF/DWARF dependent codes
+LIB_OBJS := $(filter-out $(OUTPUT)util/symbol-elf.o,$(LIB_OBJS))
+LIB_OBJS := $(filter-out $(OUTPUT)util/dwarf-aux.o,$(LIB_OBJS))
+LIB_OBJS := $(filter-out $(OUTPUT)util/probe-event.o,$(LIB_OBJS))
+LIB_OBJS := $(filter-out $(OUTPUT)util/probe-finder.o,$(LIB_OBJS))
+
+BUILTIN_OBJS := $(filter-out $(OUTPUT)builtin-probe.o,$(BUILTIN_OBJS))
+
+# Use minimal symbol handling
+LIB_OBJS += $(OUTPUT)util/symbol-minimal.o
+
+else # NO_LIBELF
+ifndef NO_DWARF
+  LIB_OBJS += $(OUTPUT)util/probe-finder.o
+  LIB_OBJS += $(OUTPUT)util/dwarf-aux.o
+endif # NO_DWARF
+endif # NO_LIBELF
+
+ifndef NO_LIBUNWIND
+  LIB_OBJS += $(OUTPUT)util/unwind.o
+endif
+LIB_OBJS += $(OUTPUT)tests/keep-tracking.o
+
+ifndef NO_LIBAUDIT
+  BUILTIN_OBJS += $(OUTPUT)builtin-trace.o
+endif
+
+ifndef NO_SLANG
+  LIB_OBJS += $(OUTPUT)ui/browser.o
+  LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o
+  LIB_OBJS += $(OUTPUT)ui/browsers/hists.o
+  LIB_OBJS += $(OUTPUT)ui/browsers/map.o
+  LIB_OBJS += $(OUTPUT)ui/browsers/scripts.o
+  LIB_OBJS += $(OUTPUT)ui/tui/setup.o
+  LIB_OBJS += $(OUTPUT)ui/tui/util.o
+  LIB_OBJS += $(OUTPUT)ui/tui/helpline.o
+  LIB_OBJS += $(OUTPUT)ui/tui/progress.o
+  LIB_H += ui/tui/tui.h
+  LIB_H += ui/browser.h
+  LIB_H += ui/browsers/map.h
+  LIB_H += ui/keysyms.h
+  LIB_H += ui/libslang.h
+endif
+
+ifndef NO_GTK2
+  ALL_PROGRAMS += $(OUTPUT)libperf-gtk.so
+
+  GTK_OBJS += $(OUTPUT)ui/gtk/browser.o
+  GTK_OBJS += $(OUTPUT)ui/gtk/hists.o
+  GTK_OBJS += $(OUTPUT)ui/gtk/setup.o
+  GTK_OBJS += $(OUTPUT)ui/gtk/util.o
+  GTK_OBJS += $(OUTPUT)ui/gtk/helpline.o
+  GTK_OBJS += $(OUTPUT)ui/gtk/progress.o
+  GTK_OBJS += $(OUTPUT)ui/gtk/annotate.o
+
+install-gtk: $(OUTPUT)libperf-gtk.so
+	$(call QUIET_INSTALL, 'GTK UI') \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(libdir_SQ)'; \
+		$(INSTALL) $(OUTPUT)libperf-gtk.so '$(DESTDIR_SQ)$(libdir_SQ)'
+endif
+
+ifndef NO_LIBPERL
+  LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o
+  LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o
+endif
+
+ifndef NO_LIBPYTHON
+  LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
+  LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
+endif
+
+ifeq ($(NO_PERF_REGS),0)
+  ifeq ($(ARCH),x86)
+    LIB_H += arch/x86/include/perf_regs.h
+  endif
+endif
+
+ifndef NO_LIBNUMA
+  BUILTIN_OBJS += $(OUTPUT)bench/numa.o
+endif
+
+ifdef ASCIIDOC8
+  export ASCIIDOC8
+endif
+
+LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group
+
+export INSTALL SHELL_PATH
+
+### Build rules
+
+SHELL = $(SHELL_PATH)
+
+all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
+
+please_set_SHELL_PATH_to_a_more_modern_shell:
+	@$$(:)
+
+shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell
+
+strip: $(PROGRAMS) $(OUTPUT)perf
+	$(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf
+
+$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -include $(OUTPUT)PERF-VERSION-FILE \
+		'-DPERF_HTML_PATH="$(htmldir_SQ)"' \
+		$(CFLAGS) -c $(filter %.c,$^) -o $@
+
+$(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OUTPUT)perf.o \
+               $(BUILTIN_OBJS) $(LIBS) -o $@
+
+$(GTK_OBJS): $(OUTPUT)%.o: %.c $(LIB_H)
+	$(QUIET_CC)$(CC) -o $@ -c -fPIC $(CFLAGS) $(GTK_CFLAGS) $<
+
+$(OUTPUT)libperf-gtk.so: $(GTK_OBJS) $(PERFLIBS)
+	$(QUIET_LINK)$(CC) -o $@ -shared $(ALL_LDFLAGS) $(filter %.o,$^) $(GTK_LIBS)
+
+$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
+		'-DPERF_HTML_PATH="$(htmldir_SQ)"' \
+		'-DPERF_MAN_PATH="$(mandir_SQ)"' \
+		'-DPERF_INFO_PATH="$(infodir_SQ)"' $<
+
+$(OUTPUT)builtin-timechart.o: builtin-timechart.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
+		'-DPERF_HTML_PATH="$(htmldir_SQ)"' \
+		'-DPERF_MAN_PATH="$(mandir_SQ)"' \
+		'-DPERF_INFO_PATH="$(infodir_SQ)"' $<
+
+$(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt
+
+$(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt)
+	$(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@
+
+$(SCRIPTS) : % : %.sh
+	$(QUIET_GEN)$(INSTALL) '$@.sh' '$(OUTPUT)$@'
+
+# These can record PERF_VERSION
+$(OUTPUT)perf.o perf.spec \
+	$(SCRIPTS) \
+	: $(OUTPUT)PERF-VERSION-FILE
+
+.SUFFIXES:
+
+#
+# If a target does not match any of the later rules then prefix it by $(OUTPUT)
+# This makes targets like 'make O=/tmp/perf perf.o' work in a natural way.
+#
+ifneq ($(OUTPUT),)
+%.o: $(OUTPUT)%.o
+	@echo "    # Redirected target $@ => $(OUTPUT)$@"
+util/%.o: $(OUTPUT)util/%.o
+	@echo "    # Redirected target $@ => $(OUTPUT)$@"
+bench/%.o: $(OUTPUT)bench/%.o
+	@echo "    # Redirected target $@ => $(OUTPUT)$@"
+tests/%.o: $(OUTPUT)tests/%.o
+	@echo "    # Redirected target $@ => $(OUTPUT)$@"
+endif
+
+# These two need to be here so that when O= is not used they take precedence
+# over the general rule for .o
+
+$(OUTPUT)util/%-flex.o: $(OUTPUT)util/%-flex.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -w $<
+
+$(OUTPUT)util/%-bison.o: $(OUTPUT)util/%-bison.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $<
+
+$(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $<
+$(OUTPUT)%.i: %.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $<
+$(OUTPUT)%.s: %.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -S $(CFLAGS) $<
+$(OUTPUT)%.o: %.S
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $<
+$(OUTPUT)%.s: %.S
+	$(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $<
+
+$(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
+		'-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \
+		'-DPREFIX="$(prefix_SQ)"' \
+		$<
+
+$(OUTPUT)tests/attr.o: tests/attr.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
+		'-DBINDIR="$(bindir_SQ)"' -DPYTHON='"$(PYTHON_WORD)"' \
+		$<
+
+$(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
+		-DPYTHONPATH='"$(OUTPUT)python"' \
+		-DPYTHON='"$(PYTHON_WORD)"' \
+		$<
+
+$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
+
+$(OUTPUT)ui/setup.o: ui/setup.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DLIBDIR='"$(libdir_SQ)"' $<
+
+$(OUTPUT)ui/browser.o: ui/browser.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
+
+$(OUTPUT)ui/browsers/annotate.o: ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
+
+$(OUTPUT)ui/browsers/hists.o: ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
+
+$(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
+
+$(OUTPUT)ui/browsers/scripts.o: ui/browsers/scripts.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
+
+$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
+
+$(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $<
+
+$(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default $<
+
+$(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default $<
+
+$(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
+
+$(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
+
+$(OUTPUT)perf-%: %.o $(PERFLIBS)
+	$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(filter %.o,$^) $(LIBS)
+
+$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
+$(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
+
+# we compile into subdirectories. if the target directory is not the source directory, they might not exists. So
+# we depend the various files onto their directories.
+DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(GTK_OBJS)
+DIRECTORY_DEPS += $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h
+$(DIRECTORY_DEPS): | $(sort $(dir $(DIRECTORY_DEPS)))
+# In the second step, we make a rule to actually create these directories
+$(sort $(dir $(DIRECTORY_DEPS))):
+	$(QUIET_MKDIR)$(MKDIR) -p $@ 2>/dev/null
+
+$(LIB_FILE): $(LIB_OBJS)
+	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
+
+# libtraceevent.a
+TE_SOURCES = $(wildcard $(TRACE_EVENT_DIR)*.[ch])
+
+$(LIBTRACEEVENT): $(TE_SOURCES)
+	$(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) CFLAGS="-g -Wall $(EXTRA_CFLAGS)" libtraceevent.a
+
+$(LIBTRACEEVENT)-clean:
+	$(call QUIET_CLEAN, libtraceevent)
+	@$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null
+
+LIBLK_SOURCES = $(wildcard $(LK_PATH)*.[ch])
+
+# if subdir is set, we've been called from above so target has been built
+# already
+$(LIBLK): $(LIBLK_SOURCES)
+ifeq ($(subdir),)
+	$(QUIET_SUBDIR0)$(LK_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) liblk.a
+endif
+
+$(LIBLK)-clean:
+ifeq ($(subdir),)
+	$(call QUIET_CLEAN, liblk)
+	@$(MAKE) -C $(LK_DIR) O=$(OUTPUT) clean >/dev/null
+endif
+
+help:
+	@echo 'Perf make targets:'
+	@echo '  doc		- make *all* documentation (see below)'
+	@echo '  man		- make manpage documentation (access with man <foo>)'
+	@echo '  html		- make html documentation'
+	@echo '  info		- make GNU info documentation (access with info <foo>)'
+	@echo '  pdf		- make pdf documentation'
+	@echo '  TAGS		- use etags to make tag information for source browsing'
+	@echo '  tags		- use ctags to make tag information for source browsing'
+	@echo '  cscope	- use cscope to make interactive browsing database'
+	@echo ''
+	@echo 'Perf install targets:'
+	@echo '  NOTE: documentation build requires asciidoc, xmlto packages to be installed'
+	@echo '  HINT: use "make prefix=<path> <install target>" to install to a particular'
+	@echo '        path like make prefix=/usr/local install install-doc'
+	@echo '  install	- install compiled binaries'
+	@echo '  install-doc	- install *all* documentation'
+	@echo '  install-man	- install manpage documentation'
+	@echo '  install-html	- install html documentation'
+	@echo '  install-info	- install GNU info documentation'
+	@echo '  install-pdf	- install pdf documentation'
+	@echo ''
+	@echo '  quick-install-doc	- alias for quick-install-man'
+	@echo '  quick-install-man	- install the documentation quickly'
+	@echo '  quick-install-html	- install the html documentation quickly'
+	@echo ''
+	@echo 'Perf maintainer targets:'
+	@echo '  clean			- clean all binary objects and build output'
+
+
+DOC_TARGETS := doc man html info pdf
+
+INSTALL_DOC_TARGETS := $(patsubst %,install-%,$(DOC_TARGETS)) try-install-man
+INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html
+
+# 'make doc' should call 'make -C Documentation all'
+$(DOC_TARGETS):
+	$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all)
+
+TAGS:
+	$(RM) TAGS
+	$(FIND) . -name '*.[hcS]' -print | xargs etags -a
+
+tags:
+	$(RM) tags
+	$(FIND) . -name '*.[hcS]' -print | xargs ctags -a
+
+cscope:
+	$(RM) cscope*
+	$(FIND) . -name '*.[hcS]' -print | xargs cscope -b
+
+### Detect prefix changes
+TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\
+             $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ)
+
+$(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS
+	@FLAGS='$(TRACK_CFLAGS)'; \
+	    if test x"$$FLAGS" != x"`cat $(OUTPUT)PERF-CFLAGS 2>/dev/null`" ; then \
+		echo 1>&2 "  FLAGS:   * new build flags or prefix"; \
+		echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \
+            fi
+
+### Testing rules
+
+# GNU make supports exporting all variables by "export" without parameters.
+# However, the environment gets quite big, and some programs have problems
+# with that.
+
+check: $(OUTPUT)common-cmds.h
+	if sparse; \
+	then \
+		for i in *.c */*.c; \
+		do \
+			sparse $(CFLAGS) $(SPARSE_FLAGS) $$i || exit; \
+		done; \
+	else \
+		exit 1; \
+	fi
+
+### Installation rules
+
+install-gtk:
+
+install-bin: all install-gtk
+	$(call QUIET_INSTALL, binaries) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'; \
+		$(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'; \
+		$(LN) '$(DESTDIR_SQ)$(bindir_SQ)/perf' '$(DESTDIR_SQ)$(bindir_SQ)/trace'
+	$(call QUIET_INSTALL, libexec) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+	$(call QUIET_INSTALL, perf-archive) \
+		$(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+ifndef NO_LIBPERL
+	$(call QUIET_INSTALL, perl-scripts) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
+		$(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
+		$(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'; \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'; \
+		$(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
+endif
+ifndef NO_LIBPYTHON
+	$(call QUIET_INSTALL, python-scripts) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'; \
+		$(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \
+		$(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'; \
+		$(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
+endif
+	$(call QUIET_INSTALL, bash_completion-script) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'; \
+		$(INSTALL) bash_completion '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
+	$(call QUIET_INSTALL, tests) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
+		$(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
+		$(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
+
+install: install-bin try-install-man
+
+install-python_ext:
+	$(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)'
+
+# 'make install-doc' should call 'make -C Documentation install'
+$(INSTALL_DOC_TARGETS):
+	$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:-doc=)
+
+### Cleaning rules
+
+#
+# This is here, not in config/Makefile, because config/Makefile does
+# not get included for the clean target:
+#
+config-clean:
+	$(call QUIET_CLEAN, config)
+	@$(MAKE) -C config/feature-checks clean >/dev/null
+
+clean: $(LIBTRACEEVENT)-clean $(LIBLK)-clean config-clean
+	$(call QUIET_CLEAN, core-objs)  $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS) $(GTK_OBJS)
+	$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf
+	$(call QUIET_CLEAN, core-gen)   $(RM)  *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex*
+	$(call QUIET_CLEAN, Documentation)
+	@$(MAKE) -C Documentation O=$(OUTPUT) clean >/dev/null
+	$(python-clean)
+
+#
+# Trick: if ../../.git does not exist - we are building out of tree for example,
+# then force version regeneration:
+#
+ifeq ($(wildcard ../../.git/HEAD),)
+    GIT-HEAD-PHONY = ../../.git/HEAD
+else
+    GIT-HEAD-PHONY =
+endif
+
+.PHONY: all install clean config-clean strip install-gtk
+.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
+.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS
+
diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h
index 7fcdcdbee917a8473acf25fe7ac72c6c5a9732b0..e84ca76aae779ef484df1633b7cff3e35b327ba8 100644
--- a/tools/perf/arch/x86/include/perf_regs.h
+++ b/tools/perf/arch/x86/include/perf_regs.h
@@ -5,7 +5,7 @@
 #include "../../util/types.h"
 #include <asm/perf_regs.h>
 
-#ifndef ARCH_X86_64
+#ifndef HAVE_ARCH_X86_64_SUPPORT
 #define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1)
 #else
 #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
@@ -52,7 +52,7 @@ static inline const char *perf_reg_name(int id)
 		return "FS";
 	case PERF_REG_X86_GS:
 		return "GS";
-#ifdef ARCH_X86_64
+#ifdef HAVE_ARCH_X86_64_SUPPORT
 	case PERF_REG_X86_R8:
 		return "R8";
 	case PERF_REG_X86_R9:
@@ -69,7 +69,7 @@ static inline const char *perf_reg_name(int id)
 		return "R14";
 	case PERF_REG_X86_R15:
 		return "R15";
-#endif /* ARCH_X86_64 */
+#endif /* HAVE_ARCH_X86_64_SUPPORT */
 	default:
 		return NULL;
 	}
diff --git a/tools/perf/arch/x86/util/unwind.c b/tools/perf/arch/x86/util/unwind.c
index 78d956eff96fcd8de79a5afd42041c6c72fb02ec..456a88cf5b374bf3319ea2310e88f2cd179c9b28 100644
--- a/tools/perf/arch/x86/util/unwind.c
+++ b/tools/perf/arch/x86/util/unwind.c
@@ -4,7 +4,7 @@
 #include "perf_regs.h"
 #include "../../util/unwind.h"
 
-#ifdef ARCH_X86_64
+#ifdef HAVE_ARCH_X86_64_SUPPORT
 int unwind__arch_reg_id(int regnum)
 {
 	int id;
@@ -108,4 +108,4 @@ int unwind__arch_reg_id(int regnum)
 
 	return id;
 }
-#endif /* ARCH_X86_64 */
+#endif /* HAVE_ARCH_X86_64_SUPPORT */
diff --git a/tools/perf/bash_completion b/tools/perf/bash_completion
index 56e6a12aab59b42d4cffbc67858824af818f5fff..62e157db2e2b01e9400f9348d671fd9ba7267104 100644
--- a/tools/perf/bash_completion
+++ b/tools/perf/bash_completion
@@ -1,17 +1,87 @@
 # perf completion
 
-function_exists()
+# Taken from git.git's completion script.
+__my_reassemble_comp_words_by_ref()
 {
-	declare -F $1 > /dev/null
-	return $?
+	local exclude i j first
+	# Which word separators to exclude?
+	exclude="${1//[^$COMP_WORDBREAKS]}"
+	cword_=$COMP_CWORD
+	if [ -z "$exclude" ]; then
+		words_=("${COMP_WORDS[@]}")
+		return
+	fi
+	# List of word completion separators has shrunk;
+	# re-assemble words to complete.
+	for ((i=0, j=0; i < ${#COMP_WORDS[@]}; i++, j++)); do
+		# Append each nonempty word consisting of just
+		# word separator characters to the current word.
+		first=t
+		while
+			[ $i -gt 0 ] &&
+			[ -n "${COMP_WORDS[$i]}" ] &&
+			# word consists of excluded word separators
+			[ "${COMP_WORDS[$i]//[^$exclude]}" = "${COMP_WORDS[$i]}" ]
+		do
+			# Attach to the previous token,
+			# unless the previous token is the command name.
+			if [ $j -ge 2 ] && [ -n "$first" ]; then
+				((j--))
+			fi
+			first=
+			words_[$j]=${words_[j]}${COMP_WORDS[i]}
+			if [ $i = $COMP_CWORD ]; then
+				cword_=$j
+			fi
+			if (($i < ${#COMP_WORDS[@]} - 1)); then
+				((i++))
+			else
+				# Done.
+				return
+			fi
+		done
+		words_[$j]=${words_[j]}${COMP_WORDS[i]}
+		if [ $i = $COMP_CWORD ]; then
+			cword_=$j
+		fi
+	done
 }
 
-function_exists __ltrim_colon_completions ||
+type _get_comp_words_by_ref &>/dev/null ||
+_get_comp_words_by_ref()
+{
+	local exclude cur_ words_ cword_
+	if [ "$1" = "-n" ]; then
+		exclude=$2
+		shift 2
+	fi
+	__my_reassemble_comp_words_by_ref "$exclude"
+	cur_=${words_[cword_]}
+	while [ $# -gt 0 ]; do
+		case "$1" in
+		cur)
+			cur=$cur_
+			;;
+		prev)
+			prev=${words_[$cword_-1]}
+			;;
+		words)
+			words=("${words_[@]}")
+			;;
+		cword)
+			cword=$cword_
+			;;
+		esac
+		shift
+	done
+}
+
+type __ltrim_colon_completions &>/dev/null ||
 __ltrim_colon_completions()
 {
 	if [[ "$1" == *:* && "$COMP_WORDBREAKS" == *:* ]]; then
 		# Remove colon-word prefix from COMPREPLY items
-		local colon_word=${1%${1##*:}}
+		local colon_word=${1%"${1##*:}"}
 		local i=${#COMPREPLY[*]}
 		while [[ $((--i)) -ge 0 ]]; do
 			COMPREPLY[$i]=${COMPREPLY[$i]#"$colon_word"}
@@ -19,23 +89,18 @@ __ltrim_colon_completions()
 	fi
 }
 
-have perf &&
+type perf &>/dev/null &&
 _perf()
 {
-	local cur prev cmd
+	local cur words cword prev cmd
 
 	COMPREPLY=()
-	if function_exists _get_comp_words_by_ref; then
-		_get_comp_words_by_ref -n : cur prev
-	else
-		cur=$(_get_cword :)
-		prev=${COMP_WORDS[COMP_CWORD-1]}
-	fi
+	_get_comp_words_by_ref -n =: cur words cword prev
 
-	cmd=${COMP_WORDS[0]}
+	cmd=${words[0]}
 
 	# List perf subcommands or long options
-	if [ $COMP_CWORD -eq 1 ]; then
+	if [ $cword -eq 1 ]; then
 		if [[ $cur == --* ]]; then
 			COMPREPLY=( $( compgen -W '--help --version \
 			--exec-path --html-path --paginate --no-pager \
@@ -45,18 +110,17 @@ _perf()
 			COMPREPLY=( $( compgen -W '$cmds' -- "$cur" ) )
 		fi
 	# List possible events for -e option
-	elif [[ $prev == "-e" && "${COMP_WORDS[1]}" == @(record|stat|top) ]]; then
+	elif [[ $prev == "-e" && "${words[1]}" == @(record|stat|top) ]]; then
 		evts=$($cmd list --raw-dump)
 		COMPREPLY=( $( compgen -W '$evts' -- "$cur" ) )
 		__ltrim_colon_completions $cur
 	# List long option names
 	elif [[ $cur == --* ]];  then
-		subcmd=${COMP_WORDS[1]}
+		subcmd=${words[1]}
 		opts=$($cmd $subcmd --list-opts)
 		COMPREPLY=( $( compgen -W '$opts' -- "$cur" ) )
-	# Fall down to list regular files
-	else
-		_filedir
 	fi
 } &&
-complete -F _perf perf
+
+complete -o bashdefault -o default -o nospace -F _perf perf 2>/dev/null \
+	|| complete -o default -o nospace -F _perf perf
diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h
index a72e36cb53943c1a9689b8ce6ee35abc467ccb1d..57b4ed871459034b57159bb4eb21dc27e88e87ba 100644
--- a/tools/perf/bench/mem-memcpy-arch.h
+++ b/tools/perf/bench/mem-memcpy-arch.h
@@ -1,5 +1,5 @@
 
-#ifdef ARCH_X86_64
+#ifdef HAVE_ARCH_X86_64_SUPPORT
 
 #define MEMCPY_FN(fn, name, desc)		\
 	extern void *fn(void *, const void *, size_t);
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index 8cdca43016b250109d8bdd3ea7568eb13229a125..5ce71d3b72cf9bc16251ba5e7ac81ad0c61560b6 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -58,7 +58,7 @@ struct routine routines[] = {
 	{ "default",
 	  "Default memcpy() provided by glibc",
 	  memcpy },
-#ifdef ARCH_X86_64
+#ifdef HAVE_ARCH_X86_64_SUPPORT
 
 #define MEMCPY_FN(fn, name, desc) { name, desc, fn },
 #include "mem-memcpy-x86-64-asm-def.h"
diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h
index a040fa77665b0ef90eb66ba3901074f274536750..633800cb0dcb0ad60309c1a12003f42d38e64b08 100644
--- a/tools/perf/bench/mem-memset-arch.h
+++ b/tools/perf/bench/mem-memset-arch.h
@@ -1,5 +1,5 @@
 
-#ifdef ARCH_X86_64
+#ifdef HAVE_ARCH_X86_64_SUPPORT
 
 #define MEMSET_FN(fn, name, desc)		\
 	extern void *fn(void *, int, size_t);
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c
index 4a2f12081964163c3f93bede8934c723806d16f2..9af79d2b18e5a178c97adfe6ddcaecda236dcfc6 100644
--- a/tools/perf/bench/mem-memset.c
+++ b/tools/perf/bench/mem-memset.c
@@ -58,7 +58,7 @@ static const struct routine routines[] = {
 	{ "default",
 	  "Default memset() provided by glibc",
 	  memset },
-#ifdef ARCH_X86_64
+#ifdef HAVE_ARCH_X86_64_SUPPORT
 
 #define MEMSET_FN(fn, name, desc) { name, desc, fn },
 #include "mem-memset-x86-64-asm-def.h"
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 30d1c3225b46222d52f3d73b0cdb1fa1995e8efa..d4c83c60b9b29fa7d6249de70e5f0673d2066287 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -429,14 +429,14 @@ static int parse_cpu_list(const char *arg)
 	return 0;
 }
 
-static void parse_setup_cpu_list(void)
+static int parse_setup_cpu_list(void)
 {
 	struct thread_data *td;
 	char *str0, *str;
 	int t;
 
 	if (!g->p.cpu_list_str)
-		return;
+		return 0;
 
 	dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
 
@@ -500,8 +500,12 @@ static void parse_setup_cpu_list(void)
 
 		dprintf("CPUs: %d_%d-%d#%dx%d\n", bind_cpu_0, bind_len, bind_cpu_1, step, mul);
 
-		BUG_ON(bind_cpu_0 < 0 || bind_cpu_0 >= g->p.nr_cpus);
-		BUG_ON(bind_cpu_1 < 0 || bind_cpu_1 >= g->p.nr_cpus);
+		if (bind_cpu_0 >= g->p.nr_cpus || bind_cpu_1 >= g->p.nr_cpus) {
+			printf("\nTest not applicable, system has only %d CPUs.\n", g->p.nr_cpus);
+			return -1;
+		}
+
+		BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0);
 		BUG_ON(bind_cpu_0 > bind_cpu_1);
 
 		for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) {
@@ -541,6 +545,7 @@ static void parse_setup_cpu_list(void)
 		printf("# NOTE: %d tasks bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
 
 	free(str0);
+	return 0;
 }
 
 static int parse_cpus_opt(const struct option *opt __maybe_unused,
@@ -561,14 +566,14 @@ static int parse_node_list(const char *arg)
 	return 0;
 }
 
-static void parse_setup_node_list(void)
+static int parse_setup_node_list(void)
 {
 	struct thread_data *td;
 	char *str0, *str;
 	int t;
 
 	if (!g->p.node_list_str)
-		return;
+		return 0;
 
 	dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
 
@@ -619,8 +624,12 @@ static void parse_setup_node_list(void)
 
 		dprintf("NODEs: %d-%d #%d\n", bind_node_0, bind_node_1, step);
 
-		BUG_ON(bind_node_0 < 0 || bind_node_0 >= g->p.nr_nodes);
-		BUG_ON(bind_node_1 < 0 || bind_node_1 >= g->p.nr_nodes);
+		if (bind_node_0 >= g->p.nr_nodes || bind_node_1 >= g->p.nr_nodes) {
+			printf("\nTest not applicable, system has only %d nodes.\n", g->p.nr_nodes);
+			return -1;
+		}
+
+		BUG_ON(bind_node_0 < 0 || bind_node_1 < 0);
 		BUG_ON(bind_node_0 > bind_node_1);
 
 		for (bind_node = bind_node_0; bind_node <= bind_node_1; bind_node += step) {
@@ -651,6 +660,7 @@ static void parse_setup_node_list(void)
 		printf("# NOTE: %d tasks mem-bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
 
 	free(str0);
+	return 0;
 }
 
 static int parse_nodes_opt(const struct option *opt __maybe_unused,
@@ -1110,7 +1120,7 @@ static void *worker_thread(void *__tdata)
 		/* Check whether our max runtime timed out: */
 		if (g->p.nr_secs) {
 			timersub(&stop, &start0, &diff);
-			if (diff.tv_sec >= g->p.nr_secs) {
+			if ((u32)diff.tv_sec >= g->p.nr_secs) {
 				g->stop_work = true;
 				break;
 			}
@@ -1157,7 +1167,7 @@ static void *worker_thread(void *__tdata)
 			runtime_ns_max += diff.tv_usec * 1000;
 
 			if (details >= 0) {
-				printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016lx]\n",
+				printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016"PRIx64"]\n",
 					process_nr, thread_nr, runtime_ns_max / bytes_done, val);
 			}
 			fflush(stdout);
@@ -1356,8 +1366,8 @@ static int init(void)
 	init_thread_data();
 
 	tprintf("#\n");
-	parse_setup_cpu_list();
-	parse_setup_node_list();
+	if (parse_setup_cpu_list() || parse_setup_node_list())
+		return -1;
 	tprintf("#\n");
 
 	print_summary();
@@ -1600,7 +1610,6 @@ static int run_bench_numa(const char *name, const char **argv)
 	return 0;
 
 err:
-	usage_with_options(numa_usage, options);
 	return -1;
 }
 
@@ -1701,8 +1710,7 @@ static int bench_all(void)
 	BUG_ON(ret < 0);
 
 	for (i = 0; i < nr; i++) {
-		if (run_bench_numa(tests[i][0], tests[i] + 1))
-			return -1;
+		run_bench_numa(tests[i][0], tests[i] + 1);
 	}
 
 	printf("\n");
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index 69cfba8d4c6cfb5a0bd402735d3eaa5bfdd7a467..07a8d7646a1549c61699f7311285238ef5ef93cf 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -7,9 +7,7 @@
  * Based on pipe-test-1m.c by Ingo Molnar <mingo@redhat.com>
  *  http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c
  * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
- *
  */
-
 #include "../perf.h"
 #include "../util/util.h"
 #include "../util/parse-options.h"
@@ -28,12 +26,24 @@
 #include <sys/time.h>
 #include <sys/types.h>
 
+#include <pthread.h>
+
+struct thread_data {
+	int			nr;
+	int			pipe_read;
+	int			pipe_write;
+	pthread_t		pthread;
+};
+
 #define LOOPS_DEFAULT 1000000
-static int loops = LOOPS_DEFAULT;
+static	int			loops = LOOPS_DEFAULT;
+
+/* Use processes by default: */
+static bool			threaded;
 
 static const struct option options[] = {
-	OPT_INTEGER('l', "loop", &loops,
-		    "Specify number of loops"),
+	OPT_INTEGER('l', "loop",	&loops,		"Specify number of loops"),
+	OPT_BOOLEAN('T', "threaded",	&threaded,	"Specify threads/process based task setup"),
 	OPT_END()
 };
 
@@ -42,13 +52,37 @@ static const char * const bench_sched_pipe_usage[] = {
 	NULL
 };
 
-int bench_sched_pipe(int argc, const char **argv,
-		     const char *prefix __maybe_unused)
+static void *worker_thread(void *__tdata)
 {
-	int pipe_1[2], pipe_2[2];
+	struct thread_data *td = __tdata;
 	int m = 0, i;
+	int ret;
+
+	for (i = 0; i < loops; i++) {
+		if (!td->nr) {
+			ret = read(td->pipe_read, &m, sizeof(int));
+			BUG_ON(ret != sizeof(int));
+			ret = write(td->pipe_write, &m, sizeof(int));
+			BUG_ON(ret != sizeof(int));
+		} else {
+			ret = write(td->pipe_write, &m, sizeof(int));
+			BUG_ON(ret != sizeof(int));
+			ret = read(td->pipe_read, &m, sizeof(int));
+			BUG_ON(ret != sizeof(int));
+		}
+	}
+
+	return NULL;
+}
+
+int bench_sched_pipe(int argc, const char **argv, const char *prefix __maybe_unused)
+{
+	struct thread_data threads[2], *td;
+	int pipe_1[2], pipe_2[2];
 	struct timeval start, stop, diff;
 	unsigned long long result_usec = 0;
+	int nr_threads = 2;
+	int t;
 
 	/*
 	 * why does "ret" exist?
@@ -58,43 +92,66 @@ int bench_sched_pipe(int argc, const char **argv,
 	int __maybe_unused ret, wait_stat;
 	pid_t pid, retpid __maybe_unused;
 
-	argc = parse_options(argc, argv, options,
-			     bench_sched_pipe_usage, 0);
+	argc = parse_options(argc, argv, options, bench_sched_pipe_usage, 0);
 
 	BUG_ON(pipe(pipe_1));
 	BUG_ON(pipe(pipe_2));
 
-	pid = fork();
-	assert(pid >= 0);
-
 	gettimeofday(&start, NULL);
 
-	if (!pid) {
-		for (i = 0; i < loops; i++) {
-			ret = read(pipe_1[0], &m, sizeof(int));
-			ret = write(pipe_2[1], &m, sizeof(int));
-		}
-	} else {
-		for (i = 0; i < loops; i++) {
-			ret = write(pipe_1[1], &m, sizeof(int));
-			ret = read(pipe_2[0], &m, sizeof(int));
+	for (t = 0; t < nr_threads; t++) {
+		td = threads + t;
+
+		td->nr = t;
+
+		if (t == 0) {
+			td->pipe_read = pipe_1[0];
+			td->pipe_write = pipe_2[1];
+		} else {
+			td->pipe_write = pipe_1[1];
+			td->pipe_read = pipe_2[0];
 		}
 	}
 
-	gettimeofday(&stop, NULL);
-	timersub(&stop, &start, &diff);
 
-	if (pid) {
+	if (threaded) {
+
+		for (t = 0; t < nr_threads; t++) {
+			td = threads + t;
+
+			ret = pthread_create(&td->pthread, NULL, worker_thread, td);
+			BUG_ON(ret);
+		}
+
+		for (t = 0; t < nr_threads; t++) {
+			td = threads + t;
+
+			ret = pthread_join(td->pthread, NULL);
+			BUG_ON(ret);
+		}
+
+	} else {
+		pid = fork();
+		assert(pid >= 0);
+
+		if (!pid) {
+			worker_thread(threads + 0);
+			exit(0);
+		} else {
+			worker_thread(threads + 1);
+		}
+
 		retpid = waitpid(pid, &wait_stat, 0);
 		assert((retpid == pid) && WIFEXITED(wait_stat));
-	} else {
-		exit(0);
 	}
 
+	gettimeofday(&stop, NULL);
+	timersub(&stop, &start, &diff);
+
 	switch (bench_format) {
 	case BENCH_FORMAT_DEFAULT:
-		printf("# Executed %d pipe operations between two tasks\n\n",
-			loops);
+		printf("# Executed %d pipe operations between two %s\n\n",
+			loops, threaded ? "threads" : "processes");
 
 		result_usec = diff.tv_sec * 1000000;
 		result_usec += diff.tv_usec;
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 5ebd0c3b71b6aa45d80aa63b47661996577cf6a4..4087ab19823c2f843c50e18d7b8e5042a079032f 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -28,8 +28,10 @@
 #include "util/hist.h"
 #include "util/session.h"
 #include "util/tool.h"
+#include "util/data.h"
 #include "arch/common.h"
 
+#include <dlfcn.h>
 #include <linux/bitmap.h>
 
 struct perf_annotate {
@@ -63,7 +65,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
 		return 0;
 	}
 
-	he = __hists__add_entry(&evsel->hists, al, NULL, 1, 1);
+	he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0);
 	if (he == NULL)
 		return -ENOMEM;
 
@@ -116,11 +118,11 @@ static int hist_entry__tty_annotate(struct hist_entry *he,
 				    ann->print_line, ann->full_paths, 0, 0);
 }
 
-static void hists__find_annotations(struct hists *self,
+static void hists__find_annotations(struct hists *hists,
 				    struct perf_evsel *evsel,
 				    struct perf_annotate *ann)
 {
-	struct rb_node *nd = rb_first(&self->entries), *next;
+	struct rb_node *nd = rb_first(&hists->entries), *next;
 	int key = K_RIGHT;
 
 	while (nd) {
@@ -142,8 +144,18 @@ static void hists__find_annotations(struct hists *self,
 
 		if (use_browser == 2) {
 			int ret;
+			int (*annotate)(struct hist_entry *he,
+					struct perf_evsel *evsel,
+					struct hist_browser_timer *hbt);
+
+			annotate = dlsym(perf_gtk_handle,
+					 "hist_entry__gtk_annotate");
+			if (annotate == NULL) {
+				ui__error("GTK browser not found!\n");
+				return;
+			}
 
-			ret = hist_entry__gtk_annotate(he, evsel, NULL);
+			ret = annotate(he, evsel, NULL);
 			if (!ret || !ann->skip_missing)
 				return;
 
@@ -188,9 +200,13 @@ static int __cmd_annotate(struct perf_annotate *ann)
 	struct perf_session *session;
 	struct perf_evsel *pos;
 	u64 total_nr_samples;
+	struct perf_data_file file = {
+		.path  = input_name,
+		.mode  = PERF_DATA_MODE_READ,
+		.force = ann->force,
+	};
 
-	session = perf_session__new(input_name, O_RDONLY,
-				    ann->force, false, &ann->tool);
+	session = perf_session__new(&file, false, &ann->tool);
 	if (session == NULL)
 		return -ENOMEM;
 
@@ -231,7 +247,7 @@ static int __cmd_annotate(struct perf_annotate *ann)
 
 		if (nr_samples > 0) {
 			total_nr_samples += nr_samples;
-			hists__collapse_resort(hists);
+			hists__collapse_resort(hists, NULL);
 			hists__output_resort(hists);
 
 			if (symbol_conf.event_group &&
@@ -243,12 +259,21 @@ static int __cmd_annotate(struct perf_annotate *ann)
 	}
 
 	if (total_nr_samples == 0) {
-		ui__error("The %s file has no samples!\n", session->filename);
+		ui__error("The %s file has no samples!\n", file.path);
 		goto out_delete;
 	}
 
-	if (use_browser == 2)
-		perf_gtk__show_annotations();
+	if (use_browser == 2) {
+		void (*show_annotations)(void);
+
+		show_annotations = dlsym(perf_gtk_handle,
+					 "perf_gtk__show_annotations");
+		if (show_annotations == NULL) {
+			ui__error("GTK browser not found!\n");
+			goto out_delete;
+		}
+		show_annotations();
+	}
 
 out_delete:
 	/*
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 77298bf892b85c68d3c662cfe62e050518630a49..e47f90cc7b98cdde57079975bbd5637f3249ce44 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -1,21 +1,18 @@
 /*
- *
  * builtin-bench.c
  *
- * General benchmarking subsystem provided by perf
+ * General benchmarking collections provided by perf
  *
  * Copyright (C) 2009, Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
- *
  */
 
 /*
+ * Available benchmark collection list:
  *
- * Available subsystem list:
- *  sched ... scheduler and IPC mechanism
+ *  sched ... scheduler and IPC performance
  *  mem   ... memory access performance
- *
+ *  numa  ... NUMA scheduling and MM performance
  */
-
 #include "perf.h"
 #include "util/util.h"
 #include "util/parse-options.h"
@@ -25,112 +22,92 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/prctl.h>
 
-struct bench_suite {
-	const char *name;
-	const char *summary;
-	int (*fn)(int, const char **, const char *);
+typedef int (*bench_fn_t)(int argc, const char **argv, const char *prefix);
+
+struct bench {
+	const char	*name;
+	const char	*summary;
+	bench_fn_t	fn;
 };
-						\
-/* sentinel: easy for help */
-#define suite_all { "all", "Test all benchmark suites", NULL }
-
-#ifdef LIBNUMA_SUPPORT
-static struct bench_suite numa_suites[] = {
-	{ "mem",
-	  "Benchmark for NUMA workloads",
-	  bench_numa },
-	suite_all,
-	{ NULL,
-	  NULL,
-	  NULL                  }
+
+#ifdef HAVE_LIBNUMA_SUPPORT
+static struct bench numa_benchmarks[] = {
+	{ "mem",	"Benchmark for NUMA workloads",			bench_numa		},
+	{ "all",	"Test all NUMA benchmarks",			NULL			},
+	{ NULL,		NULL,						NULL			}
 };
 #endif
 
-static struct bench_suite sched_suites[] = {
-	{ "messaging",
-	  "Benchmark for scheduler and IPC mechanisms",
-	  bench_sched_messaging },
-	{ "pipe",
-	  "Flood of communication over pipe() between two processes",
-	  bench_sched_pipe      },
-	suite_all,
-	{ NULL,
-	  NULL,
-	  NULL                  }
+static struct bench sched_benchmarks[] = {
+	{ "messaging",	"Benchmark for scheduling and IPC",		bench_sched_messaging	},
+	{ "pipe",	"Benchmark for pipe() between two processes",	bench_sched_pipe	},
+	{ "all",	"Test all scheduler benchmarks",		NULL			},
+	{ NULL,		NULL,						NULL			}
 };
 
-static struct bench_suite mem_suites[] = {
-	{ "memcpy",
-	  "Simple memory copy in various ways",
-	  bench_mem_memcpy },
-	{ "memset",
-	  "Simple memory set in various ways",
-	  bench_mem_memset },
-	suite_all,
-	{ NULL,
-	  NULL,
-	  NULL             }
+static struct bench mem_benchmarks[] = {
+	{ "memcpy",	"Benchmark for memcpy()",			bench_mem_memcpy	},
+	{ "memset",	"Benchmark for memset() tests",			bench_mem_memset	},
+	{ "all",	"Test all memory benchmarks",			NULL			},
+	{ NULL,		NULL,						NULL			}
 };
 
-struct bench_subsys {
-	const char *name;
-	const char *summary;
-	struct bench_suite *suites;
+struct collection {
+	const char	*name;
+	const char	*summary;
+	struct bench	*benchmarks;
 };
 
-static struct bench_subsys subsystems[] = {
-#ifdef LIBNUMA_SUPPORT
-	{ "numa",
-	  "NUMA scheduling and MM behavior",
-	  numa_suites },
+static struct collection collections[] = {
+	{ "sched",	"Scheduler and IPC benchmarks",		sched_benchmarks	},
+	{ "mem",	"Memory access benchmarks",			mem_benchmarks		},
+#ifdef HAVE_LIBNUMA_SUPPORT
+	{ "numa",	"NUMA scheduling and MM benchmarks",		numa_benchmarks		},
 #endif
-	{ "sched",
-	  "scheduler and IPC mechanism",
-	  sched_suites },
-	{ "mem",
-	  "memory access performance",
-	  mem_suites },
-	{ "all",		/* sentinel: easy for help */
-	  "all benchmark subsystem",
-	  NULL },
-	{ NULL,
-	  NULL,
-	  NULL       }
+	{ "all",	"All benchmarks",				NULL			},
+	{ NULL,		NULL,						NULL			}
 };
 
-static void dump_suites(int subsys_index)
+/* Iterate over all benchmark collections: */
+#define for_each_collection(coll) \
+	for (coll = collections; coll->name; coll++)
+
+/* Iterate over all benchmarks within a collection: */
+#define for_each_bench(coll, bench) \
+	for (bench = coll->benchmarks; bench->name; bench++)
+
+static void dump_benchmarks(struct collection *coll)
 {
-	int i;
+	struct bench *bench;
 
-	printf("# List of available suites for %s...\n\n",
-	       subsystems[subsys_index].name);
+	printf("\n        # List of available benchmarks for collection '%s':\n\n", coll->name);
 
-	for (i = 0; subsystems[subsys_index].suites[i].name; i++)
-		printf("%14s: %s\n",
-		       subsystems[subsys_index].suites[i].name,
-		       subsystems[subsys_index].suites[i].summary);
+	for_each_bench(coll, bench)
+		printf("%14s: %s\n", bench->name, bench->summary);
 
 	printf("\n");
-	return;
 }
 
 static const char *bench_format_str;
+
+/* Output/formatting style, exported to benchmark modules: */
 int bench_format = BENCH_FORMAT_DEFAULT;
 
 static const struct option bench_options[] = {
-	OPT_STRING('f', "format", &bench_format_str, "default",
-		    "Specify format style"),
+	OPT_STRING('f', "format", &bench_format_str, "default", "Specify format style"),
 	OPT_END()
 };
 
 static const char * const bench_usage[] = {
-	"perf bench [<common options>] <subsystem> <suite> [<options>]",
+	"perf bench [<common options>] <collection> <benchmark> [<options>]",
 	NULL
 };
 
 static void print_usage(void)
 {
+	struct collection *coll;
 	int i;
 
 	printf("Usage: \n");
@@ -138,11 +115,10 @@ static void print_usage(void)
 		printf("\t%s\n", bench_usage[i]);
 	printf("\n");
 
-	printf("# List of available subsystems...\n\n");
+	printf("        # List of all available benchmark collections:\n\n");
 
-	for (i = 0; subsystems[i].name; i++)
-		printf("%14s: %s\n",
-		       subsystems[i].name, subsystems[i].summary);
+	for_each_collection(coll)
+		printf("%14s: %s\n", coll->name, coll->summary);
 	printf("\n");
 }
 
@@ -159,44 +135,74 @@ static int bench_str2int(const char *str)
 	return BENCH_FORMAT_UNKNOWN;
 }
 
-static void all_suite(struct bench_subsys *subsys)	  /* FROM HERE */
+/*
+ * Run a specific benchmark but first rename the running task's ->comm[]
+ * to something meaningful:
+ */
+static int run_bench(const char *coll_name, const char *bench_name, bench_fn_t fn,
+		     int argc, const char **argv, const char *prefix)
 {
-	int i;
+	int size;
+	char *name;
+	int ret;
+
+	size = strlen(coll_name) + 1 + strlen(bench_name) + 1;
+
+	name = zalloc(size);
+	BUG_ON(!name);
+
+	scnprintf(name, size, "%s-%s", coll_name, bench_name);
+
+	prctl(PR_SET_NAME, name);
+	argv[0] = name;
+
+	ret = fn(argc, argv, prefix);
+
+	free(name);
+
+	return ret;
+}
+
+static void run_collection(struct collection *coll)
+{
+	struct bench *bench;
 	const char *argv[2];
-	struct bench_suite *suites = subsys->suites;
 
 	argv[1] = NULL;
 	/*
 	 * TODO:
-	 * preparing preset parameters for
+	 *
+	 * Preparing preset parameters for
 	 * embedded, ordinary PC, HPC, etc...
-	 * will be helpful
+	 * would be helpful.
 	 */
-	for (i = 0; suites[i].fn; i++) {
-		printf("# Running %s/%s benchmark...\n",
-		       subsys->name,
-		       suites[i].name);
+	for_each_bench(coll, bench) {
+		if (!bench->fn)
+			break;
+		printf("# Running %s/%s benchmark...\n", coll->name, bench->name);
 		fflush(stdout);
 
-		argv[1] = suites[i].name;
-		suites[i].fn(1, argv, NULL);
+		argv[1] = bench->name;
+		run_bench(coll->name, bench->name, bench->fn, 1, argv, NULL);
 		printf("\n");
 	}
 }
 
-static void all_subsystem(void)
+static void run_all_collections(void)
 {
-	int i;
-	for (i = 0; subsystems[i].suites; i++)
-		all_suite(&subsystems[i]);
+	struct collection *coll;
+
+	for_each_collection(coll)
+		run_collection(coll);
 }
 
 int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused)
 {
-	int i, j, status = 0;
+	struct collection *coll;
+	int ret = 0;
 
 	if (argc < 2) {
-		/* No subsystem specified. */
+		/* No collection specified. */
 		print_usage();
 		goto end;
 	}
@@ -206,7 +212,7 @@ int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	bench_format = bench_str2int(bench_format_str);
 	if (bench_format == BENCH_FORMAT_UNKNOWN) {
-		printf("Unknown format descriptor:%s\n", bench_format_str);
+		printf("Unknown format descriptor: '%s'\n", bench_format_str);
 		goto end;
 	}
 
@@ -216,52 +222,51 @@ int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused)
 	}
 
 	if (!strcmp(argv[0], "all")) {
-		all_subsystem();
+		run_all_collections();
 		goto end;
 	}
 
-	for (i = 0; subsystems[i].name; i++) {
-		if (strcmp(subsystems[i].name, argv[0]))
+	for_each_collection(coll) {
+		struct bench *bench;
+
+		if (strcmp(coll->name, argv[0]))
 			continue;
 
 		if (argc < 2) {
-			/* No suite specified. */
-			dump_suites(i);
+			/* No bench specified. */
+			dump_benchmarks(coll);
 			goto end;
 		}
 
 		if (!strcmp(argv[1], "all")) {
-			all_suite(&subsystems[i]);
+			run_collection(coll);
 			goto end;
 		}
 
-		for (j = 0; subsystems[i].suites[j].name; j++) {
-			if (strcmp(subsystems[i].suites[j].name, argv[1]))
+		for_each_bench(coll, bench) {
+			if (strcmp(bench->name, argv[1]))
 				continue;
 
 			if (bench_format == BENCH_FORMAT_DEFAULT)
-				printf("# Running %s/%s benchmark...\n",
-				       subsystems[i].name,
-				       subsystems[i].suites[j].name);
+				printf("# Running '%s/%s' benchmark:\n", coll->name, bench->name);
 			fflush(stdout);
-			status = subsystems[i].suites[j].fn(argc - 1,
-							    argv + 1, prefix);
+			ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1, prefix);
 			goto end;
 		}
 
 		if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
-			dump_suites(i);
+			dump_benchmarks(coll);
 			goto end;
 		}
 
-		printf("Unknown suite:%s for %s\n", argv[1], argv[0]);
-		status = 1;
+		printf("Unknown benchmark: '%s' for collection '%s'\n", argv[1], argv[0]);
+		ret = 1;
 		goto end;
 	}
 
-	printf("Unknown subsystem:%s\n", argv[0]);
-	status = 1;
+	printf("Unknown collection: '%s'\n", argv[0]);
+	ret = 1;
 
 end:
-	return status;
+	return ret;
 }
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index c96c8fa3824353f1c5401cd2a6bacb9701a2762e..cfede86161d8ae9410c123745bb60def09785331 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -6,6 +6,11 @@
  * Copyright (C) 2010, Red Hat Inc.
  * Copyright (C) 2010, Arnaldo Carvalho de Melo <acme@redhat.com>
  */
+#include <sys/types.h>
+#include <sys/time.h>
+#include <time.h>
+#include <dirent.h>
+#include <unistd.h>
 #include "builtin.h"
 #include "perf.h"
 #include "util/cache.h"
@@ -17,6 +22,140 @@
 #include "util/session.h"
 #include "util/symbol.h"
 
+static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid)
+{
+	char root_dir[PATH_MAX];
+	char notes[PATH_MAX];
+	u8 build_id[BUILD_ID_SIZE];
+	char *p;
+
+	strlcpy(root_dir, proc_dir, sizeof(root_dir));
+
+	p = strrchr(root_dir, '/');
+	if (!p)
+		return -1;
+	*p = '\0';
+
+	scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir);
+
+	if (sysfs__read_build_id(notes, build_id, sizeof(build_id)))
+		return -1;
+
+	build_id__sprintf(build_id, sizeof(build_id), sbuildid);
+
+	return 0;
+}
+
+static int build_id_cache__kcore_dir(char *dir, size_t sz)
+{
+	struct timeval tv;
+	struct tm tm;
+	char dt[32];
+
+	if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm))
+		return -1;
+
+	if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm))
+		return -1;
+
+	scnprintf(dir, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000);
+
+	return 0;
+}
+
+static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir,
+					  size_t to_dir_sz)
+{
+	char from[PATH_MAX];
+	char to[PATH_MAX];
+	struct dirent *dent;
+	int ret = -1;
+	DIR *d;
+
+	d = opendir(to_dir);
+	if (!d)
+		return -1;
+
+	scnprintf(from, sizeof(from), "%s/modules", from_dir);
+
+	while (1) {
+		dent = readdir(d);
+		if (!dent)
+			break;
+		if (dent->d_type != DT_DIR)
+			continue;
+		scnprintf(to, sizeof(to), "%s/%s/modules", to_dir,
+			  dent->d_name);
+		if (!compare_proc_modules(from, to)) {
+			scnprintf(to, sizeof(to), "%s/%s", to_dir,
+				  dent->d_name);
+			strlcpy(to_dir, to, to_dir_sz);
+			ret = 0;
+			break;
+		}
+	}
+
+	closedir(d);
+
+	return ret;
+}
+
+static int build_id_cache__add_kcore(const char *filename, const char *debugdir)
+{
+	char dir[32], sbuildid[BUILD_ID_SIZE * 2 + 1];
+	char from_dir[PATH_MAX], to_dir[PATH_MAX];
+	char *p;
+
+	strlcpy(from_dir, filename, sizeof(from_dir));
+
+	p = strrchr(from_dir, '/');
+	if (!p || strcmp(p + 1, "kcore"))
+		return -1;
+	*p = '\0';
+
+	if (build_id_cache__kcore_buildid(from_dir, sbuildid))
+		return -1;
+
+	scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s",
+		  debugdir, sbuildid);
+
+	if (!build_id_cache__kcore_existing(from_dir, to_dir, sizeof(to_dir))) {
+		pr_debug("same kcore found in %s\n", to_dir);
+		return 0;
+	}
+
+	if (build_id_cache__kcore_dir(dir, sizeof(dir)))
+		return -1;
+
+	scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s/%s",
+		  debugdir, sbuildid, dir);
+
+	if (mkdir_p(to_dir, 0755))
+		return -1;
+
+	if (kcore_copy(from_dir, to_dir)) {
+		/* Remove YYYYmmddHHMMSShh directory */
+		if (!rmdir(to_dir)) {
+			p = strrchr(to_dir, '/');
+			if (p)
+				*p = '\0';
+			/* Try to remove buildid directory */
+			if (!rmdir(to_dir)) {
+				p = strrchr(to_dir, '/');
+				if (p)
+					*p = '\0';
+				/* Try to remove [kernel.kcore] directory */
+				rmdir(to_dir);
+			}
+		}
+		return -1;
+	}
+
+	pr_debug("kcore added to build-id cache directory %s\n", to_dir);
+
+	return 0;
+}
+
 static int build_id_cache__add_file(const char *filename, const char *debugdir)
 {
 	char sbuild_id[BUILD_ID_SIZE * 2 + 1];
@@ -82,8 +221,12 @@ static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused)
 
 static int build_id_cache__fprintf_missing(const char *filename, bool force, FILE *fp)
 {
-	struct perf_session *session = perf_session__new(filename, O_RDONLY,
-							 force, false, NULL);
+	struct perf_data_file file = {
+		.path  = filename,
+		.mode  = PERF_DATA_MODE_READ,
+		.force = force,
+	};
+	struct perf_session *session = perf_session__new(&file, false, NULL);
 	if (session == NULL)
 		return -1;
 
@@ -130,11 +273,14 @@ int cmd_buildid_cache(int argc, const char **argv,
 	char const *add_name_list_str = NULL,
 		   *remove_name_list_str = NULL,
 		   *missing_filename = NULL,
-		   *update_name_list_str = NULL;
+		   *update_name_list_str = NULL,
+		   *kcore_filename;
 
 	const struct option buildid_cache_options[] = {
 	OPT_STRING('a', "add", &add_name_list_str,
 		   "file list", "file(s) to add"),
+	OPT_STRING('k', "kcore", &kcore_filename,
+		   "file", "kcore file to add"),
 	OPT_STRING('r', "remove", &remove_name_list_str, "file list",
 		    "file(s) to remove"),
 	OPT_STRING('M', "missing", &missing_filename, "file",
@@ -217,5 +363,9 @@ int cmd_buildid_cache(int argc, const char **argv,
 		}
 	}
 
+	if (kcore_filename &&
+	    build_id_cache__add_kcore(kcore_filename, debugdir))
+		pr_warning("Couldn't add %s\n", kcore_filename);
+
 	return ret;
 }
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index e74366a13218dbc5da8ef576b298d8bcc21c4ed6..ed3873b3e23873ded61f12486d50bb118c9471b2 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -15,6 +15,7 @@
 #include "util/parse-options.h"
 #include "util/session.h"
 #include "util/symbol.h"
+#include "util/data.h"
 
 static int sysfs__fprintf_build_id(FILE *fp)
 {
@@ -52,6 +53,11 @@ static bool dso__skip_buildid(struct dso *dso, int with_hits)
 static int perf_session__list_build_ids(bool force, bool with_hits)
 {
 	struct perf_session *session;
+	struct perf_data_file file = {
+		.path  = input_name,
+		.mode  = PERF_DATA_MODE_READ,
+		.force = force,
+	};
 
 	symbol__elf_init();
 	/*
@@ -60,15 +66,14 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
 	if (filename__fprintf_build_id(input_name, stdout))
 		goto out;
 
-	session = perf_session__new(input_name, O_RDONLY, force, false,
-				    &build_id__mark_dso_hit_ops);
+	session = perf_session__new(&file, false, &build_id__mark_dso_hit_ops);
 	if (session == NULL)
 		return -1;
 	/*
 	 * in pipe-mode, the only way to get the buildids is to parse
 	 * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID
 	 */
-	if (with_hits || session->fd_pipe)
+	if (with_hits || perf_data_file__is_pipe(&file))
 		perf_session__process_events(session, &build_id__mark_dso_hit_ops);
 
 	perf_session__fprintf_dsos_buildid(session, stdout, dso__skip_buildid, with_hits);
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index f28799e94f2a4bbbb1226da3cd78b908c7b1f610..3b67ea2444bd4abf8314ef5c40c7d4b3e731c455 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -16,6 +16,7 @@
 #include "util/sort.h"
 #include "util/symbol.h"
 #include "util/util.h"
+#include "util/data.h"
 
 #include <stdlib.h>
 #include <math.h>
@@ -42,7 +43,7 @@ struct diff_hpp_fmt {
 
 struct data__file {
 	struct perf_session	*session;
-	const char		*file;
+	struct perf_data_file	file;
 	int			 idx;
 	struct hists		*hists;
 	struct diff_hpp_fmt	 fmt[PERF_HPP_DIFF__MAX_INDEX];
@@ -302,11 +303,12 @@ static int formula_fprintf(struct hist_entry *he, struct hist_entry *pair,
 	return -1;
 }
 
-static int hists__add_entry(struct hists *self,
+static int hists__add_entry(struct hists *hists,
 			    struct addr_location *al, u64 period,
-			    u64 weight)
+			    u64 weight, u64 transaction)
 {
-	if (__hists__add_entry(self, al, NULL, period, weight) != NULL)
+	if (__hists__add_entry(hists, al, NULL, NULL, NULL, period, weight,
+			       transaction) != NULL)
 		return 0;
 	return -ENOMEM;
 }
@@ -328,7 +330,8 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
 	if (al.filtered)
 		return 0;
 
-	if (hists__add_entry(&evsel->hists, &al, sample->period, sample->weight)) {
+	if (hists__add_entry(&evsel->hists, &al, sample->period,
+			     sample->weight, sample->transaction)) {
 		pr_warning("problem incrementing symbol period, skipping event\n");
 		return -1;
 	}
@@ -367,7 +370,7 @@ static void perf_evlist__collapse_resort(struct perf_evlist *evlist)
 	list_for_each_entry(evsel, &evlist->entries, node) {
 		struct hists *hists = &evsel->hists;
 
-		hists__collapse_resort(hists);
+		hists__collapse_resort(hists, NULL);
 	}
 }
 
@@ -599,7 +602,7 @@ static void data__fprintf(void)
 
 	data__for_each_file(i, d)
 		fprintf(stdout, "#  [%d] %s %s\n",
-			d->idx, d->file,
+			d->idx, d->file.path,
 			!d->idx ? "(Baseline)" : "");
 
 	fprintf(stdout, "#\n");
@@ -661,17 +664,16 @@ static int __cmd_diff(void)
 	int ret = -EINVAL, i;
 
 	data__for_each_file(i, d) {
-		d->session = perf_session__new(d->file, O_RDONLY, force,
-					       false, &tool);
+		d->session = perf_session__new(&d->file, false, &tool);
 		if (!d->session) {
-			pr_err("Failed to open %s\n", d->file);
+			pr_err("Failed to open %s\n", d->file.path);
 			ret = -ENOMEM;
 			goto out_delete;
 		}
 
 		ret = perf_session__process_events(d->session, &tool);
 		if (ret) {
-			pr_err("Failed to process %s\n", d->file);
+			pr_err("Failed to process %s\n", d->file.path);
 			goto out_delete;
 		}
 
@@ -1014,7 +1016,12 @@ static int data_init(int argc, const char **argv)
 		return -ENOMEM;
 
 	data__for_each_file(i, d) {
-		d->file = use_default ? defaults[i] : argv[i];
+		struct perf_data_file *file = &d->file;
+
+		file->path  = use_default ? defaults[i] : argv[i];
+		file->mode  = PERF_DATA_MODE_READ,
+		file->force = force,
+
 		d->idx  = i;
 	}
 
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index 05bd9dfe875cb95aeb6d2008b3319c2e8ca8f5eb..20b0f12763b091fd392366865ab113f9aa66a3c7 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -14,13 +14,18 @@
 #include "util/parse-events.h"
 #include "util/parse-options.h"
 #include "util/session.h"
+#include "util/data.h"
 
 static int __cmd_evlist(const char *file_name, struct perf_attr_details *details)
 {
 	struct perf_session *session;
 	struct perf_evsel *pos;
+	struct perf_data_file file = {
+		.path = file_name,
+		.mode = PERF_DATA_MODE_READ,
+	};
 
-	session = perf_session__new(file_name, O_RDONLY, 0, false, NULL);
+	session = perf_session__new(&file, 0, NULL);
 	if (session == NULL)
 		return -ENOMEM;
 
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index afe377b2884f740b0c469a367ab3938d99f7df0e..6a2508589460bfc1fec37c10163cf44b95a2ddfc 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -15,6 +15,7 @@
 #include "util/tool.h"
 #include "util/debug.h"
 #include "util/build-id.h"
+#include "util/data.h"
 
 #include "util/parse-options.h"
 
@@ -71,12 +72,17 @@ static int perf_event__repipe_attr(struct perf_tool *tool,
 				   union perf_event *event,
 				   struct perf_evlist **pevlist)
 {
+	struct perf_inject *inject = container_of(tool, struct perf_inject,
+						  tool);
 	int ret;
 
 	ret = perf_event__process_attr(tool, event, pevlist);
 	if (ret)
 		return ret;
 
+	if (!inject->pipe_output)
+		return 0;
+
 	return perf_event__repipe_synth(tool, event);
 }
 
@@ -100,8 +106,8 @@ static int perf_event__repipe_sample(struct perf_tool *tool,
 				     struct perf_evsel *evsel,
 				     struct machine *machine)
 {
-	if (evsel->handler.func) {
-		inject_handler f = evsel->handler.func;
+	if (evsel->handler) {
+		inject_handler f = evsel->handler;
 		return f(tool, event, sample, evsel, machine);
 	}
 
@@ -161,38 +167,38 @@ static int perf_event__repipe_tracing_data(struct perf_tool *tool,
 	return err;
 }
 
-static int dso__read_build_id(struct dso *self)
+static int dso__read_build_id(struct dso *dso)
 {
-	if (self->has_build_id)
+	if (dso->has_build_id)
 		return 0;
 
-	if (filename__read_build_id(self->long_name, self->build_id,
-				    sizeof(self->build_id)) > 0) {
-		self->has_build_id = true;
+	if (filename__read_build_id(dso->long_name, dso->build_id,
+				    sizeof(dso->build_id)) > 0) {
+		dso->has_build_id = true;
 		return 0;
 	}
 
 	return -1;
 }
 
-static int dso__inject_build_id(struct dso *self, struct perf_tool *tool,
+static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool,
 				struct machine *machine)
 {
 	u16 misc = PERF_RECORD_MISC_USER;
 	int err;
 
-	if (dso__read_build_id(self) < 0) {
-		pr_debug("no build_id found for %s\n", self->long_name);
+	if (dso__read_build_id(dso) < 0) {
+		pr_debug("no build_id found for %s\n", dso->long_name);
 		return -1;
 	}
 
-	if (self->kernel)
+	if (dso->kernel)
 		misc = PERF_RECORD_MISC_KERNEL;
 
-	err = perf_event__synthesize_build_id(tool, self, misc, perf_event__repipe,
+	err = perf_event__synthesize_build_id(tool, dso, misc, perf_event__repipe,
 					      machine);
 	if (err) {
-		pr_err("Can't synthesize build_id event for %s\n", self->long_name);
+		pr_err("Can't synthesize build_id event for %s\n", dso->long_name);
 		return -1;
 	}
 
@@ -231,7 +237,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
 				 * account this as unresolved.
 				 */
 			} else {
-#ifdef LIBELF_SUPPORT
+#ifdef HAVE_LIBELF_SUPPORT
 				pr_warning("no symbols found in %s, maybe "
 					   "install a debug package?\n",
 					   al.map->dso->long_name);
@@ -345,6 +351,10 @@ static int __cmd_inject(struct perf_inject *inject)
 {
 	struct perf_session *session;
 	int ret = -EINVAL;
+	struct perf_data_file file = {
+		.path = inject->input_name,
+		.mode = PERF_DATA_MODE_READ,
+	};
 
 	signal(SIGINT, sig_handler);
 
@@ -355,7 +365,7 @@ static int __cmd_inject(struct perf_inject *inject)
 		inject->tool.tracing_data = perf_event__repipe_tracing_data;
 	}
 
-	session = perf_session__new(inject->input_name, O_RDONLY, false, true, &inject->tool);
+	session = perf_session__new(&file, true, &inject->tool);
 	if (session == NULL)
 		return -ENOMEM;
 
@@ -373,11 +383,11 @@ static int __cmd_inject(struct perf_inject *inject)
 				if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
 					return -EINVAL;
 
-				evsel->handler.func = perf_inject__sched_switch;
+				evsel->handler = perf_inject__sched_switch;
 			} else if (!strcmp(name, "sched:sched_process_exit"))
-				evsel->handler.func = perf_inject__sched_process_exit;
+				evsel->handler = perf_inject__sched_process_exit;
 			else if (!strncmp(name, "sched:sched_stat_", 17))
-				evsel->handler.func = perf_inject__sched_stat;
+				evsel->handler = perf_inject__sched_stat;
 		}
 	}
 
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 9b5f077fee5b1b65a4e51b48ef1af0727b8c134b..929462aa494351f5f237eaa03ce0042146853945 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -13,6 +13,7 @@
 
 #include "util/parse-options.h"
 #include "util/trace-event.h"
+#include "util/data.h"
 
 #include "util/debug.h"
 
@@ -314,10 +315,10 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 		return -1;
 	}
 
-	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->tid);
+	dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
 
-	if (evsel->handler.func != NULL) {
-		tracepoint_handler f = evsel->handler.func;
+	if (evsel->handler != NULL) {
+		tracepoint_handler f = evsel->handler;
 		return f(evsel, sample);
 	}
 
@@ -486,8 +487,12 @@ static int __cmd_kmem(void)
 		{ "kmem:kfree",			perf_evsel__process_free_event, },
     		{ "kmem:kmem_cache_free",	perf_evsel__process_free_event, },
 	};
+	struct perf_data_file file = {
+		.path = input_name,
+		.mode = PERF_DATA_MODE_READ,
+	};
 
-	session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_kmem);
+	session = perf_session__new(&file, false, &perf_kmem);
 	if (session == NULL)
 		return -ENOMEM;
 
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index fbc2888d64950040d3f56444d11bc0e828396b31..cd9f92078aba2e42aaadb8b48f7dad04e23f3485 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -17,9 +17,12 @@
 #include "util/tool.h"
 #include "util/stat.h"
 #include "util/top.h"
+#include "util/data.h"
 
 #include <sys/prctl.h>
+#ifdef HAVE_TIMERFD_SUPPORT
 #include <sys/timerfd.h>
+#endif
 
 #include <termios.h>
 #include <semaphore.h>
@@ -336,6 +339,7 @@ static void init_kvm_event_record(struct perf_kvm_stat *kvm)
 		INIT_LIST_HEAD(&kvm->kvm_events_cache[i]);
 }
 
+#ifdef HAVE_TIMERFD_SUPPORT
 static void clear_events_cache_stats(struct list_head *kvm_events_cache)
 {
 	struct list_head *head;
@@ -357,6 +361,7 @@ static void clear_events_cache_stats(struct list_head *kvm_events_cache)
 		}
 	}
 }
+#endif
 
 static int kvm_events_hash_fn(u64 key)
 {
@@ -782,6 +787,7 @@ static void print_result(struct perf_kvm_stat *kvm)
 		pr_info("\nLost events: %" PRIu64 "\n\n", kvm->lost_events);
 }
 
+#ifdef HAVE_TIMERFD_SUPPORT
 static int process_lost_event(struct perf_tool *tool,
 			      union perf_event *event __maybe_unused,
 			      struct perf_sample *sample __maybe_unused,
@@ -792,6 +798,7 @@ static int process_lost_event(struct perf_tool *tool,
 	kvm->lost_events++;
 	return 0;
 }
+#endif
 
 static bool skip_sample(struct perf_kvm_stat *kvm,
 			struct perf_sample *sample)
@@ -871,6 +878,7 @@ static bool verify_vcpu(int vcpu)
 	return true;
 }
 
+#ifdef HAVE_TIMERFD_SUPPORT
 /* keeping the max events to a modest level to keep
  * the processing of samples per mmap smooth.
  */
@@ -1212,6 +1220,7 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm)
 out:
 	return rc;
 }
+#endif
 
 static int read_events(struct perf_kvm_stat *kvm)
 {
@@ -1222,10 +1231,13 @@ static int read_events(struct perf_kvm_stat *kvm)
 		.comm			= perf_event__process_comm,
 		.ordered_samples	= true,
 	};
+	struct perf_data_file file = {
+		.path = input_name,
+		.mode = PERF_DATA_MODE_READ,
+	};
 
 	kvm->tool = eops;
-	kvm->session = perf_session__new(kvm->file_name, O_RDONLY, 0, false,
-					 &kvm->tool);
+	kvm->session = perf_session__new(&file, false, &kvm->tool);
 	if (!kvm->session) {
 		pr_err("Initializing perf session failed\n");
 		return -EINVAL;
@@ -1375,6 +1387,7 @@ kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv)
 	return kvm_events_report_vcpu(kvm);
 }
 
+#ifdef HAVE_TIMERFD_SUPPORT
 static struct perf_evlist *kvm_live_event_list(void)
 {
 	struct perf_evlist *evlist;
@@ -1433,8 +1446,9 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
 	const struct option live_options[] = {
 		OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid",
 			"record events on existing process id"),
-		OPT_UINTEGER('m', "mmap-pages", &kvm->opts.mmap_pages,
-			"number of mmap data pages"),
+		OPT_CALLBACK('m', "mmap-pages", &kvm->opts.mmap_pages, "pages",
+			"number of mmap data pages",
+			perf_evlist__parse_mmap_pages),
 		OPT_INCR('v', "verbose", &verbose,
 			"be more verbose (show counter open errors, etc)"),
 		OPT_BOOLEAN('a', "all-cpus", &kvm->opts.target.system_wide,
@@ -1456,6 +1470,9 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
 		"perf kvm stat live [<options>]",
 		NULL
 	};
+	struct perf_data_file file = {
+		.mode = PERF_DATA_MODE_WRITE,
+	};
 
 
 	/* event handling */
@@ -1520,7 +1537,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
 	/*
 	 * perf session
 	 */
-	kvm->session = perf_session__new(NULL, O_WRONLY, false, false, &kvm->tool);
+	kvm->session = perf_session__new(&file, false, &kvm->tool);
 	if (kvm->session == NULL) {
 		err = -ENOMEM;
 		goto out;
@@ -1558,6 +1575,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
 
 	return err;
 }
+#endif
 
 static void print_kvm_stat_usage(void)
 {
@@ -1596,8 +1614,10 @@ static int kvm_cmd_stat(const char *file_name, int argc, const char **argv)
 	if (!strncmp(argv[1], "rep", 3))
 		return kvm_events_report(&kvm, argc - 1 , argv + 1);
 
+#ifdef HAVE_TIMERFD_SUPPORT
 	if (!strncmp(argv[1], "live", 4))
 		return kvm_events_live(&kvm, argc - 1 , argv + 1);
+#endif
 
 perf_stat:
 	return cmd_stat(argc, argv, NULL);
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index e79f423cc3022fff357e132d11d573a66f5ad7a6..011195e38f2173947550100e62927e908b429d30 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -14,51 +14,63 @@
 #include "util/parse-events.h"
 #include "util/cache.h"
 #include "util/pmu.h"
+#include "util/parse-options.h"
 
 int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
 {
+	int i;
+	const struct option list_options[] = {
+		OPT_END()
+	};
+	const char * const list_usage[] = {
+		"perf list [hw|sw|cache|tracepoint|pmu|event_glob]",
+		NULL
+	};
+
+	argc = parse_options(argc, argv, list_options, list_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
+
 	setup_pager();
 
-	if (argc == 1)
+	if (argc == 0) {
 		print_events(NULL, false);
-	else {
-		int i;
-
-		for (i = 1; i < argc; ++i) {
-			if (i > 2)
-				putchar('\n');
-			if (strncmp(argv[i], "tracepoint", 10) == 0)
-				print_tracepoint_events(NULL, NULL, false);
-			else if (strcmp(argv[i], "hw") == 0 ||
-				 strcmp(argv[i], "hardware") == 0)
-				print_events_type(PERF_TYPE_HARDWARE);
-			else if (strcmp(argv[i], "sw") == 0 ||
-				 strcmp(argv[i], "software") == 0)
-				print_events_type(PERF_TYPE_SOFTWARE);
-			else if (strcmp(argv[i], "cache") == 0 ||
-				 strcmp(argv[i], "hwcache") == 0)
-				print_hwcache_events(NULL, false);
-			else if (strcmp(argv[i], "pmu") == 0)
-				print_pmu_events(NULL, false);
-			else if (strcmp(argv[i], "--raw-dump") == 0)
-				print_events(NULL, true);
-			else {
-				char *sep = strchr(argv[i], ':'), *s;
-				int sep_idx;
+		return 0;
+	}
 
-				if (sep == NULL) {
-					print_events(argv[i], false);
-					continue;
-				}
-				sep_idx = sep - argv[i];
-				s = strdup(argv[i]);
-				if (s == NULL)
-					return -1;
+	for (i = 0; i < argc; ++i) {
+		if (i)
+			putchar('\n');
+		if (strncmp(argv[i], "tracepoint", 10) == 0)
+			print_tracepoint_events(NULL, NULL, false);
+		else if (strcmp(argv[i], "hw") == 0 ||
+			 strcmp(argv[i], "hardware") == 0)
+			print_events_type(PERF_TYPE_HARDWARE);
+		else if (strcmp(argv[i], "sw") == 0 ||
+			 strcmp(argv[i], "software") == 0)
+			print_events_type(PERF_TYPE_SOFTWARE);
+		else if (strcmp(argv[i], "cache") == 0 ||
+			 strcmp(argv[i], "hwcache") == 0)
+			print_hwcache_events(NULL, false);
+		else if (strcmp(argv[i], "pmu") == 0)
+			print_pmu_events(NULL, false);
+		else if (strcmp(argv[i], "--raw-dump") == 0)
+			print_events(NULL, true);
+		else {
+			char *sep = strchr(argv[i], ':'), *s;
+			int sep_idx;
 
-				s[sep_idx] = '\0';
-				print_tracepoint_events(s, s + sep_idx + 1, false);
-				free(s);
+			if (sep == NULL) {
+				print_events(argv[i], false);
+				continue;
 			}
+			sep_idx = sep - argv[i];
+			s = strdup(argv[i]);
+			if (s == NULL)
+				return -1;
+
+			s[sep_idx] = '\0';
+			print_tracepoint_events(s, s + sep_idx + 1, false);
+			free(s);
 		}
 	}
 	return 0;
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index ee33ba2f05dd8b2f1f9c90f1820e0eab93b86f8b..c852c7a85d3236e86781b15616858a76b56c6206 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -15,6 +15,7 @@
 #include "util/debug.h"
 #include "util/session.h"
 #include "util/tool.h"
+#include "util/data.h"
 
 #include <sys/types.h>
 #include <sys/prctl.h>
@@ -56,7 +57,9 @@ struct lock_stat {
 
 	unsigned int		nr_readlock;
 	unsigned int		nr_trylock;
+
 	/* these times are in nano sec. */
+	u64                     avg_wait_time;
 	u64			wait_time_total;
 	u64			wait_time_min;
 	u64			wait_time_max;
@@ -208,6 +211,7 @@ static struct thread_stat *thread_stat_findnew_first(u32 tid)
 
 SINGLE_KEY(nr_acquired)
 SINGLE_KEY(nr_contended)
+SINGLE_KEY(avg_wait_time)
 SINGLE_KEY(wait_time_total)
 SINGLE_KEY(wait_time_max)
 
@@ -244,6 +248,7 @@ static struct rb_root		result;	/* place to store sorted data */
 struct lock_key keys[] = {
 	DEF_KEY_LOCK(acquired, nr_acquired),
 	DEF_KEY_LOCK(contended, nr_contended),
+	DEF_KEY_LOCK(avg_wait, avg_wait_time),
 	DEF_KEY_LOCK(wait_total, wait_time_total),
 	DEF_KEY_LOCK(wait_min, wait_time_min),
 	DEF_KEY_LOCK(wait_max, wait_time_max),
@@ -321,10 +326,12 @@ static struct lock_stat *lock_stat_findnew(void *addr, const char *name)
 
 	new->addr = addr;
 	new->name = zalloc(sizeof(char) * strlen(name) + 1);
-	if (!new->name)
+	if (!new->name) {
+		free(new);
 		goto alloc_failed;
-	strcpy(new->name, name);
+	}
 
+	strcpy(new->name, name);
 	new->wait_time_min = ULLONG_MAX;
 
 	list_add(&new->hash_entry, entry);
@@ -400,17 +407,17 @@ static int report_lock_acquire_event(struct perf_evsel *evsel,
 
 	ls = lock_stat_findnew(addr, name);
 	if (!ls)
-		return -1;
+		return -ENOMEM;
 	if (ls->discard)
 		return 0;
 
 	ts = thread_stat_findnew(sample->tid);
 	if (!ts)
-		return -1;
+		return -ENOMEM;
 
 	seq = get_seq(ts, addr);
 	if (!seq)
-		return -1;
+		return -ENOMEM;
 
 	switch (seq->state) {
 	case SEQ_STATE_UNINITIALIZED:
@@ -446,7 +453,6 @@ static int report_lock_acquire_event(struct perf_evsel *evsel,
 		list_del(&seq->list);
 		free(seq);
 		goto end;
-		break;
 	default:
 		BUG_ON("Unknown state of lock sequence found!\n");
 		break;
@@ -473,17 +479,17 @@ static int report_lock_acquired_event(struct perf_evsel *evsel,
 
 	ls = lock_stat_findnew(addr, name);
 	if (!ls)
-		return -1;
+		return -ENOMEM;
 	if (ls->discard)
 		return 0;
 
 	ts = thread_stat_findnew(sample->tid);
 	if (!ts)
-		return -1;
+		return -ENOMEM;
 
 	seq = get_seq(ts, addr);
 	if (!seq)
-		return -1;
+		return -ENOMEM;
 
 	switch (seq->state) {
 	case SEQ_STATE_UNINITIALIZED:
@@ -508,8 +514,6 @@ static int report_lock_acquired_event(struct perf_evsel *evsel,
 		list_del(&seq->list);
 		free(seq);
 		goto end;
-		break;
-
 	default:
 		BUG_ON("Unknown state of lock sequence found!\n");
 		break;
@@ -517,6 +521,7 @@ static int report_lock_acquired_event(struct perf_evsel *evsel,
 
 	seq->state = SEQ_STATE_ACQUIRED;
 	ls->nr_acquired++;
+	ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0;
 	seq->prev_event_time = sample->time;
 end:
 	return 0;
@@ -536,17 +541,17 @@ static int report_lock_contended_event(struct perf_evsel *evsel,
 
 	ls = lock_stat_findnew(addr, name);
 	if (!ls)
-		return -1;
+		return -ENOMEM;
 	if (ls->discard)
 		return 0;
 
 	ts = thread_stat_findnew(sample->tid);
 	if (!ts)
-		return -1;
+		return -ENOMEM;
 
 	seq = get_seq(ts, addr);
 	if (!seq)
-		return -1;
+		return -ENOMEM;
 
 	switch (seq->state) {
 	case SEQ_STATE_UNINITIALIZED:
@@ -564,7 +569,6 @@ static int report_lock_contended_event(struct perf_evsel *evsel,
 		list_del(&seq->list);
 		free(seq);
 		goto end;
-		break;
 	default:
 		BUG_ON("Unknown state of lock sequence found!\n");
 		break;
@@ -572,6 +576,7 @@ static int report_lock_contended_event(struct perf_evsel *evsel,
 
 	seq->state = SEQ_STATE_CONTENDED;
 	ls->nr_contended++;
+	ls->avg_wait_time = ls->wait_time_total/ls->nr_contended;
 	seq->prev_event_time = sample->time;
 end:
 	return 0;
@@ -591,22 +596,21 @@ static int report_lock_release_event(struct perf_evsel *evsel,
 
 	ls = lock_stat_findnew(addr, name);
 	if (!ls)
-		return -1;
+		return -ENOMEM;
 	if (ls->discard)
 		return 0;
 
 	ts = thread_stat_findnew(sample->tid);
 	if (!ts)
-		return -1;
+		return -ENOMEM;
 
 	seq = get_seq(ts, addr);
 	if (!seq)
-		return -1;
+		return -ENOMEM;
 
 	switch (seq->state) {
 	case SEQ_STATE_UNINITIALIZED:
 		goto end;
-		break;
 	case SEQ_STATE_ACQUIRED:
 		break;
 	case SEQ_STATE_READ_ACQUIRED:
@@ -624,7 +628,6 @@ static int report_lock_release_event(struct perf_evsel *evsel,
 		ls->discard = 1;
 		bad_hist[BROKEN_RELEASE]++;
 		goto free_seq;
-		break;
 	default:
 		BUG_ON("Unknown state of lock sequence found!\n");
 		break;
@@ -690,7 +693,7 @@ static void print_bad_events(int bad, int total)
 
 	pr_info("\n=== output for debug===\n\n");
 	pr_info("bad: %d, total: %d\n", bad, total);
-	pr_info("bad rate: %f %%\n", (double)bad / (double)total * 100);
+	pr_info("bad rate: %.2f %%\n", (double)bad / (double)total * 100);
 	pr_info("histogram of events caused bad sequence\n");
 	for (i = 0; i < BROKEN_MAX; i++)
 		pr_info(" %10s: %d\n", name[i], bad_hist[i]);
@@ -707,6 +710,7 @@ static void print_result(void)
 	pr_info("%10s ", "acquired");
 	pr_info("%10s ", "contended");
 
+	pr_info("%15s ", "avg wait (ns)");
 	pr_info("%15s ", "total wait (ns)");
 	pr_info("%15s ", "max wait (ns)");
 	pr_info("%15s ", "min wait (ns)");
@@ -738,6 +742,7 @@ static void print_result(void)
 		pr_info("%10u ", st->nr_acquired);
 		pr_info("%10u ", st->nr_contended);
 
+		pr_info("%15" PRIu64 " ", st->avg_wait_time);
 		pr_info("%15" PRIu64 " ", st->wait_time_total);
 		pr_info("%15" PRIu64 " ", st->wait_time_max);
 		pr_info("%15" PRIu64 " ", st->wait_time_min == ULLONG_MAX ?
@@ -762,7 +767,7 @@ static void dump_threads(void)
 	while (node) {
 		st = container_of(node, struct thread_stat, rb);
 		t = perf_session__findnew(session, st->tid);
-		pr_info("%10d: %s\n", st->tid, t->comm);
+		pr_info("%10d: %s\n", st->tid, thread__comm_str(t));
 		node = rb_next(node);
 	};
 }
@@ -814,14 +819,26 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 		return -1;
 	}
 
-	if (evsel->handler.func != NULL) {
-		tracepoint_handler f = evsel->handler.func;
+	if (evsel->handler != NULL) {
+		tracepoint_handler f = evsel->handler;
 		return f(evsel, sample);
 	}
 
 	return 0;
 }
 
+static void sort_result(void)
+{
+	unsigned int i;
+	struct lock_stat *st;
+
+	for (i = 0; i < LOCKHASH_SIZE; i++) {
+		list_for_each_entry(st, &lockhash_table[i], hash_entry) {
+			insert_to_result(st, compare);
+		}
+	}
+}
+
 static const struct perf_evsel_str_handler lock_tracepoints[] = {
 	{ "lock:lock_acquire",	 perf_evsel__process_lock_acquire,   }, /* CONFIG_LOCKDEP */
 	{ "lock:lock_acquired",	 perf_evsel__process_lock_acquired,  }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
@@ -829,51 +846,51 @@ static const struct perf_evsel_str_handler lock_tracepoints[] = {
 	{ "lock:lock_release",	 perf_evsel__process_lock_release,   }, /* CONFIG_LOCKDEP */
 };
 
-static int read_events(void)
+static int __cmd_report(bool display_info)
 {
+	int err = -EINVAL;
 	struct perf_tool eops = {
 		.sample		 = process_sample_event,
 		.comm		 = perf_event__process_comm,
 		.ordered_samples = true,
 	};
-	session = perf_session__new(input_name, O_RDONLY, 0, false, &eops);
+	struct perf_data_file file = {
+		.path = input_name,
+		.mode = PERF_DATA_MODE_READ,
+	};
+
+	session = perf_session__new(&file, false, &eops);
 	if (!session) {
 		pr_err("Initializing perf session failed\n");
-		return -1;
+		return -ENOMEM;
 	}
 
+	if (!perf_session__has_traces(session, "lock record"))
+		goto out_delete;
+
 	if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
 		pr_err("Initializing perf session tracepoint handlers failed\n");
-		return -1;
+		goto out_delete;
 	}
 
-	return perf_session__process_events(session, &eops);
-}
-
-static void sort_result(void)
-{
-	unsigned int i;
-	struct lock_stat *st;
+	if (select_key())
+		goto out_delete;
 
-	for (i = 0; i < LOCKHASH_SIZE; i++) {
-		list_for_each_entry(st, &lockhash_table[i], hash_entry) {
-			insert_to_result(st, compare);
-		}
-	}
-}
+	err = perf_session__process_events(session, &eops);
+	if (err)
+		goto out_delete;
 
-static int __cmd_report(void)
-{
 	setup_pager();
+	if (display_info) /* used for info subcommand */
+		err = dump_info();
+	else {
+		sort_result();
+		print_result();
+	}
 
-	if ((select_key() != 0) ||
-	    (read_events() != 0))
-		return -1;
-
-	sort_result();
-	print_result();
-
-	return 0;
+out_delete:
+	perf_session__delete(session);
+	return err;
 }
 
 static int __cmd_record(int argc, const char **argv)
@@ -881,7 +898,7 @@ static int __cmd_record(int argc, const char **argv)
 	const char *record_args[] = {
 		"record", "-R", "-m", "1024", "-c", "1",
 	};
-	unsigned int rec_argc, i, j;
+	unsigned int rec_argc, i, j, ret;
 	const char **rec_argv;
 
 	for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
@@ -898,7 +915,7 @@ static int __cmd_record(int argc, const char **argv)
 	rec_argc += 2 * ARRAY_SIZE(lock_tracepoints);
 
 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
-	if (rec_argv == NULL)
+	if (!rec_argv)
 		return -ENOMEM;
 
 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
@@ -914,7 +931,9 @@ static int __cmd_record(int argc, const char **argv)
 
 	BUG_ON(i != rec_argc);
 
-	return cmd_record(i, rec_argv, NULL);
+	ret = cmd_record(i, rec_argv, NULL);
+	free(rec_argv);
+	return ret;
 }
 
 int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
@@ -934,7 +953,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
 	};
 	const struct option report_options[] = {
 	OPT_STRING('k', "key", &sort_key, "acquired",
-		    "key for sorting (acquired / contended / wait_total / wait_max / wait_min)"),
+		    "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
 	/* TODO: type */
 	OPT_END()
 	};
@@ -972,7 +991,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
 			if (argc)
 				usage_with_options(report_usage, report_options);
 		}
-		__cmd_report();
+		rc = __cmd_report(false);
 	} else if (!strcmp(argv[0], "script")) {
 		/* Aliased to 'perf script' */
 		return cmd_script(argc, argv, prefix);
@@ -985,11 +1004,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
 		}
 		/* recycling report_lock_ops */
 		trace_handler = &report_lock_ops;
-		setup_pager();
-		if (read_events() != 0)
-			rc = -1;
-		else
-			rc = dump_info();
+		rc = __cmd_report(true);
 	} else {
 		usage_with_options(lock_usage, lock_options);
 	}
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 253133a6251d3c3d657671b7a8a603ef4cb03643..31c00f186da127fa6fca4157d839426886008c07 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -5,6 +5,7 @@
 #include "util/trace-event.h"
 #include "util/tool.h"
 #include "util/session.h"
+#include "util/data.h"
 
 #define MEM_OPERATION_LOAD	"load"
 #define MEM_OPERATION_STORE	"store"
@@ -119,10 +120,14 @@ static int process_sample_event(struct perf_tool *tool,
 
 static int report_raw_events(struct perf_mem *mem)
 {
+	struct perf_data_file file = {
+		.path = input_name,
+		.mode = PERF_DATA_MODE_READ,
+	};
 	int err = -EINVAL;
 	int ret;
-	struct perf_session *session = perf_session__new(input_name, O_RDONLY,
-							 0, false, &mem->tool);
+	struct perf_session *session = perf_session__new(&file, false,
+							 &mem->tool);
 
 	if (session == NULL)
 		return -ENOMEM;
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index e8a66f9a67153c6819422839a55bd1ae5f7d4fce..89acc17cf2a02e4c7d8760a7b89e1300007cc5a3 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -173,7 +173,7 @@ static int opt_set_target(const struct option *opt, const char *str,
 	if  (str && !params.target) {
 		if (!strcmp(opt->long_name, "exec"))
 			params.uprobes = true;
-#ifdef DWARF_SUPPORT
+#ifdef HAVE_DWARF_SUPPORT
 		else if (!strcmp(opt->long_name, "module"))
 			params.uprobes = false;
 #endif
@@ -187,7 +187,7 @@ static int opt_set_target(const struct option *opt, const char *str,
 	return ret;
 }
 
-#ifdef DWARF_SUPPORT
+#ifdef HAVE_DWARF_SUPPORT
 static int opt_show_lines(const struct option *opt __maybe_unused,
 			  const char *str, int unset __maybe_unused)
 {
@@ -257,7 +257,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 		"perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
 		"perf probe [<options>] --del '[GROUP:]EVENT' ...",
 		"perf probe --list",
-#ifdef DWARF_SUPPORT
+#ifdef HAVE_DWARF_SUPPORT
 		"perf probe [<options>] --line 'LINEDESC'",
 		"perf probe [<options>] --vars 'PROBEPOINT'",
 #endif
@@ -271,7 +271,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_CALLBACK('d', "del", NULL, "[GROUP:]EVENT", "delete a probe event.",
 		opt_del_probe_event),
 	OPT_CALLBACK('a', "add", NULL,
-#ifdef DWARF_SUPPORT
+#ifdef HAVE_DWARF_SUPPORT
 		"[EVENT=]FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT"
 		" [[NAME=]ARG ...]",
 #else
@@ -283,7 +283,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 		"\t\tFUNC:\tFunction name\n"
 		"\t\tOFF:\tOffset from function entry (in byte)\n"
 		"\t\t%return:\tPut the probe at function return\n"
-#ifdef DWARF_SUPPORT
+#ifdef HAVE_DWARF_SUPPORT
 		"\t\tSRC:\tSource code path\n"
 		"\t\tRL:\tRelative line number from function entry.\n"
 		"\t\tAL:\tAbsolute line number in file.\n"
@@ -296,7 +296,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 		opt_add_probe_event),
 	OPT_BOOLEAN('f', "force", &params.force_add, "forcibly add events"
 		    " with existing name"),
-#ifdef DWARF_SUPPORT
+#ifdef HAVE_DWARF_SUPPORT
 	OPT_CALLBACK('L', "line", NULL,
 		     "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]",
 		     "Show source code lines.", opt_show_lines),
@@ -408,7 +408,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 		return ret;
 	}
 
-#ifdef DWARF_SUPPORT
+#ifdef HAVE_DWARF_SUPPORT
 	if (params.show_lines && !params.uprobes) {
 		if (params.mod_events) {
 			pr_err("  Error: Don't use --line with"
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index d0465148464022c82e5fcfb431513624edf34740..15280b5e5574669c78376a419a8512eb5e774258 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -24,12 +24,13 @@
 #include "util/symbol.h"
 #include "util/cpumap.h"
 #include "util/thread_map.h"
+#include "util/data.h"
 
 #include <unistd.h>
 #include <sched.h>
 #include <sys/mman.h>
 
-#ifndef HAVE_ON_EXIT
+#ifndef HAVE_ON_EXIT_SUPPORT
 #ifndef ATEXIT_MAX
 #define ATEXIT_MAX 32
 #endif
@@ -65,31 +66,25 @@ struct perf_record {
 	struct perf_tool	tool;
 	struct perf_record_opts	opts;
 	u64			bytes_written;
-	const char		*output_name;
+	struct perf_data_file	file;
 	struct perf_evlist	*evlist;
 	struct perf_session	*session;
 	const char		*progname;
-	int			output;
-	unsigned int		page_size;
 	int			realtime_prio;
 	bool			no_buildid;
 	bool			no_buildid_cache;
 	long			samples;
-	off_t			post_processing_offset;
 };
 
-static void advance_output(struct perf_record *rec, size_t size)
-{
-	rec->bytes_written += size;
-}
-
 static int write_output(struct perf_record *rec, void *buf, size_t size)
 {
+	struct perf_data_file *file = &rec->file;
+
 	while (size) {
-		int ret = write(rec->output, buf, size);
+		int ret = write(file->fd, buf, size);
 
 		if (ret < 0) {
-			pr_err("failed to write\n");
+			pr_err("failed to write perf data, error: %m\n");
 			return -1;
 		}
 
@@ -119,7 +114,7 @@ static int perf_record__mmap_read(struct perf_record *rec,
 {
 	unsigned int head = perf_mmap__read_head(md);
 	unsigned int old = md->prev;
-	unsigned char *data = md->base + rec->page_size;
+	unsigned char *data = md->base + page_size;
 	unsigned long size;
 	void *buf;
 	int rc = 0;
@@ -234,10 +229,6 @@ static int perf_record__open(struct perf_record *rec)
 			       "or try again with a smaller value of -m/--mmap_pages.\n"
 			       "(current value: %d)\n", opts->mmap_pages);
 			rc = -errno;
-		} else if (!is_power_of_2(opts->mmap_pages) &&
-			   (opts->mmap_pages != UINT_MAX)) {
-			pr_err("--mmap_pages/-m value must be a power of two.");
-			rc = -EINVAL;
 		} else {
 			pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
 			rc = -errno;
@@ -253,31 +244,34 @@ static int perf_record__open(struct perf_record *rec)
 
 static int process_buildids(struct perf_record *rec)
 {
-	u64 size = lseek(rec->output, 0, SEEK_CUR);
+	struct perf_data_file *file  = &rec->file;
+	struct perf_session *session = rec->session;
+	u64 start = session->header.data_offset;
 
+	u64 size = lseek(file->fd, 0, SEEK_CUR);
 	if (size == 0)
 		return 0;
 
-	rec->session->fd = rec->output;
-	return __perf_session__process_events(rec->session, rec->post_processing_offset,
-					      size - rec->post_processing_offset,
+	return __perf_session__process_events(session, start,
+					      size - start,
 					      size, &build_id__mark_dso_hit_ops);
 }
 
 static void perf_record__exit(int status, void *arg)
 {
 	struct perf_record *rec = arg;
+	struct perf_data_file *file = &rec->file;
 
 	if (status != 0)
 		return;
 
-	if (!rec->opts.pipe_output) {
+	if (!file->is_pipe) {
 		rec->session->header.data_size += rec->bytes_written;
 
 		if (!rec->no_buildid)
 			process_buildids(rec);
 		perf_session__write_header(rec->session, rec->evlist,
-					   rec->output, true);
+					   file->fd, true);
 		perf_session__delete(rec->session);
 		perf_evlist__delete(rec->evlist);
 		symbol__exit();
@@ -343,64 +337,47 @@ static int perf_record__mmap_read_all(struct perf_record *rec)
 	return rc;
 }
 
+static void perf_record__init_features(struct perf_record *rec)
+{
+	struct perf_evlist *evsel_list = rec->evlist;
+	struct perf_session *session = rec->session;
+	int feat;
+
+	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
+		perf_header__set_feat(&session->header, feat);
+
+	if (rec->no_buildid)
+		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
+
+	if (!have_tracepoints(&evsel_list->entries))
+		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
+
+	if (!rec->opts.branch_stack)
+		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
+}
+
 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
 {
-	struct stat st;
-	int flags;
-	int err, output, feat;
+	int err;
 	unsigned long waking = 0;
 	const bool forks = argc > 0;
 	struct machine *machine;
 	struct perf_tool *tool = &rec->tool;
 	struct perf_record_opts *opts = &rec->opts;
 	struct perf_evlist *evsel_list = rec->evlist;
-	const char *output_name = rec->output_name;
+	struct perf_data_file *file = &rec->file;
 	struct perf_session *session;
 	bool disabled = false;
 
 	rec->progname = argv[0];
 
-	rec->page_size = sysconf(_SC_PAGE_SIZE);
-
 	on_exit(perf_record__sig_exit, rec);
 	signal(SIGCHLD, sig_handler);
 	signal(SIGINT, sig_handler);
 	signal(SIGUSR1, sig_handler);
 	signal(SIGTERM, sig_handler);
 
-	if (!output_name) {
-		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
-			opts->pipe_output = true;
-		else
-			rec->output_name = output_name = "perf.data";
-	}
-	if (output_name) {
-		if (!strcmp(output_name, "-"))
-			opts->pipe_output = true;
-		else if (!stat(output_name, &st) && st.st_size) {
-			char oldname[PATH_MAX];
-			snprintf(oldname, sizeof(oldname), "%s.old",
-				 output_name);
-			unlink(oldname);
-			rename(output_name, oldname);
-		}
-	}
-
-	flags = O_CREAT|O_RDWR|O_TRUNC;
-
-	if (opts->pipe_output)
-		output = STDOUT_FILENO;
-	else
-		output = open(output_name, flags, S_IRUSR | S_IWUSR);
-	if (output < 0) {
-		perror("failed to create output file");
-		return -1;
-	}
-
-	rec->output = output;
-
-	session = perf_session__new(output_name, O_WRONLY,
-				    true, false, NULL);
+	session = perf_session__new(file, false, NULL);
 	if (session == NULL) {
 		pr_err("Not enough memory for reading perf file header\n");
 		return -1;
@@ -408,21 +385,11 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
 
 	rec->session = session;
 
-	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
-		perf_header__set_feat(&session->header, feat);
-
-	if (rec->no_buildid)
-		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
-
-	if (!have_tracepoints(&evsel_list->entries))
-		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
-
-	if (!rec->opts.branch_stack)
-		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
+	perf_record__init_features(rec);
 
 	if (forks) {
 		err = perf_evlist__prepare_workload(evsel_list, &opts->target,
-						    argv, opts->pipe_output,
+						    argv, file->is_pipe,
 						    true);
 		if (err < 0) {
 			pr_err("Couldn't run the workload!\n");
@@ -443,13 +410,13 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
 	 */
 	on_exit(perf_record__exit, rec);
 
-	if (opts->pipe_output) {
-		err = perf_header__write_pipe(output);
+	if (file->is_pipe) {
+		err = perf_header__write_pipe(file->fd);
 		if (err < 0)
 			goto out_delete_session;
 	} else {
 		err = perf_session__write_header(session, evsel_list,
-						 output, false);
+						 file->fd, false);
 		if (err < 0)
 			goto out_delete_session;
 	}
@@ -462,11 +429,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
 		goto out_delete_session;
 	}
 
-	rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
-
 	machine = &session->machines.host;
 
-	if (opts->pipe_output) {
+	if (file->is_pipe) {
 		err = perf_event__synthesize_attrs(tool, session,
 						   process_synthesized_event);
 		if (err < 0) {
@@ -483,13 +448,13 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
 			 * return this more properly and also
 			 * propagate errors that now are calling die()
 			 */
-			err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
+			err = perf_event__synthesize_tracing_data(tool, file->fd, evsel_list,
 								  process_synthesized_event);
 			if (err <= 0) {
 				pr_err("Couldn't record tracing data.\n");
 				goto out_delete_session;
 			}
-			advance_output(rec, err);
+			rec->bytes_written += err;
 		}
 	}
 
@@ -590,7 +555,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
 	fprintf(stderr,
 		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
 		(double)rec->bytes_written / 1024.0 / 1024.0,
-		output_name,
+		file->path,
 		rec->bytes_written / 24);
 
 	return 0;
@@ -618,6 +583,9 @@ static const struct branch_mode branch_modes[] = {
 	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
 	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
 	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
+	BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
+	BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
+	BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
 	BRANCH_END
 };
 
@@ -684,7 +652,7 @@ parse_branch_stack(const struct option *opt, const char *str, int unset)
 	return ret;
 }
 
-#ifdef LIBUNWIND_SUPPORT
+#ifdef HAVE_LIBUNWIND_SUPPORT
 static int get_stack_size(char *str, unsigned long *_size)
 {
 	char *endptr;
@@ -710,7 +678,7 @@ static int get_stack_size(char *str, unsigned long *_size)
 	       max_size, str);
 	return -1;
 }
-#endif /* LIBUNWIND_SUPPORT */
+#endif /* HAVE_LIBUNWIND_SUPPORT */
 
 int record_parse_callchain(const char *arg, struct perf_record_opts *opts)
 {
@@ -739,7 +707,7 @@ int record_parse_callchain(const char *arg, struct perf_record_opts *opts)
 				       "needed for -g fp\n");
 			break;
 
-#ifdef LIBUNWIND_SUPPORT
+#ifdef HAVE_LIBUNWIND_SUPPORT
 		/* Dwarf style */
 		} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
 			const unsigned long default_stack_dump_size = 8192;
@@ -755,7 +723,7 @@ int record_parse_callchain(const char *arg, struct perf_record_opts *opts)
 				ret = get_stack_size(tok, &size);
 				opts->stack_dump_size = size;
 			}
-#endif /* LIBUNWIND_SUPPORT */
+#endif /* HAVE_LIBUNWIND_SUPPORT */
 		} else {
 			pr_err("callchain: Unknown --call-graph option "
 			       "value: %s\n", arg);
@@ -841,7 +809,7 @@ static struct perf_record record = {
 
 #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
 
-#ifdef LIBUNWIND_SUPPORT
+#ifdef HAVE_LIBUNWIND_SUPPORT
 const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
 #else
 const char record_callchain_help[] = CALLCHAIN_HELP "fp";
@@ -875,13 +843,14 @@ const struct option record_options[] = {
 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
 		    "list of cpus to monitor"),
 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
-	OPT_STRING('o', "output", &record.output_name, "file",
+	OPT_STRING('o', "output", &record.file.path, "file",
 		    "output file name"),
 	OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
 		    "child tasks do not inherit counters"),
 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
-	OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
-		     "number of mmap data pages"),
+	OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
+		     "number of mmap data pages",
+		     perf_evlist__parse_mmap_pages),
 	OPT_BOOLEAN(0, "group", &record.opts.group,
 		    "put the counters into a counter group"),
 	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
@@ -920,6 +889,8 @@ const struct option record_options[] = {
 		     parse_branch_stack),
 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
 		    "sample by weight (on special events only)"),
+	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
+		    "sample transaction flags (special events only)"),
 	OPT_END()
 };
 
@@ -989,20 +960,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
 		usage_with_options(record_usage, record_options);
 
-	if (rec->opts.user_interval != ULLONG_MAX)
-		rec->opts.default_interval = rec->opts.user_interval;
-	if (rec->opts.user_freq != UINT_MAX)
-		rec->opts.freq = rec->opts.user_freq;
-
-	/*
-	 * User specified count overrides default frequency.
-	 */
-	if (rec->opts.default_interval)
-		rec->opts.freq = 0;
-	else if (rec->opts.freq) {
-		rec->opts.default_interval = rec->opts.freq;
-	} else {
-		ui__error("frequency and count are zero, aborting\n");
+	if (perf_record_opts__config(&rec->opts)) {
 		err = -EINVAL;
 		goto out_free_fd;
 	}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 72eae7498c09419c8f1f77a7a005c6dcbbb5eb4b..8cf8e66ba594156fc85a1ddca5b0b0305544ae23 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -33,8 +33,10 @@
 #include "util/thread.h"
 #include "util/sort.h"
 #include "util/hist.h"
+#include "util/data.h"
 #include "arch/common.h"
 
+#include <dlfcn.h>
 #include <linux/bitmap.h>
 
 struct perf_report {
@@ -47,6 +49,7 @@ struct perf_report {
 	bool			show_threads;
 	bool			inverted_callchain;
 	bool			mem_mode;
+	int			max_stack;
 	struct perf_read_values	show_threads_values;
 	const char		*pretty_printing_style;
 	const char		*cpu_list;
@@ -88,7 +91,8 @@ static int perf_report__add_mem_hist_entry(struct perf_tool *tool,
 	if ((sort__has_parent || symbol_conf.use_callchain) &&
 	    sample->callchain) {
 		err = machine__resolve_callchain(machine, evsel, al->thread,
-						 sample, &parent, al);
+						 sample, &parent, al,
+						 rep->max_stack);
 		if (err)
 			return err;
 	}
@@ -111,7 +115,8 @@ static int perf_report__add_mem_hist_entry(struct perf_tool *tool,
 	 * and this is indirectly achieved by passing period=weight here
 	 * and the he_stat__add_period() function.
 	 */
-	he = __hists__add_mem_entry(&evsel->hists, al, parent, mi, cost, cost);
+	he = __hists__add_entry(&evsel->hists, al, parent, NULL, mi,
+				cost, cost, 0);
 	if (!he)
 		return -ENOMEM;
 
@@ -179,7 +184,8 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
 	if ((sort__has_parent || symbol_conf.use_callchain)
 	    && sample->callchain) {
 		err = machine__resolve_callchain(machine, evsel, al->thread,
-						 sample, &parent, al);
+						 sample, &parent, al,
+						 rep->max_stack);
 		if (err)
 			return err;
 	}
@@ -195,12 +201,16 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
 
 		err = -ENOMEM;
 
+		/* overwrite the 'al' to branch-to info */
+		al->map = bi[i].to.map;
+		al->sym = bi[i].to.sym;
+		al->addr = bi[i].to.addr;
 		/*
 		 * The report shows the percentage of total branches captured
 		 * and not events sampled. Thus we use a pseudo period of 1.
 		 */
-		he = __hists__add_branch_entry(&evsel->hists, al, parent,
-				&bi[i], 1, 1);
+		he = __hists__add_entry(&evsel->hists, al, parent, &bi[i], NULL,
+					1, 1, 0);
 		if (he) {
 			struct annotation *notes;
 			bx = he->branch_info;
@@ -242,24 +252,28 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
 	return err;
 }
 
-static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
+static int perf_evsel__add_hist_entry(struct perf_tool *tool,
+				      struct perf_evsel *evsel,
 				      struct addr_location *al,
 				      struct perf_sample *sample,
 				      struct machine *machine)
 {
+	struct perf_report *rep = container_of(tool, struct perf_report, tool);
 	struct symbol *parent = NULL;
 	int err = 0;
 	struct hist_entry *he;
 
 	if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
 		err = machine__resolve_callchain(machine, evsel, al->thread,
-						 sample, &parent, al);
+						 sample, &parent, al,
+						 rep->max_stack);
 		if (err)
 			return err;
 	}
 
-	he = __hists__add_entry(&evsel->hists, al, parent, sample->period,
-					sample->weight);
+	he = __hists__add_entry(&evsel->hists, al, parent, NULL, NULL,
+				sample->period, sample->weight,
+				sample->transaction);
 	if (he == NULL)
 		return -ENOMEM;
 
@@ -330,7 +344,8 @@ static int process_sample_event(struct perf_tool *tool,
 		if (al.map != NULL)
 			al.map->dso->hit = 1;
 
-		ret = perf_evsel__add_hist_entry(evsel, &al, sample, machine);
+		ret = perf_evsel__add_hist_entry(tool, evsel, &al, sample,
+						 machine);
 		if (ret < 0)
 			pr_debug("problem incrementing symbol period, skipping event\n");
 	}
@@ -364,10 +379,11 @@ static int process_read_event(struct perf_tool *tool,
 /* For pipe mode, sample_type is not currently set */
 static int perf_report__setup_sample_type(struct perf_report *rep)
 {
-	struct perf_session *self = rep->session;
-	u64 sample_type = perf_evlist__combined_sample_type(self->evlist);
+	struct perf_session *session = rep->session;
+	u64 sample_type = perf_evlist__combined_sample_type(session->evlist);
+	bool is_pipe = perf_data_file__is_pipe(session->file);
 
-	if (!self->fd_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) {
+	if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) {
 		if (sort__has_parent) {
 			ui__error("Selected --sort parent, but no "
 				    "callchain data. Did you call "
@@ -390,7 +406,7 @@ static int perf_report__setup_sample_type(struct perf_report *rep)
 	}
 
 	if (sort__mode == SORT_MODE__BRANCH) {
-		if (!self->fd_pipe &&
+		if (!is_pipe &&
 		    !(sample_type & PERF_SAMPLE_BRANCH_STACK)) {
 			ui__error("Selected -b but no branch data. "
 				  "Did you call perf record without -b?\n");
@@ -407,14 +423,14 @@ static void sig_handler(int sig __maybe_unused)
 }
 
 static size_t hists__fprintf_nr_sample_events(struct perf_report *rep,
-					      struct hists *self,
+					      struct hists *hists,
 					      const char *evname, FILE *fp)
 {
 	size_t ret;
 	char unit;
-	unsigned long nr_samples = self->stats.nr_events[PERF_RECORD_SAMPLE];
-	u64 nr_events = self->stats.total_period;
-	struct perf_evsel *evsel = hists_to_evsel(self);
+	unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
+	u64 nr_events = hists->stats.total_period;
+	struct perf_evsel *evsel = hists_to_evsel(hists);
 	char buf[512];
 	size_t size = sizeof(buf);
 
@@ -486,6 +502,8 @@ static int __cmd_report(struct perf_report *rep)
 	struct map *kernel_map;
 	struct kmap *kernel_kmap;
 	const char *help = "For a higher level overview, try: perf report --sort comm,dso";
+	struct ui_progress prog;
+	struct perf_data_file *file = session->file;
 
 	signal(SIGINT, sig_handler);
 
@@ -546,6 +564,12 @@ static int __cmd_report(struct perf_report *rep)
 		return 0;
 	}
 
+	nr_samples = 0;
+	list_for_each_entry(pos, &session->evlist->entries, node)
+		nr_samples += pos->hists.nr_entries;
+
+	ui_progress__init(&prog, nr_samples, "Merging related events...");
+
 	nr_samples = 0;
 	list_for_each_entry(pos, &session->evlist->entries, node) {
 		struct hists *hists = &pos->hists;
@@ -553,7 +577,7 @@ static int __cmd_report(struct perf_report *rep)
 		if (pos->idx == 0)
 			hists->symbol_filter_str = rep->symbol_filter_str;
 
-		hists__collapse_resort(hists);
+		hists__collapse_resort(hists, &prog);
 		nr_samples += hists->stats.nr_events[PERF_RECORD_SAMPLE];
 
 		/* Non-group events are considered as leader */
@@ -565,12 +589,13 @@ static int __cmd_report(struct perf_report *rep)
 			hists__link(leader_hists, hists);
 		}
 	}
+	ui_progress__finish();
 
 	if (session_done())
 		return 0;
 
 	if (nr_samples == 0) {
-		ui__error("The %s file has no samples!\n", session->filename);
+		ui__error("The %s file has no samples!\n", file->path);
 		return 0;
 	}
 
@@ -591,8 +616,19 @@ static int __cmd_report(struct perf_report *rep)
 				ret = 0;
 
 		} else if (use_browser == 2) {
-			perf_evlist__gtk_browse_hists(session->evlist, help,
-						      NULL, rep->min_percent);
+			int (*hist_browser)(struct perf_evlist *,
+					    const char *,
+					    struct hist_browser_timer *,
+					    float min_pcnt);
+
+			hist_browser = dlsym(perf_gtk_handle,
+					     "perf_evlist__gtk_browse_hists");
+			if (hist_browser == NULL) {
+				ui__error("GTK browser not found!\n");
+				return ret;
+			}
+			hist_browser(session->evlist, help, NULL,
+				     rep->min_percent);
 		}
 	} else
 		perf_evlist__tty_browse_hists(session->evlist, rep, help);
@@ -757,6 +793,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 			.ordered_samples = true,
 			.ordering_requires_timestamps = true,
 		},
+		.max_stack		 = PERF_MAX_STACK_DEPTH,
 		.pretty_printing_style	 = "normal",
 	};
 	const struct option options[] = {
@@ -787,7 +824,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 		   "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline,"
 		   " dso_to, dso_from, symbol_to, symbol_from, mispredict,"
 		   " weight, local_weight, mem, symbol_daddr, dso_daddr, tlb, "
-		   "snoop, locked"),
+		   "snoop, locked, abort, in_tx, transaction"),
 	OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
 		    "Show sample percentage for different cpu modes"),
 	OPT_STRING('p', "parent", &parent_pattern, "regex",
@@ -797,6 +834,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
 		     "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
 		     "Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt),
+	OPT_INTEGER(0, "max-stack", &report.max_stack,
+		    "Set the maximum stack depth when parsing the callchain, "
+		    "anything beyond the specified depth will be ignored. "
+		    "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
 	OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
 		    "alias for inverted call graph"),
 	OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
@@ -845,6 +886,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 		     "Don't show entries under that percent", parse_percent_limit),
 	OPT_END()
 	};
+	struct perf_data_file file = {
+		.mode  = PERF_DATA_MODE_READ,
+	};
 
 	perf_config(perf_report_config, &report);
 
@@ -867,16 +911,11 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 			input_name = "perf.data";
 	}
 
-	if (strcmp(input_name, "-") != 0)
-		setup_browser(true);
-	else {
-		use_browser = 0;
-		perf_hpp__init();
-	}
+	file.path  = input_name;
+	file.force = report.force;
 
 repeat:
-	session = perf_session__new(input_name, O_RDONLY,
-				    report.force, false, &report.tool);
+	session = perf_session__new(&file, false, &report.tool);
 	if (session == NULL)
 		return -ENOMEM;
 
@@ -914,8 +953,22 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 			sort_order = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
 	}
 
-	if (setup_sorting() < 0)
-		usage_with_options(report_usage, options);
+	if (setup_sorting() < 0) {
+		parse_options_usage(report_usage, options, "s", 1);
+		goto error;
+	}
+
+	if (parent_pattern != default_parent_pattern) {
+		if (sort_dimension__add("parent") < 0)
+			goto error;
+	}
+
+	if (strcmp(input_name, "-") != 0)
+		setup_browser(true);
+	else {
+		use_browser = 0;
+		perf_hpp__init();
+	}
 
 	/*
 	 * Only in the TUI browser we are doing integrated annotation,
@@ -946,11 +999,6 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (symbol__init() < 0)
 		goto error;
 
-	if (parent_pattern != default_parent_pattern) {
-		if (sort_dimension__add("parent") < 0)
-			goto error;
-	}
-
 	if (argc) {
 		/*
 		 * Special case: if there's an argument left then assume that
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index d8c51b2f263f7475da99a28df87cad3d1f6bb4d2..0f3c65518a2c669a9b83ad255d1018368de6ec69 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -737,12 +737,12 @@ static int replay_fork_event(struct perf_sched *sched,
 
 	if (verbose) {
 		printf("fork event\n");
-		printf("... parent: %s/%d\n", parent->comm, parent->tid);
-		printf("...  child: %s/%d\n", child->comm, child->tid);
+		printf("... parent: %s/%d\n", thread__comm_str(parent), parent->tid);
+		printf("...  child: %s/%d\n", thread__comm_str(child), child->tid);
 	}
 
-	register_pid(sched, parent->tid, parent->comm);
-	register_pid(sched, child->tid, child->comm);
+	register_pid(sched, parent->tid, thread__comm_str(parent));
+	register_pid(sched, child->tid, thread__comm_str(child));
 	return 0;
 }
 
@@ -1077,7 +1077,7 @@ static int latency_migrate_task_event(struct perf_sched *sched,
 	if (!atoms) {
 		if (thread_atoms_insert(sched, migrant))
 			return -1;
-		register_pid(sched, migrant->tid, migrant->comm);
+		register_pid(sched, migrant->tid, thread__comm_str(migrant));
 		atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
 		if (!atoms) {
 			pr_err("migration-event: Internal tree error");
@@ -1111,13 +1111,13 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
 	/*
 	 * Ignore idle threads:
 	 */
-	if (!strcmp(work_list->thread->comm, "swapper"))
+	if (!strcmp(thread__comm_str(work_list->thread), "swapper"))
 		return;
 
 	sched->all_runtime += work_list->total_runtime;
 	sched->all_count   += work_list->nb_atoms;
 
-	ret = printf("  %s:%d ", work_list->thread->comm, work_list->thread->tid);
+	ret = printf("  %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid);
 
 	for (i = 0; i < 24 - ret; i++)
 		printf(" ");
@@ -1334,7 +1334,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 	printf("  %12.6f secs ", (double)timestamp/1e9);
 	if (new_shortname) {
 		printf("%s => %s:%d\n",
-			sched_in->shortname, sched_in->comm, sched_in->tid);
+		       sched_in->shortname, thread__comm_str(sched_in), sched_in->tid);
 	} else {
 		printf("\n");
 	}
@@ -1427,8 +1427,8 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_
 	evsel->hists.stats.total_period += sample->period;
 	hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
 
-	if (evsel->handler.func != NULL) {
-		tracepoint_handler f = evsel->handler.func;
+	if (evsel->handler != NULL) {
+		tracepoint_handler f = evsel->handler;
 		err = f(tool, evsel, sample, machine);
 	}
 
@@ -1446,8 +1446,12 @@ static int perf_sched__read_events(struct perf_sched *sched,
 		{ "sched:sched_migrate_task", process_sched_migrate_task_event, },
 	};
 	struct perf_session *session;
+	struct perf_data_file file = {
+		.path = input_name,
+		.mode = PERF_DATA_MODE_READ,
+	};
 
-	session = perf_session__new(input_name, O_RDONLY, 0, false, &sched->tool);
+	session = perf_session__new(&file, false, &sched->tool);
 	if (session == NULL) {
 		pr_debug("No Memory for session\n");
 		return -1;
@@ -1651,29 +1655,27 @@ static int __cmd_record(int argc, const char **argv)
 	return cmd_record(i, rec_argv, NULL);
 }
 
-static const char default_sort_order[] = "avg, max, switch, runtime";
-static struct perf_sched sched = {
-	.tool = {
-		.sample		 = perf_sched__process_tracepoint_sample,
-		.comm		 = perf_event__process_comm,
-		.lost		 = perf_event__process_lost,
-		.fork		 = perf_sched__process_fork_event,
-		.ordered_samples = true,
-	},
-	.cmp_pid	      = LIST_HEAD_INIT(sched.cmp_pid),
-	.sort_list	      = LIST_HEAD_INIT(sched.sort_list),
-	.start_work_mutex     = PTHREAD_MUTEX_INITIALIZER,
-	.work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER,
-	.curr_pid	      = { [0 ... MAX_CPUS - 1] = -1 },
-	.sort_order	      = default_sort_order,
-	.replay_repeat	      = 10,
-	.profile_cpu	      = -1,
-	.next_shortname1      = 'A',
-	.next_shortname2      = '0',
-};
-
 int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 {
+	const char default_sort_order[] = "avg, max, switch, runtime";
+	struct perf_sched sched = {
+		.tool = {
+			.sample		 = perf_sched__process_tracepoint_sample,
+			.comm		 = perf_event__process_comm,
+			.lost		 = perf_event__process_lost,
+			.fork		 = perf_sched__process_fork_event,
+			.ordered_samples = true,
+		},
+		.cmp_pid	      = LIST_HEAD_INIT(sched.cmp_pid),
+		.sort_list	      = LIST_HEAD_INIT(sched.sort_list),
+		.start_work_mutex     = PTHREAD_MUTEX_INITIALIZER,
+		.work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER,
+		.sort_order	      = default_sort_order,
+		.replay_repeat	      = 10,
+		.profile_cpu	      = -1,
+		.next_shortname1      = 'A',
+		.next_shortname2      = '0',
+	};
 	const struct option latency_options[] = {
 	OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]",
 		   "sort by key(s): runtime, switch, avg, max"),
@@ -1729,6 +1731,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 		.switch_event	    = replay_switch_event,
 		.fork_event	    = replay_fork_event,
 	};
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++)
+		sched.curr_pid[i] = -1;
 
 	argc = parse_options(argc, argv, sched_options, sched_usage,
 			     PARSE_OPT_STOP_AT_NON_OPTION);
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 9c333ff3dfeb3de716eac13d48d7364e998bb50d..baf17989a216137064e089d4e2f65fef07a4357f 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -15,6 +15,7 @@
 #include "util/evlist.h"
 #include "util/evsel.h"
 #include "util/sort.h"
+#include "util/data.h"
 #include <linux/bitmap.h>
 
 static char const		*script_name;
@@ -228,6 +229,24 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 	return 0;
 }
 
+static void set_print_ip_opts(struct perf_event_attr *attr)
+{
+	unsigned int type = attr->type;
+
+	output[type].print_ip_opts = 0;
+	if (PRINT_FIELD(IP))
+		output[type].print_ip_opts |= PRINT_IP_OPT_IP;
+
+	if (PRINT_FIELD(SYM))
+		output[type].print_ip_opts |= PRINT_IP_OPT_SYM;
+
+	if (PRINT_FIELD(DSO))
+		output[type].print_ip_opts |= PRINT_IP_OPT_DSO;
+
+	if (PRINT_FIELD(SYMOFFSET))
+		output[type].print_ip_opts |= PRINT_IP_OPT_SYMOFFSET;
+}
+
 /*
  * verify all user requested events exist and the samples
  * have the expected data
@@ -236,7 +255,6 @@ static int perf_session__check_output_opt(struct perf_session *session)
 {
 	int j;
 	struct perf_evsel *evsel;
-	struct perf_event_attr *attr;
 
 	for (j = 0; j < PERF_TYPE_MAX; ++j) {
 		evsel = perf_session__find_first_evtype(session, j);
@@ -259,20 +277,7 @@ static int perf_session__check_output_opt(struct perf_session *session)
 		if (evsel == NULL)
 			continue;
 
-		attr = &evsel->attr;
-
-		output[j].print_ip_opts = 0;
-		if (PRINT_FIELD(IP))
-			output[j].print_ip_opts |= PRINT_IP_OPT_IP;
-
-		if (PRINT_FIELD(SYM))
-			output[j].print_ip_opts |= PRINT_IP_OPT_SYM;
-
-		if (PRINT_FIELD(DSO))
-			output[j].print_ip_opts |= PRINT_IP_OPT_DSO;
-
-		if (PRINT_FIELD(SYMOFFSET))
-			output[j].print_ip_opts |= PRINT_IP_OPT_SYMOFFSET;
+		set_print_ip_opts(&evsel->attr);
 	}
 
 	return 0;
@@ -290,11 +295,11 @@ static void print_sample_start(struct perf_sample *sample,
 
 	if (PRINT_FIELD(COMM)) {
 		if (latency_format)
-			printf("%8.8s ", thread->comm);
+			printf("%8.8s ", thread__comm_str(thread));
 		else if (PRINT_FIELD(IP) && symbol_conf.use_callchain)
-			printf("%s ", thread->comm);
+			printf("%s ", thread__comm_str(thread));
 		else
-			printf("%16s ", thread->comm);
+			printf("%16s ", thread__comm_str(thread));
 	}
 
 	if (PRINT_FIELD(PID) && PRINT_FIELD(TID))
@@ -409,7 +414,9 @@ static void print_sample_bts(union perf_event *event,
 	printf(" => ");
 
 	/* print branch_to information */
-	if (PRINT_FIELD(ADDR))
+	if (PRINT_FIELD(ADDR) ||
+	    ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) &&
+	     !output[attr->type].user_set))
 		print_sample_addr(event, sample, machine, thread, attr);
 
 	printf("\n");
@@ -539,32 +546,51 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 	return 0;
 }
 
-static struct perf_tool perf_script = {
-	.sample		 = process_sample_event,
-	.mmap		 = perf_event__process_mmap,
-	.mmap2		 = perf_event__process_mmap2,
-	.comm		 = perf_event__process_comm,
-	.exit		 = perf_event__process_exit,
-	.fork		 = perf_event__process_fork,
-	.attr		 = perf_event__process_attr,
-	.tracing_data	 = perf_event__process_tracing_data,
-	.build_id	 = perf_event__process_build_id,
-	.ordered_samples = true,
-	.ordering_requires_timestamps = true,
+struct perf_script {
+	struct perf_tool	tool;
+	struct perf_session	*session;
 };
 
+static int process_attr(struct perf_tool *tool, union perf_event *event,
+			struct perf_evlist **pevlist)
+{
+	struct perf_script *scr = container_of(tool, struct perf_script, tool);
+	struct perf_evlist *evlist;
+	struct perf_evsel *evsel, *pos;
+	int err;
+
+	err = perf_event__process_attr(tool, event, pevlist);
+	if (err)
+		return err;
+
+	evlist = *pevlist;
+	evsel = perf_evlist__last(*pevlist);
+
+	if (evsel->attr.type >= PERF_TYPE_MAX)
+		return 0;
+
+	list_for_each_entry(pos, &evlist->entries, node) {
+		if (pos->attr.type == evsel->attr.type && pos != evsel)
+			return 0;
+	}
+
+	set_print_ip_opts(&evsel->attr);
+
+	return perf_evsel__check_attr(evsel, scr->session);
+}
+
 static void sig_handler(int sig __maybe_unused)
 {
 	session_done = 1;
 }
 
-static int __cmd_script(struct perf_session *session)
+static int __cmd_script(struct perf_script *script)
 {
 	int ret;
 
 	signal(SIGINT, sig_handler);
 
-	ret = perf_session__process_events(session, &perf_script);
+	ret = perf_session__process_events(script->session, &script->tool);
 
 	if (debug_mode)
 		pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered);
@@ -1113,10 +1139,14 @@ int find_scripts(char **scripts_array, char **scripts_path_array)
 	char scripts_path[MAXPATHLEN], lang_path[MAXPATHLEN];
 	DIR *scripts_dir, *lang_dir;
 	struct perf_session *session;
+	struct perf_data_file file = {
+		.path = input_name,
+		.mode = PERF_DATA_MODE_READ,
+	};
 	char *temp;
 	int i = 0;
 
-	session = perf_session__new(input_name, O_RDONLY, 0, false, NULL);
+	session = perf_session__new(&file, false, NULL);
 	if (!session)
 		return -1;
 
@@ -1266,6 +1296,21 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 	char *script_path = NULL;
 	const char **__argv;
 	int i, j, err;
+	struct perf_script script = {
+		.tool = {
+			.sample		 = process_sample_event,
+			.mmap		 = perf_event__process_mmap,
+			.mmap2		 = perf_event__process_mmap2,
+			.comm		 = perf_event__process_comm,
+			.exit		 = perf_event__process_exit,
+			.fork		 = perf_event__process_fork,
+			.attr		 = process_attr,
+			.tracing_data	 = perf_event__process_tracing_data,
+			.build_id	 = perf_event__process_build_id,
+			.ordered_samples = true,
+			.ordering_requires_timestamps = true,
+		},
+	};
 	const struct option options[] = {
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
@@ -1317,12 +1362,17 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 		"perf script [<options>] <top-script> [script-args]",
 		NULL
 	};
+	struct perf_data_file file = {
+		.mode = PERF_DATA_MODE_READ,
+	};
 
 	setup_scripting();
 
 	argc = parse_options(argc, argv, options, script_usage,
 			     PARSE_OPT_STOP_AT_NON_OPTION);
 
+	file.path = input_name;
+
 	if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) {
 		rec_script_path = get_script_path(argv[1], RECORD_SUFFIX);
 		if (!rec_script_path)
@@ -1486,11 +1536,12 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (!script_name)
 		setup_pager();
 
-	session = perf_session__new(input_name, O_RDONLY, 0, false,
-				    &perf_script);
+	session = perf_session__new(&file, false, &script.tool);
 	if (session == NULL)
 		return -ENOMEM;
 
+	script.session = session;
+
 	if (cpu_list) {
 		if (perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap))
 			return -1;
@@ -1514,7 +1565,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 			return -1;
 		}
 
-		input = open(session->filename, O_RDONLY);	/* input_name */
+		input = open(file.path, O_RDONLY);	/* input_name */
 		if (input < 0) {
 			perror("failed to open file");
 			return -1;
@@ -1554,7 +1605,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (err < 0)
 		goto out;
 
-	err = __cmd_script(session);
+	err = __cmd_script(&script);
 
 	perf_session__delete(session);
 	cleanup_scripting();
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 5098f144b92defd53e94f9f24184e3ade0672928..0fc1c941a73c3bd03cdfe183aad5397e53ed6496 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -46,6 +46,7 @@
 #include "util/util.h"
 #include "util/parse-options.h"
 #include "util/parse-events.h"
+#include "util/pmu.h"
 #include "util/event.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
@@ -70,6 +71,41 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
 static void print_counter(struct perf_evsel *counter, char *prefix);
 static void print_aggr(char *prefix);
 
+/* Default events used for perf stat -T */
+static const char * const transaction_attrs[] = {
+	"task-clock",
+	"{"
+	"instructions,"
+	"cycles,"
+	"cpu/cycles-t/,"
+	"cpu/tx-start/,"
+	"cpu/el-start/,"
+	"cpu/cycles-ct/"
+	"}"
+};
+
+/* More limited version when the CPU does not have all events. */
+static const char * const transaction_limited_attrs[] = {
+	"task-clock",
+	"{"
+	"instructions,"
+	"cycles,"
+	"cpu/cycles-t/,"
+	"cpu/tx-start/"
+	"}"
+};
+
+/* must match transaction_attrs and the beginning limited_attrs */
+enum {
+	T_TASK_CLOCK,
+	T_INSTRUCTIONS,
+	T_CYCLES,
+	T_CYCLES_IN_TX,
+	T_TRANSACTION_START,
+	T_ELISION_START,
+	T_CYCLES_IN_TX_CP,
+};
+
 static struct perf_evlist	*evsel_list;
 
 static struct perf_target	target = {
@@ -90,6 +126,7 @@ static enum aggr_mode		aggr_mode			= AGGR_GLOBAL;
 static volatile pid_t		child_pid			= -1;
 static bool			null_run			=  false;
 static int			detailed_run			=  0;
+static bool			transaction_run;
 static bool			big_num				=  true;
 static int			big_num_opt			=  -1;
 static const char		*csv_sep			= NULL;
@@ -214,7 +251,10 @@ static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
 static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS];
 static struct stats walltime_nsecs_stats;
+static struct stats runtime_transaction_stats[MAX_NR_CPUS];
+static struct stats runtime_elision_stats[MAX_NR_CPUS];
 
 static void perf_stat__reset_stats(struct perf_evlist *evlist)
 {
@@ -236,6 +276,11 @@ static void perf_stat__reset_stats(struct perf_evlist *evlist)
 	memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
 	memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
 	memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
+	memset(runtime_cycles_in_tx_stats, 0,
+			sizeof(runtime_cycles_in_tx_stats));
+	memset(runtime_transaction_stats, 0,
+		sizeof(runtime_transaction_stats));
+	memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
 	memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
 }
 
@@ -274,6 +319,29 @@ static inline int nsec_counter(struct perf_evsel *evsel)
 	return 0;
 }
 
+static struct perf_evsel *nth_evsel(int n)
+{
+	static struct perf_evsel **array;
+	static int array_len;
+	struct perf_evsel *ev;
+	int j;
+
+	/* Assumes this only called when evsel_list does not change anymore. */
+	if (!array) {
+		list_for_each_entry(ev, &evsel_list->entries, node)
+			array_len++;
+		array = malloc(array_len * sizeof(void *));
+		if (!array)
+			exit(ENOMEM);
+		j = 0;
+		list_for_each_entry(ev, &evsel_list->entries, node)
+			array[j++] = ev;
+	}
+	if (n < array_len)
+		return array[n];
+	return NULL;
+}
+
 /*
  * Update various tracking values we maintain to print
  * more semantic information such as miss/hit ratios,
@@ -285,6 +353,15 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
 		update_stats(&runtime_nsecs_stats[0], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
 		update_stats(&runtime_cycles_stats[0], count[0]);
+	else if (transaction_run &&
+		 perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX)))
+		update_stats(&runtime_cycles_in_tx_stats[0], count[0]);
+	else if (transaction_run &&
+		 perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START)))
+		update_stats(&runtime_transaction_stats[0], count[0]);
+	else if (transaction_run &&
+		 perf_evsel__cmp(counter, nth_evsel(T_ELISION_START)))
+		update_stats(&runtime_elision_stats[0], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
 		update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
@@ -629,10 +706,13 @@ static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
 {
 	double msecs = avg / 1e6;
 	const char *fmt = csv_output ? "%.6f%s%s" : "%18.6f%s%-25s";
+	char name[25];
 
 	aggr_printout(evsel, cpu, nr);
 
-	fprintf(output, fmt, msecs, csv_sep, perf_evsel__name(evsel));
+	scnprintf(name, sizeof(name), "%s%s",
+		  perf_evsel__name(evsel), csv_output ? "" : " (msec)");
+	fprintf(output, fmt, msecs, csv_sep, name);
 
 	if (evsel->cgrp)
 		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
@@ -828,7 +908,7 @@ static void print_ll_cache_misses(int cpu,
 
 static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
 {
-	double total, ratio = 0.0;
+	double total, ratio = 0.0, total2;
 	const char *fmt;
 
 	if (csv_output)
@@ -853,11 +933,10 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
 
 	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
 		total = avg_stats(&runtime_cycles_stats[cpu]);
-		if (total)
+		if (total) {
 			ratio = avg / total;
-
-		fprintf(output, " #   %5.2f  insns per cycle        ", ratio);
-
+			fprintf(output, " #   %5.2f  insns per cycle        ", ratio);
+		}
 		total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
 		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
 
@@ -920,10 +999,47 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
 		total = avg_stats(&runtime_nsecs_stats[cpu]);
 
+		if (total) {
+			ratio = avg / total;
+			fprintf(output, " # %8.3f GHz                    ", ratio);
+		}
+	} else if (transaction_run &&
+		   perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) {
+		total = avg_stats(&runtime_cycles_stats[cpu]);
+		if (total)
+			fprintf(output,
+				" #   %5.2f%% transactional cycles   ",
+				100.0 * (avg / total));
+	} else if (transaction_run &&
+		   perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX_CP))) {
+		total = avg_stats(&runtime_cycles_stats[cpu]);
+		total2 = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
+		if (total2 < avg)
+			total2 = avg;
+		if (total)
+			fprintf(output,
+				" #   %5.2f%% aborted cycles         ",
+				100.0 * ((total2-avg) / total));
+	} else if (transaction_run &&
+		   perf_evsel__cmp(evsel, nth_evsel(T_TRANSACTION_START)) &&
+		   avg > 0 &&
+		   runtime_cycles_in_tx_stats[cpu].n != 0) {
+		total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
+
 		if (total)
-			ratio = 1.0 * avg / total;
+			ratio = total / avg;
 
-		fprintf(output, " # %8.3f GHz                    ", ratio);
+		fprintf(output, " # %8.0f cycles / transaction   ", ratio);
+	} else if (transaction_run &&
+		   perf_evsel__cmp(evsel, nth_evsel(T_ELISION_START)) &&
+		   avg > 0 &&
+		   runtime_cycles_in_tx_stats[cpu].n != 0) {
+		total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
+
+		if (total)
+			ratio = total / avg;
+
+		fprintf(output, " # %8.0f cycles / elision       ", ratio);
 	} else if (runtime_nsecs_stats[cpu].n != 0) {
 		char unit = 'M';
 
@@ -1116,7 +1232,11 @@ static void print_stat(int argc, const char **argv)
 	if (!csv_output) {
 		fprintf(output, "\n");
 		fprintf(output, " Performance counter stats for ");
-		if (!perf_target__has_task(&target)) {
+		if (target.system_wide)
+			fprintf(output, "\'system wide");
+		else if (target.cpu_list)
+			fprintf(output, "\'CPU(s) %s", target.cpu_list);
+		else if (!perf_target__has_task(&target)) {
 			fprintf(output, "\'%s", argv[0]);
 			for (i = 1; i < argc; i++)
 				fprintf(output, " %s", argv[i]);
@@ -1237,6 +1357,16 @@ static int perf_stat_init_aggr_mode(void)
 	return 0;
 }
 
+static int setup_events(const char * const *attrs, unsigned len)
+{
+	unsigned i;
+
+	for (i = 0; i < len; i++) {
+		if (parse_events(evsel_list, attrs[i]))
+			return -1;
+	}
+	return 0;
+}
 
 /*
  * Add default attributes, if there were no attributes specified or
@@ -1355,6 +1485,22 @@ static int add_default_attributes(void)
 	if (null_run)
 		return 0;
 
+	if (transaction_run) {
+		int err;
+		if (pmu_have_event("cpu", "cycles-ct") &&
+		    pmu_have_event("cpu", "el-start"))
+			err = setup_events(transaction_attrs,
+					ARRAY_SIZE(transaction_attrs));
+		else
+			err = setup_events(transaction_limited_attrs,
+				 ARRAY_SIZE(transaction_limited_attrs));
+		if (err < 0) {
+			fprintf(stderr, "Cannot set up transaction events\n");
+			return -1;
+		}
+		return 0;
+	}
+
 	if (!evsel_list->nr_entries) {
 		if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
 			return -1;
@@ -1389,6 +1535,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 	int output_fd = 0;
 	const char *output_name	= NULL;
 	const struct option options[] = {
+	OPT_BOOLEAN('T', "transaction", &transaction_run,
+		    "hardware transaction statistics"),
 	OPT_CALLBACK('e', "event", &evsel_list, "event",
 		     "event selector. use 'perf list' to list available events",
 		     parse_events_option),
@@ -1448,7 +1596,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		"perf stat [<options>] [<command>]",
 		NULL
 	};
-	int status = -ENOMEM, run_idx;
+	int status = -EINVAL, run_idx;
 	const char *mode;
 
 	setlocale(LC_ALL, "");
@@ -1466,12 +1614,15 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	if (output_name && output_fd) {
 		fprintf(stderr, "cannot use both --output and --log-fd\n");
-		usage_with_options(stat_usage, options);
+		parse_options_usage(stat_usage, options, "o", 1);
+		parse_options_usage(NULL, options, "log-fd", 0);
+		goto out;
 	}
 
 	if (output_fd < 0) {
 		fprintf(stderr, "argument to --log-fd must be a > 0\n");
-		usage_with_options(stat_usage, options);
+		parse_options_usage(stat_usage, options, "log-fd", 0);
+		goto out;
 	}
 
 	if (!output) {
@@ -1508,16 +1659,21 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		/* User explicitly passed -B? */
 		if (big_num_opt == 1) {
 			fprintf(stderr, "-B option not supported with -x\n");
-			usage_with_options(stat_usage, options);
+			parse_options_usage(stat_usage, options, "B", 1);
+			parse_options_usage(NULL, options, "x", 1);
+			goto out;
 		} else /* Nope, so disable big number formatting */
 			big_num = false;
 	} else if (big_num_opt == 0) /* User passed --no-big-num */
 		big_num = false;
 
-	if (!argc && !perf_target__has_task(&target))
+	if (!argc && perf_target__none(&target))
 		usage_with_options(stat_usage, options);
+
 	if (run_count < 0) {
-		usage_with_options(stat_usage, options);
+		pr_err("Run count must be a positive number\n");
+		parse_options_usage(stat_usage, options, "r", 1);
+		goto out;
 	} else if (run_count == 0) {
 		forever = true;
 		run_count = 1;
@@ -1529,8 +1685,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		fprintf(stderr, "both cgroup and no-aggregation "
 			"modes only available in system-wide mode\n");
 
-		usage_with_options(stat_usage, options);
-		return -1;
+		parse_options_usage(stat_usage, options, "G", 1);
+		parse_options_usage(NULL, options, "A", 1);
+		parse_options_usage(NULL, options, "a", 1);
+		goto out;
 	}
 
 	if (add_default_attributes())
@@ -1539,25 +1697,28 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 	perf_target__validate(&target);
 
 	if (perf_evlist__create_maps(evsel_list, &target) < 0) {
-		if (perf_target__has_task(&target))
+		if (perf_target__has_task(&target)) {
 			pr_err("Problems finding threads of monitor\n");
-		if (perf_target__has_cpu(&target))
+			parse_options_usage(stat_usage, options, "p", 1);
+			parse_options_usage(NULL, options, "t", 1);
+		} else if (perf_target__has_cpu(&target)) {
 			perror("failed to parse CPUs map");
-
-		usage_with_options(stat_usage, options);
-		return -1;
+			parse_options_usage(stat_usage, options, "C", 1);
+			parse_options_usage(NULL, options, "a", 1);
+		}
+		goto out;
 	}
 	if (interval && interval < 100) {
 		pr_err("print interval must be >= 100ms\n");
-		usage_with_options(stat_usage, options);
-		return -1;
+		parse_options_usage(stat_usage, options, "I", 1);
+		goto out_free_maps;
 	}
 
 	if (perf_evlist__alloc_stats(evsel_list, interval))
 		goto out_free_maps;
 
 	if (perf_stat_init_aggr_mode())
-		goto out;
+		goto out_free_maps;
 
 	/*
 	 * We dont want to block the signals - that would cause
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index c2e02319347abd86c74f6ab0b44c37106c0fb238..41c9bde2fb67f1418faceaaf86475a163694bdeb 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -36,6 +36,7 @@
 #include "util/session.h"
 #include "util/svghelper.h"
 #include "util/tool.h"
+#include "util/data.h"
 
 #define SUPPORT_OLD_POWER_EVENTS 1
 #define PWR_EVENT_EXIT -1
@@ -482,8 +483,8 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 	if (sample->cpu > numcpus)
 		numcpus = sample->cpu;
 
-	if (evsel->handler.func != NULL) {
-		tracepoint_handler f = evsel->handler.func;
+	if (evsel->handler != NULL) {
+		tracepoint_handler f = evsel->handler;
 		return f(evsel, sample);
 	}
 
@@ -990,8 +991,13 @@ static int __cmd_timechart(const char *output_name)
 		{ "power:power_frequency",	process_sample_power_frequency },
 #endif
 	};
-	struct perf_session *session = perf_session__new(input_name, O_RDONLY,
-							 0, false, &perf_timechart);
+	struct perf_data_file file = {
+		.path = input_name,
+		.mode = PERF_DATA_MODE_READ,
+	};
+
+	struct perf_session *session = perf_session__new(&file, false,
+							 &perf_timechart);
 	int ret = -EINVAL;
 
 	if (session == NULL)
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 5a11f13e56f9f186cc7aa0926f4ee63008327358..9acca8856ccb2ba60ee3ae72cb954b6c1932da65 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -246,10 +246,10 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
 	struct hist_entry *he;
 
 	pthread_mutex_lock(&evsel->hists.lock);
-	he = __hists__add_entry(&evsel->hists, al, NULL, sample->period,
-				sample->weight);
+	he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL,
+				sample->period, sample->weight,
+				sample->transaction);
 	pthread_mutex_unlock(&evsel->hists.lock);
-
 	if (he == NULL)
 		return NULL;
 
@@ -287,7 +287,7 @@ static void perf_top__print_sym_table(struct perf_top *top)
 		return;
 	}
 
-	hists__collapse_resort(&top->sym_evsel->hists);
+	hists__collapse_resort(&top->sym_evsel->hists, NULL);
 	hists__output_resort(&top->sym_evsel->hists);
 	hists__decay_entries(&top->sym_evsel->hists,
 			     top->hide_user_symbols,
@@ -553,7 +553,7 @@ static void perf_top__sort_new_samples(void *arg)
 	if (t->evlist->selected != NULL)
 		t->sym_evsel = t->evlist->selected;
 
-	hists__collapse_resort(&t->sym_evsel->hists);
+	hists__collapse_resort(&t->sym_evsel->hists, NULL);
 	hists__output_resort(&t->sym_evsel->hists);
 	hists__decay_entries(&t->sym_evsel->hists,
 			     t->hide_user_symbols,
@@ -771,7 +771,8 @@ static void perf_event__process_sample(struct perf_tool *tool,
 		    sample->callchain) {
 			err = machine__resolve_callchain(machine, evsel,
 							 al.thread, sample,
-							 &parent, &al);
+							 &parent, &al,
+							 top->max_stack);
 			if (err)
 				return;
 		}
@@ -856,7 +857,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 						   &sample, machine);
 		} else if (event->header.type < PERF_RECORD_MAX) {
 			hists__inc_nr_events(&evsel->hists, event->header.type);
-			machine__process_event(machine, event);
+			machine__process_event(machine, event, &sample);
 		} else
 			++session->stats.nr_unknown_events;
 next_event:
@@ -932,11 +933,8 @@ static int __cmd_top(struct perf_top *top)
 	struct perf_record_opts *opts = &top->record_opts;
 	pthread_t thread;
 	int ret;
-	/*
-	 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
-	 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
-	 */
-	top->session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
+
+	top->session = perf_session__new(NULL, false, NULL);
 	if (top->session == NULL)
 		return -ENOMEM;
 
@@ -1043,7 +1041,7 @@ parse_percent_limit(const struct option *opt, const char *arg,
 
 int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 {
-	int status;
+	int status = -1;
 	char errbuf[BUFSIZ];
 	struct perf_top top = {
 		.count_filter	     = 5,
@@ -1053,10 +1051,11 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 			.user_freq	= UINT_MAX,
 			.user_interval	= ULLONG_MAX,
 			.freq		= 4000, /* 4 KHz */
-			.target		     = {
+			.target		= {
 				.uses_mmap   = true,
 			},
 		},
+		.max_stack	     = PERF_MAX_STACK_DEPTH,
 		.sym_pcnt_filter     = 5,
 	};
 	struct perf_record_opts *opts = &top.record_opts;
@@ -1076,10 +1075,13 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "list of cpus to monitor"),
 	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
 		   "file", "vmlinux pathname"),
+	OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
+		    "don't load vmlinux even if found"),
 	OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
 		    "hide kernel symbols"),
-	OPT_UINTEGER('m', "mmap-pages", &opts->mmap_pages,
-		     "number of mmap data pages"),
+	OPT_CALLBACK('m', "mmap-pages", &opts->mmap_pages, "pages",
+		     "number of mmap data pages",
+		     perf_evlist__parse_mmap_pages),
 	OPT_INTEGER('r', "realtime", &top.realtime_prio,
 		    "collect data with this RT SCHED_FIFO priority"),
 	OPT_INTEGER('d', "delay", &top.delay_secs,
@@ -1105,7 +1107,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show counter open errors, etc)"),
 	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
-		   "sort by key(s): pid, comm, dso, symbol, parent, weight, local_weight"),
+		   "sort by key(s): pid, comm, dso, symbol, parent, weight, local_weight,"
+		   " abort, in_tx, transaction"),
 	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
 		    "Show a column with the number of samples"),
 	OPT_CALLBACK_NOOPT('G', NULL, &top.record_opts,
@@ -1114,6 +1117,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_CALLBACK(0, "call-graph", &top.record_opts,
 		     "mode[,dump_size]", record_callchain_help,
 		     &parse_callchain_opt),
+	OPT_INTEGER(0, "max-stack", &top.max_stack,
+		    "Set the maximum stack depth when parsing the callchain. "
+		    "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
 	OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
 		   "ignore callees of these functions in call graphs",
 		   report_parse_ignore_callees_opt),
@@ -1154,8 +1160,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (sort_order == default_sort_order)
 		sort_order = "dso,symbol";
 
-	if (setup_sorting() < 0)
-		usage_with_options(top_usage, options);
+	if (setup_sorting() < 0) {
+		parse_options_usage(top_usage, options, "s", 1);
+		goto out_delete_evlist;
+	}
 
 	/* display thread wants entries to be collapsed in a different tree */
 	sort__need_collapse = 1;
@@ -1201,20 +1209,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (top.delay_secs < 1)
 		top.delay_secs = 1;
 
-	if (opts->user_interval != ULLONG_MAX)
-		opts->default_interval = opts->user_interval;
-	if (opts->user_freq != UINT_MAX)
-		opts->freq = opts->user_freq;
-
-	/*
-	 * User specified count overrides default frequency.
-	 */
-	if (opts->default_interval)
-		opts->freq = 0;
-	else if (opts->freq) {
-		opts->default_interval = opts->freq;
-	} else {
-		ui__error("frequency and count are zero, aborting\n");
+	if (perf_record_opts__config(opts)) {
 		status = -EINVAL;
 		goto out_delete_maps;
 	}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 99c8d9ad6729cabf6bebe5b65c753af1a28d9024..329b7832b5dac7d46ace24cc597e7539cbfddd8d 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -10,9 +10,11 @@
 #include "util/strlist.h"
 #include "util/intlist.h"
 #include "util/thread_map.h"
+#include "util/stat.h"
 
 #include <libaudit.h>
 #include <stdlib.h>
+#include <sys/eventfd.h>
 #include <sys/mman.h>
 #include <linux/futex.h>
 
@@ -33,49 +35,279 @@
 # define MADV_UNMERGEABLE	13
 #endif
 
-static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
-					 unsigned long arg,
-					 u8 arg_idx __maybe_unused,
-					 u8 *arg_mask __maybe_unused)
+struct tp_field {
+	int offset;
+	union {
+		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
+		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
+	};
+};
+
+#define TP_UINT_FIELD(bits) \
+static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
+{ \
+	return *(u##bits *)(sample->raw_data + field->offset); \
+}
+
+TP_UINT_FIELD(8);
+TP_UINT_FIELD(16);
+TP_UINT_FIELD(32);
+TP_UINT_FIELD(64);
+
+#define TP_UINT_FIELD__SWAPPED(bits) \
+static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
+{ \
+	u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
+	return bswap_##bits(value);\
+}
+
+TP_UINT_FIELD__SWAPPED(16);
+TP_UINT_FIELD__SWAPPED(32);
+TP_UINT_FIELD__SWAPPED(64);
+
+static int tp_field__init_uint(struct tp_field *field,
+			       struct format_field *format_field,
+			       bool needs_swap)
 {
-	return scnprintf(bf, size, "%#lx", arg);
+	field->offset = format_field->offset;
+
+	switch (format_field->size) {
+	case 1:
+		field->integer = tp_field__u8;
+		break;
+	case 2:
+		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
+		break;
+	case 4:
+		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
+		break;
+	case 8:
+		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
+		break;
+	default:
+		return -1;
+	}
+
+	return 0;
 }
 
-#define SCA_HEX syscall_arg__scnprintf_hex
+static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
+{
+	return sample->raw_data + field->offset;
+}
 
-static size_t syscall_arg__scnprintf_whence(char *bf, size_t size,
-					    unsigned long arg,
-					    u8 arg_idx __maybe_unused,
-					    u8 *arg_mask __maybe_unused)
+static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
 {
-	int whence = arg;
+	field->offset = format_field->offset;
+	field->pointer = tp_field__ptr;
+	return 0;
+}
 
-	switch (whence) {
-#define P_WHENCE(n) case SEEK_##n: return scnprintf(bf, size, #n)
-	P_WHENCE(SET);
-	P_WHENCE(CUR);
-	P_WHENCE(END);
-#ifdef SEEK_DATA
-	P_WHENCE(DATA);
-#endif
-#ifdef SEEK_HOLE
-	P_WHENCE(HOLE);
-#endif
-#undef P_WHENCE
-	default: break;
+struct syscall_tp {
+	struct tp_field id;
+	union {
+		struct tp_field args, ret;
+	};
+};
+
+static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
+					  struct tp_field *field,
+					  const char *name)
+{
+	struct format_field *format_field = perf_evsel__field(evsel, name);
+
+	if (format_field == NULL)
+		return -1;
+
+	return tp_field__init_uint(field, format_field, evsel->needs_swap);
+}
+
+#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
+	({ struct syscall_tp *sc = evsel->priv;\
+	   perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
+
+static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
+					 struct tp_field *field,
+					 const char *name)
+{
+	struct format_field *format_field = perf_evsel__field(evsel, name);
+
+	if (format_field == NULL)
+		return -1;
+
+	return tp_field__init_ptr(field, format_field);
+}
+
+#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
+	({ struct syscall_tp *sc = evsel->priv;\
+	   perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
+
+static void perf_evsel__delete_priv(struct perf_evsel *evsel)
+{
+	free(evsel->priv);
+	evsel->priv = NULL;
+	perf_evsel__delete(evsel);
+}
+
+static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction,
+						    void *handler, int idx)
+{
+	struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction, idx);
+
+	if (evsel) {
+		evsel->priv = malloc(sizeof(struct syscall_tp));
+
+		if (evsel->priv == NULL)
+			goto out_delete;
+
+		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
+			goto out_delete;
+
+		evsel->handler = handler;
 	}
 
-	return scnprintf(bf, size, "%#x", whence);
+	return evsel;
+
+out_delete:
+	perf_evsel__delete_priv(evsel);
+	return NULL;
 }
 
-#define SCA_WHENCE syscall_arg__scnprintf_whence
+#define perf_evsel__sc_tp_uint(evsel, name, sample) \
+	({ struct syscall_tp *fields = evsel->priv; \
+	   fields->name.integer(&fields->name, sample); })
+
+#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
+	({ struct syscall_tp *fields = evsel->priv; \
+	   fields->name.pointer(&fields->name, sample); })
+
+static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
+					  void *sys_enter_handler,
+					  void *sys_exit_handler)
+{
+	int ret = -1;
+	int idx = evlist->nr_entries;
+	struct perf_evsel *sys_enter, *sys_exit;
+
+	sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler, idx++);
+	if (sys_enter == NULL)
+		goto out;
+
+	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
+		goto out_delete_sys_enter;
+
+	sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler, idx++);
+	if (sys_exit == NULL)
+		goto out_delete_sys_enter;
+
+	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
+		goto out_delete_sys_exit;
+
+	perf_evlist__add(evlist, sys_enter);
+	perf_evlist__add(evlist, sys_exit);
+
+	ret = 0;
+out:
+	return ret;
+
+out_delete_sys_exit:
+	perf_evsel__delete_priv(sys_exit);
+out_delete_sys_enter:
+	perf_evsel__delete_priv(sys_enter);
+	goto out;
+}
+
+
+struct syscall_arg {
+	unsigned long val;
+	struct thread *thread;
+	struct trace  *trace;
+	void	      *parm;
+	u8	      idx;
+	u8	      mask;
+};
+
+struct strarray {
+	int	    offset;
+	int	    nr_entries;
+	const char **entries;
+};
+
+#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
+	.nr_entries = ARRAY_SIZE(array), \
+	.entries = array, \
+}
+
+#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
+	.offset	    = off, \
+	.nr_entries = ARRAY_SIZE(array), \
+	.entries = array, \
+}
+
+static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
+						const char *intfmt,
+					        struct syscall_arg *arg)
+{
+	struct strarray *sa = arg->parm;
+	int idx = arg->val - sa->offset;
+
+	if (idx < 0 || idx >= sa->nr_entries)
+		return scnprintf(bf, size, intfmt, arg->val);
+
+	return scnprintf(bf, size, "%s", sa->entries[idx]);
+}
+
+static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
+					      struct syscall_arg *arg)
+{
+	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
+}
+
+#define SCA_STRARRAY syscall_arg__scnprintf_strarray
+
+static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
+						 struct syscall_arg *arg)
+{
+	return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
+}
+
+#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
+
+static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
+					struct syscall_arg *arg);
+
+#define SCA_FD syscall_arg__scnprintf_fd
+
+static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
+					   struct syscall_arg *arg)
+{
+	int fd = arg->val;
+
+	if (fd == AT_FDCWD)
+		return scnprintf(bf, size, "CWD");
+
+	return syscall_arg__scnprintf_fd(bf, size, arg);
+}
+
+#define SCA_FDAT syscall_arg__scnprintf_fd_at
+
+static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
+					      struct syscall_arg *arg);
+
+#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
+
+static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
+					 struct syscall_arg *arg)
+{
+	return scnprintf(bf, size, "%#lx", arg->val);
+}
+
+#define SCA_HEX syscall_arg__scnprintf_hex
 
 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
-					       unsigned long arg,
-					       u8 arg_idx __maybe_unused,
-					       u8 *arg_mask __maybe_unused)
+					       struct syscall_arg *arg)
 {
-	int printed = 0, prot = arg;
+	int printed = 0, prot = arg->val;
 
 	if (prot == PROT_NONE)
 		return scnprintf(bf, size, "NONE");
@@ -104,10 +336,9 @@ static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
 
 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
-						unsigned long arg, u8 arg_idx __maybe_unused,
-						u8 *arg_mask __maybe_unused)
+						struct syscall_arg *arg)
 {
-	int printed = 0, flags = arg;
+	int printed = 0, flags = arg->val;
 
 #define	P_MMAP_FLAG(n) \
 	if (flags & MAP_##n) { \
@@ -148,10 +379,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
 
 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
-						      unsigned long arg, u8 arg_idx __maybe_unused,
-						      u8 *arg_mask __maybe_unused)
+						      struct syscall_arg *arg)
 {
-	int behavior = arg;
+	int behavior = arg->val;
 
 	switch (behavior) {
 #define	P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
@@ -190,8 +420,38 @@ static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
 
 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
 
-static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, unsigned long arg,
-					      u8 arg_idx __maybe_unused, u8 *arg_mask)
+static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
+					   struct syscall_arg *arg)
+{
+	int printed = 0, op = arg->val;
+
+	if (op == 0)
+		return scnprintf(bf, size, "NONE");
+#define	P_CMD(cmd) \
+	if ((op & LOCK_##cmd) == LOCK_##cmd) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
+		op &= ~LOCK_##cmd; \
+	}
+
+	P_CMD(SH);
+	P_CMD(EX);
+	P_CMD(NB);
+	P_CMD(UN);
+	P_CMD(MAND);
+	P_CMD(RW);
+	P_CMD(READ);
+	P_CMD(WRITE);
+#undef P_OP
+
+	if (op)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
+
+	return printed;
+}
+
+#define SCA_FLOCK syscall_arg__scnprintf_flock
+
+static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
 {
 	enum syscall_futex_args {
 		SCF_UADDR   = (1 << 0),
@@ -201,24 +461,24 @@ static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, unsigned lo
 		SCF_UADDR2  = (1 << 4),
 		SCF_VAL3    = (1 << 5),
 	};
-	int op = arg;
+	int op = arg->val;
 	int cmd = op & FUTEX_CMD_MASK;
 	size_t printed = 0;
 
 	switch (cmd) {
 #define	P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
-	P_FUTEX_OP(WAIT);	    *arg_mask |= SCF_VAL3|SCF_UADDR2;		  break;
-	P_FUTEX_OP(WAKE);	    *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
-	P_FUTEX_OP(FD);		    *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
-	P_FUTEX_OP(REQUEUE);	    *arg_mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
-	P_FUTEX_OP(CMP_REQUEUE);    *arg_mask |= SCF_TIMEOUT;			  break;
-	P_FUTEX_OP(CMP_REQUEUE_PI); *arg_mask |= SCF_TIMEOUT;			  break;
+	P_FUTEX_OP(WAIT);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
+	P_FUTEX_OP(WAKE);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+	P_FUTEX_OP(FD);		    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+	P_FUTEX_OP(REQUEUE);	    arg->mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
+	P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;			  break;
+	P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;			  break;
 	P_FUTEX_OP(WAKE_OP);							  break;
-	P_FUTEX_OP(LOCK_PI);	    *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
-	P_FUTEX_OP(UNLOCK_PI);	    *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
-	P_FUTEX_OP(TRYLOCK_PI);	    *arg_mask |= SCF_VAL3|SCF_UADDR2;		  break;
-	P_FUTEX_OP(WAIT_BITSET);    *arg_mask |= SCF_UADDR2;			  break;
-	P_FUTEX_OP(WAKE_BITSET);    *arg_mask |= SCF_UADDR2;			  break;
+	P_FUTEX_OP(LOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+	P_FUTEX_OP(UNLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+	P_FUTEX_OP(TRYLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
+	P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;			  break;
+	P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;			  break;
 	P_FUTEX_OP(WAIT_REQUEUE_PI);						  break;
 	default: printed = scnprintf(bf, size, "%#x", cmd);			  break;
 	}
@@ -234,14 +494,194 @@ static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, unsigned lo
 
 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
 
+static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
+static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
+
+static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
+static DEFINE_STRARRAY(itimers);
+
+static const char *whences[] = { "SET", "CUR", "END",
+#ifdef SEEK_DATA
+"DATA",
+#endif
+#ifdef SEEK_HOLE
+"HOLE",
+#endif
+};
+static DEFINE_STRARRAY(whences);
+
+static const char *fcntl_cmds[] = {
+	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
+	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
+	"F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
+	"F_GETOWNER_UIDS",
+};
+static DEFINE_STRARRAY(fcntl_cmds);
+
+static const char *rlimit_resources[] = {
+	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
+	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
+	"RTTIME",
+};
+static DEFINE_STRARRAY(rlimit_resources);
+
+static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
+static DEFINE_STRARRAY(sighow);
+
+static const char *clockid[] = {
+	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
+	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
+};
+static DEFINE_STRARRAY(clockid);
+
+static const char *socket_families[] = {
+	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
+	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
+	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
+	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
+	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
+	"ALG", "NFC", "VSOCK",
+};
+static DEFINE_STRARRAY(socket_families);
+
+#ifndef SOCK_TYPE_MASK
+#define SOCK_TYPE_MASK 0xf
+#endif
+
+static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
+						      struct syscall_arg *arg)
+{
+	size_t printed;
+	int type = arg->val,
+	    flags = type & ~SOCK_TYPE_MASK;
+
+	type &= SOCK_TYPE_MASK;
+	/*
+ 	 * Can't use a strarray, MIPS may override for ABI reasons.
+ 	 */
+	switch (type) {
+#define	P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
+	P_SK_TYPE(STREAM);
+	P_SK_TYPE(DGRAM);
+	P_SK_TYPE(RAW);
+	P_SK_TYPE(RDM);
+	P_SK_TYPE(SEQPACKET);
+	P_SK_TYPE(DCCP);
+	P_SK_TYPE(PACKET);
+#undef P_SK_TYPE
+	default:
+		printed = scnprintf(bf, size, "%#x", type);
+	}
+
+#define	P_SK_FLAG(n) \
+	if (flags & SOCK_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
+		flags &= ~SOCK_##n; \
+	}
+
+	P_SK_FLAG(CLOEXEC);
+	P_SK_FLAG(NONBLOCK);
+#undef P_SK_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
+
+	return printed;
+}
+
+#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
+
+#ifndef MSG_PROBE
+#define MSG_PROBE	     0x10
+#endif
+#ifndef MSG_WAITFORONE
+#define MSG_WAITFORONE	0x10000
+#endif
+#ifndef MSG_SENDPAGE_NOTLAST
+#define MSG_SENDPAGE_NOTLAST 0x20000
+#endif
+#ifndef MSG_FASTOPEN
+#define MSG_FASTOPEN	     0x20000000
+#endif
+
+static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
+					       struct syscall_arg *arg)
+{
+	int printed = 0, flags = arg->val;
+
+	if (flags == 0)
+		return scnprintf(bf, size, "NONE");
+#define	P_MSG_FLAG(n) \
+	if (flags & MSG_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~MSG_##n; \
+	}
+
+	P_MSG_FLAG(OOB);
+	P_MSG_FLAG(PEEK);
+	P_MSG_FLAG(DONTROUTE);
+	P_MSG_FLAG(TRYHARD);
+	P_MSG_FLAG(CTRUNC);
+	P_MSG_FLAG(PROBE);
+	P_MSG_FLAG(TRUNC);
+	P_MSG_FLAG(DONTWAIT);
+	P_MSG_FLAG(EOR);
+	P_MSG_FLAG(WAITALL);
+	P_MSG_FLAG(FIN);
+	P_MSG_FLAG(SYN);
+	P_MSG_FLAG(CONFIRM);
+	P_MSG_FLAG(RST);
+	P_MSG_FLAG(ERRQUEUE);
+	P_MSG_FLAG(NOSIGNAL);
+	P_MSG_FLAG(MORE);
+	P_MSG_FLAG(WAITFORONE);
+	P_MSG_FLAG(SENDPAGE_NOTLAST);
+	P_MSG_FLAG(FASTOPEN);
+	P_MSG_FLAG(CMSG_CLOEXEC);
+#undef P_MSG_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
+
+static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
+						 struct syscall_arg *arg)
+{
+	size_t printed = 0;
+	int mode = arg->val;
+
+	if (mode == F_OK) /* 0 */
+		return scnprintf(bf, size, "F");
+#define	P_MODE(n) \
+	if (mode & n##_OK) { \
+		printed += scnprintf(bf + printed, size - printed, "%s", #n); \
+		mode &= ~n##_OK; \
+	}
+
+	P_MODE(R);
+	P_MODE(W);
+	P_MODE(X);
+#undef P_MODE
+
+	if (mode)
+		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
+
+	return printed;
+}
+
+#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
+
 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
-					       unsigned long arg,
-					       u8 arg_idx, u8 *arg_mask)
+					       struct syscall_arg *arg)
 {
-	int printed = 0, flags = arg;
+	int printed = 0, flags = arg->val;
 
 	if (!(flags & O_CREAT))
-		*arg_mask |= 1 << (arg_idx + 1); /* Mask the mode parm */
+		arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
 
 	if (flags == 0)
 		return scnprintf(bf, size, "RDONLY");
@@ -291,32 +731,225 @@ static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
 
 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
 
+static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
+						   struct syscall_arg *arg)
+{
+	int printed = 0, flags = arg->val;
+
+	if (flags == 0)
+		return scnprintf(bf, size, "NONE");
+#define	P_FLAG(n) \
+	if (flags & EFD_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~EFD_##n; \
+	}
+
+	P_FLAG(SEMAPHORE);
+	P_FLAG(CLOEXEC);
+	P_FLAG(NONBLOCK);
+#undef P_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
+
+static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
+						struct syscall_arg *arg)
+{
+	int printed = 0, flags = arg->val;
+
+#define	P_FLAG(n) \
+	if (flags & O_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~O_##n; \
+	}
+
+	P_FLAG(CLOEXEC);
+	P_FLAG(NONBLOCK);
+#undef P_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
+
+static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
+{
+	int sig = arg->val;
+
+	switch (sig) {
+#define	P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
+	P_SIGNUM(HUP);
+	P_SIGNUM(INT);
+	P_SIGNUM(QUIT);
+	P_SIGNUM(ILL);
+	P_SIGNUM(TRAP);
+	P_SIGNUM(ABRT);
+	P_SIGNUM(BUS);
+	P_SIGNUM(FPE);
+	P_SIGNUM(KILL);
+	P_SIGNUM(USR1);
+	P_SIGNUM(SEGV);
+	P_SIGNUM(USR2);
+	P_SIGNUM(PIPE);
+	P_SIGNUM(ALRM);
+	P_SIGNUM(TERM);
+	P_SIGNUM(STKFLT);
+	P_SIGNUM(CHLD);
+	P_SIGNUM(CONT);
+	P_SIGNUM(STOP);
+	P_SIGNUM(TSTP);
+	P_SIGNUM(TTIN);
+	P_SIGNUM(TTOU);
+	P_SIGNUM(URG);
+	P_SIGNUM(XCPU);
+	P_SIGNUM(XFSZ);
+	P_SIGNUM(VTALRM);
+	P_SIGNUM(PROF);
+	P_SIGNUM(WINCH);
+	P_SIGNUM(IO);
+	P_SIGNUM(PWR);
+	P_SIGNUM(SYS);
+	default: break;
+	}
+
+	return scnprintf(bf, size, "%#x", sig);
+}
+
+#define SCA_SIGNUM syscall_arg__scnprintf_signum
+
+#define TCGETS		0x5401
+
+static const char *tioctls[] = {
+	"TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
+	"TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
+	"TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
+	"TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
+	"TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
+	"TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
+	"TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
+	"TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
+	"TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
+	"TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
+	"TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
+	[0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
+	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
+	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
+	"TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
+};
+
+static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
+
+#define STRARRAY(arg, name, array) \
+	  .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
+	  .arg_parm	 = { [arg] = &strarray__##array, }
+
 static struct syscall_fmt {
 	const char *name;
 	const char *alias;
-	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, unsigned long arg, u8 arg_idx, u8 *arg_mask);
+	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
+	void	   *arg_parm[6];
 	bool	   errmsg;
 	bool	   timeout;
 	bool	   hexret;
 } syscall_fmts[] = {
-	{ .name	    = "access",	    .errmsg = true, },
+	{ .name	    = "access",	    .errmsg = true,
+	  .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
 	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
 	{ .name	    = "brk",	    .hexret = true,
 	  .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
-	{ .name	    = "mmap",	    .hexret = true, },
+	{ .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
+	{ .name	    = "close",	    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, 
 	{ .name	    = "connect",    .errmsg = true, },
-	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat", },
-	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat", },
+	{ .name	    = "dup",	    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
+	{ .name	    = "dup2",	    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
+	{ .name	    = "dup3",	    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
+	{ .name	    = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
+	{ .name	    = "eventfd2",   .errmsg = true,
+	  .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
+	{ .name	    = "faccessat",  .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
+	{ .name	    = "fadvise64",  .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
+	{ .name	    = "fallocate",  .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
+	{ .name	    = "fchdir",	    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
+	{ .name	    = "fchmod",	    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
+	{ .name	    = "fchmodat",   .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
+	{ .name	    = "fchown",	    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
+	{ .name	    = "fchownat",   .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
+	{ .name	    = "fcntl",	    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
+			     [1] = SCA_STRARRAY, /* cmd */ },
+	  .arg_parm	 = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
+	{ .name	    = "fdatasync",  .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
+	{ .name	    = "flock",	    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
+			     [1] = SCA_FLOCK, /* cmd */ }, },
+	{ .name	    = "fsetxattr",  .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
+	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat",
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
+	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat",
+	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
+	{ .name	    = "fstatfs",    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
+	{ .name	    = "fsync",    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
+	{ .name	    = "ftruncate", .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 	{ .name	    = "futex",	    .errmsg = true,
 	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
+	{ .name	    = "futimesat", .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
+	{ .name	    = "getdents",   .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
+	{ .name	    = "getdents64", .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
+	{ .name	    = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
+	{ .name	    = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
 	{ .name	    = "ioctl",	    .errmsg = true,
-	  .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ 
+			     [1] = SCA_STRHEXARRAY, /* cmd */
+			     [2] = SCA_HEX, /* arg */ },
+	  .arg_parm	 = { [1] = &strarray__tioctls, /* cmd */ }, },
+	{ .name	    = "kill",	    .errmsg = true,
+	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
+	{ .name	    = "linkat",	    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
 	{ .name	    = "lseek",	    .errmsg = true,
-	  .arg_scnprintf = { [2] = SCA_WHENCE, /* whence */ }, },
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
+			     [2] = SCA_STRARRAY, /* whence */ },
+	  .arg_parm	 = { [2] = &strarray__whences, /* whence */ }, },
 	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat", },
 	{ .name     = "madvise",    .errmsg = true,
 	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
 			     [2] = SCA_MADV_BHV, /* behavior */ }, },
+	{ .name	    = "mkdirat",    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
+	{ .name	    = "mknodat",    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, 
+	{ .name	    = "mlock",	    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
+	{ .name	    = "mlockall",   .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
 	{ .name	    = "mmap",	    .hexret = true,
 	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
 			     [2] = SCA_MMAP_PROT, /* prot */
@@ -327,24 +960,91 @@ static struct syscall_fmt {
 	{ .name	    = "mremap",	    .hexret = true,
 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */
 			     [4] = SCA_HEX, /* new_addr */ }, },
+	{ .name	    = "munlock",    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
 	{ .name	    = "munmap",	    .errmsg = true,
 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
+	{ .name	    = "name_to_handle_at", .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
+	{ .name	    = "newfstatat", .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
 	{ .name	    = "open",	    .errmsg = true,
 	  .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
 	{ .name	    = "open_by_handle_at", .errmsg = true,
-	  .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
+	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
+			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
 	{ .name	    = "openat",	    .errmsg = true,
-	  .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
+	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
+			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
+	{ .name	    = "pipe2",	    .errmsg = true,
+	  .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
 	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
 	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
-	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64", },
-	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64", },
-	{ .name	    = "read",	    .errmsg = true, },
-	{ .name	    = "recvfrom",   .errmsg = true, },
+	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64",
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
+	{ .name	    = "preadv",	    .errmsg = true, .alias = "pread",
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
+	{ .name	    = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
+	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64",
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
+	{ .name	    = "pwritev",    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
+	{ .name	    = "read",	    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
+	{ .name	    = "readlinkat", .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
+	{ .name	    = "readv",	    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
+	{ .name	    = "recvfrom",   .errmsg = true,
+	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
+	{ .name	    = "recvmmsg",   .errmsg = true,
+	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
+	{ .name	    = "recvmsg",    .errmsg = true,
+	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
+	{ .name	    = "renameat",   .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
+	{ .name	    = "rt_sigaction", .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
+	{ .name	    = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
+	{ .name	    = "rt_sigqueueinfo", .errmsg = true,
+	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
+	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
+	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
 	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
-	{ .name	    = "socket",	    .errmsg = true, },
+	{ .name	    = "sendmmsg",    .errmsg = true,
+	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
+	{ .name	    = "sendmsg",    .errmsg = true,
+	  .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
+	{ .name	    = "sendto",	    .errmsg = true,
+	  .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
+	{ .name	    = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
+	{ .name	    = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
+	{ .name	    = "shutdown",   .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
+	{ .name	    = "socket",	    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
+			     [1] = SCA_SK_TYPE, /* type */ },
+	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
+	{ .name	    = "socketpair", .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
+			     [1] = SCA_SK_TYPE, /* type */ },
+	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
 	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat", },
+	{ .name	    = "symlinkat",  .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, 
+	{ .name	    = "tgkill",	    .errmsg = true,
+	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
+	{ .name	    = "tkill",	    .errmsg = true,
+	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
 	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
+	{ .name	    = "unlinkat",   .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
+	{ .name	    = "utimensat",  .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
+	{ .name	    = "write",	    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
+	{ .name	    = "writev",	    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, 
 };
 
 static int syscall_fmt__cmp(const void *name, const void *fmtp)
@@ -364,8 +1064,8 @@ struct syscall {
 	const char	    *name;
 	bool		    filtered;
 	struct syscall_fmt  *fmt;
-	size_t		    (**arg_scnprintf)(char *bf, size_t size,
-					      unsigned long arg, u8 arg_idx, u8 *args_mask);
+	size_t		    (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
+	void		    **arg_parm;
 };
 
 static size_t fprintf_duration(unsigned long t, FILE *fp)
@@ -389,11 +1089,24 @@ struct thread_trace {
 	unsigned long	  nr_events;
 	char		  *entry_str;
 	double		  runtime_ms;
+	struct {
+		int	  max;
+		char	  **table;
+	} paths;
+
+	struct intlist *syscall_stats;
 };
 
 static struct thread_trace *thread_trace__new(void)
 {
-	return zalloc(sizeof(struct thread_trace));
+	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
+
+	if (ttrace)
+		ttrace->paths.max = -1;
+
+	ttrace->syscall_stats = intlist__new(NULL);
+
+	return ttrace;
 }
 
 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
@@ -421,26 +1134,140 @@ static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
 
 struct trace {
 	struct perf_tool	tool;
-	int			audit_machine;
+	struct {
+		int		machine;
+		int		open_id;
+	}			audit;
 	struct {
 		int		max;
 		struct syscall  *table;
 	} syscalls;
 	struct perf_record_opts opts;
-	struct machine		host;
+	struct machine		*host;
 	u64			base_time;
+	bool			full_time;
 	FILE			*output;
 	unsigned long		nr_events;
 	struct strlist		*ev_qualifier;
 	bool			not_ev_qualifier;
+	bool			live;
+	const char 		*last_vfs_getname;
 	struct intlist		*tid_list;
 	struct intlist		*pid_list;
 	bool			sched;
 	bool			multiple_threads;
+	bool			summary;
+	bool			show_comm;
+	bool			show_tool_stats;
 	double			duration_filter;
 	double			runtime_ms;
+	struct {
+		u64		vfs_getname, proc_getname;
+	} stats;
 };
 
+static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
+{
+	struct thread_trace *ttrace = thread->priv;
+
+	if (fd > ttrace->paths.max) {
+		char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
+
+		if (npath == NULL)
+			return -1;
+
+		if (ttrace->paths.max != -1) {
+			memset(npath + ttrace->paths.max + 1, 0,
+			       (fd - ttrace->paths.max) * sizeof(char *));
+		} else {
+			memset(npath, 0, (fd + 1) * sizeof(char *));
+		}
+
+		ttrace->paths.table = npath;
+		ttrace->paths.max   = fd;
+	}
+
+	ttrace->paths.table[fd] = strdup(pathname);
+
+	return ttrace->paths.table[fd] != NULL ? 0 : -1;
+}
+
+static int thread__read_fd_path(struct thread *thread, int fd)
+{
+	char linkname[PATH_MAX], pathname[PATH_MAX];
+	struct stat st;
+	int ret;
+
+	if (thread->pid_ == thread->tid) {
+		scnprintf(linkname, sizeof(linkname),
+			  "/proc/%d/fd/%d", thread->pid_, fd);
+	} else {
+		scnprintf(linkname, sizeof(linkname),
+			  "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
+	}
+
+	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
+		return -1;
+
+	ret = readlink(linkname, pathname, sizeof(pathname));
+
+	if (ret < 0 || ret > st.st_size)
+		return -1;
+
+	pathname[ret] = '\0';
+	return trace__set_fd_pathname(thread, fd, pathname);
+}
+
+static const char *thread__fd_path(struct thread *thread, int fd,
+				   struct trace *trace)
+{
+	struct thread_trace *ttrace = thread->priv;
+
+	if (ttrace == NULL)
+		return NULL;
+
+	if (fd < 0)
+		return NULL;
+
+	if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
+		if (!trace->live)
+			return NULL;
+		++trace->stats.proc_getname;
+		if (thread__read_fd_path(thread, fd)) {
+			return NULL;
+	}
+
+	return ttrace->paths.table[fd];
+}
+
+static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
+					struct syscall_arg *arg)
+{
+	int fd = arg->val;
+	size_t printed = scnprintf(bf, size, "%d", fd);
+	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
+
+	if (path)
+		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
+
+	return printed;
+}
+
+static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
+					      struct syscall_arg *arg)
+{
+	int fd = arg->val;
+	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
+	struct thread_trace *ttrace = arg->thread->priv;
+
+	if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
+		free(ttrace->paths.table[fd]);
+		ttrace->paths.table[fd] = NULL;
+	}
+
+	return printed;
+}
+
 static bool trace__filter_duration(struct trace *trace, double t)
 {
 	return t < (trace->duration_filter * NSEC_PER_MSEC);
@@ -454,10 +1281,12 @@ static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
 }
 
 static bool done = false;
+static bool interrupted = false;
 
-static void sig_handler(int sig __maybe_unused)
+static void sig_handler(int sig)
 {
 	done = true;
+	interrupted = sig == SIGINT;
 }
 
 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
@@ -466,14 +1295,17 @@ static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thre
 	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
 	printed += fprintf_duration(duration, fp);
 
-	if (trace->multiple_threads)
+	if (trace->multiple_threads) {
+		if (trace->show_comm)
+			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
 		printed += fprintf(fp, "%d ", thread->tid);
+	}
 
 	return printed;
 }
 
 static int trace__process_event(struct trace *trace, struct machine *machine,
-				union perf_event *event)
+				union perf_event *event, struct perf_sample *sample)
 {
 	int ret = 0;
 
@@ -481,9 +1313,9 @@ static int trace__process_event(struct trace *trace, struct machine *machine,
 	case PERF_RECORD_LOST:
 		color_fprintf(trace->output, PERF_COLOR_RED,
 			      "LOST %" PRIu64 " events!\n", event->lost.lost);
-		ret = machine__process_lost_event(machine, event);
+		ret = machine__process_lost_event(machine, event, sample);
 	default:
-		ret = machine__process_event(machine, event);
+		ret = machine__process_event(machine, event, sample);
 		break;
 	}
 
@@ -492,11 +1324,11 @@ static int trace__process_event(struct trace *trace, struct machine *machine,
 
 static int trace__tool_process(struct perf_tool *tool,
 			       union perf_event *event,
-			       struct perf_sample *sample __maybe_unused,
+			       struct perf_sample *sample,
 			       struct machine *machine)
 {
 	struct trace *trace = container_of(tool, struct trace, tool);
-	return trace__process_event(trace, machine, event);
+	return trace__process_event(trace, machine, event, sample);
 }
 
 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
@@ -506,16 +1338,17 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
 	if (err)
 		return err;
 
-	machine__init(&trace->host, "", HOST_KERNEL_ID);
-	machine__create_kernel_maps(&trace->host);
+	trace->host = machine__new_host();
+	if (trace->host == NULL)
+		return -ENOMEM;
 
 	if (perf_target__has_task(&trace->opts.target)) {
 		err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
 							trace__tool_process,
-							&trace->host);
+							trace->host);
 	} else {
 		err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
-						     &trace->host);
+						     trace->host);
 	}
 
 	if (err)
@@ -533,6 +1366,9 @@ static int syscall__set_arg_fmts(struct syscall *sc)
 	if (sc->arg_scnprintf == NULL)
 		return -1;
 
+	if (sc->fmt)
+		sc->arg_parm = sc->fmt->arg_parm;
+
 	for (field = sc->tp_format->format.fields->next; field; field = field->next) {
 		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
 			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
@@ -548,7 +1384,7 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 {
 	char tp_name[128];
 	struct syscall *sc;
-	const char *name = audit_syscall_to_name(id, trace->audit_machine);
+	const char *name = audit_syscall_to_name(id, trace->audit.machine);
 
 	if (name == NULL)
 		return -1;
@@ -603,32 +1439,52 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 }
 
 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
-				      unsigned long *args)
+				      unsigned long *args, struct trace *trace,
+				      struct thread *thread)
 {
-	int i = 0;
 	size_t printed = 0;
 
 	if (sc->tp_format != NULL) {
 		struct format_field *field;
-		u8 mask = 0, bit = 1;
+		u8 bit = 1;
+		struct syscall_arg arg = {
+			.idx	= 0,
+			.mask	= 0,
+			.trace  = trace,
+			.thread = thread,
+		};
 
 		for (field = sc->tp_format->format.fields->next; field;
-		     field = field->next, ++i, bit <<= 1) {
-			if (mask & bit)
+		     field = field->next, ++arg.idx, bit <<= 1) {
+			if (arg.mask & bit)
+				continue;
+			/*
+ 			 * Suppress this argument if its value is zero and
+ 			 * and we don't have a string associated in an
+ 			 * strarray for it.
+ 			 */
+			if (args[arg.idx] == 0 &&
+			    !(sc->arg_scnprintf &&
+			      sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
+			      sc->arg_parm[arg.idx]))
 				continue;
 
 			printed += scnprintf(bf + printed, size - printed,
 					     "%s%s: ", printed ? ", " : "", field->name);
-
-			if (sc->arg_scnprintf && sc->arg_scnprintf[i]) {
-				printed += sc->arg_scnprintf[i](bf + printed, size - printed,
-								args[i], i, &mask);
+			if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
+				arg.val = args[arg.idx];
+				if (sc->arg_parm)
+					arg.parm = sc->arg_parm[arg.idx];
+				printed += sc->arg_scnprintf[arg.idx](bf + printed,
+								      size - printed, &arg);
 			} else {
 				printed += scnprintf(bf + printed, size - printed,
-						     "%ld", args[i]);
+						     "%ld", args[arg.idx]);
 			}
 		}
 	} else {
+		int i = 0;
+
 		while (i < 6) {
 			printed += scnprintf(bf + printed, size - printed,
 					     "%sarg%d: %ld",
@@ -644,10 +1500,8 @@ typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
 				  struct perf_sample *sample);
 
 static struct syscall *trace__syscall_info(struct trace *trace,
-					   struct perf_evsel *evsel,
-					   struct perf_sample *sample)
+					   struct perf_evsel *evsel, int id)
 {
-	int id = perf_evsel__intval(evsel, sample, "id");
 
 	if (id < 0) {
 
@@ -688,6 +1542,32 @@ static struct syscall *trace__syscall_info(struct trace *trace,
 	return NULL;
 }
 
+static void thread__update_stats(struct thread_trace *ttrace,
+				 int id, struct perf_sample *sample)
+{
+	struct int_node *inode;
+	struct stats *stats;
+	u64 duration = 0;
+
+	inode = intlist__findnew(ttrace->syscall_stats, id);
+	if (inode == NULL)
+		return;
+
+	stats = inode->priv;
+	if (stats == NULL) {
+		stats = malloc(sizeof(struct stats));
+		if (stats == NULL)
+			return;
+		init_stats(stats);
+		inode->priv = stats;
+	}
+
+	if (ttrace->entry_time && sample->time > ttrace->entry_time)
+		duration = sample->time - ttrace->entry_time;
+
+	update_stats(stats, duration);
+}
+
 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 			    struct perf_sample *sample)
 {
@@ -695,7 +1575,8 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 	void *args;
 	size_t printed = 0;
 	struct thread *thread;
-	struct syscall *sc = trace__syscall_info(trace, evsel, sample);
+	int id = perf_evsel__sc_tp_uint(evsel, id, sample);
+	struct syscall *sc = trace__syscall_info(trace, evsel, id);
 	struct thread_trace *ttrace;
 
 	if (sc == NULL)
@@ -704,18 +1585,12 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 	if (sc->filtered)
 		return 0;
 
-	thread = machine__findnew_thread(&trace->host, sample->pid,
-					 sample->tid);
+	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
 	ttrace = thread__trace(thread, trace->output);
 	if (ttrace == NULL)
 		return -1;
 
-	args = perf_evsel__rawptr(evsel, sample, "args");
-	if (args == NULL) {
-		fprintf(trace->output, "Problems reading syscall arguments\n");
-		return -1;
-	}
-
+	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
 	ttrace = thread->priv;
 
 	if (ttrace->entry_str == NULL) {
@@ -728,7 +1603,8 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 	msg = ttrace->entry_str;
 	printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
 
-	printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,  args);
+	printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
+					   args, trace, thread);
 
 	if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
 		if (!trace->duration_filter) {
@@ -747,7 +1623,8 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 	int ret;
 	u64 duration = 0;
 	struct thread *thread;
-	struct syscall *sc = trace__syscall_info(trace, evsel, sample);
+	int id = perf_evsel__sc_tp_uint(evsel, id, sample);
+	struct syscall *sc = trace__syscall_info(trace, evsel, id);
 	struct thread_trace *ttrace;
 
 	if (sc == NULL)
@@ -756,13 +1633,21 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 	if (sc->filtered)
 		return 0;
 
-	thread = machine__findnew_thread(&trace->host, sample->pid,
-					 sample->tid);
+	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
 	ttrace = thread__trace(thread, trace->output);
 	if (ttrace == NULL)
 		return -1;
 
-	ret = perf_evsel__intval(evsel, sample, "ret");
+	if (trace->summary)
+		thread__update_stats(ttrace, id, sample);
+
+	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
+
+	if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
+		trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
+		trace->last_vfs_getname = NULL;
+		++trace->stats.vfs_getname;
+	}
 
 	ttrace = thread->priv;
 
@@ -808,12 +1693,19 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 	return 0;
 }
 
+static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
+			      struct perf_sample *sample)
+{
+	trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
+	return 0;
+}
+
 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
 				     struct perf_sample *sample)
 {
         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
 	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
-	struct thread *thread = machine__findnew_thread(&trace->host,
+	struct thread *thread = machine__findnew_thread(trace->host,
 							sample->pid,
 							sample->tid);
 	struct thread_trace *ttrace = thread__trace(thread, trace->output);
@@ -856,12 +1748,12 @@ static int trace__process_sample(struct perf_tool *tool,
 	struct trace *trace = container_of(tool, struct trace, tool);
 	int err = 0;
 
-	tracepoint_handler handler = evsel->handler.func;
+	tracepoint_handler handler = evsel->handler;
 
 	if (skip_sample(trace, sample))
 		return 0;
 
-	if (trace->base_time == 0)
+	if (!trace->full_time && trace->base_time == 0)
 		trace->base_time = sample->time;
 
 	if (handler)
@@ -901,6 +1793,51 @@ static int parse_target_str(struct trace *trace)
 	return 0;
 }
 
+static int trace__record(int argc, const char **argv)
+{
+	unsigned int rec_argc, i, j;
+	const char **rec_argv;
+	const char * const record_args[] = {
+		"record",
+		"-R",
+		"-m", "1024",
+		"-c", "1",
+		"-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit",
+	};
+
+	rec_argc = ARRAY_SIZE(record_args) + argc;
+	rec_argv = calloc(rec_argc + 1, sizeof(char *));
+
+	if (rec_argv == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < ARRAY_SIZE(record_args); i++)
+		rec_argv[i] = record_args[i];
+
+	for (j = 0; j < (unsigned int)argc; j++, i++)
+		rec_argv[i] = argv[j];
+
+	return cmd_record(i, rec_argv, NULL);
+}
+
+static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
+
+static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname",
+						     evlist->nr_entries);
+	if (evsel == NULL)
+		return;
+
+	if (perf_evsel__field(evsel, "pathname") == NULL) {
+		perf_evsel__delete(evsel);
+		return;
+	}
+
+	evsel->handler = trace__vfs_getname;
+	perf_evlist__add(evlist, evsel);
+}
+
 static int trace__run(struct trace *trace, int argc, const char **argv)
 {
 	struct perf_evlist *evlist = perf_evlist__new();
@@ -909,23 +1846,22 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 	unsigned long before;
 	const bool forks = argc > 0;
 
+	trace->live = true;
+
 	if (evlist == NULL) {
 		fprintf(trace->output, "Not enough memory to run!\n");
 		goto out;
 	}
 
-	if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
-	    perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
-		fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
-		goto out_delete_evlist;
-	}
+	if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
+		goto out_error_tp;
+
+	perf_evlist__add_vfs_getname(evlist);
 
 	if (trace->sched &&
-	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
-				   trace__sched_stat_runtime)) {
-		fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
-		goto out_delete_evlist;
-	}
+		perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
+				trace__sched_stat_runtime))
+		goto out_error_tp;
 
 	err = perf_evlist__create_maps(evlist, &trace->opts.target);
 	if (err < 0) {
@@ -954,10 +1890,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 	}
 
 	err = perf_evlist__open(evlist);
-	if (err < 0) {
-		fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
-		goto out_delete_maps;
-	}
+	if (err < 0)
+		goto out_error_open;
 
 	err = perf_evlist__mmap(evlist, UINT_MAX, false);
 	if (err < 0) {
@@ -990,11 +1924,11 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 				goto next_event;
 			}
 
-			if (trace->base_time == 0)
+			if (!trace->full_time && trace->base_time == 0)
 				trace->base_time = sample.time;
 
 			if (type != PERF_RECORD_SAMPLE) {
-				trace__process_event(trace, &trace->host, event);
+				trace__process_event(trace, trace->host, event, &sample);
 				continue;
 			}
 
@@ -1011,29 +1945,41 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 				goto next_event;
 			}
 
-			handler = evsel->handler.func;
+			handler = evsel->handler;
 			handler(trace, evsel, &sample);
 next_event:
 			perf_evlist__mmap_consume(evlist, i);
 
-			if (done)
-				goto out_unmap_evlist;
+			if (interrupted)
+				goto out_disable;
 		}
 	}
 
 	if (trace->nr_events == before) {
-		if (done)
-			goto out_unmap_evlist;
+		int timeout = done ? 100 : -1;
 
-		poll(evlist->pollfd, evlist->nr_fds, -1);
+		if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
+			goto again;
+	} else {
+		goto again;
 	}
 
-	if (done)
-		perf_evlist__disable(evlist);
+out_disable:
+	perf_evlist__disable(evlist);
 
-	goto again;
+	if (!err) {
+		if (trace->summary)
+			trace__fprintf_thread_summary(trace, trace->output);
+
+		if (trace->show_tool_stats) {
+			fprintf(trace->output, "Stats:\n "
+					       " vfs_getname : %" PRIu64 "\n"
+					       " proc_getname: %" PRIu64 "\n",
+				trace->stats.vfs_getname,
+				trace->stats.proc_getname);
+		}
+	}
 
-out_unmap_evlist:
 	perf_evlist__munmap(evlist);
 out_close_evlist:
 	perf_evlist__close(evlist);
@@ -1042,7 +1988,22 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 out_delete_evlist:
 	perf_evlist__delete(evlist);
 out:
+	trace->live = false;
 	return err;
+{
+	char errbuf[BUFSIZ];
+
+out_error_tp:
+	perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
+	goto out_error;
+
+out_error_open:
+	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
+
+out_error:
+	fprintf(trace->output, "%s\n", errbuf);
+	goto out_delete_evlist;
+}
 }
 
 static int trace__replay(struct trace *trace)
@@ -1050,8 +2011,12 @@ static int trace__replay(struct trace *trace)
 	const struct perf_evsel_str_handler handlers[] = {
 		{ "raw_syscalls:sys_enter",  trace__sys_enter, },
 		{ "raw_syscalls:sys_exit",   trace__sys_exit, },
+		{ "probe:vfs_getname",	     trace__vfs_getname, },
+	};
+	struct perf_data_file file = {
+		.path  = input_name,
+		.mode  = PERF_DATA_MODE_READ,
 	};
-
 	struct perf_session *session;
 	int err = -1;
 
@@ -1074,11 +2039,12 @@ static int trace__replay(struct trace *trace)
 	if (symbol__init() < 0)
 		return -1;
 
-	session = perf_session__new(input_name, O_RDONLY, 0, false,
-				    &trace->tool);
+	session = perf_session__new(&file, false, &trace->tool);
 	if (session == NULL)
 		return -ENOMEM;
 
+	trace->host = &session->machines.host;
+
 	err = perf_session__set_tracepoints_handlers(session, handlers);
 	if (err)
 		goto out;
@@ -1103,6 +2069,9 @@ static int trace__replay(struct trace *trace)
 	if (err)
 		pr_err("Failed to process events, error %d", err);
 
+	else if (trace->summary)
+		trace__fprintf_thread_summary(trace, trace->output);
+
 out:
 	perf_session__delete(session);
 
@@ -1113,47 +2082,111 @@ static size_t trace__fprintf_threads_header(FILE *fp)
 {
 	size_t printed;
 
-	printed  = fprintf(fp, "\n _____________________________________________________________________\n");
-	printed += fprintf(fp," __)    Summary of events    (__\n\n");
-	printed += fprintf(fp,"              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
-	printed += fprintf(fp," _____________________________________________________________________\n\n");
+	printed  = fprintf(fp, "\n _____________________________________________________________________________\n");
+	printed += fprintf(fp, " __)    Summary of events    (__\n\n");
+	printed += fprintf(fp, "              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
+	printed += fprintf(fp, "                                  syscall  count    min     max    avg  stddev\n");
+	printed += fprintf(fp, "                                                   msec    msec   msec     %%\n");
+	printed += fprintf(fp, " _____________________________________________________________________________\n\n");
 
 	return printed;
 }
 
-static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
+static size_t thread__dump_stats(struct thread_trace *ttrace,
+				 struct trace *trace, FILE *fp)
 {
-	size_t printed = trace__fprintf_threads_header(fp);
-	struct rb_node *nd;
-
-	for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
-		struct thread *thread = rb_entry(nd, struct thread, rb_node);
-		struct thread_trace *ttrace = thread->priv;
-		const char *color;
-		double ratio;
-
-		if (ttrace == NULL)
-			continue;
+	struct stats *stats;
+	size_t printed = 0;
+	struct syscall *sc;
+	struct int_node *inode = intlist__first(ttrace->syscall_stats);
 
-		ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
+	if (inode == NULL)
+		return 0;
 
-		color = PERF_COLOR_NORMAL;
-		if (ratio > 50.0)
-			color = PERF_COLOR_RED;
-		else if (ratio > 25.0)
-			color = PERF_COLOR_GREEN;
-		else if (ratio > 5.0)
-			color = PERF_COLOR_YELLOW;
+	printed += fprintf(fp, "\n");
+
+	/* each int_node is a syscall */
+	while (inode) {
+		stats = inode->priv;
+		if (stats) {
+			double min = (double)(stats->min) / NSEC_PER_MSEC;
+			double max = (double)(stats->max) / NSEC_PER_MSEC;
+			double avg = avg_stats(stats);
+			double pct;
+			u64 n = (u64) stats->n;
+
+			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
+			avg /= NSEC_PER_MSEC;
+
+			sc = &trace->syscalls.table[inode->i];
+			printed += fprintf(fp, "%24s  %14s : ", "", sc->name);
+			printed += fprintf(fp, "%5" PRIu64 "  %8.3f  %8.3f",
+					   n, min, max);
+			printed += fprintf(fp, "  %8.3f  %6.2f\n", avg, pct);
+		}
 
-		printed += color_fprintf(fp, color, "%20s", thread->comm);
-		printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
-		printed += color_fprintf(fp, color, "%5.1f%%", ratio);
-		printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
+		inode = intlist__next(inode);
 	}
 
+	printed += fprintf(fp, "\n\n");
+
 	return printed;
 }
 
+/* struct used to pass data to per-thread function */
+struct summary_data {
+	FILE *fp;
+	struct trace *trace;
+	size_t printed;
+};
+
+static int trace__fprintf_one_thread(struct thread *thread, void *priv)
+{
+	struct summary_data *data = priv;
+	FILE *fp = data->fp;
+	size_t printed = data->printed;
+	struct trace *trace = data->trace;
+	struct thread_trace *ttrace = thread->priv;
+	const char *color;
+	double ratio;
+
+	if (ttrace == NULL)
+		return 0;
+
+	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
+
+	color = PERF_COLOR_NORMAL;
+	if (ratio > 50.0)
+		color = PERF_COLOR_RED;
+	else if (ratio > 25.0)
+		color = PERF_COLOR_GREEN;
+	else if (ratio > 5.0)
+		color = PERF_COLOR_YELLOW;
+
+	printed += color_fprintf(fp, color, "%20s", thread__comm_str(thread));
+	printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
+	printed += color_fprintf(fp, color, "%5.1f%%", ratio);
+	printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
+	printed += thread__dump_stats(ttrace, trace, fp);
+
+	data->printed += printed;
+
+	return 0;
+}
+
+static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
+{
+	struct summary_data data = {
+		.fp = fp,
+		.trace = trace
+	};
+	data.printed = trace__fprintf_threads_header(fp);
+
+	machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
+
+	return data.printed;
+}
+
 static int trace__set_duration(const struct option *opt, const char *str,
 			       int unset __maybe_unused)
 {
@@ -1185,10 +2218,15 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 	const char * const trace_usage[] = {
 		"perf trace [<options>] [<command>]",
 		"perf trace [<options>] -- <command> [<options>]",
+		"perf trace record [<options>] [<command>]",
+		"perf trace record [<options>] -- <command> [<options>]",
 		NULL
 	};
 	struct trace trace = {
-		.audit_machine = audit_detect_machine(),
+		.audit = {
+			.machine = audit_detect_machine(),
+			.open_id = audit_name_to_syscall("open", trace.audit.machine),
+		},
 		.syscalls = {
 			. max = -1,
 		},
@@ -1203,10 +2241,14 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 			.mmap_pages    = 1024,
 		},
 		.output = stdout,
+		.show_comm = true,
 	};
 	const char *output_name = NULL;
 	const char *ev_qualifier_str = NULL;
 	const struct option trace_options[] = {
+	OPT_BOOLEAN(0, "comm", &trace.show_comm,
+		    "show the thread COMM next to its id"),
+	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
 	OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
 		    "list of events to trace"),
 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
@@ -1221,8 +2263,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "list of cpus to monitor"),
 	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
 		    "child tasks do not inherit counters"),
-	OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages,
-		     "number of mmap data pages"),
+	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
+		     "number of mmap data pages",
+		     perf_evlist__parse_mmap_pages),
 	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
 		   "user to profile"),
 	OPT_CALLBACK(0, "duration", &trace, "float",
@@ -1230,11 +2273,18 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 		     trace__set_duration),
 	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
 	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
+	OPT_BOOLEAN('T', "time", &trace.full_time,
+		    "Show full timestamp, not time relative to first start"),
+	OPT_BOOLEAN(0, "summary", &trace.summary,
+		    "Show syscall summary with statistics"),
 	OPT_END()
 	};
 	int err;
 	char bf[BUFSIZ];
 
+	if ((argc > 1) && (strcmp(argv[1], "record") == 0))
+		return trace__record(argc-2, &argv[2]);
+
 	argc = parse_options(argc, argv, trace_options, trace_usage, 0);
 
 	if (output_name != NULL) {
@@ -1282,9 +2332,6 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 	else
 		err = trace__run(&trace, argc, argv);
 
-	if (trace.sched && !err)
-		trace__fprintf_thread_summary(&trace, trace.output);
-
 out_close:
 	if (output_name != NULL)
 		fclose(trace.output);
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index 5f6f9b3271bb0657b77206f6723fd8b3786041bd..58b2d37ae23a19db3719ebfa29c98a79905a4c49 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -23,15 +23,17 @@ ifeq ($(ARCH),x86_64)
   endif
   ifeq (${IS_X86_64}, 1)
     RAW_ARCH := x86_64
-    CFLAGS += -DARCH_X86_64
+    CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT
     ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
+    LIBUNWIND_LIBS = -lunwind -lunwind-x86_64
+  else
+    LIBUNWIND_LIBS = -lunwind -lunwind-x86
   endif
   NO_PERF_REGS := 0
-  LIBUNWIND_LIBS = -lunwind -lunwind-x86_64
 endif
 
 ifeq ($(NO_PERF_REGS),0)
-  CFLAGS += -DHAVE_PERF_REGS
+  CFLAGS += -DHAVE_PERF_REGS_SUPPORT
 endif
 
 ifeq ($(src-perf),)
@@ -51,7 +53,6 @@ LIB_INCLUDE := $(srctree)/tools/lib/
 # include ARCH specific config
 -include $(src-perf)/arch/$(ARCH)/Makefile
 
-include $(src-perf)/config/feature-tests.mak
 include $(src-perf)/config/utilities.mak
 
 ifeq ($(call get-executable,$(FLEX)),)
@@ -67,10 +68,11 @@ ifneq ($(WERROR),0)
   CFLAGS += -Werror
 endif
 
-ifeq ("$(origin DEBUG)", "command line")
-  PERF_DEBUG = $(DEBUG)
+ifndef DEBUG
+  DEBUG := 0
 endif
-ifndef PERF_DEBUG
+
+ifeq ($(DEBUG),0)
   CFLAGS += -O6
 endif
 
@@ -89,20 +91,125 @@ CFLAGS += -std=gnu99
 
 EXTLIBS = -lelf -lpthread -lrt -lm -ldl
 
-ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -fstack-protector-all,-fstack-protector-all),y)
-  CFLAGS += -fstack-protector-all
+ifneq ($(OUTPUT),)
+  OUTPUT_FEATURES = $(OUTPUT)config/feature-checks/
+  $(shell mkdir -p $(OUTPUT_FEATURES))
 endif
 
-ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -Wstack-protector,-Wstack-protector),y)
-  CFLAGS += -Wstack-protector
+feature_check = $(eval $(feature_check_code))
+define feature_check_code
+  feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS)" LDFLAGS="$(LDFLAGS)" LIBUNWIND_LIBS="$(LIBUNWIND_LIBS)" -C config/feature-checks test-$1 >/dev/null 2>/dev/null && echo 1 || echo 0)
+endef
+
+feature_set = $(eval $(feature_set_code))
+define feature_set_code
+  feature-$(1) := 1
+endef
+
+#
+# Build the feature check binaries in parallel, ignore errors, ignore return value and suppress output:
+#
+
+#
+# Note that this is not a complete list of all feature tests, just
+# those that are typically built on a fully configured system.
+#
+# [ Feature tests not mentioned here have to be built explicitly in
+#   the rule that uses them - an example for that is the 'bionic'
+#   feature check. ]
+#
+CORE_FEATURE_TESTS =			\
+	backtrace			\
+	dwarf				\
+	fortify-source			\
+	glibc				\
+	gtk2				\
+	gtk2-infobar			\
+	libaudit			\
+	libbfd				\
+	libelf				\
+	libelf-getphdrnum		\
+	libelf-mmap			\
+	libnuma				\
+	libperl				\
+	libpython			\
+	libpython-version		\
+	libslang			\
+	libunwind			\
+	on-exit				\
+	stackprotector			\
+	stackprotector-all
+
+#
+# So here we detect whether test-all was rebuilt, to be able
+# to skip the print-out of the long features list if the file
+# existed before and after it was built:
+#
+ifeq ($(wildcard $(OUTPUT)config/feature-checks/test-all),)
+  test-all-failed := 1
+else
+  test-all-failed := 0
+endif
+
+#
+# Special fast-path for the 'all features are available' case:
+#
+$(call feature_check,all,$(MSG))
+
+#
+# Just in case the build freshly failed, make sure we print the
+# feature matrix:
+#
+ifeq ($(feature-all), 0)
+  test-all-failed := 1
 endif
 
-ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -Wvolatile-register-var,-Wvolatile-register-var),y)
-  CFLAGS += -Wvolatile-register-var
+ifeq ($(test-all-failed),1)
+  $(info )
+  $(info Auto-detecting system features:)
 endif
 
-ifndef PERF_DEBUG
-  ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -D_FORTIFY_SOURCE=2,-D_FORTIFY_SOURCE=2),y)
+ifeq ($(feature-all), 1)
+  #
+  # test-all.c passed - just set all the core feature flags to 1:
+  #
+  $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_set,$(feat)))
+else
+  $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS)" LDFLAGS=$(LDFLAGS) -i -j -C config/feature-checks $(CORE_FEATURE_TESTS) >/dev/null 2>&1)
+  $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_check,$(feat)))
+endif
+
+#
+# Print the result of the feature test:
+#
+feature_print = $(eval $(feature_print_code)) $(info $(MSG))
+
+define feature_print_code
+  ifeq ($(feature-$(1)), 1)
+    MSG = $(shell printf '...%30s: [ \033[32mon\033[m  ]' $(1))
+  else
+    MSG = $(shell printf '...%30s: [ \033[31mOFF\033[m ]' $(1))
+  endif
+endef
+
+#
+# Only print out our features if we rebuilt the testcases or if a test failed:
+#
+ifeq ($(test-all-failed), 1)
+  $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_print,$(feat)))
+  $(info )
+endif
+
+ifeq ($(feature-stackprotector-all), 1)
+  CFLAGS += -fstack-protector-all
+endif
+
+ifeq ($(feature-stackprotector), 1)
+  CFLAGS += -Wstack-protector
+endif
+
+ifeq ($(DEBUG),0)
+  ifeq ($(feature-fortify-source), 1)
     CFLAGS += -D_FORTIFY_SOURCE=2
   endif
 endif
@@ -128,84 +235,74 @@ CFLAGS += -I$(LIB_INCLUDE)
 CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
 
 ifndef NO_BIONIC
-ifeq ($(call try-cc,$(SOURCE_BIONIC),$(CFLAGS),bionic),y)
-  BIONIC := 1
-  EXTLIBS := $(filter-out -lrt,$(EXTLIBS))
-  EXTLIBS := $(filter-out -lpthread,$(EXTLIBS))
+  $(call feature_check,bionic)
+  ifeq ($(feature-bionic), 1)
+    BIONIC := 1
+    EXTLIBS := $(filter-out -lrt,$(EXTLIBS))
+    EXTLIBS := $(filter-out -lpthread,$(EXTLIBS))
+  endif
 endif
-endif # NO_BIONIC
 
 ifdef NO_LIBELF
   NO_DWARF := 1
   NO_DEMANGLE := 1
   NO_LIBUNWIND := 1
 else
-FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS)
-ifneq ($(call try-cc,$(SOURCE_LIBELF),$(FLAGS_LIBELF),libelf),y)
-  FLAGS_GLIBC=$(CFLAGS) $(LDFLAGS)
-  ifeq ($(call try-cc,$(SOURCE_GLIBC),$(FLAGS_GLIBC),glibc),y)
-    LIBC_SUPPORT := 1
-  endif
-  ifeq ($(BIONIC),1)
-    LIBC_SUPPORT := 1
-  endif
-  ifeq ($(LIBC_SUPPORT),1)
-    msg := $(warning No libelf found, disables 'probe' tool, please install elfutils-libelf-devel/libelf-dev);
+  ifeq ($(feature-libelf), 0)
+    ifeq ($(feature-glibc), 1)
+      LIBC_SUPPORT := 1
+    endif
+    ifeq ($(BIONIC),1)
+      LIBC_SUPPORT := 1
+    endif
+    ifeq ($(LIBC_SUPPORT),1)
+      msg := $(warning No libelf found, disables 'probe' tool, please install elfutils-libelf-devel/libelf-dev);
 
-    NO_LIBELF := 1
-    NO_DWARF := 1
-    NO_DEMANGLE := 1
+      NO_LIBELF := 1
+      NO_DWARF := 1
+      NO_DEMANGLE := 1
+    else
+      msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static);
+    endif
   else
-    msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static);
-  endif
-else
-  # for linking with debug library, run like:
-  # make DEBUG=1 LIBDW_DIR=/opt/libdw/
-  ifdef LIBDW_DIR
-    LIBDW_CFLAGS  := -I$(LIBDW_DIR)/include
-    LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib
-  endif
+    # for linking with debug library, run like:
+    # make DEBUG=1 LIBDW_DIR=/opt/libdw/
+    ifdef LIBDW_DIR
+      LIBDW_CFLAGS  := -I$(LIBDW_DIR)/include
+      LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib
+    endif
 
-  FLAGS_DWARF=$(CFLAGS) $(LIBDW_CFLAGS) -ldw -lz -lelf $(LIBDW_LDFLAGS) $(LDFLAGS) $(EXTLIBS)
-  ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF),libdw),y)
-    msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
-    NO_DWARF := 1
-  endif # Dwarf support
-endif # SOURCE_LIBELF
+    ifneq ($(feature-dwarf), 1)
+      msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
+      NO_DWARF := 1
+    endif # Dwarf support
+  endif # libelf support
 endif # NO_LIBELF
 
 ifndef NO_LIBELF
-CFLAGS += -DLIBELF_SUPPORT
-FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS)
-ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DLIBELF_MMAP),y)
-  CFLAGS += -DLIBELF_MMAP
-endif
-ifeq ($(call try-cc,$(SOURCE_ELF_GETPHDRNUM),$(FLAGS_LIBELF),-DHAVE_ELF_GETPHDRNUM),y)
-  CFLAGS += -DHAVE_ELF_GETPHDRNUM
-endif
+  CFLAGS += -DHAVE_LIBELF_SUPPORT
 
-# include ARCH specific config
--include $(src-perf)/arch/$(ARCH)/Makefile
+  ifeq ($(feature-libelf-mmap), 1)
+    CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT
+  endif
 
-ifndef NO_DWARF
-ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
-  msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
-  NO_DWARF := 1
-else
-  CFLAGS += -DDWARF_SUPPORT $(LIBDW_CFLAGS)
-  LDFLAGS += $(LIBDW_LDFLAGS)
-  EXTLIBS += -lelf -ldw
-endif # PERF_HAVE_DWARF_REGS
-endif # NO_DWARF
+  ifeq ($(feature-libelf-getphdrnum), 1)
+    CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT
+  endif
 
-endif # NO_LIBELF
+  # include ARCH specific config
+  -include $(src-perf)/arch/$(ARCH)/Makefile
 
-ifndef NO_LIBELF
-CFLAGS += -DLIBELF_SUPPORT
-FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS)
-ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DLIBELF_MMAP),y)
-  CFLAGS += -DLIBELF_MMAP
-endif # try-cc
+  ifndef NO_DWARF
+    ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
+      msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
+      NO_DWARF := 1
+    else
+      CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS)
+      LDFLAGS += $(LIBDW_LDFLAGS)
+      EXTLIBS += -lelf -ldw
+    endif # PERF_HAVE_DWARF_REGS
+  endif # NO_DWARF
 endif # NO_LIBELF
 
 # There's only x86 (both 32 and 64) support for CFI unwind so far
@@ -214,34 +311,35 @@ ifneq ($(ARCH),x86)
 endif
 
 ifndef NO_LIBUNWIND
-# for linking with debug library, run like:
-# make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
-ifdef LIBUNWIND_DIR
-  LIBUNWIND_CFLAGS  := -I$(LIBUNWIND_DIR)/include
-  LIBUNWIND_LDFLAGS := -L$(LIBUNWIND_DIR)/lib
-endif
+  #
+  # For linking with debug library, run like:
+  #
+  #   make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
+  #
+  ifdef LIBUNWIND_DIR
+    LIBUNWIND_CFLAGS  := -I$(LIBUNWIND_DIR)/include
+    LIBUNWIND_LDFLAGS := -L$(LIBUNWIND_DIR)/lib
+  endif
 
-FLAGS_UNWIND=$(LIBUNWIND_CFLAGS) $(CFLAGS) $(LIBUNWIND_LDFLAGS) $(LDFLAGS) $(EXTLIBS) $(LIBUNWIND_LIBS)
-ifneq ($(call try-cc,$(SOURCE_LIBUNWIND),$(FLAGS_UNWIND),libunwind),y)
-  msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 0.99);
-  NO_LIBUNWIND := 1
-endif # Libunwind support
-endif # NO_LIBUNWIND
+  ifneq ($(feature-libunwind), 1)
+    msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 0.99);
+    NO_LIBUNWIND := 1
+  endif
+endif
 
 ifndef NO_LIBUNWIND
-  CFLAGS += -DLIBUNWIND_SUPPORT
+  CFLAGS += -DHAVE_LIBUNWIND_SUPPORT
   EXTLIBS += $(LIBUNWIND_LIBS)
   CFLAGS += $(LIBUNWIND_CFLAGS)
   LDFLAGS += $(LIBUNWIND_LDFLAGS)
-endif # NO_LIBUNWIND
+endif
 
 ifndef NO_LIBAUDIT
-  FLAGS_LIBAUDIT = $(CFLAGS) $(LDFLAGS) -laudit
-  ifneq ($(call try-cc,$(SOURCE_LIBAUDIT),$(FLAGS_LIBAUDIT),libaudit),y)
+  ifneq ($(feature-libaudit), 1)
     msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev);
     NO_LIBAUDIT := 1
   else
-    CFLAGS += -DLIBAUDIT_SUPPORT
+    CFLAGS += -DHAVE_LIBAUDIT_SUPPORT
     EXTLIBS += -laudit
   endif
 endif
@@ -251,30 +349,30 @@ ifdef NO_NEWT
 endif
 
 ifndef NO_SLANG
-  FLAGS_SLANG=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) -I/usr/include/slang -lslang
-  ifneq ($(call try-cc,$(SOURCE_SLANG),$(FLAGS_SLANG),libslang),y)
+  ifneq ($(feature-libslang), 1)
     msg := $(warning slang not found, disables TUI support. Please install slang-devel or libslang-dev);
     NO_SLANG := 1
   else
     # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h
     CFLAGS += -I/usr/include/slang
-    CFLAGS += -DSLANG_SUPPORT
+    CFLAGS += -DHAVE_SLANG_SUPPORT
     EXTLIBS += -lslang
   endif
 endif
 
 ifndef NO_GTK2
   FLAGS_GTK2=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null)
-  ifneq ($(call try-cc,$(SOURCE_GTK2),$(FLAGS_GTK2),gtk2),y)
+  ifneq ($(feature-gtk2), 1)
     msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev);
     NO_GTK2 := 1
   else
-    ifeq ($(call try-cc,$(SOURCE_GTK2_INFOBAR),$(FLAGS_GTK2),-DHAVE_GTK_INFO_BAR),y)
-      CFLAGS += -DHAVE_GTK_INFO_BAR
+    ifeq ($(feature-gtk2-infobar), 1)
+      GTK_CFLAGS := -DHAVE_GTK_INFO_BAR_SUPPORT
     endif
-    CFLAGS += -DGTK2_SUPPORT
-    CFLAGS += $(shell pkg-config --cflags gtk+-2.0 2>/dev/null)
-    EXTLIBS += $(shell pkg-config --libs gtk+-2.0 2>/dev/null)
+    CFLAGS += -DHAVE_GTK2_SUPPORT
+    GTK_CFLAGS += $(shell pkg-config --cflags gtk+-2.0 2>/dev/null)
+    GTK_LIBS := $(shell pkg-config --libs gtk+-2.0 2>/dev/null)
+    EXTLIBS += -ldl
   endif
 endif
 
@@ -290,7 +388,7 @@ else
   PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
   FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
 
-  ifneq ($(call try-cc,$(SOURCE_PERL_EMBED),$(FLAGS_PERL_EMBED),perl),y)
+  ifneq ($(feature-libperl), 1)
     CFLAGS += -DNO_LIBPERL
     NO_LIBPERL := 1
   else
@@ -299,6 +397,13 @@ else
   endif
 endif
 
+$(call feature_check,timerfd)
+ifeq ($(feature-timerfd), 1)
+  CFLAGS += -DHAVE_TIMERFD_SUPPORT
+else
+  msg := $(warning No timerfd support. Disables 'perf kvm stat live');
+endif
+
 disable-python = $(eval $(disable-python_code))
 define disable-python_code
   CFLAGS += -DNO_LIBPYTHON
@@ -335,11 +440,11 @@ else
       PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
       FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
 
-      ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED),python),y)
+      ifneq ($(feature-libpython), 1)
         $(call disable-python,Python.h (for Python 2.x))
       else
 
-        ifneq ($(call try-cc,$(SOURCE_PYTHON_VERSION),$(FLAGS_PYTHON_EMBED),python version),y)
+        ifneq ($(feature-libpython-version), 1)
           $(warning Python 3 is not yet supported; please set)
           $(warning PYTHON and/or PYTHON_CONFIG appropriately.)
           $(warning If you also have Python 2 installed, then)
@@ -362,33 +467,30 @@ else
   endif
 endif
 
+ifeq ($(feature-libbfd), 1)
+  EXTLIBS += -lbfd
+endif
+
 ifdef NO_DEMANGLE
   CFLAGS += -DNO_DEMANGLE
 else
-  ifdef HAVE_CPLUS_DEMANGLE
+  ifdef HAVE_CPLUS_DEMANGLE_SUPPORT
     EXTLIBS += -liberty
-    CFLAGS += -DHAVE_CPLUS_DEMANGLE
+    CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT
   else
-    FLAGS_BFD=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) -DPACKAGE='perf' -lbfd
-    has_bfd := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD),libbfd)
-    ifeq ($(has_bfd),y)
-      EXTLIBS += -lbfd
-    else
-      FLAGS_BFD_IBERTY=$(FLAGS_BFD) -liberty
-      has_bfd_iberty := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD_IBERTY),liberty)
-      ifeq ($(has_bfd_iberty),y)
+    ifneq ($(feature-libbfd), 1)
+      $(call feature_check,liberty)
+      ifeq ($(feature-liberty), 1)
         EXTLIBS += -lbfd -liberty
       else
-        FLAGS_BFD_IBERTY_Z=$(FLAGS_BFD_IBERTY) -lz
-        has_bfd_iberty_z := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD_IBERTY_Z),libz)
-        ifeq ($(has_bfd_iberty_z),y)
+        $(call feature_check,liberty-z)
+        ifeq ($(feature-liberty-z), 1)
           EXTLIBS += -lbfd -liberty -lz
         else
-          FLAGS_CPLUS_DEMANGLE=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) -liberty
-          has_cplus_demangle := $(call try-cc,$(SOURCE_CPLUS_DEMANGLE),$(FLAGS_CPLUS_DEMANGLE),demangle)
-          ifeq ($(has_cplus_demangle),y)
+          $(call feature_check,cplus-demangle)
+          ifeq ($(feature-cplus-demangle), 1)
             EXTLIBS += -liberty
-            CFLAGS += -DHAVE_CPLUS_DEMANGLE
+            CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT
           else
             msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling)
             CFLAGS += -DNO_DEMANGLE
@@ -399,31 +501,28 @@ else
   endif
 endif
 
-ifndef NO_STRLCPY
-  ifeq ($(call try-cc,$(SOURCE_STRLCPY),,-DHAVE_STRLCPY),y)
-    CFLAGS += -DHAVE_STRLCPY
-  endif
+ifneq ($(filter -lbfd,$(EXTLIBS)),)
+  CFLAGS += -DHAVE_LIBBFD_SUPPORT
 endif
 
 ifndef NO_ON_EXIT
-  ifeq ($(call try-cc,$(SOURCE_ON_EXIT),,-DHAVE_ON_EXIT),y)
-    CFLAGS += -DHAVE_ON_EXIT
+  ifeq ($(feature-on-exit), 1)
+    CFLAGS += -DHAVE_ON_EXIT_SUPPORT
   endif
 endif
 
 ifndef NO_BACKTRACE
-  ifeq ($(call try-cc,$(SOURCE_BACKTRACE),,-DBACKTRACE_SUPPORT),y)
-    CFLAGS += -DBACKTRACE_SUPPORT
+  ifeq ($(feature-backtrace), 1)
+    CFLAGS += -DHAVE_BACKTRACE_SUPPORT
   endif
 endif
 
 ifndef NO_LIBNUMA
-  FLAGS_LIBNUMA = $(CFLAGS) $(LDFLAGS) -lnuma
-  ifneq ($(call try-cc,$(SOURCE_LIBNUMA),$(FLAGS_LIBNUMA),libnuma),y)
+  ifeq ($(feature-libnuma), 0)
     msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numa-libs-devel or libnuma-dev);
     NO_LIBNUMA := 1
   else
-    CFLAGS += -DLIBNUMA_SUPPORT
+    CFLAGS += -DHAVE_LIBNUMA_SUPPORT
     EXTLIBS += -lnuma
   endif
 endif
@@ -459,7 +558,12 @@ else
 sysconfdir = $(prefix)/etc
 ETC_PERFCONFIG = etc/perfconfig
 endif
+ifeq ($(IS_X86_64),1)
+lib = lib64
+else
 lib = lib
+endif
+libdir = $(prefix)/$(lib)
 
 # Shell quote (do not use $(call) to accommodate ancient setups);
 ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG))
@@ -472,6 +576,7 @@ template_dir_SQ = $(subst ','\'',$(template_dir))
 htmldir_SQ = $(subst ','\'',$(htmldir))
 prefix_SQ = $(subst ','\'',$(prefix))
 sysconfdir_SQ = $(subst ','\'',$(sysconfdir))
+libdir_SQ = $(subst ','\'',$(libdir))
 
 ifneq ($(filter /%,$(firstword $(perfexecdir))),)
 perfexec_instdir = $(perfexecdir)
diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..c803f17fb9869648c97ba9efe93e3a05b4e4329d
--- /dev/null
+++ b/tools/perf/config/feature-checks/Makefile
@@ -0,0 +1,148 @@
+
+FILES=					\
+	test-all			\
+	test-backtrace			\
+	test-bionic			\
+	test-dwarf			\
+	test-fortify-source		\
+	test-glibc			\
+	test-gtk2			\
+	test-gtk2-infobar		\
+	test-hello			\
+	test-libaudit			\
+	test-libbfd			\
+	test-liberty			\
+	test-liberty-z			\
+	test-cplus-demangle		\
+	test-libelf			\
+	test-libelf-getphdrnum		\
+	test-libelf-mmap		\
+	test-libnuma			\
+	test-libperl			\
+	test-libpython			\
+	test-libpython-version		\
+	test-libslang			\
+	test-libunwind			\
+	test-on-exit			\
+	test-stackprotector-all		\
+	test-stackprotector		\
+	test-timerfd
+
+CC := $(CC) -MD
+
+all: $(FILES)
+
+BUILD = $(CC) $(CFLAGS) $(LDFLAGS) -o $(OUTPUT)$@ $@.c
+
+###############################
+
+test-all:
+	$(BUILD) -Werror -fstack-protector -fstack-protector-all -O2 -Werror -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma $(LIBUNWIND_LIBS) -lelf -laudit -I/usr/include/slang -lslang $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl
+
+test-hello:
+	$(BUILD)
+
+test-stackprotector-all:
+	$(BUILD) -Werror -fstack-protector-all
+
+test-stackprotector:
+	$(BUILD) -Werror -fstack-protector -Wstack-protector
+
+test-fortify-source:
+	$(BUILD) -O2 -Werror -D_FORTIFY_SOURCE=2
+
+test-bionic:
+	$(BUILD)
+
+test-libelf:
+	$(BUILD) -lelf
+
+test-glibc:
+	$(BUILD)
+
+test-dwarf:
+	$(BUILD) -ldw
+
+test-libelf-mmap:
+	$(BUILD) -lelf
+
+test-libelf-getphdrnum:
+	$(BUILD) -lelf
+
+test-libnuma:
+	$(BUILD) -lnuma
+
+test-libunwind:
+	$(BUILD) $(LIBUNWIND_LIBS) -lelf
+
+test-libaudit:
+	$(BUILD) -laudit
+
+test-libslang:
+	$(BUILD) -I/usr/include/slang -lslang
+
+test-gtk2:
+	$(BUILD) $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null)
+
+test-gtk2-infobar:
+	$(BUILD) $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null)
+
+grep-libs  = $(filter -l%,$(1))
+strip-libs = $(filter-out -l%,$(1))
+
+PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
+PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
+PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
+PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
+FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
+
+test-libperl:
+	$(BUILD) $(FLAGS_PERL_EMBED)
+
+override PYTHON := python
+override PYTHON_CONFIG := python-config
+
+escape-for-shell-sq =  $(subst ','\'',$(1))
+shell-sq = '$(escape-for-shell-sq)'
+
+PYTHON_CONFIG_SQ = $(call shell-sq,$(PYTHON_CONFIG))
+
+PYTHON_EMBED_LDOPTS = $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
+PYTHON_EMBED_LDFLAGS = $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
+PYTHON_EMBED_LIBADD = $(call grep-libs,$(PYTHON_EMBED_LDOPTS))
+PYTHON_EMBED_CCOPTS = $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
+FLAGS_PYTHON_EMBED = $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
+
+test-libpython:
+	$(BUILD) $(FLAGS_PYTHON_EMBED)
+
+test-libpython-version:
+	$(BUILD) $(FLAGS_PYTHON_EMBED)
+
+test-libbfd:
+	$(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
+
+test-liberty:
+	$(CC) -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty
+
+test-liberty-z:
+	$(CC) -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty -lz
+
+test-cplus-demangle:
+	$(BUILD) -liberty
+
+test-on-exit:
+	$(BUILD)
+
+test-backtrace:
+	$(BUILD)
+
+test-timerfd:
+	$(BUILD)
+
+-include *.d
+
+###############################
+
+clean:
+	rm -f $(FILES) *.d
diff --git a/tools/perf/config/feature-checks/test-all.c b/tools/perf/config/feature-checks/test-all.c
new file mode 100644
index 0000000000000000000000000000000000000000..59e7a705e146d4eb8ea029ebf74878411f8e3898
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-all.c
@@ -0,0 +1,111 @@
+/*
+ * test-all.c: Try to build all the main testcases at once.
+ *
+ * A well-configured system will have all the prereqs installed, so we can speed
+ * up auto-detection on such systems.
+ */
+
+/*
+ * Quirk: Python and Perl headers cannot be in arbitrary places, so keep
+ * these 3 testcases at the top:
+ */
+#define main main_test_libpython
+# include "test-libpython.c"
+#undef main
+
+#define main main_test_libpython_version
+# include "test-libpython-version.c"
+#undef main
+
+#define main main_test_libperl
+# include "test-libperl.c"
+#undef main
+
+#define main main_test_hello
+# include "test-hello.c"
+#undef main
+
+#define main main_test_libelf
+# include "test-libelf.c"
+#undef main
+
+#define main main_test_libelf_mmap
+# include "test-libelf-mmap.c"
+#undef main
+
+#define main main_test_glibc
+# include "test-glibc.c"
+#undef main
+
+#define main main_test_dwarf
+# include "test-dwarf.c"
+#undef main
+
+#define main main_test_libelf_getphdrnum
+# include "test-libelf-getphdrnum.c"
+#undef main
+
+#define main main_test_libunwind
+# include "test-libunwind.c"
+#undef main
+
+#define main main_test_libaudit
+# include "test-libaudit.c"
+#undef main
+
+#define main main_test_libslang
+# include "test-libslang.c"
+#undef main
+
+#define main main_test_gtk2
+# include "test-gtk2.c"
+#undef main
+
+#define main main_test_gtk2_infobar
+# include "test-gtk2-infobar.c"
+#undef main
+
+#define main main_test_libbfd
+# include "test-libbfd.c"
+#undef main
+
+#define main main_test_on_exit
+# include "test-on-exit.c"
+#undef main
+
+#define main main_test_backtrace
+# include "test-backtrace.c"
+#undef main
+
+#define main main_test_libnuma
+# include "test-libnuma.c"
+#undef main
+
+#define main main_test_timerfd
+# include "test-timerfd.c"
+#undef main
+
+int main(int argc, char *argv[])
+{
+	main_test_libpython();
+	main_test_libpython_version();
+	main_test_libperl();
+	main_test_hello();
+	main_test_libelf();
+	main_test_libelf_mmap();
+	main_test_glibc();
+	main_test_dwarf();
+	main_test_libelf_getphdrnum();
+	main_test_libunwind();
+	main_test_libaudit();
+	main_test_libslang();
+	main_test_gtk2(argc, argv);
+	main_test_gtk2_infobar(argc, argv);
+	main_test_libbfd();
+	main_test_on_exit();
+	main_test_backtrace();
+	main_test_libnuma();
+	main_test_timerfd();
+
+	return 0;
+}
diff --git a/tools/perf/config/feature-checks/test-backtrace.c b/tools/perf/config/feature-checks/test-backtrace.c
new file mode 100644
index 0000000000000000000000000000000000000000..7124aa1dc8fb77d5fe1adae0cc4a0dcaff2ff3a3
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-backtrace.c
@@ -0,0 +1,13 @@
+#include <execinfo.h>
+#include <stdio.h>
+
+int main(void)
+{
+	void *backtrace_fns[10];
+	size_t entries;
+
+	entries = backtrace(backtrace_fns, 10);
+	backtrace_symbols_fd(backtrace_fns, entries, 1);
+
+	return 0;
+}
diff --git a/tools/perf/config/feature-checks/test-bionic.c b/tools/perf/config/feature-checks/test-bionic.c
new file mode 100644
index 0000000000000000000000000000000000000000..eac24e9513ebc0f35d5ee8ebbb450ab2f1fa5bc2
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-bionic.c
@@ -0,0 +1,6 @@
+#include <android/api-level.h>
+
+int main(void)
+{
+	return __ANDROID_API__;
+}
diff --git a/tools/perf/config/feature-checks/test-cplus-demangle.c b/tools/perf/config/feature-checks/test-cplus-demangle.c
new file mode 100644
index 0000000000000000000000000000000000000000..610c686e0009a572d471954e32bea26a96a046b0
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-cplus-demangle.c
@@ -0,0 +1,14 @@
+extern int printf(const char *format, ...);
+extern char *cplus_demangle(const char *, int);
+
+int main(void)
+{
+	char symbol[4096] = "FieldName__9ClassNameFd";
+	char *tmp;
+
+	tmp = cplus_demangle(symbol, 0);
+
+	printf("demangled symbol: {%s}\n", tmp);
+
+	return 0;
+}
diff --git a/tools/perf/config/feature-checks/test-dwarf.c b/tools/perf/config/feature-checks/test-dwarf.c
new file mode 100644
index 0000000000000000000000000000000000000000..3fc1801ce4a9760dce3a2db564747ace639e88aa
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-dwarf.c
@@ -0,0 +1,10 @@
+#include <dwarf.h>
+#include <elfutils/libdw.h>
+#include <elfutils/version.h>
+
+int main(void)
+{
+	Dwarf *dbg = dwarf_begin(0, DWARF_C_READ);
+
+	return (long)dbg;
+}
diff --git a/tools/perf/config/feature-checks/test-fortify-source.c b/tools/perf/config/feature-checks/test-fortify-source.c
new file mode 100644
index 0000000000000000000000000000000000000000..c9f398d8786820588da652a7a89ef5c3e729127b
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-fortify-source.c
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+int main(void)
+{
+	return puts("hi");
+}
diff --git a/tools/perf/config/feature-checks/test-glibc.c b/tools/perf/config/feature-checks/test-glibc.c
new file mode 100644
index 0000000000000000000000000000000000000000..b0820345cd9843776bad77dd92e926e226da1551
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-glibc.c
@@ -0,0 +1,8 @@
+#include <gnu/libc-version.h>
+
+int main(void)
+{
+	const char *version = gnu_get_libc_version();
+
+	return (long)version;
+}
diff --git a/tools/perf/config/feature-checks/test-gtk2-infobar.c b/tools/perf/config/feature-checks/test-gtk2-infobar.c
new file mode 100644
index 0000000000000000000000000000000000000000..397b4646d066144d1db1f6bd2842b9f1af5bf617
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-gtk2-infobar.c
@@ -0,0 +1,11 @@
+#pragma GCC diagnostic ignored "-Wstrict-prototypes"
+#include <gtk/gtk.h>
+#pragma GCC diagnostic error "-Wstrict-prototypes"
+
+int main(int argc, char *argv[])
+{
+	gtk_init(&argc, &argv);
+	gtk_info_bar_new();
+
+	return 0;
+}
diff --git a/tools/perf/config/feature-checks/test-gtk2.c b/tools/perf/config/feature-checks/test-gtk2.c
new file mode 100644
index 0000000000000000000000000000000000000000..6bd80e5094391f10e03abeb24d6c02953ba74a2c
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-gtk2.c
@@ -0,0 +1,10 @@
+#pragma GCC diagnostic ignored "-Wstrict-prototypes"
+#include <gtk/gtk.h>
+#pragma GCC diagnostic error "-Wstrict-prototypes"
+
+int main(int argc, char *argv[])
+{
+	gtk_init(&argc, &argv);
+
+        return 0;
+}
diff --git a/tools/perf/config/feature-checks/test-hello.c b/tools/perf/config/feature-checks/test-hello.c
new file mode 100644
index 0000000000000000000000000000000000000000..c9f398d8786820588da652a7a89ef5c3e729127b
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-hello.c
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+int main(void)
+{
+	return puts("hi");
+}
diff --git a/tools/perf/config/feature-checks/test-libaudit.c b/tools/perf/config/feature-checks/test-libaudit.c
new file mode 100644
index 0000000000000000000000000000000000000000..afc019f08641c5d9cd9d8cfeeb69aec7cd1d7687
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-libaudit.c
@@ -0,0 +1,10 @@
+#include <libaudit.h>
+
+extern int printf(const char *format, ...);
+
+int main(void)
+{
+	printf("error message: %s\n", audit_errno_to_name(0));
+
+	return audit_open();
+}
diff --git a/tools/perf/config/feature-checks/test-libbfd.c b/tools/perf/config/feature-checks/test-libbfd.c
new file mode 100644
index 0000000000000000000000000000000000000000..24059907e990da208af957549b662e29f13695b9
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-libbfd.c
@@ -0,0 +1,15 @@
+#include <bfd.h>
+
+extern int printf(const char *format, ...);
+
+int main(void)
+{
+	char symbol[4096] = "FieldName__9ClassNameFd";
+	char *tmp;
+
+	tmp = bfd_demangle(0, symbol, 0);
+
+	printf("demangled symbol: {%s}\n", tmp);
+
+	return 0;
+}
diff --git a/tools/perf/config/feature-checks/test-libelf-getphdrnum.c b/tools/perf/config/feature-checks/test-libelf-getphdrnum.c
new file mode 100644
index 0000000000000000000000000000000000000000..d710459306c3aad3d322be3ad9073d5f886691fd
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-libelf-getphdrnum.c
@@ -0,0 +1,8 @@
+#include <libelf.h>
+
+int main(void)
+{
+	size_t dst;
+
+	return elf_getphdrnum(0, &dst);
+}
diff --git a/tools/perf/config/feature-checks/test-libelf-mmap.c b/tools/perf/config/feature-checks/test-libelf-mmap.c
new file mode 100644
index 0000000000000000000000000000000000000000..564427d7ef18d09a650f8cdc7cb21c09746e3d50
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-libelf-mmap.c
@@ -0,0 +1,8 @@
+#include <libelf.h>
+
+int main(void)
+{
+	Elf *elf = elf_begin(0, ELF_C_READ_MMAP, 0);
+
+	return (long)elf;
+}
diff --git a/tools/perf/config/feature-checks/test-libelf.c b/tools/perf/config/feature-checks/test-libelf.c
new file mode 100644
index 0000000000000000000000000000000000000000..08db322d89576cf9cc6f459a151a5f154224e447
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-libelf.c
@@ -0,0 +1,8 @@
+#include <libelf.h>
+
+int main(void)
+{
+	Elf *elf = elf_begin(0, ELF_C_READ, 0);
+
+	return (long)elf;
+}
diff --git a/tools/perf/config/feature-checks/test-libnuma.c b/tools/perf/config/feature-checks/test-libnuma.c
new file mode 100644
index 0000000000000000000000000000000000000000..4763d9cd587d51c8528ee9d5f80165dc93bb6565
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-libnuma.c
@@ -0,0 +1,9 @@
+#include <numa.h>
+#include <numaif.h>
+
+int main(void)
+{
+	numa_available();
+
+	return 0;
+}
diff --git a/tools/perf/config/feature-checks/test-libperl.c b/tools/perf/config/feature-checks/test-libperl.c
new file mode 100644
index 0000000000000000000000000000000000000000..8871f6a0fdb4817e8fd40de71a7db40085e5588f
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-libperl.c
@@ -0,0 +1,9 @@
+#include <EXTERN.h>
+#include <perl.h>
+
+int main(void)
+{
+	perl_alloc();
+
+	return 0;
+}
diff --git a/tools/perf/config/feature-checks/test-libpython-version.c b/tools/perf/config/feature-checks/test-libpython-version.c
new file mode 100644
index 0000000000000000000000000000000000000000..facea122d812eb27585037afe9b3a97737cb31bf
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-libpython-version.c
@@ -0,0 +1,10 @@
+#include <Python.h>
+
+#if PY_VERSION_HEX >= 0x03000000
+	#error
+#endif
+
+int main(void)
+{
+	return 0;
+}
diff --git a/tools/perf/config/feature-checks/test-libpython.c b/tools/perf/config/feature-checks/test-libpython.c
new file mode 100644
index 0000000000000000000000000000000000000000..b24b28ad6324ecab7b3c4ffa3545a4aa1a68faae
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-libpython.c
@@ -0,0 +1,8 @@
+#include <Python.h>
+
+int main(void)
+{
+	Py_Initialize();
+
+	return 0;
+}
diff --git a/tools/perf/config/feature-checks/test-libslang.c b/tools/perf/config/feature-checks/test-libslang.c
new file mode 100644
index 0000000000000000000000000000000000000000..22ff22ed94d1fe151df640ad3d089a048957fb6a
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-libslang.c
@@ -0,0 +1,6 @@
+#include <slang.h>
+
+int main(void)
+{
+	return SLsmg_init_smg();
+}
diff --git a/tools/perf/config/feature-checks/test-libunwind.c b/tools/perf/config/feature-checks/test-libunwind.c
new file mode 100644
index 0000000000000000000000000000000000000000..43b9369bcab7d03125e19d6f4001080a6b92987f
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-libunwind.c
@@ -0,0 +1,27 @@
+#include <libunwind.h>
+#include <stdlib.h>
+
+extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+                                      unw_word_t ip,
+                                      unw_dyn_info_t *di,
+                                      unw_proc_info_t *pi,
+                                      int need_unwind_info, void *arg);
+
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+static unw_accessors_t accessors;
+
+int main(void)
+{
+	unw_addr_space_t addr_space;
+
+	addr_space = unw_create_addr_space(&accessors, 0);
+	if (addr_space)
+		return 0;
+
+	unw_init_remote(NULL, addr_space, NULL);
+	dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
+
+	return 0;
+}
diff --git a/tools/perf/config/feature-checks/test-on-exit.c b/tools/perf/config/feature-checks/test-on-exit.c
new file mode 100644
index 0000000000000000000000000000000000000000..8e88b16e6ded0a799038b3ce91ee7cf772d01031
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-on-exit.c
@@ -0,0 +1,16 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+static void exit_fn(int status, void *__data)
+{
+	printf("exit status: %d, data: %d\n", status, *(int *)__data);
+}
+
+static int data = 123;
+
+int main(void)
+{
+	on_exit(exit_fn, &data);
+
+	return 321;
+}
diff --git a/tools/perf/config/feature-checks/test-stackprotector-all.c b/tools/perf/config/feature-checks/test-stackprotector-all.c
new file mode 100644
index 0000000000000000000000000000000000000000..c9f398d8786820588da652a7a89ef5c3e729127b
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-stackprotector-all.c
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+int main(void)
+{
+	return puts("hi");
+}
diff --git a/tools/perf/config/feature-checks/test-stackprotector.c b/tools/perf/config/feature-checks/test-stackprotector.c
new file mode 100644
index 0000000000000000000000000000000000000000..c9f398d8786820588da652a7a89ef5c3e729127b
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-stackprotector.c
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+int main(void)
+{
+	return puts("hi");
+}
diff --git a/tools/perf/config/feature-checks/test-timerfd.c b/tools/perf/config/feature-checks/test-timerfd.c
new file mode 100644
index 0000000000000000000000000000000000000000..8c5c083b4d3caadf8bc92b82910a261f3f91a16a
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-timerfd.c
@@ -0,0 +1,18 @@
+/*
+ * test for timerfd functions used by perf-kvm-stat-live
+ */
+#include <sys/timerfd.h>
+
+int main(void)
+{
+	struct itimerspec new_value;
+
+	int fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK);
+	if (fd < 0)
+		return 1;
+
+	if (timerfd_settime(fd, 0, &new_value, NULL) != 0)
+		return 1;
+
+	return 0;
+}
diff --git a/tools/perf/config/feature-checks/test-volatile-register-var.c b/tools/perf/config/feature-checks/test-volatile-register-var.c
new file mode 100644
index 0000000000000000000000000000000000000000..c9f398d8786820588da652a7a89ef5c3e729127b
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-volatile-register-var.c
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+int main(void)
+{
+	return puts("hi");
+}
diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak
deleted file mode 100644
index f79305739eccdbea8a7ee7dc65be5862543808a4..0000000000000000000000000000000000000000
--- a/tools/perf/config/feature-tests.mak
+++ /dev/null
@@ -1,246 +0,0 @@
-define SOURCE_HELLO
-#include <stdio.h>
-int main(void)
-{
-	return puts(\"hi\");
-}
-endef
-
-ifndef NO_DWARF
-define SOURCE_DWARF
-#include <dwarf.h>
-#include <elfutils/libdw.h>
-#include <elfutils/version.h>
-#ifndef _ELFUTILS_PREREQ
-#error
-#endif
-
-int main(void)
-{
-	Dwarf *dbg = dwarf_begin(0, DWARF_C_READ);
-	return (long)dbg;
-}
-endef
-endif
-
-define SOURCE_LIBELF
-#include <libelf.h>
-
-int main(void)
-{
-	Elf *elf = elf_begin(0, ELF_C_READ, 0);
-	return (long)elf;
-}
-endef
-
-define SOURCE_GLIBC
-#include <gnu/libc-version.h>
-
-int main(void)
-{
-	const char *version = gnu_get_libc_version();
-	return (long)version;
-}
-endef
-
-define SOURCE_BIONIC
-#include <android/api-level.h>
-
-int main(void)
-{
-	return __ANDROID_API__;
-}
-endef
-
-define SOURCE_ELF_MMAP
-#include <libelf.h>
-int main(void)
-{
-	Elf *elf = elf_begin(0, ELF_C_READ_MMAP, 0);
-	return (long)elf;
-}
-endef
-
-define SOURCE_ELF_GETPHDRNUM
-#include <libelf.h>
-int main(void)
-{
-	size_t dst;
-	return elf_getphdrnum(0, &dst);
-}
-endef
-
-ifndef NO_SLANG
-define SOURCE_SLANG
-#include <slang.h>
-
-int main(void)
-{
-	return SLsmg_init_smg();
-}
-endef
-endif
-
-ifndef NO_GTK2
-define SOURCE_GTK2
-#pragma GCC diagnostic ignored \"-Wstrict-prototypes\"
-#include <gtk/gtk.h>
-#pragma GCC diagnostic error \"-Wstrict-prototypes\"
-
-int main(int argc, char *argv[])
-{
-        gtk_init(&argc, &argv);
-
-        return 0;
-}
-endef
-
-define SOURCE_GTK2_INFOBAR
-#pragma GCC diagnostic ignored \"-Wstrict-prototypes\"
-#include <gtk/gtk.h>
-#pragma GCC diagnostic error \"-Wstrict-prototypes\"
-
-int main(void)
-{
-	gtk_info_bar_new();
-
-	return 0;
-}
-endef
-endif
-
-ifndef NO_LIBPERL
-define SOURCE_PERL_EMBED
-#include <EXTERN.h>
-#include <perl.h>
-
-int main(void)
-{
-perl_alloc();
-return 0;
-}
-endef
-endif
-
-ifndef NO_LIBPYTHON
-define SOURCE_PYTHON_VERSION
-#include <Python.h>
-#if PY_VERSION_HEX >= 0x03000000
-	#error
-#endif
-int main(void)
-{
-	return 0;
-}
-endef
-define SOURCE_PYTHON_EMBED
-#include <Python.h>
-int main(void)
-{
-	Py_Initialize();
-	return 0;
-}
-endef
-endif
-
-define SOURCE_BFD
-#include <bfd.h>
-
-int main(void)
-{
-	bfd_demangle(0, 0, 0);
-	return 0;
-}
-endef
-
-define SOURCE_CPLUS_DEMANGLE
-extern char *cplus_demangle(const char *, int);
-
-int main(void)
-{
-	cplus_demangle(0, 0);
-	return 0;
-}
-endef
-
-define SOURCE_STRLCPY
-#include <stdlib.h>
-extern size_t strlcpy(char *dest, const char *src, size_t size);
-
-int main(void)
-{
-	strlcpy(NULL, NULL, 0);
-	return 0;
-}
-endef
-
-ifndef NO_LIBUNWIND
-define SOURCE_LIBUNWIND
-#include <libunwind.h>
-#include <stdlib.h>
-
-extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
-                                      unw_word_t ip,
-                                      unw_dyn_info_t *di,
-                                      unw_proc_info_t *pi,
-                                      int need_unwind_info, void *arg);
-
-
-#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
-
-int main(void)
-{
-	unw_addr_space_t addr_space;
-	addr_space = unw_create_addr_space(NULL, 0);
-	unw_init_remote(NULL, addr_space, NULL);
-	dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
-	return 0;
-}
-endef
-endif
-
-ifndef NO_BACKTRACE
-define SOURCE_BACKTRACE
-#include <execinfo.h>
-#include <stdio.h>
-
-int main(void)
-{
-	backtrace(NULL, 0);
-	backtrace_symbols(NULL, 0);
-	return 0;
-}
-endef
-endif
-
-ifndef NO_LIBAUDIT
-define SOURCE_LIBAUDIT
-#include <libaudit.h>
-
-int main(void)
-{
-	printf(\"error message: %s\", audit_errno_to_name(0));
-	return audit_open();
-}
-endef
-endif
-
-define SOURCE_ON_EXIT
-#include <stdio.h>
-
-int main(void)
-{
-	return on_exit(NULL, NULL);
-}
-endef
-
-define SOURCE_LIBNUMA
-#include <numa.h>
-#include <numaif.h>
-
-int main(void)
-{
-	numa_available();
-	return 0;
-}
-endef
diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak
index 94d2d4f9c35d0e39ed0b570b033aca5294611736..f168debc5be268a75456a3639ccbf120ded594a2 100644
--- a/tools/perf/config/utilities.mak
+++ b/tools/perf/config/utilities.mak
@@ -179,16 +179,9 @@ _ge_attempt = $(if $(get-executable),$(get-executable),$(_gea_warn)$(call _gea_e
 _gea_warn = $(warning The path '$(1)' is not executable.)
 _gea_err  = $(if $(1),$(error Please set '$(1)' appropriately))
 
-# try-cc
-# Usage: option = $(call try-cc, source-to-build, cc-options, msg)
-ifneq ($(V),1)
-TRY_CC_OUTPUT= > /dev/null 2>&1
+ifneq ($(findstring $(MAKEFLAGS),s),s)
+  ifneq ($(V),1)
+    QUIET_CLEAN		= @printf '  CLEAN    %s\n' $1;
+    QUIET_INSTALL	= @printf '  INSTALL  %s\n' $1;
+  endif
 endif
-TRY_CC_MSG=echo "    CHK $(3)" 1>&2;
-
-try-cc = $(shell sh -c						  \
-	'TMP="$(OUTPUT)$(TMPOUT).$$$$";				  \
-	 $(TRY_CC_MSG)						  \
-	 echo "$(1)" |						  \
-	 $(CC) -x c - $(2) -o "$$TMP" $(TRY_CC_OUTPUT) && echo y; \
-	 rm -f "$$TMP"')
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 85e1aed95204c21de59b2b80721735670839b814..8b38b4e80ec2c6f6de581022143c989ed6a0e955 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -49,14 +49,14 @@ static struct cmd_struct commands[] = {
 	{ "version",	cmd_version,	0 },
 	{ "script",	cmd_script,	0 },
 	{ "sched",	cmd_sched,	0 },
-#ifdef LIBELF_SUPPORT
+#ifdef HAVE_LIBELF_SUPPORT
 	{ "probe",	cmd_probe,	0 },
 #endif
 	{ "kmem",	cmd_kmem,	0 },
 	{ "lock",	cmd_lock,	0 },
 	{ "kvm",	cmd_kvm,	0 },
 	{ "test",	cmd_test,	0 },
-#ifdef LIBAUDIT_SUPPORT
+#ifdef HAVE_LIBAUDIT_SUPPORT
 	{ "trace",	cmd_trace,	0 },
 #endif
 	{ "inject",	cmd_inject,	0 },
@@ -456,6 +456,7 @@ int main(int argc, const char **argv)
 {
 	const char *cmd;
 
+	/* The page_size is placed in util object. */
 	page_size = sysconf(_SC_PAGE_SIZE);
 
 	cmd = perf_extract_argv0_path(argv[0]);
@@ -480,7 +481,14 @@ int main(int argc, const char **argv)
 		fprintf(stderr, "cannot handle %s internally", cmd);
 		goto out;
 	}
-
+#ifdef HAVE_LIBAUDIT_SUPPORT
+	if (!prefixcmp(cmd, "trace")) {
+		set_buildid_dir();
+		setup_path();
+		argv[0] = "trace";
+		return cmd_trace(argc, argv, NULL);
+	}
+#endif
 	/* Look for flags.. */
 	argv++;
 	argc--;
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index cf20187eee0a7345373dfc053c580bc1e77c4bb8..6a587e84fdfe2a238904434a8bdbc1edec7c12b8 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -4,6 +4,8 @@
 #include <asm/unistd.h>
 
 #if defined(__i386__)
+#define mb()		asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#define wmb()		asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
 #define rmb()		asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
 #define cpu_relax()	asm volatile("rep; nop" ::: "memory");
 #define CPUINFO_PROC	"model name"
@@ -13,6 +15,8 @@
 #endif
 
 #if defined(__x86_64__)
+#define mb()		asm volatile("mfence" ::: "memory")
+#define wmb()		asm volatile("sfence" ::: "memory")
 #define rmb()		asm volatile("lfence" ::: "memory")
 #define cpu_relax()	asm volatile("rep; nop" ::: "memory");
 #define CPUINFO_PROC	"model name"
@@ -23,45 +27,61 @@
 
 #ifdef __powerpc__
 #include "../../arch/powerpc/include/uapi/asm/unistd.h"
+#define mb()		asm volatile ("sync" ::: "memory")
+#define wmb()		asm volatile ("sync" ::: "memory")
 #define rmb()		asm volatile ("sync" ::: "memory")
-#define cpu_relax()	asm volatile ("" ::: "memory");
 #define CPUINFO_PROC	"cpu"
 #endif
 
 #ifdef __s390__
+#define mb()		asm volatile("bcr 15,0" ::: "memory")
+#define wmb()		asm volatile("bcr 15,0" ::: "memory")
 #define rmb()		asm volatile("bcr 15,0" ::: "memory")
-#define cpu_relax()	asm volatile("" ::: "memory");
 #endif
 
 #ifdef __sh__
 #if defined(__SH4A__) || defined(__SH5__)
+# define mb()		asm volatile("synco" ::: "memory")
+# define wmb()		asm volatile("synco" ::: "memory")
 # define rmb()		asm volatile("synco" ::: "memory")
 #else
+# define mb()		asm volatile("" ::: "memory")
+# define wmb()		asm volatile("" ::: "memory")
 # define rmb()		asm volatile("" ::: "memory")
 #endif
-#define cpu_relax()	asm volatile("" ::: "memory")
 #define CPUINFO_PROC	"cpu type"
 #endif
 
 #ifdef __hppa__
+#define mb()		asm volatile("" ::: "memory")
+#define wmb()		asm volatile("" ::: "memory")
 #define rmb()		asm volatile("" ::: "memory")
-#define cpu_relax()	asm volatile("" ::: "memory");
 #define CPUINFO_PROC	"cpu"
 #endif
 
 #ifdef __sparc__
+#ifdef __LP64__
+#define mb()		asm volatile("ba,pt %%xcc, 1f\n"	\
+				     "membar #StoreLoad\n"	\
+				     "1:\n":::"memory")
+#else
+#define mb()		asm volatile("":::"memory")
+#endif
+#define wmb()		asm volatile("":::"memory")
 #define rmb()		asm volatile("":::"memory")
-#define cpu_relax()	asm volatile("":::"memory")
 #define CPUINFO_PROC	"cpu"
 #endif
 
 #ifdef __alpha__
+#define mb()		asm volatile("mb" ::: "memory")
+#define wmb()		asm volatile("wmb" ::: "memory")
 #define rmb()		asm volatile("mb" ::: "memory")
-#define cpu_relax()	asm volatile("" ::: "memory")
 #define CPUINFO_PROC	"cpu model"
 #endif
 
 #ifdef __ia64__
+#define mb()		asm volatile ("mf" ::: "memory")
+#define wmb()		asm volatile ("mf" ::: "memory")
 #define rmb()		asm volatile ("mf" ::: "memory")
 #define cpu_relax()	asm volatile ("hint @pause" ::: "memory")
 #define CPUINFO_PROC	"model name"
@@ -72,40 +92,55 @@
  * Use the __kuser_memory_barrier helper in the CPU helper page. See
  * arch/arm/kernel/entry-armv.S in the kernel source for details.
  */
+#define mb()		((void(*)(void))0xffff0fa0)()
+#define wmb()		((void(*)(void))0xffff0fa0)()
 #define rmb()		((void(*)(void))0xffff0fa0)()
-#define cpu_relax()	asm volatile("":::"memory")
 #define CPUINFO_PROC	"Processor"
 #endif
 
 #ifdef __aarch64__
-#define rmb()		asm volatile("dmb ld" ::: "memory")
+#define mb()		asm volatile("dmb ish" ::: "memory")
+#define wmb()		asm volatile("dmb ishld" ::: "memory")
+#define rmb()		asm volatile("dmb ishst" ::: "memory")
 #define cpu_relax()	asm volatile("yield" ::: "memory")
 #endif
 
 #ifdef __mips__
-#define rmb()		asm volatile(					\
+#define mb()		asm volatile(					\
 				".set	mips2\n\t"			\
 				"sync\n\t"				\
 				".set	mips0"				\
 				: /* no output */			\
 				: /* no input */			\
 				: "memory")
-#define cpu_relax()	asm volatile("" ::: "memory")
+#define wmb()	mb()
+#define rmb()	mb()
 #define CPUINFO_PROC	"cpu model"
 #endif
 
 #ifdef __arc__
+#define mb()		asm volatile("" ::: "memory")
+#define wmb()		asm volatile("" ::: "memory")
 #define rmb()		asm volatile("" ::: "memory")
-#define cpu_relax()	rmb()
 #define CPUINFO_PROC	"Processor"
 #endif
 
 #ifdef __metag__
+#define mb()		asm volatile("" ::: "memory")
+#define wmb()		asm volatile("" ::: "memory")
 #define rmb()		asm volatile("" ::: "memory")
-#define cpu_relax()	asm volatile("" ::: "memory")
 #define CPUINFO_PROC	"CPU"
 #endif
 
+#define barrier() asm volatile ("" ::: "memory")
+
+#ifndef cpu_relax
+#define cpu_relax() barrier()
+#endif
+
+#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+
 #include <time.h>
 #include <unistd.h>
 #include <sys/types.h>
@@ -182,7 +217,9 @@ struct ip_callchain {
 struct branch_flags {
 	u64 mispred:1;
 	u64 predicted:1;
-	u64 reserved:62;
+	u64 in_tx:1;
+	u64 abort:1;
+	u64 reserved:60;
 };
 
 struct branch_entry {
@@ -218,7 +255,6 @@ struct perf_record_opts {
 	bool	     no_delay;
 	bool	     no_inherit;
 	bool	     no_samples;
-	bool	     pipe_output;
 	bool	     raw_samples;
 	bool	     sample_address;
 	bool	     sample_weight;
@@ -231,6 +267,7 @@ struct perf_record_opts {
 	u64	     default_interval;
 	u64	     user_interval;
 	u16	     stack_dump_size;
+	bool	     sample_transaction;
 };
 
 #endif
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
index 315067b8f5522ae9cafbef1df506aca814e93012..fcd1dd66790623f4c6e08d62490f75e03b2e32dd 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/Context.c
+++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
@@ -25,7 +25,7 @@
 
 PyMODINIT_FUNC initperf_trace_context(void);
 
-static PyObject *perf_trace_context_common_pc(PyObject *self, PyObject *args)
+static PyObject *perf_trace_context_common_pc(PyObject *obj, PyObject *args)
 {
 	static struct scripting_context *scripting_context;
 	PyObject *context;
@@ -40,7 +40,7 @@ static PyObject *perf_trace_context_common_pc(PyObject *self, PyObject *args)
 	return Py_BuildValue("i", retval);
 }
 
-static PyObject *perf_trace_context_common_flags(PyObject *self,
+static PyObject *perf_trace_context_common_flags(PyObject *obj,
 						 PyObject *args)
 {
 	static struct scripting_context *scripting_context;
@@ -56,7 +56,7 @@ static PyObject *perf_trace_context_common_flags(PyObject *self,
 	return Py_BuildValue("i", retval);
 }
 
-static PyObject *perf_trace_context_common_lock_depth(PyObject *self,
+static PyObject *perf_trace_context_common_lock_depth(PyObject *obj,
 						      PyObject *args)
 {
 	static struct scripting_context *scripting_context;
diff --git a/tools/perf/tests/attr/README b/tools/perf/tests/attr/README
index d102957cd59ae927b40f065579f96cf8c140a30e..430024f618f1b307381c43bc5e363f2dc6bb08cc 100644
--- a/tools/perf/tests/attr/README
+++ b/tools/perf/tests/attr/README
@@ -44,9 +44,9 @@ Following tests are defined (with perf commands):
   perf record -c 123 kill                       (test-record-count)
   perf record -d kill                           (test-record-data)
   perf record -F 100 kill                       (test-record-freq)
-  perf record -g -- kill                        (test-record-graph-default)
-  perf record -g dwarf -- kill                  (test-record-graph-dwarf)
-  perf record -g fp kill                        (test-record-graph-fp)
+  perf record -g kill                           (test-record-graph-default)
+  perf record --call-graph dwarf kill		(test-record-graph-dwarf)
+  perf record --call-graph fp kill              (test-record-graph-fp)
   perf record --group -e cycles,instructions kill (test-record-group)
   perf record -e '{cycles,instructions}' kill   (test-record-group1)
   perf record -D kill                           (test-record-no-delay)
diff --git a/tools/perf/tests/attr/test-record-graph-default b/tools/perf/tests/attr/test-record-graph-default
index 833d1849d76741bc82db78fef86fce6c8c6513ef..853597a9a8f6484a8446b9c8b94738ccaea37c2c 100644
--- a/tools/perf/tests/attr/test-record-graph-default
+++ b/tools/perf/tests/attr/test-record-graph-default
@@ -1,6 +1,6 @@
 [config]
 command = record
-args    = -g -- kill >/dev/null 2>&1
+args    = -g kill >/dev/null 2>&1
 
 [event:base-record]
 sample_type=295
diff --git a/tools/perf/tests/attr/test-record-graph-dwarf b/tools/perf/tests/attr/test-record-graph-dwarf
index e93e082f52080340eac2a5b6946106f2bbfbffe6..d6f324ea578cf903e0eb99c0c9cbb040855a2fdb 100644
--- a/tools/perf/tests/attr/test-record-graph-dwarf
+++ b/tools/perf/tests/attr/test-record-graph-dwarf
@@ -1,6 +1,6 @@
 [config]
 command = record
-args    = -g dwarf -- kill >/dev/null 2>&1
+args    = --call-graph dwarf -- kill >/dev/null 2>&1
 
 [event:base-record]
 sample_type=12583
diff --git a/tools/perf/tests/attr/test-record-graph-fp b/tools/perf/tests/attr/test-record-graph-fp
index 7cef3743f03f894561d4e039451af4ffa32dcfc3..055e3bee79935d50fde8773f84b0fcadd466b1d2 100644
--- a/tools/perf/tests/attr/test-record-graph-fp
+++ b/tools/perf/tests/attr/test-record-graph-fp
@@ -1,6 +1,6 @@
 [config]
 command = record
-args    = -g fp kill >/dev/null 2>&1
+args    = --call-graph fp kill >/dev/null 2>&1
 
 [event:base-record]
 sample_type=295
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index e3fedfa2906e5912d6dac6da9ef73daa53820b6d..49ccc3b2995ee30c8c2ad55a4c4b8308d9ad9055 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -276,7 +276,7 @@ static int process_event(struct machine *machine, struct perf_evlist *evlist,
 		return process_sample_event(machine, evlist, event, state);
 
 	if (event->header.type < PERF_RECORD_MAX)
-		return machine__process_event(machine, event);
+		return machine__process_event(machine, event, NULL);
 
 	return 0;
 }
diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c
index dffe0551acaa717add54e96bfb864c8fbf6cf2d6..9cc81a3eb9b456e6d2693bce799fedb0caba9862 100644
--- a/tools/perf/tests/dso-data.c
+++ b/tools/perf/tests/dso-data.c
@@ -35,6 +35,7 @@ static char *test_file(int size)
 	if (size != write(fd, buf, size))
 		templ = NULL;
 
+	free(buf);
 	close(fd);
 	return templ;
 }
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 4228ffc0d9681d7acda97ecacccf8bea9b76f1e4..173bf42cc03e73e8291e4a6a7cf99f5d8d6d3107 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -93,7 +93,7 @@ static struct machine *setup_fake_machine(struct machines *machines)
 		if (thread == NULL)
 			goto out;
 
-		thread__set_comm(thread, fake_threads[i].comm);
+		thread__set_comm(thread, fake_threads[i].comm, 0);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) {
@@ -110,7 +110,7 @@ static struct machine *setup_fake_machine(struct machines *machines)
 		strcpy(fake_mmap_event.mmap.filename,
 		       fake_mmap_info[i].filename);
 
-		machine__process_mmap_event(machine, &fake_mmap_event);
+		machine__process_mmap_event(machine, &fake_mmap_event, NULL);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(fake_symbols); i++) {
@@ -222,7 +222,8 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
 							  &sample) < 0)
 				goto out;
 
-			he = __hists__add_entry(&evsel->hists, &al, NULL, 1, 1);
+			he = __hists__add_entry(&evsel->hists, &al, NULL,
+						NULL, NULL, 1, 1, 0);
 			if (he == NULL)
 				goto out;
 
@@ -244,7 +245,8 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
 							  &sample) < 0)
 				goto out;
 
-			he = __hists__add_entry(&evsel->hists, &al, NULL, 1, 1);
+			he = __hists__add_entry(&evsel->hists, &al, NULL,
+						NULL, NULL, 1, 1, 0);
 			if (he == NULL)
 				goto out;
 
@@ -419,7 +421,7 @@ static void print_hists(struct hists *hists)
 		he = rb_entry(node, struct hist_entry, rb_node_in);
 
 		pr_info("%2d: entry: %-8s [%-8s] %20s: period = %"PRIu64"\n",
-			i, he->thread->comm, he->ms.map->dso->short_name,
+			i, thread__comm_str(he->thread), he->ms.map->dso->short_name,
 			he->ms.sym->name, he->stat.period);
 
 		i++;
@@ -465,7 +467,7 @@ int test__hists_link(void)
 		goto out;
 
 	list_for_each_entry(evsel, &evlist->entries, node) {
-		hists__collapse_resort(&evsel->hists);
+		hists__collapse_resort(&evsel->hists, NULL);
 
 		if (verbose > 2)
 			print_hists(&evsel->hists);
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 48114d164e9fb991898d18c28d027ba744181dad..ef671cd41bb3ef554478db74cecc61c6df28afca 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -2,7 +2,7 @@
 #include "parse-events.h"
 #include "evsel.h"
 #include "evlist.h"
-#include "sysfs.h"
+#include "fs.h"
 #include <lk/debugfs.h>
 #include "tests.h"
 #include <linux/hw_breakpoint.h>
@@ -1456,7 +1456,7 @@ static int test_pmu(void)
 	int ret;
 
 	snprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu/format/",
-		 sysfs_find_mountpoint());
+		 sysfs__mountpoint());
 
 	ret = stat(path, &st);
 	if (ret)
@@ -1473,7 +1473,7 @@ static int test_pmu_events(void)
 	int ret;
 
 	snprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu/events/",
-		 sysfs_find_mountpoint());
+		 sysfs__mountpoint());
 
 	ret = stat(path, &st);
 	if (ret) {
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 7923b06ffc9198adde790901c8709dd8e66fa0b7..93a62b06c3afac27f7398f1572430cf29868b9d5 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -45,7 +45,7 @@ int test__PERF_RECORD(void)
 	};
 	cpu_set_t cpu_mask;
 	size_t cpu_mask_size = sizeof(cpu_mask);
-	struct perf_evlist *evlist = perf_evlist__new();
+	struct perf_evlist *evlist = perf_evlist__new_default();
 	struct perf_evsel *evsel;
 	struct perf_sample sample;
 	const char *cmd = "sleep";
@@ -65,16 +65,6 @@ int test__PERF_RECORD(void)
 		goto out;
 	}
 
-	/*
-	 * We need at least one evsel in the evlist, use the default
-	 * one: "cycles".
-	 */
-	err = perf_evlist__add_default(evlist);
-	if (err < 0) {
-		pr_debug("Not enough memory to create evsel\n");
-		goto out_delete_evlist;
-	}
-
 	/*
 	 * Create maps of threads and cpus to monitor. In this case
 	 * we start with all threads and cpus (-1, -1) but then in
diff --git a/tools/perf/tests/rdpmc.c b/tools/perf/tests/rdpmc.c
index ff94886aad995ec49e267c344885e34079dfdebd..46649c25fa5ed4982c364119792490c4ff861979 100644
--- a/tools/perf/tests/rdpmc.c
+++ b/tools/perf/tests/rdpmc.c
@@ -9,8 +9,6 @@
 
 #if defined(__x86_64__) || defined(__i386__)
 
-#define barrier() asm volatile("" ::: "memory")
-
 static u64 rdpmc(unsigned int counter)
 {
 	unsigned int low, high;
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index 77f598dbd97a30b1f6429bdb131195b8766c963d..1b677202638d087b94903f3f395d9fab8ae15bc7 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -121,6 +121,9 @@ static bool samples_same(const struct perf_sample *s1,
 	if (type & PERF_SAMPLE_DATA_SRC)
 		COMP(data_src);
 
+	if (type & PERF_SAMPLE_TRANSACTION)
+		COMP(transaction);
+
 	return true;
 }
 
@@ -165,6 +168,7 @@ static int do_test(u64 sample_type, u64 sample_regs_user, u64 read_format)
 		.cpu		= 110,
 		.raw_size	= sizeof(raw_data),
 		.data_src	= 111,
+		.transaction	= 112,
 		.raw_data	= (void *)raw_data,
 		.callchain	= &callchain.callchain,
 		.branch_stack	= &branch_stack.branch_stack,
@@ -273,10 +277,11 @@ int test__sample_parsing(void)
 
 	/*
 	 * Fail the test if it has not been updated when new sample format bits
-	 * were added.
+	 * were added.  Please actually update the test rather than just change
+	 * the condition below.
 	 */
-	if (PERF_SAMPLE_MAX > PERF_SAMPLE_IDENTIFIER << 1) {
-		pr_debug("sample format has changed - test needs updating\n");
+	if (PERF_SAMPLE_MAX > PERF_SAMPLE_TRANSACTION << 1) {
+		pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n");
 		return -1;
 	}
 
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index a3e64876e940020d93aa674ea064d735bf688713..c33d95f9559a0d08294b622600450646b9d0129c 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -37,20 +37,11 @@ int test__task_exit(void)
 	signal(SIGCHLD, sig_handler);
 	signal(SIGUSR1, sig_handler);
 
-	evlist = perf_evlist__new();
+	evlist = perf_evlist__new_default();
 	if (evlist == NULL) {
-		pr_debug("perf_evlist__new\n");
+		pr_debug("perf_evlist__new_default\n");
 		return -1;
 	}
-	/*
-	 * We need at least one evsel in the evlist, use the default
-	 * one: "cycles".
-	 */
-	err = perf_evlist__add_default(evlist);
-	if (err < 0) {
-		pr_debug("Not enough memory to create evsel\n");
-		goto out_free_evlist;
-	}
 
 	/*
 	 * Create maps of threads and cpus to monitor. In this case
@@ -117,7 +108,6 @@ int test__task_exit(void)
 	perf_evlist__close(evlist);
 out_delete_maps:
 	perf_evlist__delete_maps(evlist);
-out_free_evlist:
 	perf_evlist__delete(evlist);
 	return err;
 }
diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h
index 404ff66a3e367a84e67c4b0896a28318b829e867..7d45d2f536013e49e230aee7856d9aeaf3964f6b 100644
--- a/tools/perf/ui/browser.h
+++ b/tools/perf/ui/browser.h
@@ -21,32 +21,32 @@ struct ui_browser {
 	void	      *priv;
 	const char    *title;
 	char	      *helpline;
-	unsigned int  (*refresh)(struct ui_browser *self);
-	void	      (*write)(struct ui_browser *self, void *entry, int row);
-	void	      (*seek)(struct ui_browser *self, off_t offset, int whence);
-	bool	      (*filter)(struct ui_browser *self, void *entry);
+	unsigned int  (*refresh)(struct ui_browser *browser);
+	void	      (*write)(struct ui_browser *browser, void *entry, int row);
+	void	      (*seek)(struct ui_browser *browser, off_t offset, int whence);
+	bool	      (*filter)(struct ui_browser *browser, void *entry);
 	u32	      nr_entries;
 	bool	      navkeypressed;
 	bool	      use_navkeypressed;
 };
 
 int  ui_browser__set_color(struct ui_browser *browser, int color);
-void ui_browser__set_percent_color(struct ui_browser *self,
+void ui_browser__set_percent_color(struct ui_browser *browser,
 				   double percent, bool current);
-bool ui_browser__is_current_entry(struct ui_browser *self, unsigned row);
-void ui_browser__refresh_dimensions(struct ui_browser *self);
-void ui_browser__reset_index(struct ui_browser *self);
+bool ui_browser__is_current_entry(struct ui_browser *browser, unsigned row);
+void ui_browser__refresh_dimensions(struct ui_browser *browser);
+void ui_browser__reset_index(struct ui_browser *browser);
 
-void ui_browser__gotorc(struct ui_browser *self, int y, int x);
+void ui_browser__gotorc(struct ui_browser *browser, int y, int x);
 void ui_browser__write_graph(struct ui_browser *browser, int graph);
 void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column,
 			      u64 start, u64 end);
 void __ui_browser__show_title(struct ui_browser *browser, const char *title);
 void ui_browser__show_title(struct ui_browser *browser, const char *title);
-int ui_browser__show(struct ui_browser *self, const char *title,
+int ui_browser__show(struct ui_browser *browser, const char *title,
 		     const char *helpline, ...);
-void ui_browser__hide(struct ui_browser *self);
-int ui_browser__refresh(struct ui_browser *self);
+void ui_browser__hide(struct ui_browser *browser);
+int ui_browser__refresh(struct ui_browser *browser);
 int ui_browser__run(struct ui_browser *browser, int delay_secs);
 void ui_browser__update_nr_entries(struct ui_browser *browser, u32 nr_entries);
 void ui_browser__handle_resize(struct ui_browser *browser);
@@ -63,11 +63,11 @@ int ui_browser__input_window(const char *title, const char *text, char *input,
 void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence);
 unsigned int ui_browser__argv_refresh(struct ui_browser *browser);
 
-void ui_browser__rb_tree_seek(struct ui_browser *self, off_t offset, int whence);
-unsigned int ui_browser__rb_tree_refresh(struct ui_browser *self);
+void ui_browser__rb_tree_seek(struct ui_browser *browser, off_t offset, int whence);
+unsigned int ui_browser__rb_tree_refresh(struct ui_browser *browser);
 
-void ui_browser__list_head_seek(struct ui_browser *self, off_t offset, int whence);
-unsigned int ui_browser__list_head_refresh(struct ui_browser *self);
+void ui_browser__list_head_seek(struct ui_browser *browser, off_t offset, int whence);
+unsigned int ui_browser__list_head_refresh(struct ui_browser *browser);
 
 void ui_browser__init(void);
 void annotate_browser__init(void);
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 08545ae46992cd5b284e279bf5ac131cf9c84e62..f0697a3aede04cb19f91d459cab62a6c08c5326f 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -442,35 +442,37 @@ static bool annotate_browser__callq(struct annotate_browser *browser,
 {
 	struct map_symbol *ms = browser->b.priv;
 	struct disasm_line *dl = browser->selection;
-	struct symbol *sym = ms->sym;
 	struct annotation *notes;
-	struct symbol *target;
-	u64 ip;
+	struct addr_map_symbol target = {
+		.map = ms->map,
+		.addr = map__objdump_2mem(ms->map, dl->ops.target.addr),
+	};
 	char title[SYM_TITLE_MAX_SIZE];
 
 	if (!ins__is_call(dl->ins))
 		return false;
 
-	ip = ms->map->map_ip(ms->map, dl->ops.target.addr);
-	target = map__find_symbol(ms->map, ip, NULL);
-	if (target == NULL) {
+	if (map_groups__find_ams(&target, NULL) ||
+	    map__rip_2objdump(target.map, target.map->map_ip(target.map,
+							     target.addr)) !=
+	    dl->ops.target.addr) {
 		ui_helpline__puts("The called function was not found.");
 		return true;
 	}
 
-	notes = symbol__annotation(target);
+	notes = symbol__annotation(target.sym);
 	pthread_mutex_lock(&notes->lock);
 
-	if (notes->src == NULL && symbol__alloc_hist(target) < 0) {
+	if (notes->src == NULL && symbol__alloc_hist(target.sym) < 0) {
 		pthread_mutex_unlock(&notes->lock);
 		ui__warning("Not enough memory for annotating '%s' symbol!\n",
-			    target->name);
+			    target.sym->name);
 		return true;
 	}
 
 	pthread_mutex_unlock(&notes->lock);
-	symbol__tui_annotate(target, ms->map, evsel, hbt);
-	sym_title(sym, ms->map, title, sizeof(title));
+	symbol__tui_annotate(target.sym, target.map, evsel, hbt);
+	sym_title(ms->sym, ms->map, title, sizeof(title));
 	ui_browser__show_title(&browser->b, title);
 	return true;
 }
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 7ef36c360471e3f2ccef82d4bd2a4e4fb684eb14..16848bb4c41842355be8c7a36d2df574a9c90214 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1255,7 +1255,7 @@ static int hists__browser_title(struct hists *hists, char *bf, size_t size,
 	if (thread)
 		printed += scnprintf(bf + printed, size - printed,
 				    ", Thread: %s(%d)",
-				    (thread->comm_set ? thread->comm : ""),
+				     (thread->comm_set ? thread__comm_str(thread) : ""),
 				    thread->tid);
 	if (dso)
 		printed += scnprintf(bf + printed, size - printed,
@@ -1578,7 +1578,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 		if (thread != NULL &&
 		    asprintf(&options[nr_options], "Zoom %s %s(%d) thread",
 			     (browser->hists->thread_filter ? "out of" : "into"),
-			     (thread->comm_set ? thread->comm : ""),
+			     (thread->comm_set ? thread__comm_str(thread) : ""),
 			     thread->tid) > 0)
 			zoom_thread = nr_options++;
 
@@ -1598,7 +1598,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 			struct symbol *sym;
 
 			if (asprintf(&options[nr_options], "Run scripts for samples of thread [%s]",
-				browser->he_selection->thread->comm) > 0)
+				     thread__comm_str(browser->he_selection->thread)) > 0)
 				scripts_comm = nr_options++;
 
 			sym = browser->he_selection->ms.sym;
@@ -1701,7 +1701,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 				sort_thread.elide = false;
 			} else {
 				ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"",
-						   thread->comm_set ? thread->comm : "",
+						   thread->comm_set ? thread__comm_str(thread) : "",
 						   thread->tid);
 				browser->hists->thread_filter = thread;
 				sort_thread.elide = true;
@@ -1717,7 +1717,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 			memset(script_opt, 0, 64);
 
 			if (choice == scripts_comm)
-				sprintf(script_opt, " -c %s ", browser->he_selection->thread->comm);
+				sprintf(script_opt, " -c %s ", thread__comm_str(browser->he_selection->thread));
 
 			if (choice == scripts_symbol)
 				sprintf(script_opt, " -S %s ", browser->he_selection->ms.sym->name);
@@ -1889,7 +1889,7 @@ static int perf_evsel_menu__run(struct perf_evsel_menu *menu,
 	return key;
 }
 
-static bool filter_group_entries(struct ui_browser *self __maybe_unused,
+static bool filter_group_entries(struct ui_browser *browser __maybe_unused,
 				 void *entry)
 {
 	struct perf_evsel *evsel = list_entry(entry, struct perf_evsel, node);
diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c
index 95c7cfb8f2c60577d75df489ea2ef1382375720d..b11639f3368272ae754853ae05a62accb1ee3d05 100644
--- a/tools/perf/ui/browsers/map.c
+++ b/tools/perf/ui/browsers/map.c
@@ -18,30 +18,30 @@ struct map_browser {
 	u8		  addrlen;
 };
 
-static void map_browser__write(struct ui_browser *self, void *nd, int row)
+static void map_browser__write(struct ui_browser *browser, void *nd, int row)
 {
 	struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
-	struct map_browser *mb = container_of(self, struct map_browser, b);
-	bool current_entry = ui_browser__is_current_entry(self, row);
+	struct map_browser *mb = container_of(browser, struct map_browser, b);
+	bool current_entry = ui_browser__is_current_entry(browser, row);
 	int width;
 
-	ui_browser__set_percent_color(self, 0, current_entry);
+	ui_browser__set_percent_color(browser, 0, current_entry);
 	slsmg_printf("%*" PRIx64 " %*" PRIx64 " %c ",
 		     mb->addrlen, sym->start, mb->addrlen, sym->end,
 		     sym->binding == STB_GLOBAL ? 'g' :
 		     sym->binding == STB_LOCAL  ? 'l' : 'w');
-	width = self->width - ((mb->addrlen * 2) + 4);
+	width = browser->width - ((mb->addrlen * 2) + 4);
 	if (width > 0)
 		slsmg_write_nstring(sym->name, width);
 }
 
 /* FIXME uber-kludgy, see comment on cmd_report... */
-static u32 *symbol__browser_index(struct symbol *self)
+static u32 *symbol__browser_index(struct symbol *browser)
 {
-	return ((void *)self) - sizeof(struct rb_node) - sizeof(u32);
+	return ((void *)browser) - sizeof(struct rb_node) - sizeof(u32);
 }
 
-static int map_browser__search(struct map_browser *self)
+static int map_browser__search(struct map_browser *browser)
 {
 	char target[512];
 	struct symbol *sym;
@@ -53,37 +53,37 @@ static int map_browser__search(struct map_browser *self)
 
 	if (target[0] == '0' && tolower(target[1]) == 'x') {
 		u64 addr = strtoull(target, NULL, 16);
-		sym = map__find_symbol(self->map, addr, NULL);
+		sym = map__find_symbol(browser->map, addr, NULL);
 	} else
-		sym = map__find_symbol_by_name(self->map, target, NULL);
+		sym = map__find_symbol_by_name(browser->map, target, NULL);
 
 	if (sym != NULL) {
 		u32 *idx = symbol__browser_index(sym);
 
-		self->b.top = &sym->rb_node;
-		self->b.index = self->b.top_idx = *idx;
+		browser->b.top = &sym->rb_node;
+		browser->b.index = browser->b.top_idx = *idx;
 	} else
 		ui_helpline__fpush("%s not found!", target);
 
 	return 0;
 }
 
-static int map_browser__run(struct map_browser *self)
+static int map_browser__run(struct map_browser *browser)
 {
 	int key;
 
-	if (ui_browser__show(&self->b, self->map->dso->long_name,
+	if (ui_browser__show(&browser->b, browser->map->dso->long_name,
 			     "Press <- or ESC to exit, %s / to search",
 			     verbose ? "" : "restart with -v to use") < 0)
 		return -1;
 
 	while (1) {
-		key = ui_browser__run(&self->b, 0);
+		key = ui_browser__run(&browser->b, 0);
 
 		switch (key) {
 		case '/':
 			if (verbose)
-				map_browser__search(self);
+				map_browser__search(browser);
 		default:
 			break;
                 case K_LEFT:
@@ -94,20 +94,20 @@ static int map_browser__run(struct map_browser *self)
 		}
 	}
 out:
-	ui_browser__hide(&self->b);
+	ui_browser__hide(&browser->b);
 	return key;
 }
 
-int map__browse(struct map *self)
+int map__browse(struct map *map)
 {
 	struct map_browser mb = {
 		.b = {
-			.entries = &self->dso->symbols[self->type],
+			.entries = &map->dso->symbols[map->type],
 			.refresh = ui_browser__rb_tree_refresh,
 			.seek	 = ui_browser__rb_tree_seek,
 			.write	 = map_browser__write,
 		},
-		.map = self,
+		.map = map,
 	};
 	struct rb_node *nd;
 	char tmp[BITS_PER_LONG / 4];
diff --git a/tools/perf/ui/browsers/map.h b/tools/perf/ui/browsers/map.h
index df8581a43e17bd2c956304074900d5f9968d2e6a..2d58e4b3eb6ffb295308098350a0997c8c5bea57 100644
--- a/tools/perf/ui/browsers/map.h
+++ b/tools/perf/ui/browsers/map.h
@@ -2,5 +2,5 @@
 #define _PERF_UI_MAP_BROWSER_H_ 1
 struct map;
 
-int map__browse(struct map *self);
+int map__browse(struct map *map);
 #endif /* _PERF_UI_MAP_BROWSER_H_ */
diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c
index 12f009e61e94a7c60a6b2dbb1a9a959b690b9ad1..d63c68ea02a87e1adfbfaa2ea41ec52b373a0a0c 100644
--- a/tools/perf/ui/browsers/scripts.c
+++ b/tools/perf/ui/browsers/scripts.c
@@ -84,22 +84,22 @@ static void script_browser__write(struct ui_browser *browser,
 	slsmg_write_nstring(sline->line, browser->width);
 }
 
-static int script_browser__run(struct perf_script_browser *self)
+static int script_browser__run(struct perf_script_browser *browser)
 {
 	int key;
 
-	if (ui_browser__show(&self->b, self->script_name,
+	if (ui_browser__show(&browser->b, browser->script_name,
 			     "Press <- or ESC to exit") < 0)
 		return -1;
 
 	while (1) {
-		key = ui_browser__run(&self->b, 0);
+		key = ui_browser__run(&browser->b, 0);
 
 		/* We can add some special key handling here if needed */
 		break;
 	}
 
-	ui_browser__hide(&self->b);
+	ui_browser__hide(&browser->b);
 	return key;
 }
 
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index f538794615dbb7c399371d93bd1b96cef77ea234..9c7ff8d31b274e32d22147664cb622437db514f1 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -154,9 +154,9 @@ static int perf_gtk__annotate_symbol(GtkWidget *window, struct symbol *sym,
 	return 0;
 }
 
-int symbol__gtk_annotate(struct symbol *sym, struct map *map,
-			 struct perf_evsel *evsel,
-			 struct hist_browser_timer *hbt)
+static int symbol__gtk_annotate(struct symbol *sym, struct map *map,
+				struct perf_evsel *evsel,
+				struct hist_browser_timer *hbt)
 {
 	GtkWidget *window;
 	GtkWidget *notebook;
@@ -226,6 +226,13 @@ int symbol__gtk_annotate(struct symbol *sym, struct map *map,
 	return 0;
 }
 
+int hist_entry__gtk_annotate(struct hist_entry *he,
+			     struct perf_evsel *evsel,
+			     struct hist_browser_timer *hbt)
+{
+	return symbol__gtk_annotate(he->ms.sym, he->ms.map, evsel, hbt);
+}
+
 void perf_gtk__show_annotations(void)
 {
 	GtkWidget *window;
diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c
index c95012cdb4387f95a4125f7da3f58cf224f33dc8..c24d91221290e0049b409de582dfcf6ce7994ef5 100644
--- a/tools/perf/ui/gtk/browser.c
+++ b/tools/perf/ui/gtk/browser.c
@@ -43,7 +43,7 @@ const char *perf_gtk__get_percent_color(double percent)
 	return NULL;
 }
 
-#ifdef HAVE_GTK_INFO_BAR
+#ifdef HAVE_GTK_INFO_BAR_SUPPORT
 GtkWidget *perf_gtk__setup_info_bar(void)
 {
 	GtkWidget *info_bar;
diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h
index 3d96785ef155044d006f4438bf35b17ee3717148..0a9173ff9a61d16e804f27f7e4734b16166f696c 100644
--- a/tools/perf/ui/gtk/gtk.h
+++ b/tools/perf/ui/gtk/gtk.h
@@ -12,7 +12,7 @@ struct perf_gtk_context {
 	GtkWidget *main_window;
 	GtkWidget *notebook;
 
-#ifdef HAVE_GTK_INFO_BAR
+#ifdef HAVE_GTK_INFO_BAR_SUPPORT
 	GtkWidget *info_bar;
 	GtkWidget *message_label;
 #endif
@@ -20,6 +20,9 @@ struct perf_gtk_context {
 	guint statbar_ctx_id;
 };
 
+int perf_gtk__init(void);
+void perf_gtk__exit(bool wait_for_ok);
+
 extern struct perf_gtk_context *pgctx;
 
 static inline bool perf_gtk__is_active_context(struct perf_gtk_context *ctx)
@@ -31,7 +34,7 @@ struct perf_gtk_context *perf_gtk__activate_context(GtkWidget *window);
 int perf_gtk__deactivate_context(struct perf_gtk_context **ctx);
 
 void perf_gtk__init_helpline(void);
-void perf_gtk__init_progress(void);
+void gtk_ui_progress__init(void);
 void perf_gtk__init_hpp(void);
 
 void perf_gtk__signal(int sig);
@@ -39,7 +42,7 @@ void perf_gtk__resize_window(GtkWidget *window);
 const char *perf_gtk__get_percent_color(double percent);
 GtkWidget *perf_gtk__setup_statusbar(void);
 
-#ifdef HAVE_GTK_INFO_BAR
+#ifdef HAVE_GTK_INFO_BAR_SUPPORT
 GtkWidget *perf_gtk__setup_info_bar(void);
 #else
 static inline GtkWidget *perf_gtk__setup_info_bar(void)
@@ -48,4 +51,17 @@ static inline GtkWidget *perf_gtk__setup_info_bar(void)
 }
 #endif
 
+struct perf_evsel;
+struct perf_evlist;
+struct hist_entry;
+struct hist_browser_timer;
+
+int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, const char *help,
+				  struct hist_browser_timer *hbt,
+				  float min_pcnt);
+int hist_entry__gtk_annotate(struct hist_entry *he,
+			     struct perf_evsel *evsel,
+			     struct hist_browser_timer *hbt);
+void perf_gtk__show_annotations(void);
+
 #endif /* _PERF_GTK_H_ */
diff --git a/tools/perf/ui/gtk/progress.c b/tools/perf/ui/gtk/progress.c
index 482bcf3df9b7a293b16b4e2b4f0091d9ee87bb95..b656655fbc39e6c5f0cc23659957d8a963c9a107 100644
--- a/tools/perf/ui/gtk/progress.c
+++ b/tools/perf/ui/gtk/progress.c
@@ -7,14 +7,14 @@
 static GtkWidget *dialog;
 static GtkWidget *progress;
 
-static void gtk_progress_update(u64 curr, u64 total, const char *title)
+static void gtk_ui_progress__update(struct ui_progress *p)
 {
-	double fraction = total ? 1.0 * curr / total : 0.0;
+	double fraction = p->total ? 1.0 * p->curr / p->total : 0.0;
 	char buf[1024];
 
 	if (dialog == NULL) {
 		GtkWidget *vbox = gtk_vbox_new(TRUE, 5);
-		GtkWidget *label = gtk_label_new(title);
+		GtkWidget *label = gtk_label_new(p->title);
 
 		dialog = gtk_window_new(GTK_WINDOW_TOPLEVEL);
 		progress = gtk_progress_bar_new();
@@ -32,7 +32,7 @@ static void gtk_progress_update(u64 curr, u64 total, const char *title)
 	}
 
 	gtk_progress_bar_set_fraction(GTK_PROGRESS_BAR(progress), fraction);
-	snprintf(buf, sizeof(buf), "%"PRIu64" / %"PRIu64, curr, total);
+	snprintf(buf, sizeof(buf), "%"PRIu64" / %"PRIu64, p->curr, p->total);
 	gtk_progress_bar_set_text(GTK_PROGRESS_BAR(progress), buf);
 
 	/* we didn't call gtk_main yet, so do it manually */
@@ -40,7 +40,7 @@ static void gtk_progress_update(u64 curr, u64 total, const char *title)
 		gtk_main_iteration();
 }
 
-static void gtk_progress_finish(void)
+static void gtk_ui_progress__finish(void)
 {
 	/* this will also destroy all of its children */
 	gtk_widget_destroy(dialog);
@@ -48,12 +48,12 @@ static void gtk_progress_finish(void)
 	dialog = NULL;
 }
 
-static struct ui_progress gtk_progress_fns = {
-	.update		= gtk_progress_update,
-	.finish		= gtk_progress_finish,
+static struct ui_progress_ops gtk_ui_progress__ops = {
+	.update		= gtk_ui_progress__update,
+	.finish		= gtk_ui_progress__finish,
 };
 
-void perf_gtk__init_progress(void)
+void gtk_ui_progress__init(void)
 {
-	progress_fns = &gtk_progress_fns;
+	ui_progress__ops = &gtk_ui_progress__ops;
 }
diff --git a/tools/perf/ui/gtk/setup.c b/tools/perf/ui/gtk/setup.c
index 6c2dd2e423f3a6a160bcbd879b71c6134ef9d460..1d57676f82124d82e18d032e349e879e09a060e8 100644
--- a/tools/perf/ui/gtk/setup.c
+++ b/tools/perf/ui/gtk/setup.c
@@ -8,7 +8,7 @@ int perf_gtk__init(void)
 {
 	perf_error__register(&perf_gtk_eops);
 	perf_gtk__init_helpline();
-	perf_gtk__init_progress();
+	gtk_ui_progress__init();
 	perf_gtk__init_hpp();
 
 	return gtk_init_check(NULL, NULL) ? 0 : -1;
diff --git a/tools/perf/ui/gtk/util.c b/tools/perf/ui/gtk/util.c
index c06942a41c782e0ac029dba3719c638a4563b772..696c1fbe42482db03b4fb460b488860435a4edf5 100644
--- a/tools/perf/ui/gtk/util.c
+++ b/tools/perf/ui/gtk/util.c
@@ -53,7 +53,7 @@ static int perf_gtk__error(const char *format, va_list args)
 	return 0;
 }
 
-#ifdef HAVE_GTK_INFO_BAR
+#ifdef HAVE_GTK_INFO_BAR_SUPPORT
 static int perf_gtk__warning_info_bar(const char *format, va_list args)
 {
 	char *msg;
@@ -105,7 +105,7 @@ static int perf_gtk__warning_statusbar(const char *format, va_list args)
 
 struct perf_error_ops perf_gtk_eops = {
 	.error		= perf_gtk__error,
-#ifdef HAVE_GTK_INFO_BAR
+#ifdef HAVE_GTK_INFO_BAR_SUPPORT
 	.warning	= perf_gtk__warning_info_bar,
 #else
 	.warning	= perf_gtk__warning_statusbar,
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 0a193281eba85ec12bbed36d85803b51d7d24464..78f4c92e9b73c1c55e5699bce4b628f551348beb 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -117,7 +117,7 @@ static int hpp__color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,		\
 			      struct perf_hpp *hpp, struct hist_entry *he) 	\
 {										\
 	return __hpp__fmt(hpp, he, he_get_##_field, " %6.2f%%",			\
-			  (hpp_snprint_fn)percent_color_snprintf, true);	\
+			  percent_color_snprintf, true);			\
 }
 
 #define __HPP_ENTRY_PERCENT_FN(_type, _field)					\
diff --git a/tools/perf/ui/progress.c b/tools/perf/ui/progress.c
index 3ec695607a4d074f844f4922805d52556b2ec645..a0f24c7115c59f891fa894739ef4da157a9e33cf 100644
--- a/tools/perf/ui/progress.c
+++ b/tools/perf/ui/progress.c
@@ -1,26 +1,38 @@
 #include "../cache.h"
 #include "progress.h"
 
-static void nop_progress_update(u64 curr __maybe_unused,
-				u64 total __maybe_unused,
-				const char *title __maybe_unused)
+static void null_progress__update(struct ui_progress *p __maybe_unused)
 {
 }
 
-static struct ui_progress default_progress_fns =
+static struct ui_progress_ops null_progress__ops =
 {
-	.update		= nop_progress_update,
+	.update = null_progress__update,
 };
 
-struct ui_progress *progress_fns = &default_progress_fns;
+struct ui_progress_ops *ui_progress__ops = &null_progress__ops;
 
-void ui_progress__update(u64 curr, u64 total, const char *title)
+void ui_progress__update(struct ui_progress *p, u64 adv)
 {
-	return progress_fns->update(curr, total, title);
+	p->curr += adv;
+
+	if (p->curr >= p->next) {
+		p->next += p->step;
+		ui_progress__ops->update(p);
+	}
+}
+
+void ui_progress__init(struct ui_progress *p, u64 total, const char *title)
+{
+	p->curr = 0;
+	p->next = p->step = total / 16;
+	p->total = total;
+	p->title = title;
+
 }
 
 void ui_progress__finish(void)
 {
-	if (progress_fns->finish)
-		progress_fns->finish();
+	if (ui_progress__ops->finish)
+		ui_progress__ops->finish();
 }
diff --git a/tools/perf/ui/progress.h b/tools/perf/ui/progress.h
index 257cc224f9cff6d29acc81c0db9d315ed4b2b39a..29ec8efffefb7ec9e0ac1936304257cc8c5ff9ef 100644
--- a/tools/perf/ui/progress.h
+++ b/tools/perf/ui/progress.h
@@ -3,16 +3,21 @@
 
 #include <../types.h>
 
+void ui_progress__finish(void);
+ 
 struct ui_progress {
-	void (*update)(u64, u64, const char *);
-	void (*finish)(void);
+	const char *title;
+	u64 curr, next, step, total;
 };
+ 
+void ui_progress__init(struct ui_progress *p, u64 total, const char *title);
+void ui_progress__update(struct ui_progress *p, u64 adv);
 
-extern struct ui_progress *progress_fns;
-
-void ui_progress__init(void);
+struct ui_progress_ops {
+	void (*update)(struct ui_progress *p);
+	void (*finish)(void);
+};
 
-void ui_progress__update(u64 curr, u64 total, const char *title);
-void ui_progress__finish(void);
+extern struct ui_progress_ops *ui_progress__ops;
 
 #endif
diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c
index 47d9a571f261da9c65e36aa82f8dbdb70a838acc..5df5140a9f29e466dd055b8873711e722875f646 100644
--- a/tools/perf/ui/setup.c
+++ b/tools/perf/ui/setup.c
@@ -1,10 +1,64 @@
 #include <pthread.h>
+#include <dlfcn.h>
 
 #include "../util/cache.h"
 #include "../util/debug.h"
 #include "../util/hist.h"
 
 pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER;
+void *perf_gtk_handle;
+
+#ifdef HAVE_GTK2_SUPPORT
+static int setup_gtk_browser(void)
+{
+	int (*perf_ui_init)(void);
+
+	if (perf_gtk_handle)
+		return 0;
+
+	perf_gtk_handle = dlopen(PERF_GTK_DSO, RTLD_LAZY);
+	if (perf_gtk_handle == NULL) {
+		char buf[PATH_MAX];
+		scnprintf(buf, sizeof(buf), "%s/%s", LIBDIR, PERF_GTK_DSO);
+		perf_gtk_handle = dlopen(buf, RTLD_LAZY);
+	}
+	if (perf_gtk_handle == NULL)
+		return -1;
+
+	perf_ui_init = dlsym(perf_gtk_handle, "perf_gtk__init");
+	if (perf_ui_init == NULL)
+		goto out_close;
+
+	if (perf_ui_init() == 0)
+		return 0;
+
+out_close:
+	dlclose(perf_gtk_handle);
+	return -1;
+}
+
+static void exit_gtk_browser(bool wait_for_ok)
+{
+	void (*perf_ui_exit)(bool);
+
+	if (perf_gtk_handle == NULL)
+		return;
+
+	perf_ui_exit = dlsym(perf_gtk_handle, "perf_gtk__exit");
+	if (perf_ui_exit == NULL)
+		goto out_close;
+
+	perf_ui_exit(wait_for_ok);
+
+out_close:
+	dlclose(perf_gtk_handle);
+
+	perf_gtk_handle = NULL;
+}
+#else
+static inline int setup_gtk_browser(void) { return -1; }
+static inline void exit_gtk_browser(bool wait_for_ok __maybe_unused) {}
+#endif
 
 void setup_browser(bool fallback_to_pager)
 {
@@ -17,8 +71,11 @@ void setup_browser(bool fallback_to_pager)
 
 	switch (use_browser) {
 	case 2:
-		if (perf_gtk__init() == 0)
+		if (setup_gtk_browser() == 0)
 			break;
+		printf("GTK browser requested but could not find %s\n",
+		       PERF_GTK_DSO);
+		sleep(1);
 		/* fall through */
 	case 1:
 		use_browser = 1;
@@ -39,7 +96,7 @@ void exit_browser(bool wait_for_ok)
 {
 	switch (use_browser) {
 	case 2:
-		perf_gtk__exit(wait_for_ok);
+		exit_gtk_browser(wait_for_ok);
 		break;
 
 	case 1:
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 6c152686e83763a9b18983d06f52e2a14b739b91..c244cb524ef2cc9d4e3eafabe065c8eef60b60a6 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -213,20 +213,19 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
 	return ret;
 }
 
-static size_t __callchain__fprintf_flat(FILE *fp,
-					struct callchain_node *self,
+static size_t __callchain__fprintf_flat(FILE *fp, struct callchain_node *node,
 					u64 total_samples)
 {
 	struct callchain_list *chain;
 	size_t ret = 0;
 
-	if (!self)
+	if (!node)
 		return 0;
 
-	ret += __callchain__fprintf_flat(fp, self->parent, total_samples);
+	ret += __callchain__fprintf_flat(fp, node->parent, total_samples);
 
 
-	list_for_each_entry(chain, &self->val, list) {
+	list_for_each_entry(chain, &node->val, list) {
 		if (chain->ip >= PERF_CONTEXT_MAX)
 			continue;
 		if (chain->ms.sym)
@@ -239,15 +238,14 @@ static size_t __callchain__fprintf_flat(FILE *fp,
 	return ret;
 }
 
-static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *self,
+static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *tree,
 				      u64 total_samples)
 {
 	size_t ret = 0;
 	u32 entries_printed = 0;
-	struct rb_node *rb_node;
 	struct callchain_node *chain;
+	struct rb_node *rb_node = rb_first(tree);
 
-	rb_node = rb_first(self);
 	while (rb_node) {
 		double percent;
 
diff --git a/tools/perf/ui/tui/progress.c b/tools/perf/ui/tui/progress.c
index 6c2184d53cbf9a16d378d084984613d19ec99752..3e2d936d7443e9b31f349594a4a9935a98d69299 100644
--- a/tools/perf/ui/tui/progress.c
+++ b/tools/perf/ui/tui/progress.c
@@ -2,9 +2,10 @@
 #include "../progress.h"
 #include "../libslang.h"
 #include "../ui.h"
+#include "tui.h"
 #include "../browser.h"
 
-static void tui_progress__update(u64 curr, u64 total, const char *title)
+static void tui_progress__update(struct ui_progress *p)
 {
 	int bar, y;
 	/*
@@ -14,7 +15,7 @@ static void tui_progress__update(u64 curr, u64 total, const char *title)
 	if (use_browser <= 0)
 		return;
 
-	if (total == 0)
+	if (p->total == 0)
 		return;
 
 	ui__refresh_dimensions(true);
@@ -23,20 +24,20 @@ static void tui_progress__update(u64 curr, u64 total, const char *title)
 	SLsmg_set_color(0);
 	SLsmg_draw_box(y, 0, 3, SLtt_Screen_Cols);
 	SLsmg_gotorc(y++, 1);
-	SLsmg_write_string((char *)title);
+	SLsmg_write_string((char *)p->title);
 	SLsmg_set_color(HE_COLORSET_SELECTED);
-	bar = ((SLtt_Screen_Cols - 2) * curr) / total;
+	bar = ((SLtt_Screen_Cols - 2) * p->curr) / p->total;
 	SLsmg_fill_region(y, 1, 1, bar, ' ');
 	SLsmg_refresh();
 	pthread_mutex_unlock(&ui__lock);
 }
 
-static struct ui_progress tui_progress_fns =
+static struct ui_progress_ops tui_progress__ops =
 {
 	.update		= tui_progress__update,
 };
 
-void ui_progress__init(void)
+void tui_progress__init(void)
 {
-	progress_fns = &tui_progress_fns;
+	ui_progress__ops = &tui_progress__ops;
 }
diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c
index b9401482d110bad0f5d39946c04b881cd9b779d9..2f612562978cdc13c7e89b6dbddd24c9f928d626 100644
--- a/tools/perf/ui/tui/setup.c
+++ b/tools/perf/ui/tui/setup.c
@@ -9,6 +9,7 @@
 #include "../util.h"
 #include "../libslang.h"
 #include "../keysyms.h"
+#include "tui.h"
 
 static volatile int ui__need_resize;
 
@@ -119,7 +120,7 @@ int ui__init(void)
 
 	ui_helpline__init();
 	ui_browser__init();
-	ui_progress__init();
+	tui_progress__init();
 
 	signal(SIGSEGV, ui__signal);
 	signal(SIGFPE, ui__signal);
diff --git a/tools/perf/ui/tui/tui.h b/tools/perf/ui/tui/tui.h
new file mode 100644
index 0000000000000000000000000000000000000000..18961c7b6ec5dac67b72c8e8891754bf7aacae25
--- /dev/null
+++ b/tools/perf/ui/tui/tui.h
@@ -0,0 +1,6 @@
+#ifndef _PERF_TUI_H_
+#define _PERF_TUI_H_ 1
+
+void tui_progress__init(void);
+
+#endif /* _PERF_TUI_H_ */
diff --git a/tools/perf/ui/ui.h b/tools/perf/ui/ui.h
index 70cb0d4eb8aa390c008a4d84c575662c2515280d..ab88383f8be85d5278dfaba5b6b6b07b11a6b2d2 100644
--- a/tools/perf/ui/ui.h
+++ b/tools/perf/ui/ui.h
@@ -6,13 +6,14 @@
 #include <linux/compiler.h>
 
 extern pthread_mutex_t ui__lock;
+extern void *perf_gtk_handle;
 
 extern int use_browser;
 
 void setup_browser(bool fallback_to_pager);
 void exit_browser(bool wait_for_ok);
 
-#ifdef SLANG_SUPPORT
+#ifdef HAVE_SLANG_SUPPORT
 int ui__init(void);
 void ui__exit(bool wait_for_ok);
 #else
@@ -23,17 +24,6 @@ static inline int ui__init(void)
 static inline void ui__exit(bool wait_for_ok __maybe_unused) {}
 #endif
 
-#ifdef GTK2_SUPPORT
-int perf_gtk__init(void);
-void perf_gtk__exit(bool wait_for_ok);
-#else
-static inline int perf_gtk__init(void)
-{
-	return -1;
-}
-static inline void perf_gtk__exit(bool wait_for_ok __maybe_unused) {}
-#endif
-
 void ui__refresh_dimensions(bool force);
 
 #endif /* _PERF_UI_H_ */
diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN
index 15a77b7c0e36f155c968cbf59780aec089c53f3a..39f17507578dae0c650dc91335e888d9a907ef4a 100755
--- a/tools/perf/util/PERF-VERSION-GEN
+++ b/tools/perf/util/PERF-VERSION-GEN
@@ -19,6 +19,9 @@ if test -d ../../.git -o -f ../../.git
 then
 	TAG=$(git describe --abbrev=0 --match "v[0-9].[0-9]*" 2>/dev/null )
 	CID=$(git log -1 --abbrev=4 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID"
+elif test -f ../../PERF-VERSION-FILE
+then
+	TAG=$(cut -d' ' -f3 ../../PERF-VERSION-FILE | sed -e 's/\"//g')
 fi
 if test -z "$TAG"
 then
@@ -40,7 +43,7 @@ else
 	VC=unset
 fi
 test "$VN" = "$VC" || {
-	echo >&2 "PERF_VERSION = $VN"
+	echo >&2 "  PERF_VERSION = $VN"
 	echo "#define PERF_VERSION \"$VN\"" >$GVF
 }
 
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 7eae5488ecea47344cac10677104cb9e6cb2cc44..cf6242c92ee244ab45498993cab477af7950853f 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -825,20 +825,16 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
 		dl->ops.target.offset = dl->ops.target.addr -
 					map__rip_2objdump(map, sym->start);
 
-	/*
-	 * kcore has no symbols, so add the call target name if it is on the
-	 * same map.
-	 */
+	/* kcore has no symbols, so add the call target name */
 	if (dl->ins && ins__is_call(dl->ins) && !dl->ops.target.name) {
-		struct symbol *s;
-		u64 ip = dl->ops.target.addr;
-
-		if (ip >= map->start && ip <= map->end) {
-			ip = map->map_ip(map, ip);
-			s = map__find_symbol(map, ip, NULL);
-			if (s && s->start == ip)
-				dl->ops.target.name = strdup(s->name);
-		}
+		struct addr_map_symbol target = {
+			.map = map,
+			.addr = dl->ops.target.addr,
+		};
+
+		if (!map_groups__find_ams(&target, NULL) &&
+		    target.sym->start == target.al_addr)
+			dl->ops.target.name = strdup(target.sym->name);
 	}
 
 	disasm__add(&notes->src->source, dl);
@@ -879,6 +875,8 @@ int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize)
 	FILE *file;
 	int err = 0;
 	char symfs_filename[PATH_MAX];
+	struct kcore_extract kce;
+	bool delete_extract = false;
 
 	if (filename) {
 		snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
@@ -940,6 +938,23 @@ int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize)
 	pr_debug("annotating [%p] %30s : [%p] %30s\n",
 		 dso, dso->long_name, sym, sym->name);
 
+	if (dso__is_kcore(dso)) {
+		kce.kcore_filename = symfs_filename;
+		kce.addr = map__rip_2objdump(map, sym->start);
+		kce.offs = sym->start;
+		kce.len = sym->end + 1 - sym->start;
+		if (!kcore_extract__create(&kce)) {
+			delete_extract = true;
+			strlcpy(symfs_filename, kce.extract_filename,
+				sizeof(symfs_filename));
+			if (free_filename) {
+				free(filename);
+				free_filename = false;
+			}
+			filename = symfs_filename;
+		}
+	}
+
 	snprintf(command, sizeof(command),
 		 "%s %s%s --start-address=0x%016" PRIx64
 		 " --stop-address=0x%016" PRIx64
@@ -972,6 +987,8 @@ int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize)
 
 	pclose(file);
 out_free_filename:
+	if (delete_extract)
+		kcore_extract__delete(&kce);
 	if (free_filename)
 		free(filename);
 	return err;
@@ -1070,7 +1087,7 @@ static void symbol__free_source_line(struct symbol *sym, int len)
 			  (sizeof(src_line->p) * (src_line->nr_pcnt - 1));
 
 	for (i = 0; i < len; i++) {
-		free(src_line->path);
+		free_srcline(src_line->path);
 		src_line = (void *)src_line + sizeof_src_line;
 	}
 
@@ -1081,13 +1098,11 @@ static void symbol__free_source_line(struct symbol *sym, int len)
 /* Get the filename:line for the colored entries */
 static int symbol__get_source_line(struct symbol *sym, struct map *map,
 				   struct perf_evsel *evsel,
-				   struct rb_root *root, int len,
-				   const char *filename)
+				   struct rb_root *root, int len)
 {
 	u64 start;
 	int i, k;
 	int evidx = evsel->idx;
-	char cmd[PATH_MAX * 2];
 	struct source_line *src_line;
 	struct annotation *notes = symbol__annotation(sym);
 	struct sym_hist *h = annotation__histogram(notes, evidx);
@@ -1115,10 +1130,7 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map,
 	start = map__rip_2objdump(map, sym->start);
 
 	for (i = 0; i < len; i++) {
-		char *path = NULL;
-		size_t line_len;
 		u64 offset;
-		FILE *fp;
 		double percent_max = 0.0;
 
 		src_line->nr_pcnt = nr_pcnt;
@@ -1135,23 +1147,9 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map,
 			goto next;
 
 		offset = start + i;
-		sprintf(cmd, "addr2line -e %s %016" PRIx64, filename, offset);
-		fp = popen(cmd, "r");
-		if (!fp)
-			goto next;
-
-		if (getline(&path, &line_len, fp) < 0 || !line_len)
-			goto next_close;
-
-		src_line->path = malloc(sizeof(char) * line_len + 1);
-		if (!src_line->path)
-			goto next_close;
-
-		strcpy(src_line->path, path);
+		src_line->path = get_srcline(map->dso, offset);
 		insert_source_line(&tmp_root, src_line);
 
-	next_close:
-		pclose(fp);
 	next:
 		src_line = (void *)src_line + sizeof_src_line;
 	}
@@ -1192,7 +1190,7 @@ static void print_summary(struct rb_root *root, const char *filename)
 
 		path = src_line->path;
 		color = get_percent_color(percent_max);
-		color_fprintf(stdout, color, " %s", path);
+		color_fprintf(stdout, color, " %s\n", path);
 
 		node = rb_next(node);
 	}
@@ -1356,7 +1354,6 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map,
 			 bool full_paths, int min_pcnt, int max_lines)
 {
 	struct dso *dso = map->dso;
-	const char *filename = dso->long_name;
 	struct rb_root source_line = RB_ROOT;
 	u64 len;
 
@@ -1366,9 +1363,8 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map,
 	len = symbol__size(sym);
 
 	if (print_lines) {
-		symbol__get_source_line(sym, map, evsel, &source_line,
-					len, filename);
-		print_summary(&source_line, filename);
+		symbol__get_source_line(sym, map, evsel, &source_line, len);
+		print_summary(&source_line, dso->long_name);
 	}
 
 	symbol__annotate_printf(sym, map, evsel, full_paths,
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index af755156d2785b7963d834adcd5e1ee3cc07b4be..834b7b57b7884f7b833291caa6bcaad2a1eff1d1 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -150,7 +150,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map,
 			 struct perf_evsel *evsel, bool print_lines,
 			 bool full_paths, int min_pcnt, int max_lines);
 
-#ifdef SLANG_SUPPORT
+#ifdef HAVE_SLANG_SUPPORT
 int symbol__tui_annotate(struct symbol *sym, struct map *map,
 			 struct perf_evsel *evsel,
 			 struct hist_browser_timer *hbt);
@@ -165,30 +165,6 @@ static inline int symbol__tui_annotate(struct symbol *sym __maybe_unused,
 }
 #endif
 
-#ifdef GTK2_SUPPORT
-int symbol__gtk_annotate(struct symbol *sym, struct map *map,
-			 struct perf_evsel *evsel,
-			 struct hist_browser_timer *hbt);
-
-static inline int hist_entry__gtk_annotate(struct hist_entry *he,
-					   struct perf_evsel *evsel,
-					   struct hist_browser_timer *hbt)
-{
-	return symbol__gtk_annotate(he->ms.sym, he->ms.map, evsel, hbt);
-}
-
-void perf_gtk__show_annotations(void);
-#else
-static inline int hist_entry__gtk_annotate(struct hist_entry *he __maybe_unused,
-				struct perf_evsel *evsel __maybe_unused,
-				struct hist_browser_timer *hbt __maybe_unused)
-{
-	return 0;
-}
-
-static inline void perf_gtk__show_annotations(void) {}
-#endif
-
 extern const char	*disassembler_style;
 
 #endif	/* __PERF_ANNOTATE_H */
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 7ded71d19d75323ab02184c460afde37fe17d831..a92770c98cc7c00dac6fdd57258eb120960a5294 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -89,14 +89,14 @@ int build_id__sprintf(const u8 *build_id, int len, char *bf)
 	return raw - build_id;
 }
 
-char *dso__build_id_filename(struct dso *self, char *bf, size_t size)
+char *dso__build_id_filename(struct dso *dso, char *bf, size_t size)
 {
 	char build_id_hex[BUILD_ID_SIZE * 2 + 1];
 
-	if (!self->has_build_id)
+	if (!dso->has_build_id)
 		return NULL;
 
-	build_id__sprintf(self->build_id, sizeof(self->build_id), build_id_hex);
+	build_id__sprintf(dso->build_id, sizeof(dso->build_id), build_id_hex);
 	if (bf == NULL) {
 		if (asprintf(&bf, "%s/.build-id/%.2s/%s", buildid_dir,
 			     build_id_hex, build_id_hex + 2) < 0)
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index a811f5c62e18b3988be2e4d806886a5352f81a65..929f28a7c14dc05cb1c3fd174e17520dda95b7ec 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -10,10 +10,9 @@ extern struct perf_tool build_id__mark_dso_hit_ops;
 struct dso;
 
 int build_id__sprintf(const u8 *build_id, int len, char *bf);
-char *dso__build_id_filename(struct dso *self, char *bf, size_t size);
+char *dso__build_id_filename(struct dso *dso, char *bf, size_t size);
 
 int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event,
 			   struct perf_sample *sample, struct perf_evsel *evsel,
 			   struct machine *machine);
-
 #endif
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 26e367239873fadbdcde27610d6ee2577c0e9f79..7b176dd02e1ae94e6f5479b78ae4c2e84bb7836c 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -70,8 +70,7 @@ extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2
 extern char *perf_pathdup(const char *fmt, ...)
 	__attribute__((format (printf, 1, 2)));
 
-#ifndef HAVE_STRLCPY
+/* Matches the libc/libbsd function attribute so we declare this unconditionally: */
 extern size_t strlcpy(char *dest, const char *src, size_t size);
-#endif
 
 #endif /* __PERF_CACHE_H */
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 482f68081cd8d50a7c84fd25845841c18fcfb870..e3970e3eaacf45bc7a10db4a86314c2fdd79fbb5 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -21,12 +21,6 @@
 
 __thread struct callchain_cursor callchain_cursor;
 
-#define chain_for_each_child(child, parent)	\
-	list_for_each_entry(child, &parent->children, siblings)
-
-#define chain_for_each_child_safe(child, next, parent)	\
-	list_for_each_entry_safe(child, next, &parent->children, siblings)
-
 static void
 rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
 		    enum chain_mode mode)
@@ -71,10 +65,16 @@ static void
 __sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node,
 		  u64 min_hit)
 {
+	struct rb_node *n;
 	struct callchain_node *child;
 
-	chain_for_each_child(child, node)
+	n = rb_first(&node->rb_root_in);
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+
 		__sort_chain_flat(rb_root, child, min_hit);
+	}
 
 	if (node->hit && node->hit >= min_hit)
 		rb_insert_callchain(rb_root, node, CHAIN_FLAT);
@@ -94,11 +94,16 @@ sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root,
 static void __sort_chain_graph_abs(struct callchain_node *node,
 				   u64 min_hit)
 {
+	struct rb_node *n;
 	struct callchain_node *child;
 
 	node->rb_root = RB_ROOT;
+	n = rb_first(&node->rb_root_in);
+
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
 
-	chain_for_each_child(child, node) {
 		__sort_chain_graph_abs(child, min_hit);
 		if (callchain_cumul_hits(child) >= min_hit)
 			rb_insert_callchain(&node->rb_root, child,
@@ -117,13 +122,18 @@ sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root,
 static void __sort_chain_graph_rel(struct callchain_node *node,
 				   double min_percent)
 {
+	struct rb_node *n;
 	struct callchain_node *child;
 	u64 min_hit;
 
 	node->rb_root = RB_ROOT;
 	min_hit = ceil(node->children_hit * min_percent);
 
-	chain_for_each_child(child, node) {
+	n = rb_first(&node->rb_root_in);
+	while (n) {
+		child = rb_entry(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+
 		__sort_chain_graph_rel(child, min_percent);
 		if (callchain_cumul_hits(child) >= min_hit)
 			rb_insert_callchain(&node->rb_root, child,
@@ -173,19 +183,26 @@ create_child(struct callchain_node *parent, bool inherit_children)
 		return NULL;
 	}
 	new->parent = parent;
-	INIT_LIST_HEAD(&new->children);
 	INIT_LIST_HEAD(&new->val);
 
 	if (inherit_children) {
-		struct callchain_node *next;
+		struct rb_node *n;
+		struct callchain_node *child;
+
+		new->rb_root_in = parent->rb_root_in;
+		parent->rb_root_in = RB_ROOT;
 
-		list_splice(&parent->children, &new->children);
-		INIT_LIST_HEAD(&parent->children);
+		n = rb_first(&new->rb_root_in);
+		while (n) {
+			child = rb_entry(n, struct callchain_node, rb_node_in);
+			child->parent = new;
+			n = rb_next(n);
+		}
 
-		chain_for_each_child(next, new)
-			next->parent = new;
+		/* make it the first child */
+		rb_link_node(&new->rb_node_in, NULL, &parent->rb_root_in.rb_node);
+		rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
 	}
-	list_add_tail(&new->siblings, &parent->children);
 
 	return new;
 }
@@ -223,7 +240,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
 	}
 }
 
-static void
+static struct callchain_node *
 add_child(struct callchain_node *parent,
 	  struct callchain_cursor *cursor,
 	  u64 period)
@@ -235,6 +252,19 @@ add_child(struct callchain_node *parent,
 
 	new->children_hit = 0;
 	new->hit = period;
+	return new;
+}
+
+static s64 match_chain(struct callchain_cursor_node *node,
+		      struct callchain_list *cnode)
+{
+	struct symbol *sym = node->sym;
+
+	if (cnode->ms.sym && sym &&
+	    callchain_param.key == CCKEY_FUNCTION)
+		return cnode->ms.sym->start - sym->start;
+	else
+		return cnode->ip - node->ip;
 }
 
 /*
@@ -272,9 +302,33 @@ split_add_child(struct callchain_node *parent,
 
 	/* create a new child for the new branch if any */
 	if (idx_total < cursor->nr) {
+		struct callchain_node *first;
+		struct callchain_list *cnode;
+		struct callchain_cursor_node *node;
+		struct rb_node *p, **pp;
+
 		parent->hit = 0;
-		add_child(parent, cursor, period);
 		parent->children_hit += period;
+
+		node = callchain_cursor_current(cursor);
+		new = add_child(parent, cursor, period);
+
+		/*
+		 * This is second child since we moved parent's children
+		 * to new (first) child above.
+		 */
+		p = parent->rb_root_in.rb_node;
+		first = rb_entry(p, struct callchain_node, rb_node_in);
+		cnode = list_first_entry(&first->val, struct callchain_list,
+					 list);
+
+		if (match_chain(node, cnode) < 0)
+			pp = &p->rb_left;
+		else
+			pp = &p->rb_right;
+
+		rb_link_node(&new->rb_node_in, p, pp);
+		rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
 	} else {
 		parent->hit = period;
 	}
@@ -291,16 +345,40 @@ append_chain_children(struct callchain_node *root,
 		      u64 period)
 {
 	struct callchain_node *rnode;
+	struct callchain_cursor_node *node;
+	struct rb_node **p = &root->rb_root_in.rb_node;
+	struct rb_node *parent = NULL;
+
+	node = callchain_cursor_current(cursor);
+	if (!node)
+		return;
 
 	/* lookup in childrens */
-	chain_for_each_child(rnode, root) {
-		unsigned int ret = append_chain(rnode, cursor, period);
+	while (*p) {
+		s64 ret;
+		struct callchain_list *cnode;
 
-		if (!ret)
+		parent = *p;
+		rnode = rb_entry(parent, struct callchain_node, rb_node_in);
+		cnode = list_first_entry(&rnode->val, struct callchain_list,
+					 list);
+
+		/* just check first entry */
+		ret = match_chain(node, cnode);
+		if (ret == 0) {
+			append_chain(rnode, cursor, period);
 			goto inc_children_hit;
+		}
+
+		if (ret < 0)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
 	}
 	/* nothing in children, add to the current node */
-	add_child(root, cursor, period);
+	rnode = add_child(root, cursor, period);
+	rb_link_node(&rnode->rb_node_in, parent, p);
+	rb_insert_color(&rnode->rb_node_in, &root->rb_root_in);
 
 inc_children_hit:
 	root->children_hit += period;
@@ -325,28 +403,20 @@ append_chain(struct callchain_node *root,
 	 */
 	list_for_each_entry(cnode, &root->val, list) {
 		struct callchain_cursor_node *node;
-		struct symbol *sym;
 
 		node = callchain_cursor_current(cursor);
 		if (!node)
 			break;
 
-		sym = node->sym;
-
-		if (cnode->ms.sym && sym &&
-		    callchain_param.key == CCKEY_FUNCTION) {
-			if (cnode->ms.sym->start != sym->start)
-				break;
-		} else if (cnode->ip != node->ip)
+		if (match_chain(node, cnode) != 0)
 			break;
 
-		if (!found)
-			found = true;
+		found = true;
 
 		callchain_cursor_advance(cursor);
 	}
 
-	/* matches not, relay on the parent */
+	/* matches not, relay no the parent */
 	if (!found) {
 		cursor->curr = curr_snap;
 		cursor->pos = start;
@@ -395,8 +465,9 @@ merge_chain_branch(struct callchain_cursor *cursor,
 		   struct callchain_node *dst, struct callchain_node *src)
 {
 	struct callchain_cursor_node **old_last = cursor->last;
-	struct callchain_node *child, *next_child;
+	struct callchain_node *child;
 	struct callchain_list *list, *next_list;
+	struct rb_node *n;
 	int old_pos = cursor->nr;
 	int err = 0;
 
@@ -412,12 +483,16 @@ merge_chain_branch(struct callchain_cursor *cursor,
 		append_chain_children(dst, cursor, src->hit);
 	}
 
-	chain_for_each_child_safe(child, next_child, src) {
+	n = rb_first(&src->rb_root_in);
+	while (n) {
+		child = container_of(n, struct callchain_node, rb_node_in);
+		n = rb_next(n);
+		rb_erase(&child->rb_node_in, &src->rb_root_in);
+
 		err = merge_chain_branch(cursor, dst, child);
 		if (err)
 			break;
 
-		list_del(&child->siblings);
 		free(child);
 	}
 
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 9e99060408ae759e39401f686300090b76e52edf..4f7f989876ec7ba9b5a505334c15160e7e36aa40 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -21,11 +21,11 @@ enum chain_order {
 
 struct callchain_node {
 	struct callchain_node	*parent;
-	struct list_head	siblings;
-	struct list_head	children;
 	struct list_head	val;
-	struct rb_node		rb_node; /* to sort nodes in an rbtree */
-	struct rb_root		rb_root; /* sorted tree of children */
+	struct rb_node		rb_node_in; /* to insert nodes in an rbtree */
+	struct rb_node		rb_node;    /* to sort nodes in an output tree */
+	struct rb_root		rb_root_in; /* input tree of children */
+	struct rb_root		rb_root;    /* sorted output tree of children */
 	unsigned int		val_nr;
 	u64			hit;
 	u64			children_hit;
@@ -86,13 +86,12 @@ extern __thread struct callchain_cursor callchain_cursor;
 
 static inline void callchain_init(struct callchain_root *root)
 {
-	INIT_LIST_HEAD(&root->node.siblings);
-	INIT_LIST_HEAD(&root->node.children);
 	INIT_LIST_HEAD(&root->node.val);
 
 	root->node.parent = NULL;
 	root->node.hit = 0;
 	root->node.children_hit = 0;
+	root->node.rb_root_in = RB_ROOT;
 	root->max_depth = 0;
 }
 
diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c
index 11e46da17bbb3c73ac53c1aac9aaa1ab50471a31..66e44a5019d538fff04d338bbf304b3dccecb4e2 100644
--- a/tools/perf/util/color.c
+++ b/tools/perf/util/color.c
@@ -318,8 +318,15 @@ int percent_color_fprintf(FILE *fp, const char *fmt, double percent)
 	return r;
 }
 
-int percent_color_snprintf(char *bf, size_t size, const char *fmt, double percent)
+int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...)
 {
-	const char *color = get_percent_color(percent);
+	va_list args;
+	double percent;
+	const char *color;
+
+	va_start(args, fmt);
+	percent = va_arg(args, double);
+	va_end(args);
+	color = get_percent_color(percent);
 	return color_snprintf(bf, size, color, fmt, percent);
 }
diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h
index dea082b7960210a17ccd2ee12d71a61a6192a503..fced3840e99c53bda892a466068c78e57c5982eb 100644
--- a/tools/perf/util/color.h
+++ b/tools/perf/util/color.h
@@ -39,7 +39,7 @@ int color_fprintf(FILE *fp, const char *color, const char *fmt, ...);
 int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...);
 int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...);
 int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf);
-int percent_color_snprintf(char *bf, size_t size, const char *fmt, double percent);
+int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...);
 int percent_color_fprintf(FILE *fp, const char *fmt, double percent);
 const char *get_percent_color(double percent);
 
diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
new file mode 100644
index 0000000000000000000000000000000000000000..ee0df0e24cdbd4f37e33aaa088c3b1465941d4b1
--- /dev/null
+++ b/tools/perf/util/comm.c
@@ -0,0 +1,121 @@
+#include "comm.h"
+#include "util.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+struct comm_str {
+	char *str;
+	struct rb_node rb_node;
+	int ref;
+};
+
+/* Should perhaps be moved to struct machine */
+static struct rb_root comm_str_root;
+
+static void comm_str__get(struct comm_str *cs)
+{
+	cs->ref++;
+}
+
+static void comm_str__put(struct comm_str *cs)
+{
+	if (!--cs->ref) {
+		rb_erase(&cs->rb_node, &comm_str_root);
+		free(cs->str);
+		free(cs);
+	}
+}
+
+static struct comm_str *comm_str__alloc(const char *str)
+{
+	struct comm_str *cs;
+
+	cs = zalloc(sizeof(*cs));
+	if (!cs)
+		return NULL;
+
+	cs->str = strdup(str);
+	if (!cs->str) {
+		free(cs);
+		return NULL;
+	}
+
+	return cs;
+}
+
+static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct comm_str *iter, *new;
+	int cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		iter = rb_entry(parent, struct comm_str, rb_node);
+
+		cmp = strcmp(str, iter->str);
+		if (!cmp)
+			return iter;
+
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	new = comm_str__alloc(str);
+	if (!new)
+		return NULL;
+
+	rb_link_node(&new->rb_node, parent, p);
+	rb_insert_color(&new->rb_node, root);
+
+	return new;
+}
+
+struct comm *comm__new(const char *str, u64 timestamp)
+{
+	struct comm *comm = zalloc(sizeof(*comm));
+
+	if (!comm)
+		return NULL;
+
+	comm->start = timestamp;
+
+	comm->comm_str = comm_str__findnew(str, &comm_str_root);
+	if (!comm->comm_str) {
+		free(comm);
+		return NULL;
+	}
+
+	comm_str__get(comm->comm_str);
+
+	return comm;
+}
+
+void comm__override(struct comm *comm, const char *str, u64 timestamp)
+{
+	struct comm_str *old = comm->comm_str;
+
+	comm->comm_str = comm_str__findnew(str, &comm_str_root);
+	if (!comm->comm_str) {
+		comm->comm_str = old;
+		return;
+	}
+
+	comm->start = timestamp;
+	comm_str__get(comm->comm_str);
+	comm_str__put(old);
+}
+
+void comm__free(struct comm *comm)
+{
+	comm_str__put(comm->comm_str);
+	free(comm);
+}
+
+const char *comm__str(const struct comm *comm)
+{
+	return comm->comm_str->str;
+}
diff --git a/tools/perf/util/comm.h b/tools/perf/util/comm.h
new file mode 100644
index 0000000000000000000000000000000000000000..7a86e5656710a76176a2b00eb8891d24ef117edd
--- /dev/null
+++ b/tools/perf/util/comm.h
@@ -0,0 +1,21 @@
+#ifndef __PERF_COMM_H
+#define __PERF_COMM_H
+
+#include "../perf.h"
+#include <linux/rbtree.h>
+#include <linux/list.h>
+
+struct comm_str;
+
+struct comm {
+	struct comm_str *comm_str;
+	u64 start;
+	struct list_head list;
+};
+
+void comm__free(struct comm *comm);
+struct comm *comm__new(const char *str, u64 timestamp);
+const char *comm__str(const struct comm *comm);
+void comm__override(struct comm *comm, const char *str, u64 timestamp);
+
+#endif  /* __PERF_COMM_H */
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index beb8cf9f9976df75e77f38dbc5860b37acb5a9a9..a9b48c42e81eb36e34cce24e091be66437270d44 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -1,5 +1,5 @@
 #include "util.h"
-#include "sysfs.h"
+#include "fs.h"
 #include "../perf.h"
 #include "cpumap.h"
 #include <assert.h>
@@ -216,7 +216,7 @@ int cpu_map__get_socket(struct cpu_map *map, int idx)
 
 	cpu = map->map[idx];
 
-	mnt = sysfs_find_mountpoint();
+	mnt = sysfs__mountpoint();
 	if (!mnt)
 		return -1;
 
@@ -279,7 +279,7 @@ int cpu_map__get_core(struct cpu_map *map, int idx)
 
 	cpu = map->map[idx];
 
-	mnt = sysfs_find_mountpoint();
+	mnt = sysfs__mountpoint();
 	if (!mnt)
 		return -1;
 
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
new file mode 100644
index 0000000000000000000000000000000000000000..7d09faf85cf16f2b4c9dead1f4e3b36a5c7c9c06
--- /dev/null
+++ b/tools/perf/util/data.c
@@ -0,0 +1,120 @@
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "data.h"
+#include "util.h"
+
+static bool check_pipe(struct perf_data_file *file)
+{
+	struct stat st;
+	bool is_pipe = false;
+	int fd = perf_data_file__is_read(file) ?
+		 STDIN_FILENO : STDOUT_FILENO;
+
+	if (!file->path) {
+		if (!fstat(fd, &st) && S_ISFIFO(st.st_mode))
+			is_pipe = true;
+	} else {
+		if (!strcmp(file->path, "-"))
+			is_pipe = true;
+	}
+
+	if (is_pipe)
+		file->fd = fd;
+
+	return file->is_pipe = is_pipe;
+}
+
+static int check_backup(struct perf_data_file *file)
+{
+	struct stat st;
+
+	if (!stat(file->path, &st) && st.st_size) {
+		/* TODO check errors properly */
+		char oldname[PATH_MAX];
+		snprintf(oldname, sizeof(oldname), "%s.old",
+			 file->path);
+		unlink(oldname);
+		rename(file->path, oldname);
+	}
+
+	return 0;
+}
+
+static int open_file_read(struct perf_data_file *file)
+{
+	struct stat st;
+	int fd;
+
+	fd = open(file->path, O_RDONLY);
+	if (fd < 0) {
+		int err = errno;
+
+		pr_err("failed to open %s: %s", file->path, strerror(err));
+		if (err == ENOENT && !strcmp(file->path, "perf.data"))
+			pr_err("  (try 'perf record' first)");
+		pr_err("\n");
+		return -err;
+	}
+
+	if (fstat(fd, &st) < 0)
+		goto out_close;
+
+	if (!file->force && st.st_uid && (st.st_uid != geteuid())) {
+		pr_err("file %s not owned by current user or root\n",
+		       file->path);
+		goto out_close;
+	}
+
+	if (!st.st_size) {
+		pr_info("zero-sized file (%s), nothing to do!\n",
+			file->path);
+		goto out_close;
+	}
+
+	file->size = st.st_size;
+	return fd;
+
+ out_close:
+	close(fd);
+	return -1;
+}
+
+static int open_file_write(struct perf_data_file *file)
+{
+	if (check_backup(file))
+		return -1;
+
+	return open(file->path, O_CREAT|O_RDWR|O_TRUNC, S_IRUSR|S_IWUSR);
+}
+
+static int open_file(struct perf_data_file *file)
+{
+	int fd;
+
+	fd = perf_data_file__is_read(file) ?
+	     open_file_read(file) : open_file_write(file);
+
+	file->fd = fd;
+	return fd < 0 ? -1 : 0;
+}
+
+int perf_data_file__open(struct perf_data_file *file)
+{
+	if (check_pipe(file))
+		return 0;
+
+	if (!file->path)
+		file->path = "perf.data";
+
+	return open_file(file);
+}
+
+void perf_data_file__close(struct perf_data_file *file)
+{
+	close(file->fd);
+}
diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h
new file mode 100644
index 0000000000000000000000000000000000000000..8c2df80152a519f02c2e699bf703840ce46efd1c
--- /dev/null
+++ b/tools/perf/util/data.h
@@ -0,0 +1,48 @@
+#ifndef __PERF_DATA_H
+#define __PERF_DATA_H
+
+#include <stdbool.h>
+
+enum perf_data_mode {
+	PERF_DATA_MODE_WRITE,
+	PERF_DATA_MODE_READ,
+};
+
+struct perf_data_file {
+	const char *path;
+	int fd;
+	bool is_pipe;
+	bool force;
+	unsigned long size;
+	enum perf_data_mode mode;
+};
+
+static inline bool perf_data_file__is_read(struct perf_data_file *file)
+{
+	return file->mode == PERF_DATA_MODE_READ;
+}
+
+static inline bool perf_data_file__is_write(struct perf_data_file *file)
+{
+	return file->mode == PERF_DATA_MODE_WRITE;
+}
+
+static inline int perf_data_file__is_pipe(struct perf_data_file *file)
+{
+	return file->is_pipe;
+}
+
+static inline int perf_data_file__fd(struct perf_data_file *file)
+{
+	return file->fd;
+}
+
+static inline unsigned long perf_data_file__size(struct perf_data_file *file)
+{
+	return file->size;
+}
+
+int perf_data_file__open(struct perf_data_file *file);
+void perf_data_file__close(struct perf_data_file *file);
+
+#endif /* __PERF_DATA_H */
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index e3c1ff8512c827330d4afde013e19b7c5eeb58f2..af4c687cc49b54279c95c10ed48fe12843700658 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -7,19 +7,20 @@
 char dso__symtab_origin(const struct dso *dso)
 {
 	static const char origin[] = {
-		[DSO_BINARY_TYPE__KALLSYMS]		= 'k',
-		[DSO_BINARY_TYPE__VMLINUX]		= 'v',
-		[DSO_BINARY_TYPE__JAVA_JIT]		= 'j',
-		[DSO_BINARY_TYPE__DEBUGLINK]		= 'l',
-		[DSO_BINARY_TYPE__BUILD_ID_CACHE]	= 'B',
-		[DSO_BINARY_TYPE__FEDORA_DEBUGINFO]	= 'f',
-		[DSO_BINARY_TYPE__UBUNTU_DEBUGINFO]	= 'u',
-		[DSO_BINARY_TYPE__BUILDID_DEBUGINFO]	= 'b',
-		[DSO_BINARY_TYPE__SYSTEM_PATH_DSO]	= 'd',
-		[DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE]	= 'K',
-		[DSO_BINARY_TYPE__GUEST_KALLSYMS]	= 'g',
-		[DSO_BINARY_TYPE__GUEST_KMODULE]	= 'G',
-		[DSO_BINARY_TYPE__GUEST_VMLINUX]	= 'V',
+		[DSO_BINARY_TYPE__KALLSYMS]			= 'k',
+		[DSO_BINARY_TYPE__VMLINUX]			= 'v',
+		[DSO_BINARY_TYPE__JAVA_JIT]			= 'j',
+		[DSO_BINARY_TYPE__DEBUGLINK]			= 'l',
+		[DSO_BINARY_TYPE__BUILD_ID_CACHE]		= 'B',
+		[DSO_BINARY_TYPE__FEDORA_DEBUGINFO]		= 'f',
+		[DSO_BINARY_TYPE__UBUNTU_DEBUGINFO]		= 'u',
+		[DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO]	= 'o',
+		[DSO_BINARY_TYPE__BUILDID_DEBUGINFO]		= 'b',
+		[DSO_BINARY_TYPE__SYSTEM_PATH_DSO]		= 'd',
+		[DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE]		= 'K',
+		[DSO_BINARY_TYPE__GUEST_KALLSYMS]		= 'g',
+		[DSO_BINARY_TYPE__GUEST_KMODULE]		= 'G',
+		[DSO_BINARY_TYPE__GUEST_VMLINUX]		= 'V',
 	};
 
 	if (dso == NULL || dso->symtab_type == DSO_BINARY_TYPE__NOT_FOUND)
@@ -64,6 +65,28 @@ int dso__binary_type_file(struct dso *dso, enum dso_binary_type type,
 			 symbol_conf.symfs, dso->long_name);
 		break;
 
+	case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
+	{
+		char *last_slash;
+		size_t len;
+		size_t dir_size;
+
+		last_slash = dso->long_name + dso->long_name_len;
+		while (last_slash != dso->long_name && *last_slash != '/')
+			last_slash--;
+
+		len = scnprintf(file, size, "%s", symbol_conf.symfs);
+		dir_size = last_slash - dso->long_name + 2;
+		if (dir_size > (size - len)) {
+			ret = -1;
+			break;
+		}
+		len += scnprintf(file + len, dir_size, "%s",  dso->long_name);
+		len += scnprintf(file + len , size - len, ".debug%s",
+								last_slash);
+		break;
+	}
+
 	case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
 		if (!dso->has_build_id) {
 			ret = -1;
@@ -427,6 +450,7 @@ struct dso *dso__new(const char *name)
 		dso->rel = 0;
 		dso->sorted_by_name = 0;
 		dso->has_build_id = 0;
+		dso->has_srcline = 1;
 		dso->kernel = DSO_TYPE_USER;
 		dso->needs_swap = DSO_SWAP__UNSET;
 		INIT_LIST_HEAD(&dso->node);
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index b793053335d60fca3302fd34b27543cd6151b0fb..9ac666abbe7ef988f30630c053f9c4e709eb3edb 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -6,6 +6,7 @@
 #include <stdbool.h>
 #include "types.h"
 #include "map.h"
+#include "build-id.h"
 
 enum dso_binary_type {
 	DSO_BINARY_TYPE__KALLSYMS = 0,
@@ -23,6 +24,7 @@ enum dso_binary_type {
 	DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE,
 	DSO_BINARY_TYPE__KCORE,
 	DSO_BINARY_TYPE__GUEST_KCORE,
+	DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
 	DSO_BINARY_TYPE__NOT_FOUND,
 };
 
@@ -81,6 +83,7 @@ struct dso {
 	enum dso_binary_type	data_type;
 	u8		 adjust_symbols:1;
 	u8		 has_build_id:1;
+	u8		 has_srcline:1;
 	u8		 hit:1;
 	u8		 annotate_warned:1;
 	u8		 sname_alloc:1;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 49096ea58a15dec22dc1e20b0de5d69bf2c2ac3a..ec9ae1114ed4ca57492948686cf2858d3f2591d8 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -512,18 +512,18 @@ size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp)
 
 int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
 			     union perf_event *event,
-			     struct perf_sample *sample __maybe_unused,
+			     struct perf_sample *sample,
 			     struct machine *machine)
 {
-	return machine__process_comm_event(machine, event);
+	return machine__process_comm_event(machine, event, sample);
 }
 
 int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
 			     union perf_event *event,
-			     struct perf_sample *sample __maybe_unused,
+			     struct perf_sample *sample,
 			     struct machine *machine)
 {
-	return machine__process_lost_event(machine, event);
+	return machine__process_lost_event(machine, event, sample);
 }
 
 size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
@@ -546,18 +546,18 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp)
 
 int perf_event__process_mmap(struct perf_tool *tool __maybe_unused,
 			     union perf_event *event,
-			     struct perf_sample *sample __maybe_unused,
+			     struct perf_sample *sample,
 			     struct machine *machine)
 {
-	return machine__process_mmap_event(machine, event);
+	return machine__process_mmap_event(machine, event, sample);
 }
 
 int perf_event__process_mmap2(struct perf_tool *tool __maybe_unused,
 			     union perf_event *event,
-			     struct perf_sample *sample __maybe_unused,
+			     struct perf_sample *sample,
 			     struct machine *machine)
 {
-	return machine__process_mmap2_event(machine, event);
+	return machine__process_mmap2_event(machine, event, sample);
 }
 
 size_t perf_event__fprintf_task(union perf_event *event, FILE *fp)
@@ -569,18 +569,18 @@ size_t perf_event__fprintf_task(union perf_event *event, FILE *fp)
 
 int perf_event__process_fork(struct perf_tool *tool __maybe_unused,
 			     union perf_event *event,
-			     struct perf_sample *sample __maybe_unused,
+			     struct perf_sample *sample,
 			     struct machine *machine)
 {
-	return machine__process_fork_event(machine, event);
+	return machine__process_fork_event(machine, event, sample);
 }
 
 int perf_event__process_exit(struct perf_tool *tool __maybe_unused,
 			     union perf_event *event,
-			     struct perf_sample *sample __maybe_unused,
+			     struct perf_sample *sample,
 			     struct machine *machine)
 {
-	return machine__process_exit_event(machine, event);
+	return machine__process_exit_event(machine, event, sample);
 }
 
 size_t perf_event__fprintf(union perf_event *event, FILE *fp)
@@ -611,21 +611,21 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
 
 int perf_event__process(struct perf_tool *tool __maybe_unused,
 			union perf_event *event,
-			struct perf_sample *sample __maybe_unused,
+			struct perf_sample *sample,
 			struct machine *machine)
 {
-	return machine__process_event(machine, event);
+	return machine__process_event(machine, event, sample);
 }
 
-void thread__find_addr_map(struct thread *self,
+void thread__find_addr_map(struct thread *thread,
 			   struct machine *machine, u8 cpumode,
 			   enum map_type type, u64 addr,
 			   struct addr_location *al)
 {
-	struct map_groups *mg = &self->mg;
+	struct map_groups *mg = &thread->mg;
 	bool load_map = false;
 
-	al->thread = self;
+	al->thread = thread;
 	al->addr = addr;
 	al->cpumode = cpumode;
 	al->filtered = false;
@@ -721,10 +721,10 @@ int perf_event__preprocess_sample(const union perf_event *event,
 		return -1;
 
 	if (symbol_conf.comm_list &&
-	    !strlist__has_entry(symbol_conf.comm_list, thread->comm))
+	    !strlist__has_entry(symbol_conf.comm_list, thread__comm_str(thread)))
 		goto out_filtered;
 
-	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->tid);
+	dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
 	/*
 	 * Have we already created the kernel maps for this machine?
 	 *
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index c67ecc457d295d029a2307c1b0ad7e1531b944d1..f8d70f3003ab6995a6522846f5807fecb20f2fce 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -61,6 +61,12 @@ struct read_event {
 	u64 id;
 };
 
+struct throttle_event {
+	struct perf_event_header header;
+	u64 time;
+	u64 id;
+	u64 stream_id;
+};
 
 #define PERF_SAMPLE_MASK				\
 	(PERF_SAMPLE_IP | PERF_SAMPLE_TID |		\
@@ -69,6 +75,9 @@ struct read_event {
 	 PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD |		\
 	 PERF_SAMPLE_IDENTIFIER)
 
+/* perf sample has 16 bits size limit */
+#define PERF_SAMPLE_MAX_SIZE (1 << 16)
+
 struct sample_event {
 	struct perf_event_header        header;
 	u64 array[];
@@ -111,6 +120,7 @@ struct perf_sample {
 	u64 stream_id;
 	u64 period;
 	u64 weight;
+	u64 transaction;
 	u32 cpu;
 	u32 raw_size;
 	u64 data_src;
@@ -177,6 +187,7 @@ union perf_event {
 	struct fork_event		fork;
 	struct lost_event		lost;
 	struct read_event		read;
+	struct throttle_event		throttle;
 	struct sample_event		sample;
 	struct attr_event		attr;
 	struct event_type_event		event_type;
@@ -240,7 +251,8 @@ int perf_event__process(struct perf_tool *tool,
 			struct machine *machine);
 
 struct addr_location;
-int perf_event__preprocess_sample(const union perf_event *self,
+
+int perf_event__preprocess_sample(const union perf_event *event,
 				  struct machine *machine,
 				  struct addr_location *al,
 				  struct perf_sample *sample);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index e584cd30b0f2dca4866919e578e4c8e9e3f71ade..b939221efd8dea0b670d1d968e7fa9086c7034a0 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -18,6 +18,7 @@
 #include <unistd.h>
 
 #include "parse-events.h"
+#include "parse-options.h"
 
 #include <sys/mman.h>
 
@@ -49,6 +50,18 @@ struct perf_evlist *perf_evlist__new(void)
 	return evlist;
 }
 
+struct perf_evlist *perf_evlist__new_default(void)
+{
+	struct perf_evlist *evlist = perf_evlist__new();
+
+	if (evlist && perf_evlist__add_default(evlist)) {
+		perf_evlist__delete(evlist);
+		evlist = NULL;
+	}
+
+	return evlist;
+}
+
 /**
  * perf_evlist__set_id_pos - set the positions of event ids.
  * @evlist: selected event list
@@ -242,7 +255,7 @@ int perf_evlist__add_newtp(struct perf_evlist *evlist,
 	if (evsel == NULL)
 		return -1;
 
-	evsel->handler.func = handler;
+	evsel->handler = handler;
 	perf_evlist__add(evlist, evsel);
 	return 0;
 }
@@ -527,7 +540,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
 		if ((old & md->mask) + size != ((old + size) & md->mask)) {
 			unsigned int offset = old;
 			unsigned int len = min(sizeof(*event), size), cpy;
-			void *dst = &md->event_copy;
+			void *dst = md->event_copy;
 
 			do {
 				cpy = min(md->mask + 1 - (offset & md->mask), len);
@@ -537,7 +550,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
 				len -= cpy;
 			} while (len);
 
-			event = &md->event_copy;
+			event = (union perf_event *) md->event_copy;
 		}
 
 		old += size;
@@ -594,6 +607,8 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist,
 	evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, prot,
 				      MAP_SHARED, fd, 0);
 	if (evlist->mmap[idx].base == MAP_FAILED) {
+		pr_debug2("failed to mmap perf event ring buffer, error %d\n",
+			  errno);
 		evlist->mmap[idx].base = NULL;
 		return -1;
 	}
@@ -602,9 +617,36 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist,
 	return 0;
 }
 
-static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, int prot, int mask)
+static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
+				       int prot, int mask, int cpu, int thread,
+				       int *output)
 {
 	struct perf_evsel *evsel;
+
+	list_for_each_entry(evsel, &evlist->entries, node) {
+		int fd = FD(evsel, cpu, thread);
+
+		if (*output == -1) {
+			*output = fd;
+			if (__perf_evlist__mmap(evlist, idx, prot, mask,
+						*output) < 0)
+				return -1;
+		} else {
+			if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
+				return -1;
+		}
+
+		if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
+		    perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0)
+			return -1;
+	}
+
+	return 0;
+}
+
+static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, int prot,
+				     int mask)
+{
 	int cpu, thread;
 	int nr_cpus = cpu_map__nr(evlist->cpus);
 	int nr_threads = thread_map__nr(evlist->threads);
@@ -614,23 +656,9 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, int prot, int m
 		int output = -1;
 
 		for (thread = 0; thread < nr_threads; thread++) {
-			list_for_each_entry(evsel, &evlist->entries, node) {
-				int fd = FD(evsel, cpu, thread);
-
-				if (output == -1) {
-					output = fd;
-					if (__perf_evlist__mmap(evlist, cpu,
-								prot, mask, output) < 0)
-						goto out_unmap;
-				} else {
-					if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, output) != 0)
-						goto out_unmap;
-				}
-
-				if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
-				    perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0)
-					goto out_unmap;
-			}
+			if (perf_evlist__mmap_per_evsel(evlist, cpu, prot, mask,
+							cpu, thread, &output))
+				goto out_unmap;
 		}
 	}
 
@@ -642,9 +670,9 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, int prot, int m
 	return -1;
 }
 
-static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, int prot, int mask)
+static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, int prot,
+					int mask)
 {
-	struct perf_evsel *evsel;
 	int thread;
 	int nr_threads = thread_map__nr(evlist->threads);
 
@@ -652,23 +680,9 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, int prot, in
 	for (thread = 0; thread < nr_threads; thread++) {
 		int output = -1;
 
-		list_for_each_entry(evsel, &evlist->entries, node) {
-			int fd = FD(evsel, 0, thread);
-
-			if (output == -1) {
-				output = fd;
-				if (__perf_evlist__mmap(evlist, thread,
-							prot, mask, output) < 0)
-					goto out_unmap;
-			} else {
-				if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, output) != 0)
-					goto out_unmap;
-			}
-
-			if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
-			    perf_evlist__id_add_fd(evlist, evsel, 0, thread, fd) < 0)
-				goto out_unmap;
-		}
+		if (perf_evlist__mmap_per_evsel(evlist, thread, prot, mask, 0,
+						thread, &output))
+			goto out_unmap;
 	}
 
 	return 0;
@@ -679,20 +693,76 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, int prot, in
 	return -1;
 }
 
-/** perf_evlist__mmap - Create per cpu maps to receive events
- *
- * @evlist - list of events
- * @pages - map length in pages
- * @overwrite - overwrite older events?
- *
- * If overwrite is false the user needs to signal event consuption using:
- *
- *	struct perf_mmap *m = &evlist->mmap[cpu];
- *	unsigned int head = perf_mmap__read_head(m);
+static size_t perf_evlist__mmap_size(unsigned long pages)
+{
+	/* 512 kiB: default amount of unprivileged mlocked memory */
+	if (pages == UINT_MAX)
+		pages = (512 * 1024) / page_size;
+	else if (!is_power_of_2(pages))
+		return 0;
+
+	return (pages + 1) * page_size;
+}
+
+int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
+				  int unset __maybe_unused)
+{
+	unsigned int *mmap_pages = opt->value;
+	unsigned long pages, val;
+	size_t size;
+	static struct parse_tag tags[] = {
+		{ .tag  = 'B', .mult = 1       },
+		{ .tag  = 'K', .mult = 1 << 10 },
+		{ .tag  = 'M', .mult = 1 << 20 },
+		{ .tag  = 'G', .mult = 1 << 30 },
+		{ .tag  = 0 },
+	};
+
+	val = parse_tag_value(str, tags);
+	if (val != (unsigned long) -1) {
+		/* we got file size value */
+		pages = PERF_ALIGN(val, page_size) / page_size;
+		if (pages < (1UL << 31) && !is_power_of_2(pages)) {
+			pages = next_pow2(pages);
+			pr_info("rounding mmap pages size to %lu (%lu pages)\n",
+				pages * page_size, pages);
+		}
+	} else {
+		/* we got pages count value */
+		char *eptr;
+		pages = strtoul(str, &eptr, 10);
+		if (*eptr != '\0') {
+			pr_err("failed to parse --mmap_pages/-m value\n");
+			return -1;
+		}
+	}
+
+	if (pages > UINT_MAX || pages > SIZE_MAX / page_size) {
+		pr_err("--mmap_pages/-m value too big\n");
+		return -1;
+	}
+
+	size = perf_evlist__mmap_size(pages);
+	if (!size) {
+		pr_err("--mmap_pages/-m value must be a power of two.");
+		return -1;
+	}
+
+	*mmap_pages = pages;
+	return 0;
+}
+
+/**
+ * perf_evlist__mmap - Create mmaps to receive events.
+ * @evlist: list of events
+ * @pages: map length in pages
+ * @overwrite: overwrite older events?
  *
- *	perf_mmap__write_tail(m, head)
+ * If @overwrite is %false the user needs to signal event consumption using
+ * perf_mmap__write_tail().  Using perf_evlist__mmap_read() does this
+ * automatically.
  *
- * Using perf_evlist__read_on_cpu does this automatically.
+ * Return: %0 on success, negative error code otherwise.
  */
 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
 		      bool overwrite)
@@ -702,14 +772,6 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
 	const struct thread_map *threads = evlist->threads;
 	int prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), mask;
 
-        /* 512 kiB: default amount of unprivileged mlocked memory */
-        if (pages == UINT_MAX)
-                pages = (512 * 1024) / page_size;
-	else if (!is_power_of_2(pages))
-		return -EINVAL;
-
-	mask = pages * page_size - 1;
-
 	if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0)
 		return -ENOMEM;
 
@@ -717,7 +779,9 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
 		return -ENOMEM;
 
 	evlist->overwrite = overwrite;
-	evlist->mmap_len = (pages + 1) * page_size;
+	evlist->mmap_len = perf_evlist__mmap_size(pages);
+	pr_debug("mmap size %zuB\n", evlist->mmap_len);
+	mask = evlist->mmap_len - page_size - 1;
 
 	list_for_each_entry(evsel, &evlist->entries, node) {
 		if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
@@ -1073,3 +1137,66 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp)
 
 	return printed + fprintf(fp, "\n");;
 }
+
+int perf_evlist__strerror_tp(struct perf_evlist *evlist __maybe_unused,
+			     int err, char *buf, size_t size)
+{
+	char sbuf[128];
+
+	switch (err) {
+	case ENOENT:
+		scnprintf(buf, size, "%s",
+			  "Error:\tUnable to find debugfs\n"
+			  "Hint:\tWas your kernel was compiled with debugfs support?\n"
+			  "Hint:\tIs the debugfs filesystem mounted?\n"
+			  "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'");
+		break;
+	case EACCES:
+		scnprintf(buf, size,
+			  "Error:\tNo permissions to read %s/tracing/events/raw_syscalls\n"
+			  "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n",
+			  debugfs_mountpoint, debugfs_mountpoint);
+		break;
+	default:
+		scnprintf(buf, size, "%s", strerror_r(err, sbuf, sizeof(sbuf)));
+		break;
+	}
+
+	return 0;
+}
+
+int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused,
+			       int err, char *buf, size_t size)
+{
+	int printed, value;
+	char sbuf[128], *emsg = strerror_r(err, sbuf, sizeof(sbuf));
+
+	switch (err) {
+	case EACCES:
+	case EPERM:
+		printed = scnprintf(buf, size,
+				    "Error:\t%s.\n"
+				    "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);
+
+		if (filename__read_int("/proc/sys/kernel/perf_event_paranoid", &value))
+			break;
+
+		printed += scnprintf(buf + printed, size - printed, "\nHint:\t");
+
+		if (value >= 2) {
+			printed += scnprintf(buf + printed, size - printed,
+					     "For your workloads it needs to be <= 1\nHint:\t");
+		}
+		printed += scnprintf(buf + printed, size - printed,
+				     "For system wide tracing it needs to be set to -1");
+
+		printed += scnprintf(buf + printed, size - printed,
+				    ".\nHint:\tThe current value is %d.", value);
+		break;
+	default:
+		scnprintf(buf, size, "%s", emsg);
+		break;
+	}
+
+	return 0;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 206d093393069012421a65b6dc893f570de32dd9..ecaa582f40e265b5022c35075da162464df538d2 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -21,7 +21,7 @@ struct perf_mmap {
 	void		 *base;
 	int		 mask;
 	unsigned int	 prev;
-	union perf_event event_copy;
+	char		 event_copy[PERF_SAMPLE_MAX_SIZE];
 };
 
 struct perf_evlist {
@@ -31,7 +31,7 @@ struct perf_evlist {
 	int		 nr_groups;
 	int		 nr_fds;
 	int		 nr_mmaps;
-	int		 mmap_len;
+	size_t		 mmap_len;
 	int		 id_pos;
 	int		 is_pos;
 	u64		 combined_sample_type;
@@ -53,6 +53,7 @@ struct perf_evsel_str_handler {
 };
 
 struct perf_evlist *perf_evlist__new(void);
+struct perf_evlist *perf_evlist__new_default(void);
 void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
 		       struct thread_map *threads);
 void perf_evlist__exit(struct perf_evlist *evlist);
@@ -87,7 +88,7 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id);
 
 struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id);
 
-union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx);
+union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx);
 
 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx);
 
@@ -98,6 +99,7 @@ void perf_evlist__set_id_pos(struct perf_evlist *evlist);
 bool perf_can_sample_identifier(void);
 void perf_evlist__config(struct perf_evlist *evlist,
 			 struct perf_record_opts *opts);
+int perf_record_opts__config(struct perf_record_opts *opts);
 
 int perf_evlist__prepare_workload(struct perf_evlist *evlist,
 				  struct perf_target *target,
@@ -105,6 +107,10 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist,
 				  bool want_signal);
 int perf_evlist__start_workload(struct perf_evlist *evlist);
 
+int perf_evlist__parse_mmap_pages(const struct option *opt,
+				  const char *str,
+				  int unset);
+
 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
 		      bool overwrite);
 void perf_evlist__munmap(struct perf_evlist *evlist);
@@ -165,10 +171,13 @@ static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist)
 
 size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp);
 
+int perf_evlist__strerror_tp(struct perf_evlist *evlist, int err, char *buf, size_t size);
+int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size);
+
 static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm)
 {
 	struct perf_event_mmap_page *pc = mm->base;
-	int head = pc->data_head;
+	int head = ACCESS_ONCE(pc->data_head);
 	rmb();
 	return head;
 }
@@ -181,7 +190,7 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md,
 	/*
 	 * ensure all reads are done before we write the tail out.
 	 */
-	/* mb(); */
+	mb();
 	pc->data_tail = tail;
 }
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 9f1ef9bee2d0c96583dac31d4c12bcc220b519bf..5280820ed3897ea9ffddd1ce5f5c4e007071bf16 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -663,7 +663,7 @@ void perf_evsel__config(struct perf_evsel *evsel,
 	}
 
 	if (opts->sample_address)
-		attr->sample_type	|= PERF_SAMPLE_DATA_SRC;
+		perf_evsel__set_sample_bit(evsel, DATA_SRC);
 
 	if (opts->no_delay) {
 		attr->watermark = 0;
@@ -675,11 +675,14 @@ void perf_evsel__config(struct perf_evsel *evsel,
 	}
 
 	if (opts->sample_weight)
-		attr->sample_type	|= PERF_SAMPLE_WEIGHT;
+		perf_evsel__set_sample_bit(evsel, WEIGHT);
 
 	attr->mmap  = track;
 	attr->comm  = track;
 
+	if (opts->sample_transaction)
+		perf_evsel__set_sample_bit(evsel, TRANSACTION);
+
 	/*
 	 * XXX see the function comment above
 	 *
@@ -982,6 +985,7 @@ static size_t perf_event_attr__fprintf(struct perf_event_attr *attr, FILE *fp)
 	ret += PRINT_ATTR2(exclude_host, exclude_guest);
 	ret += PRINT_ATTR2N("excl.callchain_kern", exclude_callchain_kernel,
 			    "excl.callchain_user", exclude_callchain_user);
+	ret += PRINT_ATTR_U32(mmap2);
 
 	ret += PRINT_ATTR_U32(wakeup_events);
 	ret += PRINT_ATTR_U32(wakeup_watermark);
@@ -1047,6 +1051,8 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 								     group_fd, flags);
 			if (FD(evsel, cpu, thread) < 0) {
 				err = -errno;
+				pr_debug2("perf_event_open failed, error %d\n",
+					  err);
 				goto try_fallback;
 			}
 			set_rlimit = NO_CHANGE;
@@ -1213,6 +1219,7 @@ static int perf_evsel__parse_id_sample(const struct perf_evsel *evsel,
 
 		sample->pid = u.val32[0];
 		sample->tid = u.val32[1];
+		array--;
 	}
 
 	return 0;
@@ -1452,6 +1459,9 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 			array = (void *)array + sz;
 			OVERFLOW_CHECK_u64(array);
 			data->user_stack.size = *array++;
+			if (WARN_ONCE(data->user_stack.size > sz,
+				      "user stack dump failure\n"))
+				return -EFAULT;
 		}
 	}
 
@@ -1469,6 +1479,13 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 		array++;
 	}
 
+	data->transaction = 0;
+	if (type & PERF_SAMPLE_TRANSACTION) {
+		OVERFLOW_CHECK_u64(array);
+		data->transaction = *array;
+		array++;
+	}
+
 	return 0;
 }
 
@@ -1561,6 +1578,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
 	if (type & PERF_SAMPLE_DATA_SRC)
 		result += sizeof(u64);
 
+	if (type & PERF_SAMPLE_TRANSACTION)
+		result += sizeof(u64);
+
 	return result;
 }
 
@@ -1734,6 +1754,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,
 		array++;
 	}
 
+	if (type & PERF_SAMPLE_TRANSACTION) {
+		*array = sample->transaction;
+		array++;
+	}
+
 	return 0;
 }
 
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 4a7bdc713bab2fa4266069388ed79d530ba64b94..64ec8e1a7a2879a21c0592cbe1b50bd7b13f7ea0 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -74,10 +74,7 @@ struct perf_evsel {
 		off_t		id_offset;
 	};
 	struct cgroup_sel	*cgrp;
-	struct {
-		void		*func;
-		void		*data;
-	} handler;
+	void			*handler;
 	struct cpu_map		*cpus;
 	unsigned int		sample_size;
 	int			id_pos;
@@ -197,6 +194,12 @@ static inline bool perf_evsel__match2(struct perf_evsel *e1,
 	       (e1->attr.config == e2->attr.config);
 }
 
+#define perf_evsel__cmp(a, b)			\
+	((a) &&					\
+	 (b) &&					\
+	 (a)->attr.type == (b)->attr.type &&	\
+	 (a)->attr.config == (b)->attr.config)
+
 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
 			      int cpu, int thread, bool scale);
 
diff --git a/tools/perf/util/fs.c b/tools/perf/util/fs.c
new file mode 100644
index 0000000000000000000000000000000000000000..f5be1f26e724d303ff4a98051b8cf933e1ec1cb4
--- /dev/null
+++ b/tools/perf/util/fs.c
@@ -0,0 +1,119 @@
+
+/* TODO merge/factor into tools/lib/lk/debugfs.c */
+
+#include "util.h"
+#include "util/fs.h"
+
+static const char * const sysfs__fs_known_mountpoints[] = {
+	"/sys",
+	0,
+};
+
+static const char * const procfs__known_mountpoints[] = {
+	"/proc",
+	0,
+};
+
+struct fs {
+	const char		*name;
+	const char * const	*mounts;
+	char			 path[PATH_MAX + 1];
+	bool			 found;
+	long			 magic;
+};
+
+enum {
+	FS__SYSFS  = 0,
+	FS__PROCFS = 1,
+};
+
+static struct fs fs__entries[] = {
+	[FS__SYSFS] = {
+		.name	= "sysfs",
+		.mounts	= sysfs__fs_known_mountpoints,
+		.magic	= SYSFS_MAGIC,
+	},
+	[FS__PROCFS] = {
+		.name	= "proc",
+		.mounts	= procfs__known_mountpoints,
+		.magic	= PROC_SUPER_MAGIC,
+	},
+};
+
+static bool fs__read_mounts(struct fs *fs)
+{
+	bool found = false;
+	char type[100];
+	FILE *fp;
+
+	fp = fopen("/proc/mounts", "r");
+	if (fp == NULL)
+		return NULL;
+
+	while (!found &&
+	       fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
+		      fs->path, type) == 2) {
+
+		if (strcmp(type, fs->name) == 0)
+			found = true;
+	}
+
+	fclose(fp);
+	return fs->found = found;
+}
+
+static int fs__valid_mount(const char *fs, long magic)
+{
+	struct statfs st_fs;
+
+	if (statfs(fs, &st_fs) < 0)
+		return -ENOENT;
+	else if (st_fs.f_type != magic)
+		return -ENOENT;
+
+	return 0;
+}
+
+static bool fs__check_mounts(struct fs *fs)
+{
+	const char * const *ptr;
+
+	ptr = fs->mounts;
+	while (*ptr) {
+		if (fs__valid_mount(*ptr, fs->magic) == 0) {
+			fs->found = true;
+			strcpy(fs->path, *ptr);
+			return true;
+		}
+		ptr++;
+	}
+
+	return false;
+}
+
+static const char *fs__get_mountpoint(struct fs *fs)
+{
+	if (fs__check_mounts(fs))
+		return fs->path;
+
+	return fs__read_mounts(fs) ? fs->path : NULL;
+}
+
+static const char *fs__mountpoint(int idx)
+{
+	struct fs *fs = &fs__entries[idx];
+
+	if (fs->found)
+		return (const char *)fs->path;
+
+	return fs__get_mountpoint(fs);
+}
+
+#define FS__MOUNTPOINT(name, idx)	\
+const char *name##__mountpoint(void)	\
+{					\
+	return fs__mountpoint(idx);	\
+}
+
+FS__MOUNTPOINT(sysfs,  FS__SYSFS);
+FS__MOUNTPOINT(procfs, FS__PROCFS);
diff --git a/tools/perf/util/fs.h b/tools/perf/util/fs.h
new file mode 100644
index 0000000000000000000000000000000000000000..5e09ce1bab0ec05ceaacce5979e92f65bd1b6b52
--- /dev/null
+++ b/tools/perf/util/fs.h
@@ -0,0 +1,7 @@
+#ifndef __PERF_FS
+#define __PERF_FS
+
+const char *sysfs__mountpoint(void);
+const char *procfs__mountpoint(void);
+
+#endif /* __PERF_FS */
diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh
index 3ac38031d53400337815560562a52093aaeee9e2..36a885d2cd22d12ed004177f375bcaf4aac86a28 100755
--- a/tools/perf/util/generate-cmdlist.sh
+++ b/tools/perf/util/generate-cmdlist.sh
@@ -22,7 +22,7 @@ do
      }' "Documentation/perf-$cmd.txt"
 done
 
-echo "#ifdef LIBELF_SUPPORT"
+echo "#ifdef HAVE_LIBELF_SUPPORT"
 sed -n -e 's/^perf-\([^ 	]*\)[ 	].* full.*/\1/p' command-list.txt |
 sort |
 while read cmd
@@ -35,5 +35,5 @@ do
 	    p
      }' "Documentation/perf-$cmd.txt"
 done
-echo "#endif /* LIBELF_SUPPORT */"
+echo "#endif /* HAVE_LIBELF_SUPPORT */"
 echo "};"
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index c3e5a3b817ab714497dc7f6f39520945dc886b1a..26d9520a0c1b6ebf550bfd001bcd278fa8736d9c 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -22,6 +22,7 @@
 #include "vdso.h"
 #include "strbuf.h"
 #include "build-id.h"
+#include "data.h"
 
 static bool no_buildid_cache = false;
 
@@ -2189,7 +2190,7 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full)
 {
 	struct header_print_data hd;
 	struct perf_header *header = &session->header;
-	int fd = session->fd;
+	int fd = perf_data_file__fd(session->file);
 	hd.fp = fp;
 	hd.full = full;
 
@@ -2650,7 +2651,8 @@ static int perf_header__read_pipe(struct perf_session *session)
 	struct perf_header *header = &session->header;
 	struct perf_pipe_file_header f_header;
 
-	if (perf_file_header__read_pipe(&f_header, header, session->fd,
+	if (perf_file_header__read_pipe(&f_header, header,
+					perf_data_file__fd(session->file),
 					session->repipe) < 0) {
 		pr_debug("incompatible file format\n");
 		return -EINVAL;
@@ -2751,18 +2753,19 @@ static int perf_evlist__prepare_tracepoint_events(struct perf_evlist *evlist,
 
 int perf_session__read_header(struct perf_session *session)
 {
+	struct perf_data_file *file = session->file;
 	struct perf_header *header = &session->header;
 	struct perf_file_header	f_header;
 	struct perf_file_attr	f_attr;
 	u64			f_id;
 	int nr_attrs, nr_ids, i, j;
-	int fd = session->fd;
+	int fd = perf_data_file__fd(file);
 
 	session->evlist = perf_evlist__new();
 	if (session->evlist == NULL)
 		return -ENOMEM;
 
-	if (session->fd_pipe)
+	if (perf_data_file__is_pipe(file))
 		return perf_header__read_pipe(session);
 
 	if (perf_file_header__read(&f_header, header, fd) < 0)
@@ -2777,7 +2780,7 @@ int perf_session__read_header(struct perf_session *session)
 	if (f_header.data.size == 0) {
 		pr_warning("WARNING: The %s file's data size field is 0 which is unexpected.\n"
 			   "Was the 'perf record' command properly terminated?\n",
-			   session->filename);
+			   file->path);
 	}
 
 	nr_attrs = f_header.attrs.size / f_header.attr_size;
@@ -2990,18 +2993,19 @@ int perf_event__process_tracing_data(struct perf_tool *tool __maybe_unused,
 				     struct perf_session *session)
 {
 	ssize_t size_read, padding, size = event->tracing_data.size;
-	off_t offset = lseek(session->fd, 0, SEEK_CUR);
+	int fd = perf_data_file__fd(session->file);
+	off_t offset = lseek(fd, 0, SEEK_CUR);
 	char buf[BUFSIZ];
 
 	/* setup for reading amidst mmap */
-	lseek(session->fd, offset + sizeof(struct tracing_data_event),
+	lseek(fd, offset + sizeof(struct tracing_data_event),
 	      SEEK_SET);
 
-	size_read = trace_report(session->fd, &session->pevent,
+	size_read = trace_report(fd, &session->pevent,
 				 session->repipe);
 	padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read;
 
-	if (readn(session->fd, buf, padding) < 0) {
+	if (readn(fd, buf, padding) < 0) {
 		pr_err("%s: reading input file", __func__);
 		return -1;
 	}
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 9ff6cf3e9a99f69596b37372f60203c11bec88a3..822903eaa201f3ec81648206eaf3c2f3ae2710c0 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -160,6 +160,10 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 	hists__new_col_len(hists, HISTC_MEM_LVL, 21 + 3);
 	hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12);
 	hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12);
+
+	if (h->transaction)
+		hists__new_col_len(hists, HISTC_TRANSACTION,
+				   hist_entry__transaction_len());
 }
 
 void hists__output_recalc_col_len(struct hists *hists, int max_rows)
@@ -346,7 +350,7 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
 	struct rb_node **p;
 	struct rb_node *parent = NULL;
 	struct hist_entry *he;
-	int cmp;
+	int64_t cmp;
 
 	p = &hists->entries_in->rb_node;
 
@@ -395,6 +399,7 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
 	if (!he)
 		return NULL;
 
+	hists->nr_entries++;
 	rb_link_node(&he->rb_node_in, parent, p);
 	rb_insert_color(&he->rb_node_in, hists->entries_in);
 out:
@@ -402,74 +407,16 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
 	return he;
 }
 
-struct hist_entry *__hists__add_mem_entry(struct hists *self,
-					  struct addr_location *al,
-					  struct symbol *sym_parent,
-					  struct mem_info *mi,
-					  u64 period,
-					  u64 weight)
-{
-	struct hist_entry entry = {
-		.thread	= al->thread,
-		.ms = {
-			.map	= al->map,
-			.sym	= al->sym,
-		},
-		.stat = {
-			.period	= period,
-			.weight = weight,
-			.nr_events = 1,
-		},
-		.cpu	= al->cpu,
-		.ip	= al->addr,
-		.level	= al->level,
-		.parent = sym_parent,
-		.filtered = symbol__parent_filter(sym_parent),
-		.hists = self,
-		.mem_info = mi,
-		.branch_info = NULL,
-	};
-	return add_hist_entry(self, &entry, al, period, weight);
-}
-
-struct hist_entry *__hists__add_branch_entry(struct hists *self,
-					     struct addr_location *al,
-					     struct symbol *sym_parent,
-					     struct branch_info *bi,
-					     u64 period,
-					     u64 weight)
-{
-	struct hist_entry entry = {
-		.thread	= al->thread,
-		.ms = {
-			.map	= bi->to.map,
-			.sym	= bi->to.sym,
-		},
-		.cpu	= al->cpu,
-		.ip	= bi->to.addr,
-		.level	= al->level,
-		.stat = {
-			.period	= period,
-			.nr_events = 1,
-			.weight = weight,
-		},
-		.parent = sym_parent,
-		.filtered = symbol__parent_filter(sym_parent),
-		.branch_info = bi,
-		.hists	= self,
-		.mem_info = NULL,
-	};
-
-	return add_hist_entry(self, &entry, al, period, weight);
-}
-
-struct hist_entry *__hists__add_entry(struct hists *self,
+struct hist_entry *__hists__add_entry(struct hists *hists,
 				      struct addr_location *al,
-				      struct symbol *sym_parent, u64 period,
-				      u64 weight)
+				      struct symbol *sym_parent,
+				      struct branch_info *bi,
+				      struct mem_info *mi,
+				      u64 period, u64 weight, u64 transaction)
 {
 	struct hist_entry entry = {
 		.thread	= al->thread,
+		.comm = thread__comm(al->thread),
 		.ms = {
 			.map	= al->map,
 			.sym	= al->sym,
@@ -478,18 +425,19 @@ struct hist_entry *__hists__add_entry(struct hists *self,
 		.ip	= al->addr,
 		.level	= al->level,
 		.stat = {
-			.period	= period,
 			.nr_events = 1,
+			.period	= period,
 			.weight = weight,
 		},
 		.parent = sym_parent,
 		.filtered = symbol__parent_filter(sym_parent),
-		.hists	= self,
-		.branch_info = NULL,
-		.mem_info = NULL,
+		.hists	= hists,
+		.branch_info = bi,
+		.mem_info = mi,
+		.transaction = transaction,
 	};
 
-	return add_hist_entry(self, &entry, al, period, weight);
+	return add_hist_entry(hists, &entry, al, period, weight);
 }
 
 int64_t
@@ -530,6 +478,7 @@ void hist_entry__free(struct hist_entry *he)
 {
 	free(he->branch_info);
 	free(he->mem_info);
+	free_srcline(he->srcline);
 	free(he);
 }
 
@@ -598,7 +547,7 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he)
 	hists__filter_entry_by_symbol(hists, he);
 }
 
-void hists__collapse_resort(struct hists *hists)
+void hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
 {
 	struct rb_root *root;
 	struct rb_node *next;
@@ -625,6 +574,8 @@ void hists__collapse_resort(struct hists *hists)
 			 */
 			hists__apply_filters(hists, n);
 		}
+		if (prog)
+			ui_progress__update(prog, 1);
 	}
 }
 
@@ -884,7 +835,7 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
 	struct rb_node **p;
 	struct rb_node *parent = NULL;
 	struct hist_entry *he;
-	int cmp;
+	int64_t cmp;
 
 	if (sort__need_collapse)
 		root = &hists->entries_collapsed;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index ce8dc61ce2c358ddea5851079916292e6838a370..b621347a1585db4a469b8a08d10acb107406c20f 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -6,6 +6,7 @@
 #include "callchain.h"
 #include "header.h"
 #include "color.h"
+#include "ui/progress.h"
 
 extern struct callchain_param callchain_param;
 
@@ -46,6 +47,8 @@ enum hist_column {
 	HISTC_CPU,
 	HISTC_SRCLINE,
 	HISTC_MISPREDICT,
+	HISTC_IN_TX,
+	HISTC_ABORT,
 	HISTC_SYMBOL_FROM,
 	HISTC_SYMBOL_TO,
 	HISTC_DSO_FROM,
@@ -58,6 +61,7 @@ enum hist_column {
 	HISTC_MEM_TLB,
 	HISTC_MEM_LVL,
 	HISTC_MEM_SNOOP,
+	HISTC_TRANSACTION,
 	HISTC_NR_COLS, /* Last entry */
 };
 
@@ -80,54 +84,43 @@ struct hists {
 	u16			col_len[HISTC_NR_COLS];
 };
 
-struct hist_entry *__hists__add_entry(struct hists *self,
+struct hist_entry *__hists__add_entry(struct hists *hists,
 				      struct addr_location *al,
-				      struct symbol *parent, u64 period,
-				      u64 weight);
+				      struct symbol *parent,
+				      struct branch_info *bi,
+				      struct mem_info *mi, u64 period,
+				      u64 weight, u64 transaction);
 int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
 int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
-int hist_entry__sort_snprintf(struct hist_entry *self, char *bf, size_t size,
+int hist_entry__transaction_len(void);
+int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size,
 			      struct hists *hists);
 void hist_entry__free(struct hist_entry *);
 
-struct hist_entry *__hists__add_branch_entry(struct hists *self,
-					     struct addr_location *al,
-					     struct symbol *sym_parent,
-					     struct branch_info *bi,
-					     u64 period,
-					     u64 weight);
-
-struct hist_entry *__hists__add_mem_entry(struct hists *self,
-					  struct addr_location *al,
-					  struct symbol *sym_parent,
-					  struct mem_info *mi,
-					  u64 period,
-					  u64 weight);
-
-void hists__output_resort(struct hists *self);
-void hists__collapse_resort(struct hists *self);
+void hists__output_resort(struct hists *hists);
+void hists__collapse_resort(struct hists *hists, struct ui_progress *prog);
 
 void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel);
 void hists__output_recalc_col_len(struct hists *hists, int max_rows);
 
 void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h);
-void hists__inc_nr_events(struct hists *self, u32 type);
+void hists__inc_nr_events(struct hists *hists, u32 type);
 void events_stats__inc(struct events_stats *stats, u32 type);
 size_t events_stats__fprintf(struct events_stats *stats, FILE *fp);
 
-size_t hists__fprintf(struct hists *self, bool show_header, int max_rows,
+size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 		      int max_cols, float min_pcnt, FILE *fp);
 
-int hist_entry__inc_addr_samples(struct hist_entry *self, int evidx, u64 addr);
-int hist_entry__annotate(struct hist_entry *self, size_t privsize);
+int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr);
+int hist_entry__annotate(struct hist_entry *he, size_t privsize);
 
 void hists__filter_by_dso(struct hists *hists);
 void hists__filter_by_thread(struct hists *hists);
 void hists__filter_by_symbol(struct hists *hists);
 
-u16 hists__col_len(struct hists *self, enum hist_column col);
-void hists__set_col_len(struct hists *self, enum hist_column col, u16 len);
-bool hists__new_col_len(struct hists *self, enum hist_column col, u16 len);
+u16 hists__col_len(struct hists *hists, enum hist_column col);
+void hists__set_col_len(struct hists *hists, enum hist_column col, u16 len);
+bool hists__new_col_len(struct hists *hists, enum hist_column col, u16 len);
 void hists__reset_col_len(struct hists *hists);
 void hists__calc_col_len(struct hists *hists, struct hist_entry *he);
 
@@ -196,7 +189,7 @@ struct hist_browser_timer {
 	int refresh;
 };
 
-#ifdef SLANG_SUPPORT
+#ifdef HAVE_SLANG_SUPPORT
 #include "../ui/keysyms.h"
 int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
 			     struct hist_browser_timer *hbt);
@@ -217,12 +210,9 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused,
 	return 0;
 }
 
-static inline int hist_entry__tui_annotate(struct hist_entry *self
-					   __maybe_unused,
-					   struct perf_evsel *evsel
-					   __maybe_unused,
-					   struct hist_browser_timer *hbt
-					   __maybe_unused)
+static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused,
+					   struct perf_evsel *evsel __maybe_unused,
+					   struct hist_browser_timer *hbt __maybe_unused)
 {
 	return 0;
 }
@@ -237,20 +227,5 @@ static inline int script_browse(const char *script_opt __maybe_unused)
 #define K_SWITCH_INPUT_DATA -3000
 #endif
 
-#ifdef GTK2_SUPPORT
-int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, const char *help,
-				  struct hist_browser_timer *hbt __maybe_unused,
-				  float min_pcnt);
-#else
-static inline
-int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist __maybe_unused,
-				  const char *help __maybe_unused,
-				  struct hist_browser_timer *hbt __maybe_unused,
-				  float min_pcnt __maybe_unused)
-{
-	return 0;
-}
-#endif
-
-unsigned int hists__sort_list_width(struct hists *self);
+unsigned int hists__sort_list_width(struct hists *hists);
 #endif	/* __PERF_HIST_H */
diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h
index cf6727e99c440657b3cd6f29ac05c4f6f38bd89b..8f149655f497302f39b5352cf099fbd71d4d02d7 100644
--- a/tools/perf/util/include/dwarf-regs.h
+++ b/tools/perf/util/include/dwarf-regs.h
@@ -1,7 +1,7 @@
 #ifndef _PERF_DWARF_REGS_H_
 #define _PERF_DWARF_REGS_H_
 
-#ifdef DWARF_SUPPORT
+#ifdef HAVE_DWARF_SUPPORT
 const char *get_arch_regstr(unsigned int n);
 #endif
 
diff --git a/tools/perf/util/include/linux/compiler.h b/tools/perf/util/include/linux/compiler.h
index 96b919dae11c27bbbae7e14f1c6eb7cb0cd91a89..b003ad7200b230702b133071ab3d402395ab57e6 100644
--- a/tools/perf/util/include/linux/compiler.h
+++ b/tools/perf/util/include/linux/compiler.h
@@ -2,20 +2,29 @@
 #define _PERF_LINUX_COMPILER_H_
 
 #ifndef __always_inline
-#define __always_inline	inline
+# define __always_inline	inline __attribute__((always_inline))
 #endif
+
 #define __user
+
 #ifndef __attribute_const__
-#define __attribute_const__
+# define __attribute_const__
 #endif
 
 #ifndef __maybe_unused
-#define __maybe_unused		__attribute__((unused))
+# define __maybe_unused		__attribute__((unused))
+#endif
+
+#ifndef __packed
+# define __packed		__attribute__((__packed__))
 #endif
-#define __packed	__attribute__((__packed__))
 
 #ifndef __force
-#define __force
+# define __force
+#endif
+
+#ifndef __weak
+# define __weak			__attribute__((weak))
 #endif
 
 #endif
diff --git a/tools/perf/util/include/linux/magic.h b/tools/perf/util/include/linux/magic.h
index 58b64ed4da12e442c49e0b3069a1f4a11f83dc8e..07d63cf3e0f6f5d0860c7aa8a8531b49367fd468 100644
--- a/tools/perf/util/include/linux/magic.h
+++ b/tools/perf/util/include/linux/magic.h
@@ -9,4 +9,8 @@
 #define SYSFS_MAGIC            0x62656572
 #endif
 
+#ifndef PROC_SUPER_MAGIC
+#define PROC_SUPER_MAGIC       0x9fa0
+#endif
+
 #endif
diff --git a/tools/perf/util/intlist.c b/tools/perf/util/intlist.c
index 11a8d86f7fea3bf4ec5fb8cb05f27dc4c491d948..89715b64a31578e2f26fc98a4e9c778bb72710d5 100644
--- a/tools/perf/util/intlist.c
+++ b/tools/perf/util/intlist.c
@@ -20,6 +20,7 @@ static struct rb_node *intlist__node_new(struct rblist *rblist __maybe_unused,
 
 	if (node != NULL) {
 		node->i = i;
+		node->priv = NULL;
 		rc = &node->rb_node;
 	}
 
@@ -57,22 +58,36 @@ void intlist__remove(struct intlist *ilist, struct int_node *node)
 	rblist__remove_node(&ilist->rblist, &node->rb_node);
 }
 
-struct int_node *intlist__find(struct intlist *ilist, int i)
+static struct int_node *__intlist__findnew(struct intlist *ilist,
+					   int i, bool create)
 {
-	struct int_node *node;
+	struct int_node *node = NULL;
 	struct rb_node *rb_node;
 
 	if (ilist == NULL)
 		return NULL;
 
-	node = NULL;
-	rb_node = rblist__find(&ilist->rblist, (void *)((long)i));
+	if (create)
+		rb_node = rblist__findnew(&ilist->rblist, (void *)((long)i));
+	else
+		rb_node = rblist__find(&ilist->rblist, (void *)((long)i));
+
 	if (rb_node)
 		node = container_of(rb_node, struct int_node, rb_node);
 
 	return node;
 }
 
+struct int_node *intlist__find(struct intlist *ilist, int i)
+{
+	return __intlist__findnew(ilist, i, false);
+}
+
+struct int_node *intlist__findnew(struct intlist *ilist, int i)
+{
+	return __intlist__findnew(ilist, i, true);
+}
+
 static int intlist__parse_list(struct intlist *ilist, const char *s)
 {
 	char *sep;
diff --git a/tools/perf/util/intlist.h b/tools/perf/util/intlist.h
index 62351dad848f76c478cced833c315cc71ff1ed63..aa6877d36858288c9027d83ab2390b82a2370fc2 100644
--- a/tools/perf/util/intlist.h
+++ b/tools/perf/util/intlist.h
@@ -9,6 +9,7 @@
 struct int_node {
 	struct rb_node rb_node;
 	int i;
+	void *priv;
 };
 
 struct intlist {
@@ -23,6 +24,7 @@ int intlist__add(struct intlist *ilist, int i);
 
 struct int_node *intlist__entry(const struct intlist *ilist, unsigned int idx);
 struct int_node *intlist__find(struct intlist *ilist, int i);
+struct int_node *intlist__findnew(struct intlist *ilist, int i);
 
 static inline bool intlist__has_entry(struct intlist *ilist, int i)
 {
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 6188d2876a7128aaa68e426c3334dfcae099dc41..ce034c183a7ede78535ee8f11d13a18b2fe6617f 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -40,12 +40,29 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 			return -ENOMEM;
 
 		snprintf(comm, sizeof(comm), "[guest/%d]", pid);
-		thread__set_comm(thread, comm);
+		thread__set_comm(thread, comm, 0);
 	}
 
 	return 0;
 }
 
+struct machine *machine__new_host(void)
+{
+	struct machine *machine = malloc(sizeof(*machine));
+
+	if (machine != NULL) {
+		machine__init(machine, "", HOST_KERNEL_ID);
+
+		if (machine__create_kernel_maps(machine) < 0)
+			goto out_delete;
+	}
+
+	return machine;
+out_delete:
+	free(machine);
+	return NULL;
+}
+
 static void dsos__delete(struct list_head *dsos)
 {
 	struct dso *pos, *n;
@@ -314,7 +331,8 @@ struct thread *machine__find_thread(struct machine *machine, pid_t tid)
 	return __machine__findnew_thread(machine, 0, tid, false);
 }
 
-int machine__process_comm_event(struct machine *machine, union perf_event *event)
+int machine__process_comm_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample)
 {
 	struct thread *thread = machine__findnew_thread(machine,
 							event->comm.pid,
@@ -323,7 +341,7 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event
 	if (dump_trace)
 		perf_event__fprintf_comm(event, stdout);
 
-	if (thread == NULL || thread__set_comm(thread, event->comm.comm)) {
+	if (thread == NULL || thread__set_comm(thread, event->comm.comm, sample->time)) {
 		dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
 		return -1;
 	}
@@ -332,7 +350,7 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event
 }
 
 int machine__process_lost_event(struct machine *machine __maybe_unused,
-				union perf_event *event)
+				union perf_event *event, struct perf_sample *sample __maybe_unused)
 {
 	dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n",
 		    event->lost.id, event->lost.lost);
@@ -776,75 +794,44 @@ static int machine__set_modules_path(struct machine *machine)
 	return map_groups__set_modules_path_dir(&machine->kmaps, modules_path);
 }
 
-static int machine__create_modules(struct machine *machine)
+static int machine__create_module(void *arg, const char *name, u64 start)
 {
-	char *line = NULL;
-	size_t n;
-	FILE *file;
+	struct machine *machine = arg;
 	struct map *map;
+
+	map = machine__new_module(machine, start, name);
+	if (map == NULL)
+		return -1;
+
+	dso__kernel_module_get_build_id(map->dso, machine->root_dir);
+
+	return 0;
+}
+
+static int machine__create_modules(struct machine *machine)
+{
 	const char *modules;
 	char path[PATH_MAX];
 
-	if (machine__is_default_guest(machine))
+	if (machine__is_default_guest(machine)) {
 		modules = symbol_conf.default_guest_modules;
-	else {
-		sprintf(path, "%s/proc/modules", machine->root_dir);
+	} else {
+		snprintf(path, PATH_MAX, "%s/proc/modules", machine->root_dir);
 		modules = path;
 	}
 
 	if (symbol__restricted_filename(modules, "/proc/modules"))
 		return -1;
 
-	file = fopen(modules, "r");
-	if (file == NULL)
+	if (modules__parse(modules, machine, machine__create_module))
 		return -1;
 
-	while (!feof(file)) {
-		char name[PATH_MAX];
-		u64 start;
-		char *sep;
-		int line_len;
-
-		line_len = getline(&line, &n, file);
-		if (line_len < 0)
-			break;
-
-		if (!line)
-			goto out_failure;
-
-		line[--line_len] = '\0'; /* \n */
-
-		sep = strrchr(line, 'x');
-		if (sep == NULL)
-			continue;
-
-		hex2u64(sep + 1, &start);
-
-		sep = strchr(line, ' ');
-		if (sep == NULL)
-			continue;
-
-		*sep = '\0';
-
-		snprintf(name, sizeof(name), "[%s]", line);
-		map = machine__new_module(machine, start, name);
-		if (map == NULL)
-			goto out_delete_line;
-		dso__kernel_module_get_build_id(map->dso, machine->root_dir);
-	}
+	if (!machine__set_modules_path(machine))
+		return 0;
 
-	free(line);
-	fclose(file);
+	pr_debug("Problems setting modules path maps, continuing anyway...\n");
 
-	if (machine__set_modules_path(machine) < 0) {
-		pr_debug("Problems setting modules path maps, continuing anyway...\n");
-	}
 	return 0;
-
-out_delete_line:
-	free(line);
-out_failure:
-	return -1;
 }
 
 int machine__create_kernel_maps(struct machine *machine)
@@ -998,7 +985,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 }
 
 int machine__process_mmap2_event(struct machine *machine,
-				 union perf_event *event)
+				 union perf_event *event,
+				 struct perf_sample *sample __maybe_unused)
 {
 	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 	struct thread *thread;
@@ -1045,7 +1033,8 @@ int machine__process_mmap2_event(struct machine *machine,
 	return 0;
 }
 
-int machine__process_mmap_event(struct machine *machine, union perf_event *event)
+int machine__process_mmap_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample __maybe_unused)
 {
 	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 	struct thread *thread;
@@ -1102,7 +1091,8 @@ static void machine__remove_thread(struct machine *machine, struct thread *th)
 	list_add_tail(&th->node, &machine->dead_threads);
 }
 
-int machine__process_fork_event(struct machine *machine, union perf_event *event)
+int machine__process_fork_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample)
 {
 	struct thread *thread = machine__find_thread(machine, event->fork.tid);
 	struct thread *parent = machine__findnew_thread(machine,
@@ -1119,7 +1109,7 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
 		perf_event__fprintf_task(event, stdout);
 
 	if (thread == NULL || parent == NULL ||
-	    thread__fork(thread, parent) < 0) {
+	    thread__fork(thread, parent, sample->time) < 0) {
 		dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
 		return -1;
 	}
@@ -1127,8 +1117,8 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
 	return 0;
 }
 
-int machine__process_exit_event(struct machine *machine __maybe_unused,
-				union perf_event *event)
+int machine__process_exit_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample __maybe_unused)
 {
 	struct thread *thread = machine__find_thread(machine, event->fork.tid);
 
@@ -1141,23 +1131,24 @@ int machine__process_exit_event(struct machine *machine __maybe_unused,
 	return 0;
 }
 
-int machine__process_event(struct machine *machine, union perf_event *event)
+int machine__process_event(struct machine *machine, union perf_event *event,
+			   struct perf_sample *sample)
 {
 	int ret;
 
 	switch (event->header.type) {
 	case PERF_RECORD_COMM:
-		ret = machine__process_comm_event(machine, event); break;
+		ret = machine__process_comm_event(machine, event, sample); break;
 	case PERF_RECORD_MMAP:
-		ret = machine__process_mmap_event(machine, event); break;
+		ret = machine__process_mmap_event(machine, event, sample); break;
 	case PERF_RECORD_MMAP2:
-		ret = machine__process_mmap2_event(machine, event); break;
+		ret = machine__process_mmap2_event(machine, event, sample); break;
 	case PERF_RECORD_FORK:
-		ret = machine__process_fork_event(machine, event); break;
+		ret = machine__process_fork_event(machine, event, sample); break;
 	case PERF_RECORD_EXIT:
-		ret = machine__process_exit_event(machine, event); break;
+		ret = machine__process_exit_event(machine, event, sample); break;
 	case PERF_RECORD_LOST:
-		ret = machine__process_lost_event(machine, event); break;
+		ret = machine__process_lost_event(machine, event, sample); break;
 	default:
 		ret = -1;
 		break;
@@ -1267,10 +1258,12 @@ static int machine__resolve_callchain_sample(struct machine *machine,
 					     struct thread *thread,
 					     struct ip_callchain *chain,
 					     struct symbol **parent,
-					     struct addr_location *root_al)
+					     struct addr_location *root_al,
+					     int max_stack)
 {
 	u8 cpumode = PERF_RECORD_MISC_USER;
-	unsigned int i;
+	int chain_nr = min(max_stack, (int)chain->nr);
+	int i;
 	int err;
 
 	callchain_cursor_reset(&callchain_cursor);
@@ -1280,7 +1273,7 @@ static int machine__resolve_callchain_sample(struct machine *machine,
 		return 0;
 	}
 
-	for (i = 0; i < chain->nr; i++) {
+	for (i = 0; i < chain_nr; i++) {
 		u64 ip;
 		struct addr_location al;
 
@@ -1352,12 +1345,14 @@ int machine__resolve_callchain(struct machine *machine,
 			       struct thread *thread,
 			       struct perf_sample *sample,
 			       struct symbol **parent,
-			       struct addr_location *root_al)
+			       struct addr_location *root_al,
+			       int max_stack)
 {
 	int ret;
 
 	ret = machine__resolve_callchain_sample(machine, thread,
-						sample->callchain, parent, root_al);
+						sample->callchain, parent,
+						root_al, max_stack);
 	if (ret)
 		return ret;
 
@@ -1376,3 +1371,26 @@ int machine__resolve_callchain(struct machine *machine,
 				   sample);
 
 }
+
+int machine__for_each_thread(struct machine *machine,
+			     int (*fn)(struct thread *thread, void *p),
+			     void *priv)
+{
+	struct rb_node *nd;
+	struct thread *thread;
+	int rc = 0;
+
+	for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
+		thread = rb_entry(nd, struct thread, rb_node);
+		rc = fn(thread, priv);
+		if (rc != 0)
+			return rc;
+	}
+
+	list_for_each_entry(thread, &machine->dead_threads, node) {
+		rc = fn(thread, priv);
+		if (rc != 0)
+			return rc;
+	}
+	return rc;
+}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 58a6be1fc739ba8e543ebd7d1bb6ea5185991703..2389ba81fafedf018be283e6aea14295d610613e 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -40,13 +40,20 @@ struct map *machine__kernel_map(struct machine *machine, enum map_type type)
 
 struct thread *machine__find_thread(struct machine *machine, pid_t tid);
 
-int machine__process_comm_event(struct machine *machine, union perf_event *event);
-int machine__process_exit_event(struct machine *machine, union perf_event *event);
-int machine__process_fork_event(struct machine *machine, union perf_event *event);
-int machine__process_lost_event(struct machine *machine, union perf_event *event);
-int machine__process_mmap_event(struct machine *machine, union perf_event *event);
-int machine__process_mmap2_event(struct machine *machine, union perf_event *event);
-int machine__process_event(struct machine *machine, union perf_event *event);
+int machine__process_comm_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
+int machine__process_exit_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
+int machine__process_fork_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
+int machine__process_lost_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
+int machine__process_mmap_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
+int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
+				 struct perf_sample *sample);
+int machine__process_event(struct machine *machine, union perf_event *event,
+				struct perf_sample *sample);
 
 typedef void (*machine__process_t)(struct machine *machine, void *data);
 
@@ -74,6 +81,7 @@ char *machine__mmap_name(struct machine *machine, char *bf, size_t size);
 void machines__set_symbol_filter(struct machines *machines,
 				 symbol_filter_t symbol_filter);
 
+struct machine *machine__new_host(void);
 int machine__init(struct machine *machine, const char *root_dir, pid_t pid);
 void machine__exit(struct machine *machine);
 void machine__delete_dead_threads(struct machine *machine);
@@ -91,7 +99,8 @@ int machine__resolve_callchain(struct machine *machine,
 			       struct thread *thread,
 			       struct perf_sample *sample,
 			       struct symbol **parent,
-			       struct addr_location *root_al);
+			       struct addr_location *root_al,
+			       int max_stack);
 
 /*
  * Default guest kernel is defined by parameter --guestkallsyms
@@ -165,4 +174,8 @@ void machines__destroy_kernel_maps(struct machines *machines);
 
 size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
 
+int machine__for_each_thread(struct machine *machine,
+			     int (*fn)(struct thread *thread, void *p),
+			     void *priv);
+
 #endif /* __PERF_MACHINE_H */
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 4f6680d2043b1e68ef7b9dfbc63a37326fcc6f22..ef5bc913ca7a9fdda7946401e55f9b0295dddfb6 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -172,7 +172,7 @@ int map__load(struct map *map, symbol_filter_t filter)
 		pr_warning(", continuing without symbols\n");
 		return -1;
 	} else if (nr == 0) {
-#ifdef LIBELF_SUPPORT
+#ifdef HAVE_LIBELF_SUPPORT
 		const size_t len = strlen(name);
 		const size_t real_len = len - sizeof(DSO__DELETED);
 
@@ -252,10 +252,16 @@ size_t map__fprintf_dsoname(struct map *map, FILE *fp)
 	return fprintf(fp, "%s", dsoname);
 }
 
-/*
+/**
+ * map__rip_2objdump - convert symbol start address to objdump address.
+ * @map: memory map
+ * @rip: symbol start address
+ *
  * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN.
  * map->dso->adjust_symbols==1 for ET_EXEC-like cases except ET_REL which is
  * relative to section start.
+ *
+ * Return: Address suitable for passing to "objdump --start-address="
  */
 u64 map__rip_2objdump(struct map *map, u64 rip)
 {
@@ -268,6 +274,29 @@ u64 map__rip_2objdump(struct map *map, u64 rip)
 	return map->unmap_ip(map, rip);
 }
 
+/**
+ * map__objdump_2mem - convert objdump address to a memory address.
+ * @map: memory map
+ * @ip: objdump address
+ *
+ * Closely related to map__rip_2objdump(), this function takes an address from
+ * objdump and converts it to a memory address.  Note this assumes that @map
+ * contains the address.  To be sure the result is valid, check it forwards
+ * e.g. map__rip_2objdump(map->map_ip(map, map__objdump_2mem(map, ip))) == ip
+ *
+ * Return: Memory address.
+ */
+u64 map__objdump_2mem(struct map *map, u64 ip)
+{
+	if (!map->dso->adjust_symbols)
+		return map->unmap_ip(map, ip);
+
+	if (map->dso->rel)
+		return map->unmap_ip(map, ip + map->pgoff);
+
+	return ip;
+}
+
 void map_groups__init(struct map_groups *mg)
 {
 	int i;
@@ -371,6 +400,23 @@ struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
 	return NULL;
 }
 
+int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter)
+{
+	if (ams->addr < ams->map->start || ams->addr > ams->map->end) {
+		if (ams->map->groups == NULL)
+			return -1;
+		ams->map = map_groups__find(ams->map->groups, ams->map->type,
+					    ams->addr);
+		if (ams->map == NULL)
+			return -1;
+	}
+
+	ams->al_addr = ams->map->map_ip(ams->map, ams->addr);
+	ams->sym = map__find_symbol(ams->map, ams->al_addr, filter);
+
+	return ams->sym ? 0 : -1;
+}
+
 size_t __map_groups__fprintf_maps(struct map_groups *mg,
 				  enum map_type type, int verbose, FILE *fp)
 {
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 4886ca2805361df87a57c9f4406ed5c38fe418b5..e4e259c3ba167d77836e67d85c63e6d110f61550 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -84,6 +84,9 @@ static inline u64 identity__map_ip(struct map *map __maybe_unused, u64 ip)
 /* rip/ip <-> addr suitable for passing to `objdump --start-address=` */
 u64 map__rip_2objdump(struct map *map, u64 rip);
 
+/* objdump address -> memory address */
+u64 map__objdump_2mem(struct map *map, u64 ip);
+
 struct symbol;
 
 typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
@@ -167,6 +170,10 @@ struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
 					       struct map **mapp,
 					       symbol_filter_t filter);
 
+struct addr_map_symbol;
+
+int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter);
+
 static inline
 struct symbol *map_groups__find_function_by_name(struct map_groups *mg,
 						 const char *name, struct map **mapp,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 98125319b158b7e6e588ba4adf861494b69e18d6..c90e55cf7e82ebc134fb827ca68f45385138f161 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -998,8 +998,10 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob,
 	char evt_path[MAXPATHLEN];
 	char dir_path[MAXPATHLEN];
 
-	if (debugfs_valid_mountpoint(tracing_events_path))
+	if (debugfs_valid_mountpoint(tracing_events_path)) {
+		printf("  [ Tracepoints not available: %s ]\n", strerror(errno));
 		return;
+	}
 
 	sys_dir = opendir(tracing_events_path);
 	if (!sys_dir)
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 91346b7539607fb9e3b6dcb8f944af36f9e88572..343299575b303fb5443b99288fd487aab26e3915 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -126,6 +126,37 @@ modifier_bp	[rwx]{1,3}
 
 }
 
+<config>{
+config			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); }
+config1			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); }
+config2			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); }
+name			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); }
+period			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
+branch_type		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
+,			{ return ','; }
+"/"			{ BEGIN(INITIAL); return '/'; }
+{name_minus}		{ return str(yyscanner, PE_NAME); }
+}
+
+<mem>{
+{modifier_bp}		{ return str(yyscanner, PE_MODIFIER_BP); }
+:			{ return ':'; }
+{num_dec}		{ return value(yyscanner, 10); }
+{num_hex}		{ return value(yyscanner, 16); }
+	/*
+	 * We need to separate 'mem:' scanner part, in order to get specific
+	 * modifier bits parsed out. Otherwise we would need to handle PE_NAME
+	 * and we'd need to parse it manually. During the escape from <mem>
+	 * state we need to put the escaping char back, so we dont miss it.
+	 */
+.			{ unput(*yytext); BEGIN(INITIAL); }
+	/*
+	 * We destroy the scanner after reaching EOF,
+	 * but anyway just to be sure get back to INIT state.
+	 */
+<<EOF>>			{ BEGIN(INITIAL); }
+}
+
 cpu-cycles|cycles				{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); }
 stalled-cycles-frontend|idle-cycles-frontend	{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
 stalled-cycles-backend|idle-cycles-backend	{ return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
@@ -162,18 +193,6 @@ speculative-read|speculative-load	|
 refs|Reference|ops|access		|
 misses|miss				{ return str(yyscanner, PE_NAME_CACHE_OP_RESULT); }
 
-<config>{
-config			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); }
-config1			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); }
-config2			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); }
-name			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); }
-period			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
-branch_type		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
-,			{ return ','; }
-"/"			{ BEGIN(INITIAL); return '/'; }
-{name_minus}		{ return str(yyscanner, PE_NAME); }
-}
-
 mem:			{ BEGIN(mem); return PE_PREFIX_MEM; }
 r{num_raw_hex}		{ return raw(yyscanner); }
 {num_dec}		{ return value(yyscanner, 10); }
@@ -189,25 +208,7 @@ r{num_raw_hex}		{ return raw(yyscanner); }
 "}"			{ return '}'; }
 =			{ return '='; }
 \n			{ }
-
-<mem>{
-{modifier_bp}		{ return str(yyscanner, PE_MODIFIER_BP); }
-:			{ return ':'; }
-{num_dec}		{ return value(yyscanner, 10); }
-{num_hex}		{ return value(yyscanner, 16); }
-	/*
-	 * We need to separate 'mem:' scanner part, in order to get specific
-	 * modifier bits parsed out. Otherwise we would need to handle PE_NAME
-	 * and we'd need to parse it manually. During the escape from <mem>
-	 * state we need to put the escaping char back, so we dont miss it.
-	 */
-.			{ unput(*yytext); BEGIN(INITIAL); }
-	/*
-	 * We destroy the scanner after reaching EOF,
-	 * but anyway just to be sure get back to INIT state.
-	 */
-<<EOF>>			{ BEGIN(INITIAL); }
-}
+.			{ }
 
 %%
 
diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c
index 2bc9e70df7e2552ecc14857d1f26c2311bb2fde7..31f404a032a90499e5d366123f3185972edf3b49 100644
--- a/tools/perf/util/parse-options.c
+++ b/tools/perf/util/parse-options.c
@@ -339,10 +339,10 @@ int parse_options_step(struct parse_opt_ctx_t *ctx,
 		if (arg[1] != '-') {
 			ctx->opt = arg + 1;
 			if (internal_help && *ctx->opt == 'h')
-				return parse_options_usage(usagestr, options);
+				return usage_with_options_internal(usagestr, options, 0);
 			switch (parse_short_opt(ctx, options)) {
 			case -1:
-				return parse_options_usage(usagestr, options);
+				return parse_options_usage(usagestr, options, arg + 1, 1);
 			case -2:
 				goto unknown;
 			default:
@@ -352,10 +352,11 @@ int parse_options_step(struct parse_opt_ctx_t *ctx,
 				check_typos(arg + 1, options);
 			while (ctx->opt) {
 				if (internal_help && *ctx->opt == 'h')
-					return parse_options_usage(usagestr, options);
+					return usage_with_options_internal(usagestr, options, 0);
+				arg = ctx->opt;
 				switch (parse_short_opt(ctx, options)) {
 				case -1:
-					return parse_options_usage(usagestr, options);
+					return parse_options_usage(usagestr, options, arg, 1);
 				case -2:
 					/* fake a short option thing to hide the fact that we may have
 					 * started to parse aggregated stuff
@@ -383,12 +384,12 @@ int parse_options_step(struct parse_opt_ctx_t *ctx,
 		if (internal_help && !strcmp(arg + 2, "help-all"))
 			return usage_with_options_internal(usagestr, options, 1);
 		if (internal_help && !strcmp(arg + 2, "help"))
-			return parse_options_usage(usagestr, options);
+			return usage_with_options_internal(usagestr, options, 0);
 		if (!strcmp(arg + 2, "list-opts"))
 			return PARSE_OPT_LIST;
 		switch (parse_long_opt(ctx, arg + 2, options)) {
 		case -1:
-			return parse_options_usage(usagestr, options);
+			return parse_options_usage(usagestr, options, arg + 2, 0);
 		case -2:
 			goto unknown;
 		default:
@@ -445,6 +446,89 @@ int parse_options(int argc, const char **argv, const struct option *options,
 #define USAGE_OPTS_WIDTH 24
 #define USAGE_GAP         2
 
+static void print_option_help(const struct option *opts, int full)
+{
+	size_t pos;
+	int pad;
+
+	if (opts->type == OPTION_GROUP) {
+		fputc('\n', stderr);
+		if (*opts->help)
+			fprintf(stderr, "%s\n", opts->help);
+		return;
+	}
+	if (!full && (opts->flags & PARSE_OPT_HIDDEN))
+		return;
+
+	pos = fprintf(stderr, "    ");
+	if (opts->short_name)
+		pos += fprintf(stderr, "-%c", opts->short_name);
+	else
+		pos += fprintf(stderr, "    ");
+
+	if (opts->long_name && opts->short_name)
+		pos += fprintf(stderr, ", ");
+	if (opts->long_name)
+		pos += fprintf(stderr, "--%s", opts->long_name);
+
+	switch (opts->type) {
+	case OPTION_ARGUMENT:
+		break;
+	case OPTION_LONG:
+	case OPTION_U64:
+	case OPTION_INTEGER:
+	case OPTION_UINTEGER:
+		if (opts->flags & PARSE_OPT_OPTARG)
+			if (opts->long_name)
+				pos += fprintf(stderr, "[=<n>]");
+			else
+				pos += fprintf(stderr, "[<n>]");
+		else
+			pos += fprintf(stderr, " <n>");
+		break;
+	case OPTION_CALLBACK:
+		if (opts->flags & PARSE_OPT_NOARG)
+			break;
+		/* FALLTHROUGH */
+	case OPTION_STRING:
+		if (opts->argh) {
+			if (opts->flags & PARSE_OPT_OPTARG)
+				if (opts->long_name)
+					pos += fprintf(stderr, "[=<%s>]", opts->argh);
+				else
+					pos += fprintf(stderr, "[<%s>]", opts->argh);
+			else
+				pos += fprintf(stderr, " <%s>", opts->argh);
+		} else {
+			if (opts->flags & PARSE_OPT_OPTARG)
+				if (opts->long_name)
+					pos += fprintf(stderr, "[=...]");
+				else
+					pos += fprintf(stderr, "[...]");
+			else
+				pos += fprintf(stderr, " ...");
+		}
+		break;
+	default: /* OPTION_{BIT,BOOLEAN,SET_UINT,SET_PTR} */
+	case OPTION_END:
+	case OPTION_GROUP:
+	case OPTION_BIT:
+	case OPTION_BOOLEAN:
+	case OPTION_INCR:
+	case OPTION_SET_UINT:
+	case OPTION_SET_PTR:
+		break;
+	}
+
+	if (pos <= USAGE_OPTS_WIDTH)
+		pad = USAGE_OPTS_WIDTH - pos;
+	else {
+		fputc('\n', stderr);
+		pad = USAGE_OPTS_WIDTH;
+	}
+	fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help);
+}
+
 int usage_with_options_internal(const char * const *usagestr,
 				const struct option *opts, int full)
 {
@@ -464,87 +548,9 @@ int usage_with_options_internal(const char * const *usagestr,
 	if (opts->type != OPTION_GROUP)
 		fputc('\n', stderr);
 
-	for (; opts->type != OPTION_END; opts++) {
-		size_t pos;
-		int pad;
-
-		if (opts->type == OPTION_GROUP) {
-			fputc('\n', stderr);
-			if (*opts->help)
-				fprintf(stderr, "%s\n", opts->help);
-			continue;
-		}
-		if (!full && (opts->flags & PARSE_OPT_HIDDEN))
-			continue;
-
-		pos = fprintf(stderr, "    ");
-		if (opts->short_name)
-			pos += fprintf(stderr, "-%c", opts->short_name);
-		else
-			pos += fprintf(stderr, "    ");
-
-		if (opts->long_name && opts->short_name)
-			pos += fprintf(stderr, ", ");
-		if (opts->long_name)
-			pos += fprintf(stderr, "--%s", opts->long_name);
-
-		switch (opts->type) {
-		case OPTION_ARGUMENT:
-			break;
-		case OPTION_LONG:
-		case OPTION_U64:
-		case OPTION_INTEGER:
-		case OPTION_UINTEGER:
-			if (opts->flags & PARSE_OPT_OPTARG)
-				if (opts->long_name)
-					pos += fprintf(stderr, "[=<n>]");
-				else
-					pos += fprintf(stderr, "[<n>]");
-			else
-				pos += fprintf(stderr, " <n>");
-			break;
-		case OPTION_CALLBACK:
-			if (opts->flags & PARSE_OPT_NOARG)
-				break;
-			/* FALLTHROUGH */
-		case OPTION_STRING:
-			if (opts->argh) {
-				if (opts->flags & PARSE_OPT_OPTARG)
-					if (opts->long_name)
-						pos += fprintf(stderr, "[=<%s>]", opts->argh);
-					else
-						pos += fprintf(stderr, "[<%s>]", opts->argh);
-				else
-					pos += fprintf(stderr, " <%s>", opts->argh);
-			} else {
-				if (opts->flags & PARSE_OPT_OPTARG)
-					if (opts->long_name)
-						pos += fprintf(stderr, "[=...]");
-					else
-						pos += fprintf(stderr, "[...]");
-				else
-					pos += fprintf(stderr, " ...");
-			}
-			break;
-		default: /* OPTION_{BIT,BOOLEAN,SET_UINT,SET_PTR} */
-		case OPTION_END:
-		case OPTION_GROUP:
-		case OPTION_BIT:
-		case OPTION_BOOLEAN:
-		case OPTION_INCR:
-		case OPTION_SET_UINT:
-		case OPTION_SET_PTR:
-			break;
-		}
+	for (  ; opts->type != OPTION_END; opts++)
+		print_option_help(opts, full);
 
-		if (pos <= USAGE_OPTS_WIDTH)
-			pad = USAGE_OPTS_WIDTH - pos;
-		else {
-			fputc('\n', stderr);
-			pad = USAGE_OPTS_WIDTH;
-		}
-		fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help);
-	}
 	fputc('\n', stderr);
 
 	return PARSE_OPT_HELP;
@@ -559,9 +565,45 @@ void usage_with_options(const char * const *usagestr,
 }
 
 int parse_options_usage(const char * const *usagestr,
-			const struct option *opts)
+			const struct option *opts,
+			const char *optstr, bool short_opt)
 {
-	return usage_with_options_internal(usagestr, opts, 0);
+	if (!usagestr)
+		goto opt;
+
+	fprintf(stderr, "\n usage: %s\n", *usagestr++);
+	while (*usagestr && **usagestr)
+		fprintf(stderr, "    or: %s\n", *usagestr++);
+	while (*usagestr) {
+		fprintf(stderr, "%s%s\n",
+				**usagestr ? "    " : "",
+				*usagestr);
+		usagestr++;
+	}
+	fputc('\n', stderr);
+
+opt:
+	for (  ; opts->type != OPTION_END; opts++) {
+		if (short_opt) {
+			if (opts->short_name == *optstr)
+				break;
+			continue;
+		}
+
+		if (opts->long_name == NULL)
+			continue;
+
+		if (!prefixcmp(optstr, opts->long_name))
+			break;
+		if (!prefixcmp(optstr, "no-") &&
+		    !prefixcmp(optstr + 3, opts->long_name))
+			break;
+	}
+
+	if (opts->type != OPTION_END)
+		print_option_help(opts, 0);
+
+	return PARSE_OPT_HELP;
 }
 
 
diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h
index 7bb5999940ca925f60482bbfa10aea1a6e685b10..b0241e28eaf7dd7b81469e22416e8f1e16e26c63 100644
--- a/tools/perf/util/parse-options.h
+++ b/tools/perf/util/parse-options.h
@@ -158,7 +158,9 @@ struct parse_opt_ctx_t {
 };
 
 extern int parse_options_usage(const char * const *usagestr,
-			       const struct option *opts);
+			       const struct option *opts,
+			       const char *optstr,
+			       bool short_opt);
 
 extern void parse_options_start(struct parse_opt_ctx_t *ctx,
 				int argc, const char **argv, int flags);
diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c
index a8c49548ca48f5bc4f7ae14134ee57d7779b46e8..5d13cb45b3171a98a171b0f7228d1fff470bf8c0 100644
--- a/tools/perf/util/path.c
+++ b/tools/perf/util/path.c
@@ -22,19 +22,23 @@ static const char *get_perf_dir(void)
 	return ".";
 }
 
-#ifndef HAVE_STRLCPY
-size_t strlcpy(char *dest, const char *src, size_t size)
+/*
+ * If libc has strlcpy() then that version will override this
+ * implementation:
+ */
+size_t __weak strlcpy(char *dest, const char *src, size_t size)
 {
 	size_t ret = strlen(src);
 
 	if (size) {
 		size_t len = (ret >= size) ? size - 1 : ret;
+
 		memcpy(dest, src, len);
 		dest[len] = '\0';
 	}
+
 	return ret;
 }
-#endif
 
 static char *get_pathname(void)
 {
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index 5a4f2b6f3738c1966b180484dcfb71f70d0e0afd..a3d42cd749196b83c4c8226bfd069a68cd3a6917 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -1,7 +1,7 @@
 #ifndef __PERF_REGS_H
 #define __PERF_REGS_H
 
-#ifdef HAVE_PERF_REGS
+#ifdef HAVE_PERF_REGS_SUPPORT
 #include <perf_regs.h>
 #else
 #define PERF_REGS_MASK	0
@@ -10,5 +10,5 @@ static inline const char *perf_reg_name(int id __maybe_unused)
 {
 	return NULL;
 }
-#endif /* HAVE_PERF_REGS */
+#endif /* HAVE_PERF_REGS_SUPPORT */
 #endif /* __PERF_REGS_H */
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index bc9d8069d37637835758d02e13ec3bc751d0f5e3..c232d8dd410bf6483ba8fcd850380bbd562f03ee 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -4,7 +4,7 @@
 #include <unistd.h>
 #include <stdio.h>
 #include <dirent.h>
-#include "sysfs.h"
+#include "fs.h"
 #include "util.h"
 #include "pmu.h"
 #include "parse-events.h"
@@ -77,9 +77,8 @@ static int pmu_format(const char *name, struct list_head *format)
 {
 	struct stat st;
 	char path[PATH_MAX];
-	const char *sysfs;
+	const char *sysfs = sysfs__mountpoint();
 
-	sysfs = sysfs_find_mountpoint();
 	if (!sysfs)
 		return -1;
 
@@ -166,9 +165,8 @@ static int pmu_aliases(const char *name, struct list_head *head)
 {
 	struct stat st;
 	char path[PATH_MAX];
-	const char *sysfs;
+	const char *sysfs = sysfs__mountpoint();
 
-	sysfs = sysfs_find_mountpoint();
 	if (!sysfs)
 		return -1;
 
@@ -212,11 +210,10 @@ static int pmu_type(const char *name, __u32 *type)
 {
 	struct stat st;
 	char path[PATH_MAX];
-	const char *sysfs;
 	FILE *file;
 	int ret = 0;
+	const char *sysfs = sysfs__mountpoint();
 
-	sysfs = sysfs_find_mountpoint();
 	if (!sysfs)
 		return -1;
 
@@ -241,11 +238,10 @@ static int pmu_type(const char *name, __u32 *type)
 static void pmu_read_sysfs(void)
 {
 	char path[PATH_MAX];
-	const char *sysfs;
 	DIR *dir;
 	struct dirent *dent;
+	const char *sysfs = sysfs__mountpoint();
 
-	sysfs = sysfs_find_mountpoint();
 	if (!sysfs)
 		return;
 
@@ -270,11 +266,10 @@ static struct cpu_map *pmu_cpumask(const char *name)
 {
 	struct stat st;
 	char path[PATH_MAX];
-	const char *sysfs;
 	FILE *file;
 	struct cpu_map *cpus;
+	const char *sysfs = sysfs__mountpoint();
 
-	sysfs = sysfs_find_mountpoint();
 	if (!sysfs)
 		return NULL;
 
@@ -637,3 +632,19 @@ void print_pmu_events(const char *event_glob, bool name_only)
 		printf("\n");
 	free(aliases);
 }
+
+bool pmu_have_event(const char *pname, const char *name)
+{
+	struct perf_pmu *pmu;
+	struct perf_pmu_alias *alias;
+
+	pmu = NULL;
+	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+		if (strcmp(pname, pmu->name))
+			continue;
+		list_for_each_entry(alias, &pmu->aliases, list)
+			if (!strcmp(alias->name, name))
+				return true;
+	}
+	return false;
+}
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 6b2cbe2d4cc3a6d59a3d692770793c19ea984930..1179b26f244a31280e5454787a51a8f01ff9ac2e 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -42,6 +42,7 @@ int perf_pmu__format_parse(char *dir, struct list_head *head);
 struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu);
 
 void print_pmu_events(const char *event_glob, bool name_only);
+bool pmu_have_event(const char *pname, const char *name);
 
 int perf_pmu__test(void);
 #endif /* __PMU_H */
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index aa04bf9c9ad791a00faf7546cf75163a0dabd60a..9c6989ca2bea0242edcfed3f75803dd4e923068d 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -47,7 +47,6 @@
 #include "session.h"
 
 #define MAX_CMDLEN 256
-#define MAX_PROBE_ARGS 128
 #define PERFPROBE_GROUP "probe"
 
 bool probe_event_dry_run;	/* Dry run flag */
@@ -201,7 +200,7 @@ static int convert_to_perf_probe_point(struct probe_trace_point *tp,
 	return 0;
 }
 
-#ifdef DWARF_SUPPORT
+#ifdef HAVE_DWARF_SUPPORT
 /* Open new debuginfo of given module */
 static struct debuginfo *open_debuginfo(const char *module)
 {
@@ -630,7 +629,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs,
 	return ret;
 }
 
-#else	/* !DWARF_SUPPORT */
+#else	/* !HAVE_DWARF_SUPPORT */
 
 static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
 					struct perf_probe_point *pp)
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index f0692737ebf1237373a140c0ec76a7a4a7a96f9d..ffb657ffd327b1779b328f4546f4d45c0784e243 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -115,7 +115,7 @@ static const Dwfl_Callbacks offline_callbacks = {
 };
 
 /* Get a Dwarf from offline image */
-static int debuginfo__init_offline_dwarf(struct debuginfo *self,
+static int debuginfo__init_offline_dwarf(struct debuginfo *dbg,
 					 const char *path)
 {
 	int fd;
@@ -124,25 +124,25 @@ static int debuginfo__init_offline_dwarf(struct debuginfo *self,
 	if (fd < 0)
 		return fd;
 
-	self->dwfl = dwfl_begin(&offline_callbacks);
-	if (!self->dwfl)
+	dbg->dwfl = dwfl_begin(&offline_callbacks);
+	if (!dbg->dwfl)
 		goto error;
 
-	self->mod = dwfl_report_offline(self->dwfl, "", "", fd);
-	if (!self->mod)
+	dbg->mod = dwfl_report_offline(dbg->dwfl, "", "", fd);
+	if (!dbg->mod)
 		goto error;
 
-	self->dbg = dwfl_module_getdwarf(self->mod, &self->bias);
-	if (!self->dbg)
+	dbg->dbg = dwfl_module_getdwarf(dbg->mod, &dbg->bias);
+	if (!dbg->dbg)
 		goto error;
 
 	return 0;
 error:
-	if (self->dwfl)
-		dwfl_end(self->dwfl);
+	if (dbg->dwfl)
+		dwfl_end(dbg->dwfl);
 	else
 		close(fd);
-	memset(self, 0, sizeof(*self));
+	memset(dbg, 0, sizeof(*dbg));
 
 	return -ENOENT;
 }
@@ -180,24 +180,24 @@ static const Dwfl_Callbacks kernel_callbacks = {
 };
 
 /* Get a Dwarf from live kernel image */
-static int debuginfo__init_online_kernel_dwarf(struct debuginfo *self,
+static int debuginfo__init_online_kernel_dwarf(struct debuginfo *dbg,
 					       Dwarf_Addr addr)
 {
-	self->dwfl = dwfl_begin(&kernel_callbacks);
-	if (!self->dwfl)
+	dbg->dwfl = dwfl_begin(&kernel_callbacks);
+	if (!dbg->dwfl)
 		return -EINVAL;
 
 	/* Load the kernel dwarves: Don't care the result here */
-	dwfl_linux_kernel_report_kernel(self->dwfl);
-	dwfl_linux_kernel_report_modules(self->dwfl);
+	dwfl_linux_kernel_report_kernel(dbg->dwfl);
+	dwfl_linux_kernel_report_modules(dbg->dwfl);
 
-	self->dbg = dwfl_addrdwarf(self->dwfl, addr, &self->bias);
+	dbg->dbg = dwfl_addrdwarf(dbg->dwfl, addr, &dbg->bias);
 	/* Here, check whether we could get a real dwarf */
-	if (!self->dbg) {
+	if (!dbg->dbg) {
 		pr_debug("Failed to find kernel dwarf at %lx\n",
 			 (unsigned long)addr);
-		dwfl_end(self->dwfl);
-		memset(self, 0, sizeof(*self));
+		dwfl_end(dbg->dwfl);
+		memset(dbg, 0, sizeof(*dbg));
 		return -ENOENT;
 	}
 
@@ -205,7 +205,7 @@ static int debuginfo__init_online_kernel_dwarf(struct debuginfo *self,
 }
 #else
 /* With older elfutils, this just support kernel module... */
-static int debuginfo__init_online_kernel_dwarf(struct debuginfo *self,
+static int debuginfo__init_online_kernel_dwarf(struct debuginfo *dbg,
 					       Dwarf_Addr addr __maybe_unused)
 {
 	const char *path = kernel_get_module_path("kernel");
@@ -216,44 +216,45 @@ static int debuginfo__init_online_kernel_dwarf(struct debuginfo *self,
 	}
 
 	pr_debug2("Use file %s for debuginfo\n", path);
-	return debuginfo__init_offline_dwarf(self, path);
+	return debuginfo__init_offline_dwarf(dbg, path);
 }
 #endif
 
 struct debuginfo *debuginfo__new(const char *path)
 {
-	struct debuginfo *self = zalloc(sizeof(struct debuginfo));
-	if (!self)
+	struct debuginfo *dbg = zalloc(sizeof(*dbg));
+	if (!dbg)
 		return NULL;
 
-	if (debuginfo__init_offline_dwarf(self, path) < 0) {
-		free(self);
-		self = NULL;
+	if (debuginfo__init_offline_dwarf(dbg, path) < 0) {
+		free(dbg);
+		dbg = NULL;
 	}
 
-	return self;
+	return dbg;
 }
 
 struct debuginfo *debuginfo__new_online_kernel(unsigned long addr)
 {
-	struct debuginfo *self = zalloc(sizeof(struct debuginfo));
-	if (!self)
+	struct debuginfo *dbg = zalloc(sizeof(*dbg));
+
+	if (!dbg)
 		return NULL;
 
-	if (debuginfo__init_online_kernel_dwarf(self, (Dwarf_Addr)addr) < 0) {
-		free(self);
-		self = NULL;
+	if (debuginfo__init_online_kernel_dwarf(dbg, (Dwarf_Addr)addr) < 0) {
+		free(dbg);
+		dbg = NULL;
 	}
 
-	return self;
+	return dbg;
 }
 
-void debuginfo__delete(struct debuginfo *self)
+void debuginfo__delete(struct debuginfo *dbg)
 {
-	if (self) {
-		if (self->dwfl)
-			dwfl_end(self->dwfl);
-		free(self);
+	if (dbg) {
+		if (dbg->dwfl)
+			dwfl_end(dbg->dwfl);
+		free(dbg);
 	}
 }
 
@@ -273,12 +274,15 @@ static struct probe_trace_arg_ref *alloc_trace_arg_ref(long offs)
 /*
  * Convert a location into trace_arg.
  * If tvar == NULL, this just checks variable can be converted.
+ * If fentry == true and vr_die is a parameter, do huristic search
+ * for the location fuzzed by function entry mcount.
  */
 static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
-				     Dwarf_Op *fb_ops,
+				     Dwarf_Op *fb_ops, Dwarf_Die *sp_die,
 				     struct probe_trace_arg *tvar)
 {
 	Dwarf_Attribute attr;
+	Dwarf_Addr tmp = 0;
 	Dwarf_Op *op;
 	size_t nops;
 	unsigned int regn;
@@ -291,12 +295,29 @@ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
 		goto static_var;
 
 	/* TODO: handle more than 1 exprs */
-	if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL ||
-	    dwarf_getlocation_addr(&attr, addr, &op, &nops, 1) <= 0 ||
-	    nops == 0) {
-		/* TODO: Support const_value */
+	if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL)
+		return -EINVAL;	/* Broken DIE ? */
+	if (dwarf_getlocation_addr(&attr, addr, &op, &nops, 1) <= 0) {
+		ret = dwarf_entrypc(sp_die, &tmp);
+		if (ret || addr != tmp ||
+		    dwarf_tag(vr_die) != DW_TAG_formal_parameter ||
+		    dwarf_highpc(sp_die, &tmp))
+			return -ENOENT;
+		/*
+		 * This is fuzzed by fentry mcount. We try to find the
+		 * parameter location at the earliest address.
+		 */
+		for (addr += 1; addr <= tmp; addr++) {
+			if (dwarf_getlocation_addr(&attr, addr, &op,
+						   &nops, 1) > 0)
+				goto found;
+		}
 		return -ENOENT;
 	}
+found:
+	if (nops == 0)
+		/* TODO: Support const_value */
+		return -ENOENT;
 
 	if (op->atom == DW_OP_addr) {
 static_var:
@@ -563,7 +584,7 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
 	}
 
 	if (die_find_member(&type, field->name, die_mem) == NULL) {
-		pr_warning("%s(tyep:%s) has no member %s.\n", varname,
+		pr_warning("%s(type:%s) has no member %s.\n", varname,
 			   dwarf_diename(&type), field->name);
 		return -EINVAL;
 	}
@@ -600,7 +621,7 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf)
 		 dwarf_diename(vr_die));
 
 	ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops,
-					pf->tvar);
+					&pf->sp_die, pf->tvar);
 	if (ret == -ENOENT)
 		pr_err("Failed to find the location of %s at this address.\n"
 		       " Perhaps, it has been optimized out.\n", pf->pvar->var);
@@ -1063,7 +1084,7 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data)
 }
 
 /* Find probe points from debuginfo */
-static int debuginfo__find_probes(struct debuginfo *self,
+static int debuginfo__find_probes(struct debuginfo *dbg,
 				  struct probe_finder *pf)
 {
 	struct perf_probe_point *pp = &pf->pev->point;
@@ -1074,7 +1095,7 @@ static int debuginfo__find_probes(struct debuginfo *self,
 
 #if _ELFUTILS_PREREQ(0, 142)
 	/* Get the call frame information from this dwarf */
-	pf->cfi = dwarf_getcfi(self->dbg);
+	pf->cfi = dwarf_getcfi(dbg->dbg);
 #endif
 
 	off = 0;
@@ -1093,7 +1114,7 @@ static int debuginfo__find_probes(struct debuginfo *self,
 			.data = pf,
 		};
 
-		dwarf_getpubnames(self->dbg, pubname_search_cb,
+		dwarf_getpubnames(dbg->dbg, pubname_search_cb,
 				  &pubname_param, 0);
 		if (pubname_param.found) {
 			ret = probe_point_search_cb(&pf->sp_die, &probe_param);
@@ -1103,9 +1124,9 @@ static int debuginfo__find_probes(struct debuginfo *self,
 	}
 
 	/* Loop on CUs (Compilation Unit) */
-	while (!dwarf_nextcu(self->dbg, off, &noff, &cuhl, NULL, NULL, NULL)) {
+	while (!dwarf_nextcu(dbg->dbg, off, &noff, &cuhl, NULL, NULL, NULL)) {
 		/* Get the DIE(Debugging Information Entry) of this CU */
-		diep = dwarf_offdie(self->dbg, off + cuhl, &pf->cu_die);
+		diep = dwarf_offdie(dbg->dbg, off + cuhl, &pf->cu_die);
 		if (!diep)
 			continue;
 
@@ -1136,12 +1157,80 @@ static int debuginfo__find_probes(struct debuginfo *self,
 	return ret;
 }
 
+struct local_vars_finder {
+	struct probe_finder *pf;
+	struct perf_probe_arg *args;
+	int max_args;
+	int nargs;
+	int ret;
+};
+
+/* Collect available variables in this scope */
+static int copy_variables_cb(Dwarf_Die *die_mem, void *data)
+{
+	struct local_vars_finder *vf = data;
+	struct probe_finder *pf = vf->pf;
+	int tag;
+
+	tag = dwarf_tag(die_mem);
+	if (tag == DW_TAG_formal_parameter ||
+	    tag == DW_TAG_variable) {
+		if (convert_variable_location(die_mem, vf->pf->addr,
+					      vf->pf->fb_ops, &pf->sp_die,
+					      NULL) == 0) {
+			vf->args[vf->nargs].var = (char *)dwarf_diename(die_mem);
+			if (vf->args[vf->nargs].var == NULL) {
+				vf->ret = -ENOMEM;
+				return DIE_FIND_CB_END;
+			}
+			pr_debug(" %s", vf->args[vf->nargs].var);
+			vf->nargs++;
+		}
+	}
+
+	if (dwarf_haspc(die_mem, vf->pf->addr))
+		return DIE_FIND_CB_CONTINUE;
+	else
+		return DIE_FIND_CB_SIBLING;
+}
+
+static int expand_probe_args(Dwarf_Die *sc_die, struct probe_finder *pf,
+			     struct perf_probe_arg *args)
+{
+	Dwarf_Die die_mem;
+	int i;
+	int n = 0;
+	struct local_vars_finder vf = {.pf = pf, .args = args,
+				.max_args = MAX_PROBE_ARGS, .ret = 0};
+
+	for (i = 0; i < pf->pev->nargs; i++) {
+		/* var never be NULL */
+		if (strcmp(pf->pev->args[i].var, "$vars") == 0) {
+			pr_debug("Expanding $vars into:");
+			vf.nargs = n;
+			/* Special local variables */
+			die_find_child(sc_die, copy_variables_cb, (void *)&vf,
+				       &die_mem);
+			pr_debug(" (%d)\n", vf.nargs - n);
+			if (vf.ret < 0)
+				return vf.ret;
+			n = vf.nargs;
+		} else {
+			/* Copy normal argument */
+			args[n] = pf->pev->args[i];
+			n++;
+		}
+	}
+	return n;
+}
+
 /* Add a found probe point into trace event list */
 static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf)
 {
 	struct trace_event_finder *tf =
 			container_of(pf, struct trace_event_finder, pf);
 	struct probe_trace_event *tev;
+	struct perf_probe_arg *args;
 	int ret, i;
 
 	/* Check number of tevs */
@@ -1161,31 +1250,45 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf)
 	pr_debug("Probe point found: %s+%lu\n", tev->point.symbol,
 		 tev->point.offset);
 
-	/* Find each argument */
-	tev->nargs = pf->pev->nargs;
-	tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
-	if (tev->args == NULL)
+	/* Expand special probe argument if exist */
+	args = zalloc(sizeof(struct perf_probe_arg) * MAX_PROBE_ARGS);
+	if (args == NULL)
 		return -ENOMEM;
-	for (i = 0; i < pf->pev->nargs; i++) {
-		pf->pvar = &pf->pev->args[i];
+
+	ret = expand_probe_args(sc_die, pf, args);
+	if (ret < 0)
+		goto end;
+
+	tev->nargs = ret;
+	tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
+	if (tev->args == NULL) {
+		ret = -ENOMEM;
+		goto end;
+	}
+
+	/* Find each argument */
+	for (i = 0; i < tev->nargs; i++) {
+		pf->pvar = &args[i];
 		pf->tvar = &tev->args[i];
 		/* Variable should be found from scope DIE */
 		ret = find_variable(sc_die, pf);
 		if (ret != 0)
-			return ret;
+			break;
 	}
 
-	return 0;
+end:
+	free(args);
+	return ret;
 }
 
 /* Find probe_trace_events specified by perf_probe_event from debuginfo */
-int debuginfo__find_trace_events(struct debuginfo *self,
+int debuginfo__find_trace_events(struct debuginfo *dbg,
 				 struct perf_probe_event *pev,
 				 struct probe_trace_event **tevs, int max_tevs)
 {
 	struct trace_event_finder tf = {
 			.pf = {.pev = pev, .callback = add_probe_trace_event},
-			.mod = self->mod, .max_tevs = max_tevs};
+			.mod = dbg->mod, .max_tevs = max_tevs};
 	int ret;
 
 	/* Allocate result tevs array */
@@ -1196,7 +1299,7 @@ int debuginfo__find_trace_events(struct debuginfo *self,
 	tf.tevs = *tevs;
 	tf.ntevs = 0;
 
-	ret = debuginfo__find_probes(self, &tf.pf);
+	ret = debuginfo__find_probes(dbg, &tf.pf);
 	if (ret < 0) {
 		free(*tevs);
 		*tevs = NULL;
@@ -1222,7 +1325,8 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
 	if (tag == DW_TAG_formal_parameter ||
 	    tag == DW_TAG_variable) {
 		ret = convert_variable_location(die_mem, af->pf.addr,
-						af->pf.fb_ops, NULL);
+						af->pf.fb_ops, &af->pf.sp_die,
+						NULL);
 		if (ret == 0) {
 			ret = die_get_varname(die_mem, buf, MAX_VAR_LEN);
 			pr_debug2("Add new var: %s\n", buf);
@@ -1286,14 +1390,14 @@ static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf)
 }
 
 /* Find available variables at given probe point */
-int debuginfo__find_available_vars_at(struct debuginfo *self,
+int debuginfo__find_available_vars_at(struct debuginfo *dbg,
 				      struct perf_probe_event *pev,
 				      struct variable_list **vls,
 				      int max_vls, bool externs)
 {
 	struct available_var_finder af = {
 			.pf = {.pev = pev, .callback = add_available_vars},
-			.mod = self->mod,
+			.mod = dbg->mod,
 			.max_vls = max_vls, .externs = externs};
 	int ret;
 
@@ -1305,7 +1409,7 @@ int debuginfo__find_available_vars_at(struct debuginfo *self,
 	af.vls = *vls;
 	af.nvls = 0;
 
-	ret = debuginfo__find_probes(self, &af.pf);
+	ret = debuginfo__find_probes(dbg, &af.pf);
 	if (ret < 0) {
 		/* Free vlist for error */
 		while (af.nvls--) {
@@ -1323,7 +1427,7 @@ int debuginfo__find_available_vars_at(struct debuginfo *self,
 }
 
 /* Reverse search */
-int debuginfo__find_probe_point(struct debuginfo *self, unsigned long addr,
+int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr,
 				struct perf_probe_point *ppt)
 {
 	Dwarf_Die cudie, spdie, indie;
@@ -1332,10 +1436,10 @@ int debuginfo__find_probe_point(struct debuginfo *self, unsigned long addr,
 	int baseline = 0, lineno = 0, ret = 0;
 
 	/* Adjust address with bias */
-	addr += self->bias;
+	addr += dbg->bias;
 
 	/* Find cu die */
-	if (!dwarf_addrdie(self->dbg, (Dwarf_Addr)addr - self->bias, &cudie)) {
+	if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr - dbg->bias, &cudie)) {
 		pr_warning("Failed to find debug information for address %lx\n",
 			   addr);
 		ret = -EINVAL;
@@ -1536,7 +1640,7 @@ static int find_line_range_by_func(struct line_finder *lf)
 	return param.retval;
 }
 
-int debuginfo__find_line_range(struct debuginfo *self, struct line_range *lr)
+int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr)
 {
 	struct line_finder lf = {.lr = lr, .found = 0};
 	int ret = 0;
@@ -1553,7 +1657,7 @@ int debuginfo__find_line_range(struct debuginfo *self, struct line_range *lr)
 		struct dwarf_callback_param line_range_param = {
 			.data = (void *)&lf, .retval = 0};
 
-		dwarf_getpubnames(self->dbg, pubname_search_cb,
+		dwarf_getpubnames(dbg->dbg, pubname_search_cb,
 				  &pubname_param, 0);
 		if (pubname_param.found) {
 			line_range_search_cb(&lf.sp_die, &line_range_param);
@@ -1564,12 +1668,12 @@ int debuginfo__find_line_range(struct debuginfo *self, struct line_range *lr)
 
 	/* Loop on CUs (Compilation Unit) */
 	while (!lf.found && ret >= 0) {
-		if (dwarf_nextcu(self->dbg, off, &noff, &cuhl,
+		if (dwarf_nextcu(dbg->dbg, off, &noff, &cuhl,
 				 NULL, NULL, NULL) != 0)
 			break;
 
 		/* Get the DIE(Debugging Information Entry) of this CU */
-		diep = dwarf_offdie(self->dbg, off + cuhl, &lf.cu_die);
+		diep = dwarf_offdie(dbg->dbg, off + cuhl, &lf.cu_die);
 		if (!diep)
 			continue;
 
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index 3b7d63018960d7ff6a6808ce81eff2d34b40bd09..ffc33cdd25cc343816fd9e05586832bf48b82205 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -7,6 +7,7 @@
 
 #define MAX_PROBE_BUFFER	1024
 #define MAX_PROBES		 128
+#define MAX_PROBE_ARGS		 128
 
 static inline int is_c_varname(const char *name)
 {
@@ -14,7 +15,7 @@ static inline int is_c_varname(const char *name)
 	return isalpha(name[0]) || name[0] == '_';
 }
 
-#ifdef DWARF_SUPPORT
+#ifdef HAVE_DWARF_SUPPORT
 
 #include "dwarf-aux.h"
 
@@ -30,25 +31,25 @@ struct debuginfo {
 
 extern struct debuginfo *debuginfo__new(const char *path);
 extern struct debuginfo *debuginfo__new_online_kernel(unsigned long addr);
-extern void debuginfo__delete(struct debuginfo *self);
+extern void debuginfo__delete(struct debuginfo *dbg);
 
 /* Find probe_trace_events specified by perf_probe_event from debuginfo */
-extern int debuginfo__find_trace_events(struct debuginfo *self,
+extern int debuginfo__find_trace_events(struct debuginfo *dbg,
 					struct perf_probe_event *pev,
 					struct probe_trace_event **tevs,
 					int max_tevs);
 
 /* Find a perf_probe_point from debuginfo */
-extern int debuginfo__find_probe_point(struct debuginfo *self,
+extern int debuginfo__find_probe_point(struct debuginfo *dbg,
 				       unsigned long addr,
 				       struct perf_probe_point *ppt);
 
 /* Find a line range */
-extern int debuginfo__find_line_range(struct debuginfo *self,
+extern int debuginfo__find_line_range(struct debuginfo *dbg,
 				      struct line_range *lr);
 
 /* Find available variables */
-extern int debuginfo__find_available_vars_at(struct debuginfo *self,
+extern int debuginfo__find_available_vars_at(struct debuginfo *dbg,
 					     struct perf_probe_event *pev,
 					     struct variable_list **vls,
 					     int max_points, bool externs);
@@ -105,6 +106,6 @@ struct line_finder {
 	int			found;
 };
 
-#endif /* DWARF_SUPPORT */
+#endif /* HAVE_DWARF_SUPPORT */
 
 #endif /*_PROBE_FINDER_H */
diff --git a/tools/perf/util/pstack.h b/tools/perf/util/pstack.h
index 4cedea59f518ce7c1fcb61b0368598453bdf3a63..c3cb6584d52763f24c3074886de50c9ff741c71c 100644
--- a/tools/perf/util/pstack.h
+++ b/tools/perf/util/pstack.h
@@ -5,10 +5,10 @@
 
 struct pstack;
 struct pstack *pstack__new(unsigned short max_nr_entries);
-void pstack__delete(struct pstack *self);
-bool pstack__empty(const struct pstack *self);
-void pstack__remove(struct pstack *self, void *key);
-void pstack__push(struct pstack *self, void *key);
-void *pstack__pop(struct pstack *self);
+void pstack__delete(struct pstack *pstack);
+bool pstack__empty(const struct pstack *pstack);
+void pstack__remove(struct pstack *pstack, void *key);
+void pstack__push(struct pstack *pstack, void *key);
+void *pstack__pop(struct pstack *pstack);
 
 #endif /* _PERF_PSTACK_ */
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index f75ae1b9900c435af147f24e6c29dd414ca9692c..239036fb2b2c0b08b9352dcbce368904917d2521 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -17,5 +17,5 @@ util/xyarray.c
 util/cgroup.c
 util/rblist.c
 util/strlist.c
-util/sysfs.c
+util/fs.c
 ../../lib/rbtree.c
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 2ac4bc92bb1ff2b130e47a59e43c3127ba0568b7..4bf8ace7f5116a3b7536ba9c13592bb4f8e7a7e1 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -33,13 +33,6 @@ int eprintf(int level, const char *fmt, ...)
 # define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
 #endif
 
-struct throttle_event {
-	struct perf_event_header header;
-	u64			 time;
-	u64			 id;
-	u64			 stream_id;
-};
-
 PyMODINIT_FUNC initperf(void);
 
 #define member_def(type, member, ptype, help) \
@@ -1038,6 +1031,7 @@ PyMODINIT_FUNC initperf(void)
 	    pyrf_cpu_map__setup_types() < 0)
 		return;
 
+	/* The page_size is placed in util object. */
 	page_size = sysconf(_SC_PAGE_SIZE);
 
 	Py_INCREF(&pyrf_evlist__type);
diff --git a/tools/perf/util/rblist.c b/tools/perf/util/rblist.c
index a16cdd2625ad2b4a0ff8ff39d6b91b331f3a3a1e..0dfe27d99458c845991e490283c067d0d0706221 100644
--- a/tools/perf/util/rblist.c
+++ b/tools/perf/util/rblist.c
@@ -48,10 +48,12 @@ void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node)
 	rblist->node_delete(rblist, rb_node);
 }
 
-struct rb_node *rblist__find(struct rblist *rblist, const void *entry)
+static struct rb_node *__rblist__findnew(struct rblist *rblist,
+					 const void *entry,
+					 bool create)
 {
 	struct rb_node **p = &rblist->entries.rb_node;
-	struct rb_node *parent = NULL;
+	struct rb_node *parent = NULL, *new_node = NULL;
 
 	while (*p != NULL) {
 		int rc;
@@ -67,7 +69,26 @@ struct rb_node *rblist__find(struct rblist *rblist, const void *entry)
 			return parent;
 	}
 
-	return NULL;
+	if (create) {
+		new_node = rblist->node_new(rblist, entry);
+		if (new_node) {
+			rb_link_node(new_node, parent, p);
+			rb_insert_color(new_node, &rblist->entries);
+			++rblist->nr_entries;
+		}
+	}
+
+	return new_node;
+}
+
+struct rb_node *rblist__find(struct rblist *rblist, const void *entry)
+{
+	return __rblist__findnew(rblist, entry, false);
+}
+
+struct rb_node *rblist__findnew(struct rblist *rblist, const void *entry)
+{
+	return __rblist__findnew(rblist, entry, true);
 }
 
 void rblist__init(struct rblist *rblist)
diff --git a/tools/perf/util/rblist.h b/tools/perf/util/rblist.h
index 6d0cae5ae83d5e848b56a9518575ad3c7a114444..ff9913b994c26126203e303f694021f9d24e739a 100644
--- a/tools/perf/util/rblist.h
+++ b/tools/perf/util/rblist.h
@@ -32,6 +32,7 @@ void rblist__delete(struct rblist *rblist);
 int rblist__add_node(struct rblist *rblist, const void *new_entry);
 void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node);
 struct rb_node *rblist__find(struct rblist *rblist, const void *entry);
+struct rb_node *rblist__findnew(struct rblist *rblist, const void *entry);
 struct rb_node *rblist__entry(const struct rblist *rblist, unsigned int idx);
 
 static inline bool rblist__empty(const struct rblist *rblist)
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 18d73aa2f0f89679537b5cb77bdf914a597cd1ad..c8845b107f6085cfacb907fce862c2e76dc6db5b 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -2,6 +2,8 @@
 #include "evsel.h"
 #include "cpumap.h"
 #include "parse-events.h"
+#include "fs.h"
+#include "util.h"
 
 typedef void (*setup_probe_fn_t)(struct perf_evsel *evsel);
 
@@ -106,3 +108,72 @@ void perf_evlist__config(struct perf_evlist *evlist,
 
 	perf_evlist__set_id_pos(evlist);
 }
+
+static int get_max_rate(unsigned int *rate)
+{
+	char path[PATH_MAX];
+	const char *procfs = procfs__mountpoint();
+
+	if (!procfs)
+		return -1;
+
+	snprintf(path, PATH_MAX,
+		 "%s/sys/kernel/perf_event_max_sample_rate", procfs);
+
+	return filename__read_int(path, (int *) rate);
+}
+
+static int perf_record_opts__config_freq(struct perf_record_opts *opts)
+{
+	bool user_freq = opts->user_freq != UINT_MAX;
+	unsigned int max_rate;
+
+	if (opts->user_interval != ULLONG_MAX)
+		opts->default_interval = opts->user_interval;
+	if (user_freq)
+		opts->freq = opts->user_freq;
+
+	/*
+	 * User specified count overrides default frequency.
+	 */
+	if (opts->default_interval)
+		opts->freq = 0;
+	else if (opts->freq) {
+		opts->default_interval = opts->freq;
+	} else {
+		pr_err("frequency and count are zero, aborting\n");
+		return -1;
+	}
+
+	if (get_max_rate(&max_rate))
+		return 0;
+
+	/*
+	 * User specified frequency is over current maximum.
+	 */
+	if (user_freq && (max_rate < opts->freq)) {
+		pr_err("Maximum frequency rate (%u) reached.\n"
+		   "Please use -F freq option with lower value or consider\n"
+		   "tweaking /proc/sys/kernel/perf_event_max_sample_rate.\n",
+		   max_rate);
+		return -1;
+	}
+
+	/*
+	 * Default frequency is over current maximum.
+	 */
+	if (max_rate < opts->freq) {
+		pr_warning("Lowering default frequency rate to %u.\n"
+			   "Please consider tweaking "
+			   "/proc/sys/kernel/perf_event_max_sample_rate.\n",
+			   max_rate);
+		opts->freq = max_rate;
+	}
+
+	return 0;
+}
+
+int perf_record_opts__config(struct perf_record_opts *opts)
+{
+	return perf_record_opts__config_freq(opts);
+}
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index c0c9795c4f0235b51848e01db33a7950069b182a..d5e5969f6fea4fce43974a748f3069e869ab9fca 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -273,7 +273,7 @@ static void perl_process_tracepoint(union perf_event *perf_event __maybe_unused,
 	int cpu = sample->cpu;
 	void *data = sample->raw_data;
 	unsigned long long nsecs = sample->time;
-	char *comm = thread->comm;
+	const char *comm = thread__comm_str(thread);
 
 	dSP;
 
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 95d91a0b23afe01a45463a02dcc8eadbb5ba5169..53c20e7fd90037ce46dc1c53d20e99264d66c47a 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -250,7 +250,7 @@ static void python_process_tracepoint(union perf_event *perf_event
 	int cpu = sample->cpu;
 	void *data = sample->raw_data;
 	unsigned long long nsecs = sample->time;
-	char *comm = thread->comm;
+	const char *comm = thread__comm_str(thread);
 
 	t = PyTuple_New(MAX_FIELDS);
 	if (!t)
@@ -389,7 +389,7 @@ static void python_process_general_event(union perf_event *perf_event
 	pydict_set_item_string_decref(dict, "raw_buf", PyString_FromStringAndSize(
 			(const char *)sample->raw_data, sample->raw_size));
 	pydict_set_item_string_decref(dict, "comm",
-			PyString_FromString(thread->comm));
+			PyString_FromString(thread__comm_str(thread)));
 	if (al->map) {
 		pydict_set_item_string_decref(dict, "dso",
 			PyString_FromString(al->map->dso->name));
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 568b750c01f60b3d81cda29966ed40534a7bc8e1..f36d24a024453f6e2777d1bc4cfbb10fb9a3cf4a 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -16,73 +16,34 @@
 #include "perf_regs.h"
 #include "vdso.h"
 
-static int perf_session__open(struct perf_session *self, bool force)
+static int perf_session__open(struct perf_session *session)
 {
-	struct stat input_stat;
+	struct perf_data_file *file = session->file;
 
-	if (!strcmp(self->filename, "-")) {
-		self->fd_pipe = true;
-		self->fd = STDIN_FILENO;
-
-		if (perf_session__read_header(self) < 0)
-			pr_err("incompatible file format (rerun with -v to learn more)");
-
-		return 0;
-	}
-
-	self->fd = open(self->filename, O_RDONLY);
-	if (self->fd < 0) {
-		int err = errno;
-
-		pr_err("failed to open %s: %s", self->filename, strerror(err));
-		if (err == ENOENT && !strcmp(self->filename, "perf.data"))
-			pr_err("  (try 'perf record' first)");
-		pr_err("\n");
-		return -errno;
-	}
-
-	if (fstat(self->fd, &input_stat) < 0)
-		goto out_close;
-
-	if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
-		pr_err("file %s not owned by current user or root\n",
-		       self->filename);
-		goto out_close;
-	}
-
-	if (!input_stat.st_size) {
-		pr_info("zero-sized file (%s), nothing to do!\n",
-			self->filename);
-		goto out_close;
-	}
-
-	if (perf_session__read_header(self) < 0) {
+	if (perf_session__read_header(session) < 0) {
 		pr_err("incompatible file format (rerun with -v to learn more)");
-		goto out_close;
+		return -1;
 	}
 
-	if (!perf_evlist__valid_sample_type(self->evlist)) {
+	if (perf_data_file__is_pipe(file))
+		return 0;
+
+	if (!perf_evlist__valid_sample_type(session->evlist)) {
 		pr_err("non matching sample_type");
-		goto out_close;
+		return -1;
 	}
 
-	if (!perf_evlist__valid_sample_id_all(self->evlist)) {
+	if (!perf_evlist__valid_sample_id_all(session->evlist)) {
 		pr_err("non matching sample_id_all");
-		goto out_close;
+		return -1;
 	}
 
-	if (!perf_evlist__valid_read_format(self->evlist)) {
+	if (!perf_evlist__valid_read_format(session->evlist)) {
 		pr_err("non matching read_format");
-		goto out_close;
+		return -1;
 	}
 
-	self->size = input_stat.st_size;
 	return 0;
-
-out_close:
-	close(self->fd);
-	self->fd = -1;
-	return -1;
 }
 
 void perf_session__set_id_hdr_size(struct perf_session *session)
@@ -92,71 +53,70 @@ void perf_session__set_id_hdr_size(struct perf_session *session)
 	machines__set_id_hdr_size(&session->machines, id_hdr_size);
 }
 
-int perf_session__create_kernel_maps(struct perf_session *self)
+int perf_session__create_kernel_maps(struct perf_session *session)
 {
-	int ret = machine__create_kernel_maps(&self->machines.host);
+	int ret = machine__create_kernel_maps(&session->machines.host);
 
 	if (ret >= 0)
-		ret = machines__create_guest_kernel_maps(&self->machines);
+		ret = machines__create_guest_kernel_maps(&session->machines);
 	return ret;
 }
 
-static void perf_session__destroy_kernel_maps(struct perf_session *self)
+static void perf_session__destroy_kernel_maps(struct perf_session *session)
 {
-	machines__destroy_kernel_maps(&self->machines);
+	machines__destroy_kernel_maps(&session->machines);
 }
 
-struct perf_session *perf_session__new(const char *filename, int mode,
-				       bool force, bool repipe,
-				       struct perf_tool *tool)
+struct perf_session *perf_session__new(struct perf_data_file *file,
+				       bool repipe, struct perf_tool *tool)
 {
-	struct perf_session *self;
-	struct stat st;
-	size_t len;
-
-	if (!filename || !strlen(filename)) {
-		if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
-			filename = "-";
-		else
-			filename = "perf.data";
-	}
+	struct perf_session *session = zalloc(sizeof(*session));
 
-	len = strlen(filename);
-	self = zalloc(sizeof(*self) + len);
-
-	if (self == NULL)
+	if (!session)
 		goto out;
 
-	memcpy(self->filename, filename, len);
-	self->repipe = repipe;
-	INIT_LIST_HEAD(&self->ordered_samples.samples);
-	INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
-	INIT_LIST_HEAD(&self->ordered_samples.to_free);
-	machines__init(&self->machines);
+	session->repipe = repipe;
+	INIT_LIST_HEAD(&session->ordered_samples.samples);
+	INIT_LIST_HEAD(&session->ordered_samples.sample_cache);
+	INIT_LIST_HEAD(&session->ordered_samples.to_free);
+	machines__init(&session->machines);
 
-	if (mode == O_RDONLY) {
-		if (perf_session__open(self, force) < 0)
+	if (file) {
+		if (perf_data_file__open(file))
 			goto out_delete;
-		perf_session__set_id_hdr_size(self);
-	} else if (mode == O_WRONLY) {
+
+		session->file = file;
+
+		if (perf_data_file__is_read(file)) {
+			if (perf_session__open(session) < 0)
+				goto out_close;
+
+			perf_session__set_id_hdr_size(session);
+		}
+	}
+
+	if (!file || perf_data_file__is_write(file)) {
 		/*
 		 * In O_RDONLY mode this will be performed when reading the
 		 * kernel MMAP event, in perf_event__process_mmap().
 		 */
-		if (perf_session__create_kernel_maps(self) < 0)
+		if (perf_session__create_kernel_maps(session) < 0)
 			goto out_delete;
 	}
 
 	if (tool && tool->ordering_requires_timestamps &&
-	    tool->ordered_samples && !perf_evlist__sample_id_all(self->evlist)) {
+	    tool->ordered_samples && !perf_evlist__sample_id_all(session->evlist)) {
 		dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
 		tool->ordered_samples = false;
 	}
 
-out:
-	return self;
-out_delete:
-	perf_session__delete(self);
+	return session;
+
+ out_close:
+	perf_data_file__close(file);
+ out_delete:
+	perf_session__delete(session);
+ out:
 	return NULL;
 }
 
@@ -186,15 +146,16 @@ static void perf_session_env__delete(struct perf_session_env *env)
 	free(env->pmu_mappings);
 }
 
-void perf_session__delete(struct perf_session *self)
+void perf_session__delete(struct perf_session *session)
 {
-	perf_session__destroy_kernel_maps(self);
-	perf_session__delete_dead_threads(self);
-	perf_session__delete_threads(self);
-	perf_session_env__delete(&self->header.env);
-	machines__exit(&self->machines);
-	close(self->fd);
-	free(self);
+	perf_session__destroy_kernel_maps(session);
+	perf_session__delete_dead_threads(session);
+	perf_session__delete_threads(session);
+	perf_session_env__delete(&session->header.env);
+	machines__exit(&session->machines);
+	if (session->file)
+		perf_data_file__close(session->file);
+	free(session);
 	vdso__exit();
 }
 
@@ -397,6 +358,17 @@ static void perf_event__read_swap(union perf_event *event, bool sample_id_all)
 		swap_sample_id_all(event, &event->read + 1);
 }
 
+static void perf_event__throttle_swap(union perf_event *event,
+				      bool sample_id_all)
+{
+	event->throttle.time	  = bswap_64(event->throttle.time);
+	event->throttle.id	  = bswap_64(event->throttle.id);
+	event->throttle.stream_id = bswap_64(event->throttle.stream_id);
+
+	if (sample_id_all)
+		swap_sample_id_all(event, &event->throttle + 1);
+}
+
 static u8 revbyte(u8 b)
 {
 	int rev = (b >> 4) | ((b & 0xf) << 4);
@@ -442,6 +414,9 @@ void perf_event__attr_swap(struct perf_event_attr *attr)
 	attr->bp_type		= bswap_32(attr->bp_type);
 	attr->bp_addr		= bswap_64(attr->bp_addr);
 	attr->bp_len		= bswap_64(attr->bp_len);
+	attr->branch_sample_type = bswap_64(attr->branch_sample_type);
+	attr->sample_regs_user	 = bswap_64(attr->sample_regs_user);
+	attr->sample_stack_user  = bswap_32(attr->sample_stack_user);
 
 	swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64));
 }
@@ -482,6 +457,8 @@ static perf_event__swap_op perf_event__swap_ops[] = {
 	[PERF_RECORD_EXIT]		  = perf_event__task_swap,
 	[PERF_RECORD_LOST]		  = perf_event__all64_swap,
 	[PERF_RECORD_READ]		  = perf_event__read_swap,
+	[PERF_RECORD_THROTTLE]		  = perf_event__throttle_swap,
+	[PERF_RECORD_UNTHROTTLE]	  = perf_event__throttle_swap,
 	[PERF_RECORD_SAMPLE]		  = perf_event__all64_swap,
 	[PERF_RECORD_HEADER_ATTR]	  = perf_event__hdr_attr_swap,
 	[PERF_RECORD_HEADER_EVENT_TYPE]	  = perf_event__event_type_swap,
@@ -525,13 +502,16 @@ static int flush_sample_queue(struct perf_session *s,
 	struct perf_sample sample;
 	u64 limit = os->next_flush;
 	u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
-	unsigned idx = 0, progress_next = os->nr_samples / 16;
 	bool show_progress = limit == ULLONG_MAX;
+	struct ui_progress prog;
 	int ret;
 
 	if (!tool->ordered_samples || !limit)
 		return 0;
 
+	if (show_progress)
+		ui_progress__init(&prog, os->nr_samples, "Processing time ordered events...");
+
 	list_for_each_entry_safe(iter, tmp, head, list) {
 		if (session_done())
 			return 0;
@@ -552,11 +532,9 @@ static int flush_sample_queue(struct perf_session *s,
 		os->last_flush = iter->timestamp;
 		list_del(&iter->list);
 		list_add(&iter->list, &os->sample_cache);
-		if (show_progress && (++idx >= progress_next)) {
-			progress_next += os->nr_samples / 16;
-			ui_progress__update(idx, os->nr_samples,
-					    "Processing time ordered events...");
-		}
+
+		if (show_progress)
+			ui_progress__update(&prog, 1);
 	}
 
 	if (list_empty(head)) {
@@ -860,6 +838,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
 	if (sample_type & PERF_SAMPLE_DATA_SRC)
 		printf(" . data_src: 0x%"PRIx64"\n", sample->data_src);
 
+	if (sample_type & PERF_SAMPLE_TRANSACTION)
+		printf("... transaction: %" PRIx64 "\n", sample->transaction);
+
 	if (sample_type & PERF_SAMPLE_READ)
 		sample_read__printf(sample, evsel->attr.read_format);
 }
@@ -1031,6 +1012,7 @@ static int perf_session_deliver_event(struct perf_session *session,
 static int perf_session__process_user_event(struct perf_session *session, union perf_event *event,
 					    struct perf_tool *tool, u64 file_offset)
 {
+	int fd = perf_data_file__fd(session->file);
 	int err;
 
 	dump_event(session, event, file_offset, NULL);
@@ -1044,7 +1026,7 @@ static int perf_session__process_user_event(struct perf_session *session, union
 		return err;
 	case PERF_RECORD_HEADER_TRACING_DATA:
 		/* setup for reading amidst mmap */
-		lseek(session->fd, file_offset, SEEK_SET);
+		lseek(fd, file_offset, SEEK_SET);
 		return tool->tracing_data(tool, event, session);
 	case PERF_RECORD_HEADER_BUILD_ID:
 		return tool->build_id(tool, event, session);
@@ -1101,11 +1083,11 @@ static int perf_session__process_event(struct perf_session *session,
 					  file_offset);
 }
 
-void perf_event_header__bswap(struct perf_event_header *self)
+void perf_event_header__bswap(struct perf_event_header *hdr)
 {
-	self->type = bswap_32(self->type);
-	self->misc = bswap_16(self->misc);
-	self->size = bswap_16(self->size);
+	hdr->type = bswap_32(hdr->type);
+	hdr->misc = bswap_16(hdr->misc);
+	hdr->size = bswap_16(hdr->size);
 }
 
 struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
@@ -1113,11 +1095,11 @@ struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
 	return machine__findnew_thread(&session->machines.host, 0, pid);
 }
 
-static struct thread *perf_session__register_idle_thread(struct perf_session *self)
+static struct thread *perf_session__register_idle_thread(struct perf_session *session)
 {
-	struct thread *thread = perf_session__findnew(self, 0);
+	struct thread *thread = perf_session__findnew(session, 0);
 
-	if (thread == NULL || thread__set_comm(thread, "swapper")) {
+	if (thread == NULL || thread__set_comm(thread, "swapper", 0)) {
 		pr_err("problem inserting idle task.\n");
 		thread = NULL;
 	}
@@ -1167,9 +1149,10 @@ static void perf_session__warn_about_errors(const struct perf_session *session,
 
 volatile int session_done;
 
-static int __perf_session__process_pipe_events(struct perf_session *self,
+static int __perf_session__process_pipe_events(struct perf_session *session,
 					       struct perf_tool *tool)
 {
+	int fd = perf_data_file__fd(session->file);
 	union perf_event *event;
 	uint32_t size, cur_size = 0;
 	void *buf = NULL;
@@ -1188,7 +1171,7 @@ static int __perf_session__process_pipe_events(struct perf_session *self,
 		return -errno;
 more:
 	event = buf;
-	err = readn(self->fd, event, sizeof(struct perf_event_header));
+	err = readn(fd, event, sizeof(struct perf_event_header));
 	if (err <= 0) {
 		if (err == 0)
 			goto done;
@@ -1197,7 +1180,7 @@ static int __perf_session__process_pipe_events(struct perf_session *self,
 		goto out_err;
 	}
 
-	if (self->header.needs_swap)
+	if (session->header.needs_swap)
 		perf_event_header__bswap(&event->header);
 
 	size = event->header.size;
@@ -1220,7 +1203,7 @@ static int __perf_session__process_pipe_events(struct perf_session *self,
 	p += sizeof(struct perf_event_header);
 
 	if (size - sizeof(struct perf_event_header)) {
-		err = readn(self->fd, p, size - sizeof(struct perf_event_header));
+		err = readn(fd, p, size - sizeof(struct perf_event_header));
 		if (err <= 0) {
 			if (err == 0) {
 				pr_err("unexpected end of event stream\n");
@@ -1232,7 +1215,7 @@ static int __perf_session__process_pipe_events(struct perf_session *self,
 		}
 	}
 
-	if ((skip = perf_session__process_event(self, event, tool, head)) < 0) {
+	if ((skip = perf_session__process_event(session, event, tool, head)) < 0) {
 		pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
 		       head, event->header.size, event->header.type);
 		err = -EINVAL;
@@ -1247,11 +1230,13 @@ static int __perf_session__process_pipe_events(struct perf_session *self,
 	if (!session_done())
 		goto more;
 done:
-	err = 0;
+	/* do the final flush for ordered samples */
+	session->ordered_samples.next_flush = ULLONG_MAX;
+	err = flush_sample_queue(session, tool);
 out_err:
 	free(buf);
-	perf_session__warn_about_errors(self, tool);
-	perf_session_free_sample_buffers(self);
+	perf_session__warn_about_errors(session, tool);
+	perf_session_free_sample_buffers(session);
 	return err;
 }
 
@@ -1299,12 +1284,14 @@ int __perf_session__process_events(struct perf_session *session,
 				   u64 data_offset, u64 data_size,
 				   u64 file_size, struct perf_tool *tool)
 {
-	u64 head, page_offset, file_offset, file_pos, progress_next;
+	int fd = perf_data_file__fd(session->file);
+	u64 head, page_offset, file_offset, file_pos;
 	int err, mmap_prot, mmap_flags, map_idx = 0;
 	size_t	mmap_size;
 	char *buf, *mmaps[NUM_MMAPS];
 	union perf_event *event;
 	uint32_t size;
+	struct ui_progress prog;
 
 	perf_tool__fill_defaults(tool);
 
@@ -1315,7 +1302,7 @@ int __perf_session__process_events(struct perf_session *session,
 	if (data_size && (data_offset + data_size < file_size))
 		file_size = data_offset + data_size;
 
-	progress_next = file_size / 16;
+	ui_progress__init(&prog, file_size, "Processing events...");
 
 	mmap_size = MMAP_SIZE;
 	if (mmap_size > file_size)
@@ -1331,7 +1318,7 @@ int __perf_session__process_events(struct perf_session *session,
 		mmap_flags = MAP_PRIVATE;
 	}
 remap:
-	buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd,
+	buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, fd,
 		   file_offset);
 	if (buf == MAP_FAILED) {
 		pr_err("failed to mmap file\n");
@@ -1370,19 +1357,15 @@ int __perf_session__process_events(struct perf_session *session,
 	head += size;
 	file_pos += size;
 
-	if (file_pos >= progress_next) {
-		progress_next += file_size / 16;
-		ui_progress__update(file_pos, file_size,
-				    "Processing events...");
-	}
+	ui_progress__update(&prog, size);
 
-	err = 0;
 	if (session_done())
-		goto out_err;
+		goto out;
 
 	if (file_pos < file_size)
 		goto more;
 
+out:
 	/* do the final flush for ordered samples */
 	session->ordered_samples.next_flush = ULLONG_MAX;
 	err = flush_sample_queue(session, tool);
@@ -1393,21 +1376,22 @@ int __perf_session__process_events(struct perf_session *session,
 	return err;
 }
 
-int perf_session__process_events(struct perf_session *self,
+int perf_session__process_events(struct perf_session *session,
 				 struct perf_tool *tool)
 {
+	u64 size = perf_data_file__size(session->file);
 	int err;
 
-	if (perf_session__register_idle_thread(self) == NULL)
+	if (perf_session__register_idle_thread(session) == NULL)
 		return -ENOMEM;
 
-	if (!self->fd_pipe)
-		err = __perf_session__process_events(self,
-						     self->header.data_offset,
-						     self->header.data_size,
-						     self->size, tool);
+	if (!perf_data_file__is_pipe(session->file))
+		err = __perf_session__process_events(session,
+						     session->header.data_offset,
+						     session->header.data_size,
+						     size, tool);
 	else
-		err = __perf_session__process_pipe_events(self, tool);
+		err = __perf_session__process_pipe_events(session, tool);
 
 	return err;
 }
@@ -1456,15 +1440,15 @@ int maps__set_kallsyms_ref_reloc_sym(struct map **maps,
 	return 0;
 }
 
-size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp)
+size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp)
 {
-	return machines__fprintf_dsos(&self->machines, fp);
+	return machines__fprintf_dsos(&session->machines, fp);
 }
 
-size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp,
+size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp,
 					  bool (skip)(struct dso *dso, int parm), int parm)
 {
-	return machines__fprintf_dsos_buildid(&self->machines, fp, skip, parm);
+	return machines__fprintf_dsos_buildid(&session->machines, fp, skip, parm);
 }
 
 size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
@@ -1525,7 +1509,8 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
 	if (symbol_conf.use_callchain && sample->callchain) {
 
 		if (machine__resolve_callchain(machine, evsel, al.thread,
-					       sample, NULL, NULL) != 0) {
+					       sample, NULL, NULL,
+					       PERF_MAX_STACK_DEPTH) != 0) {
 			if (verbose)
 				error("Failed to resolve callchain. Skipping\n");
 			return;
@@ -1629,13 +1614,14 @@ int perf_session__cpu_bitmap(struct perf_session *session,
 void perf_session__fprintf_info(struct perf_session *session, FILE *fp,
 				bool full)
 {
+	int fd = perf_data_file__fd(session->file);
 	struct stat st;
 	int ret;
 
 	if (session == NULL || fp == NULL)
 		return;
 
-	ret = fstat(session->fd, &st);
+	ret = fstat(fd, &st);
 	if (ret == -1)
 		return;
 
@@ -1664,9 +1650,9 @@ int __perf_session__set_tracepoints_handlers(struct perf_session *session,
 			continue;
 
 		err = -EEXIST;
-		if (evsel->handler.func != NULL)
+		if (evsel->handler != NULL)
 			goto out;
-		evsel->handler.func = assocs[i].handler;
+		evsel->handler = assocs[i].handler;
 	}
 
 	err = 0;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 04bf7373a7e5fb04222b1a9cdb5c295045c0626f..50f640958f0f8aec87c7f5b3ef7346ad27363542 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -7,6 +7,7 @@
 #include "machine.h"
 #include "symbol.h"
 #include "thread.h"
+#include "data.h"
 #include <linux/rbtree.h>
 #include <linux/perf_event.h>
 
@@ -29,16 +30,13 @@ struct ordered_samples {
 
 struct perf_session {
 	struct perf_header	header;
-	unsigned long		size;
 	struct machines		machines;
 	struct perf_evlist	*evlist;
 	struct pevent		*pevent;
 	struct events_stats	stats;
-	int			fd;
-	bool			fd_pipe;
 	bool			repipe;
 	struct ordered_samples	ordered_samples;
-	char			filename[1];
+	struct perf_data_file	*file;
 };
 
 #define PRINT_IP_OPT_IP		(1<<0)
@@ -49,17 +47,16 @@ struct perf_session {
 
 struct perf_tool;
 
-struct perf_session *perf_session__new(const char *filename, int mode,
-				       bool force, bool repipe,
-				       struct perf_tool *tool);
+struct perf_session *perf_session__new(struct perf_data_file *file,
+				       bool repipe, struct perf_tool *tool);
 void perf_session__delete(struct perf_session *session);
 
-void perf_event_header__bswap(struct perf_event_header *self);
+void perf_event_header__bswap(struct perf_event_header *hdr);
 
-int __perf_session__process_events(struct perf_session *self,
+int __perf_session__process_events(struct perf_session *session,
 				   u64 data_offset, u64 data_size, u64 size,
 				   struct perf_tool *tool);
-int perf_session__process_events(struct perf_session *self,
+int perf_session__process_events(struct perf_session *session,
 				 struct perf_tool *tool);
 
 int perf_session_queue_event(struct perf_session *s, union perf_event *event,
@@ -67,37 +64,38 @@ int perf_session_queue_event(struct perf_session *s, union perf_event *event,
 
 void perf_tool__fill_defaults(struct perf_tool *tool);
 
-int perf_session__resolve_callchain(struct perf_session *self, struct perf_evsel *evsel,
+int perf_session__resolve_callchain(struct perf_session *session,
+				    struct perf_evsel *evsel,
 				    struct thread *thread,
 				    struct ip_callchain *chain,
 				    struct symbol **parent);
 
-bool perf_session__has_traces(struct perf_session *self, const char *msg);
+bool perf_session__has_traces(struct perf_session *session, const char *msg);
 
 void mem_bswap_64(void *src, int byte_size);
 void mem_bswap_32(void *src, int byte_size);
 void perf_event__attr_swap(struct perf_event_attr *attr);
 
-int perf_session__create_kernel_maps(struct perf_session *self);
+int perf_session__create_kernel_maps(struct perf_session *session);
 
 void perf_session__set_id_hdr_size(struct perf_session *session);
 
 static inline
-struct machine *perf_session__find_machine(struct perf_session *self, pid_t pid)
+struct machine *perf_session__find_machine(struct perf_session *session, pid_t pid)
 {
-	return machines__find(&self->machines, pid);
+	return machines__find(&session->machines, pid);
 }
 
 static inline
-struct machine *perf_session__findnew_machine(struct perf_session *self, pid_t pid)
+struct machine *perf_session__findnew_machine(struct perf_session *session, pid_t pid)
 {
-	return machines__findnew(&self->machines, pid);
+	return machines__findnew(&session->machines, pid);
 }
 
-struct thread *perf_session__findnew(struct perf_session *self, pid_t pid);
-size_t perf_session__fprintf(struct perf_session *self, FILE *fp);
+struct thread *perf_session__findnew(struct perf_session *session, pid_t pid);
+size_t perf_session__fprintf(struct perf_session *session, FILE *fp);
 
-size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp);
+size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp);
 
 size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp,
 					  bool (fn)(struct dso *dso, int parm), int parm);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 5f118a089519a46bb6b370536660476bc6880eda..3c1b75c8b9a65e15834ee31c07dabb3f19b5fe4c 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1,5 +1,6 @@
 #include "sort.h"
 #include "hist.h"
+#include "comm.h"
 #include "symbol.h"
 
 regex_t		parent_regex;
@@ -42,7 +43,7 @@ static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)
 	return n;
 }
 
-static int64_t cmp_null(void *l, void *r)
+static int64_t cmp_null(const void *l, const void *r)
 {
 	if (!l && !r)
 		return 0;
@@ -60,11 +61,12 @@ sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
 	return right->thread->tid - left->thread->tid;
 }
 
-static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf,
+static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf,
 				       size_t size, unsigned int width)
 {
+	const char *comm = thread__comm_str(he->thread);
 	return repsep_snprintf(bf, size, "%*s:%5d", width - 6,
-			      self->thread->comm ?: "", self->thread->tid);
+			       comm ?: "", he->thread->tid);
 }
 
 struct sort_entry sort_thread = {
@@ -79,25 +81,21 @@ struct sort_entry sort_thread = {
 static int64_t
 sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-	return right->thread->tid - left->thread->tid;
+	/* Compare the addr that should be unique among comm */
+	return comm__str(right->comm) - comm__str(left->comm);
 }
 
 static int64_t
 sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
 {
-	char *comm_l = left->thread->comm;
-	char *comm_r = right->thread->comm;
-
-	if (!comm_l || !comm_r)
-		return cmp_null(comm_l, comm_r);
-
-	return strcmp(comm_l, comm_r);
+	/* Compare the addr that should be unique among comm */
+	return comm__str(right->comm) - comm__str(left->comm);
 }
 
-static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf,
+static int hist_entry__comm_snprintf(struct hist_entry *he, char *bf,
 				     size_t size, unsigned int width)
 {
-	return repsep_snprintf(bf, size, "%*s", width, self->thread->comm);
+	return repsep_snprintf(bf, size, "%*s", width, comm__str(he->comm));
 }
 
 struct sort_entry sort_comm = {
@@ -148,10 +146,10 @@ static int _hist_entry__dso_snprintf(struct map *map, char *bf,
 	return repsep_snprintf(bf, size, "%-*s", width, "[unknown]");
 }
 
-static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
+static int hist_entry__dso_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width)
 {
-	return _hist_entry__dso_snprintf(self->ms.map, bf, size, width);
+	return _hist_entry__dso_snprintf(he->ms.map, bf, size, width);
 }
 
 struct sort_entry sort_dso = {
@@ -182,9 +180,19 @@ static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
 static int64_t
 sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
 {
+	int64_t ret;
+
 	if (!left->ms.sym && !right->ms.sym)
 		return right->level - left->level;
 
+	/*
+	 * comparing symbol address alone is not enough since it's a
+	 * relative address within a dso.
+	 */
+	ret = sort__dso_cmp(left, right);
+	if (ret != 0)
+		return ret;
+
 	return _sort__sym_cmp(left->ms.sym, right->ms.sym);
 }
 
@@ -224,11 +232,11 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
 	return ret;
 }
 
-static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
+static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width)
 {
-	return _hist_entry__sym_snprintf(self->ms.map, self->ms.sym, self->ip,
-					 self->level, bf, size, width);
+	return _hist_entry__sym_snprintf(he->ms.map, he->ms.sym, he->ip,
+					 he->level, bf, size, width);
 }
 
 struct sort_entry sort_sym = {
@@ -243,50 +251,32 @@ struct sort_entry sort_sym = {
 static int64_t
 sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-	return (int64_t)(right->ip - left->ip);
+	if (!left->srcline) {
+		if (!left->ms.map)
+			left->srcline = SRCLINE_UNKNOWN;
+		else {
+			struct map *map = left->ms.map;
+			left->srcline = get_srcline(map->dso,
+					    map__rip_2objdump(map, left->ip));
+		}
+	}
+	if (!right->srcline) {
+		if (!right->ms.map)
+			right->srcline = SRCLINE_UNKNOWN;
+		else {
+			struct map *map = right->ms.map;
+			right->srcline = get_srcline(map->dso,
+					    map__rip_2objdump(map, right->ip));
+		}
+	}
+	return strcmp(left->srcline, right->srcline);
 }
 
-static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf,
+static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf,
 					size_t size,
 					unsigned int width __maybe_unused)
 {
-	FILE *fp = NULL;
-	char cmd[PATH_MAX + 2], *path = self->srcline, *nl;
-	size_t line_len;
-
-	if (path != NULL)
-		goto out_path;
-
-	if (!self->ms.map)
-		goto out_ip;
-
-	if (!strncmp(self->ms.map->dso->long_name, "/tmp/perf-", 10))
-		goto out_ip;
-
-	snprintf(cmd, sizeof(cmd), "addr2line -e %s %016" PRIx64,
-		 self->ms.map->dso->long_name, self->ip);
-	fp = popen(cmd, "r");
-	if (!fp)
-		goto out_ip;
-
-	if (getline(&path, &line_len, fp) < 0 || !line_len)
-		goto out_ip;
-	self->srcline = strdup(path);
-	if (self->srcline == NULL)
-		goto out_ip;
-
-	nl = strchr(self->srcline, '\n');
-	if (nl != NULL)
-		*nl = '\0';
-	path = self->srcline;
-out_path:
-	if (fp)
-		pclose(fp);
-	return repsep_snprintf(bf, size, "%s", path);
-out_ip:
-	if (fp)
-		pclose(fp);
-	return repsep_snprintf(bf, size, "%-#*llx", BITS_PER_LONG / 4, self->ip);
+	return repsep_snprintf(bf, size, "%s", he->srcline);
 }
 
 struct sort_entry sort_srcline = {
@@ -310,11 +300,11 @@ sort__parent_cmp(struct hist_entry *left, struct hist_entry *right)
 	return strcmp(sym_l->name, sym_r->name);
 }
 
-static int hist_entry__parent_snprintf(struct hist_entry *self, char *bf,
+static int hist_entry__parent_snprintf(struct hist_entry *he, char *bf,
 				       size_t size, unsigned int width)
 {
 	return repsep_snprintf(bf, size, "%-*s", width,
-			      self->parent ? self->parent->name : "[other]");
+			      he->parent ? he->parent->name : "[other]");
 }
 
 struct sort_entry sort_parent = {
@@ -332,10 +322,10 @@ sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right)
 	return right->cpu - left->cpu;
 }
 
-static int hist_entry__cpu_snprintf(struct hist_entry *self, char *bf,
-				       size_t size, unsigned int width)
+static int hist_entry__cpu_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
 {
-	return repsep_snprintf(bf, size, "%*d", width, self->cpu);
+	return repsep_snprintf(bf, size, "%*d", width, he->cpu);
 }
 
 struct sort_entry sort_cpu = {
@@ -354,10 +344,10 @@ sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right)
 			      right->branch_info->from.map);
 }
 
-static int hist_entry__dso_from_snprintf(struct hist_entry *self, char *bf,
+static int hist_entry__dso_from_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width)
 {
-	return _hist_entry__dso_snprintf(self->branch_info->from.map,
+	return _hist_entry__dso_snprintf(he->branch_info->from.map,
 					 bf, size, width);
 }
 
@@ -368,10 +358,10 @@ sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right)
 			      right->branch_info->to.map);
 }
 
-static int hist_entry__dso_to_snprintf(struct hist_entry *self, char *bf,
+static int hist_entry__dso_to_snprintf(struct hist_entry *he, char *bf,
 				       size_t size, unsigned int width)
 {
-	return _hist_entry__dso_snprintf(self->branch_info->to.map,
+	return _hist_entry__dso_snprintf(he->branch_info->to.map,
 					 bf, size, width);
 }
 
@@ -399,21 +389,21 @@ sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right)
 	return _sort__sym_cmp(to_l->sym, to_r->sym);
 }
 
-static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf,
+static int hist_entry__sym_from_snprintf(struct hist_entry *he, char *bf,
 					 size_t size, unsigned int width)
 {
-	struct addr_map_symbol *from = &self->branch_info->from;
+	struct addr_map_symbol *from = &he->branch_info->from;
 	return _hist_entry__sym_snprintf(from->map, from->sym, from->addr,
-					 self->level, bf, size, width);
+					 he->level, bf, size, width);
 
 }
 
-static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf,
+static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf,
 				       size_t size, unsigned int width)
 {
-	struct addr_map_symbol *to = &self->branch_info->to;
+	struct addr_map_symbol *to = &he->branch_info->to;
 	return _hist_entry__sym_snprintf(to->map, to->sym, to->addr,
-					 self->level, bf, size, width);
+					 he->level, bf, size, width);
 
 }
 
@@ -456,13 +446,13 @@ sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right)
 	return mp || p;
 }
 
-static int hist_entry__mispredict_snprintf(struct hist_entry *self, char *bf,
+static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width){
 	static const char *out = "N/A";
 
-	if (self->branch_info->flags.predicted)
+	if (he->branch_info->flags.predicted)
 		out = "N";
-	else if (self->branch_info->flags.mispred)
+	else if (he->branch_info->flags.mispred)
 		out = "Y";
 
 	return repsep_snprintf(bf, size, "%-*s", width, out);
@@ -482,19 +472,19 @@ sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right)
 	return (int64_t)(r - l);
 }
 
-static int hist_entry__daddr_snprintf(struct hist_entry *self, char *bf,
+static int hist_entry__daddr_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width)
 {
 	uint64_t addr = 0;
 	struct map *map = NULL;
 	struct symbol *sym = NULL;
 
-	if (self->mem_info) {
-		addr = self->mem_info->daddr.addr;
-		map = self->mem_info->daddr.map;
-		sym = self->mem_info->daddr.sym;
+	if (he->mem_info) {
+		addr = he->mem_info->daddr.addr;
+		map = he->mem_info->daddr.map;
+		sym = he->mem_info->daddr.sym;
 	}
-	return _hist_entry__sym_snprintf(map, sym, addr, self->level, bf, size,
+	return _hist_entry__sym_snprintf(map, sym, addr, he->level, bf, size,
 					 width);
 }
 
@@ -512,13 +502,13 @@ sort__dso_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
 	return _sort__dso_cmp(map_l, map_r);
 }
 
-static int hist_entry__dso_daddr_snprintf(struct hist_entry *self, char *bf,
+static int hist_entry__dso_daddr_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width)
 {
 	struct map *map = NULL;
 
-	if (self->mem_info)
-		map = self->mem_info->daddr.map;
+	if (he->mem_info)
+		map = he->mem_info->daddr.map;
 
 	return _hist_entry__dso_snprintf(map, bf, size, width);
 }
@@ -542,14 +532,14 @@ sort__locked_cmp(struct hist_entry *left, struct hist_entry *right)
 	return (int64_t)(data_src_r.mem_lock - data_src_l.mem_lock);
 }
 
-static int hist_entry__locked_snprintf(struct hist_entry *self, char *bf,
+static int hist_entry__locked_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width)
 {
 	const char *out;
 	u64 mask = PERF_MEM_LOCK_NA;
 
-	if (self->mem_info)
-		mask = self->mem_info->data_src.mem_lock;
+	if (he->mem_info)
+		mask = he->mem_info->data_src.mem_lock;
 
 	if (mask & PERF_MEM_LOCK_NA)
 		out = "N/A";
@@ -591,7 +581,7 @@ static const char * const tlb_access[] = {
 };
 #define NUM_TLB_ACCESS (sizeof(tlb_access)/sizeof(const char *))
 
-static int hist_entry__tlb_snprintf(struct hist_entry *self, char *bf,
+static int hist_entry__tlb_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width)
 {
 	char out[64];
@@ -602,8 +592,8 @@ static int hist_entry__tlb_snprintf(struct hist_entry *self, char *bf,
 
 	out[0] = '\0';
 
-	if (self->mem_info)
-		m = self->mem_info->data_src.mem_dtlb;
+	if (he->mem_info)
+		m = he->mem_info->data_src.mem_dtlb;
 
 	hit = m & PERF_MEM_TLB_HIT;
 	miss = m & PERF_MEM_TLB_MISS;
@@ -668,7 +658,7 @@ static const char * const mem_lvl[] = {
 };
 #define NUM_MEM_LVL (sizeof(mem_lvl)/sizeof(const char *))
 
-static int hist_entry__lvl_snprintf(struct hist_entry *self, char *bf,
+static int hist_entry__lvl_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width)
 {
 	char out[64];
@@ -677,8 +667,8 @@ static int hist_entry__lvl_snprintf(struct hist_entry *self, char *bf,
 	u64 m =  PERF_MEM_LVL_NA;
 	u64 hit, miss;
 
-	if (self->mem_info)
-		m  = self->mem_info->data_src.mem_lvl;
+	if (he->mem_info)
+		m  = he->mem_info->data_src.mem_lvl;
 
 	out[0] = '\0';
 
@@ -736,7 +726,7 @@ static const char * const snoop_access[] = {
 };
 #define NUM_SNOOP_ACCESS (sizeof(snoop_access)/sizeof(const char *))
 
-static int hist_entry__snoop_snprintf(struct hist_entry *self, char *bf,
+static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width)
 {
 	char out[64];
@@ -746,8 +736,8 @@ static int hist_entry__snoop_snprintf(struct hist_entry *self, char *bf,
 
 	out[0] = '\0';
 
-	if (self->mem_info)
-		m = self->mem_info->data_src.mem_snoop;
+	if (he->mem_info)
+		m = he->mem_info->data_src.mem_snoop;
 
 	for (i = 0; m && i < NUM_SNOOP_ACCESS; i++, m >>= 1) {
 		if (!(m & 0x1))
@@ -784,10 +774,10 @@ sort__local_weight_cmp(struct hist_entry *left, struct hist_entry *right)
 	return he_weight(left) - he_weight(right);
 }
 
-static int hist_entry__local_weight_snprintf(struct hist_entry *self, char *bf,
+static int hist_entry__local_weight_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width)
 {
-	return repsep_snprintf(bf, size, "%-*llu", width, he_weight(self));
+	return repsep_snprintf(bf, size, "%-*llu", width, he_weight(he));
 }
 
 struct sort_entry sort_local_weight = {
@@ -803,10 +793,10 @@ sort__global_weight_cmp(struct hist_entry *left, struct hist_entry *right)
 	return left->stat.weight - right->stat.weight;
 }
 
-static int hist_entry__global_weight_snprintf(struct hist_entry *self, char *bf,
+static int hist_entry__global_weight_snprintf(struct hist_entry *he, char *bf,
 					      size_t size, unsigned int width)
 {
-	return repsep_snprintf(bf, size, "%-*llu", width, self->stat.weight);
+	return repsep_snprintf(bf, size, "%-*llu", width, he->stat.weight);
 }
 
 struct sort_entry sort_global_weight = {
@@ -858,6 +848,127 @@ struct sort_entry sort_mem_snoop = {
 	.se_width_idx	= HISTC_MEM_SNOOP,
 };
 
+static int64_t
+sort__abort_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return left->branch_info->flags.abort !=
+		right->branch_info->flags.abort;
+}
+
+static int hist_entry__abort_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	static const char *out = ".";
+
+	if (he->branch_info->flags.abort)
+		out = "A";
+	return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+struct sort_entry sort_abort = {
+	.se_header	= "Transaction abort",
+	.se_cmp		= sort__abort_cmp,
+	.se_snprintf	= hist_entry__abort_snprintf,
+	.se_width_idx	= HISTC_ABORT,
+};
+
+static int64_t
+sort__in_tx_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return left->branch_info->flags.in_tx !=
+		right->branch_info->flags.in_tx;
+}
+
+static int hist_entry__in_tx_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	static const char *out = ".";
+
+	if (he->branch_info->flags.in_tx)
+		out = "T";
+
+	return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+struct sort_entry sort_in_tx = {
+	.se_header	= "Branch in transaction",
+	.se_cmp		= sort__in_tx_cmp,
+	.se_snprintf	= hist_entry__in_tx_snprintf,
+	.se_width_idx	= HISTC_IN_TX,
+};
+
+static int64_t
+sort__transaction_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return left->transaction - right->transaction;
+}
+
+static inline char *add_str(char *p, const char *str)
+{
+	strcpy(p, str);
+	return p + strlen(str);
+}
+
+static struct txbit {
+	unsigned flag;
+	const char *name;
+	int skip_for_len;
+} txbits[] = {
+	{ PERF_TXN_ELISION,        "EL ",        0 },
+	{ PERF_TXN_TRANSACTION,    "TX ",        1 },
+	{ PERF_TXN_SYNC,           "SYNC ",      1 },
+	{ PERF_TXN_ASYNC,          "ASYNC ",     0 },
+	{ PERF_TXN_RETRY,          "RETRY ",     0 },
+	{ PERF_TXN_CONFLICT,       "CON ",       0 },
+	{ PERF_TXN_CAPACITY_WRITE, "CAP-WRITE ", 1 },
+	{ PERF_TXN_CAPACITY_READ,  "CAP-READ ",  0 },
+	{ 0, NULL, 0 }
+};
+
+int hist_entry__transaction_len(void)
+{
+	int i;
+	int len = 0;
+
+	for (i = 0; txbits[i].name; i++) {
+		if (!txbits[i].skip_for_len)
+			len += strlen(txbits[i].name);
+	}
+	len += 4; /* :XX<space> */
+	return len;
+}
+
+static int hist_entry__transaction_snprintf(struct hist_entry *he, char *bf,
+					    size_t size, unsigned int width)
+{
+	u64 t = he->transaction;
+	char buf[128];
+	char *p = buf;
+	int i;
+
+	buf[0] = 0;
+	for (i = 0; txbits[i].name; i++)
+		if (txbits[i].flag & t)
+			p = add_str(p, txbits[i].name);
+	if (t && !(t & (PERF_TXN_SYNC|PERF_TXN_ASYNC)))
+		p = add_str(p, "NEITHER ");
+	if (t & PERF_TXN_ABORT_MASK) {
+		sprintf(p, ":%" PRIx64,
+			(t & PERF_TXN_ABORT_MASK) >>
+			PERF_TXN_ABORT_SHIFT);
+		p += strlen(p);
+	}
+
+	return repsep_snprintf(bf, size, "%-*s", width, buf);
+}
+
+struct sort_entry sort_transaction = {
+	.se_header	= "Transaction                ",
+	.se_cmp		= sort__transaction_cmp,
+	.se_snprintf	= hist_entry__transaction_snprintf,
+	.se_width_idx	= HISTC_TRANSACTION,
+};
+
 struct sort_dimension {
 	const char		*name;
 	struct sort_entry	*entry;
@@ -876,6 +987,7 @@ static struct sort_dimension common_sort_dimensions[] = {
 	DIM(SORT_SRCLINE, "srcline", sort_srcline),
 	DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight),
 	DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight),
+	DIM(SORT_TRANSACTION, "transaction", sort_transaction),
 };
 
 #undef DIM
@@ -888,6 +1000,8 @@ static struct sort_dimension bstack_sort_dimensions[] = {
 	DIM(SORT_SYM_FROM, "symbol_from", sort_sym_from),
 	DIM(SORT_SYM_TO, "symbol_to", sort_sym_to),
 	DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
+	DIM(SORT_IN_TX, "in_tx", sort_in_tx),
+	DIM(SORT_ABORT, "abort", sort_abort),
 };
 
 #undef DIM
@@ -1009,7 +1123,7 @@ int setup_sorting(void)
 	return ret;
 }
 
-static void sort_entry__setup_elide(struct sort_entry *self,
+static void sort_entry__setup_elide(struct sort_entry *se,
 				    struct strlist *list,
 				    const char *list_name, FILE *fp)
 {
@@ -1017,7 +1131,7 @@ static void sort_entry__setup_elide(struct sort_entry *self,
 		if (fp != NULL)
 			fprintf(fp, "# %s: %s\n", list_name,
 				strlist__entry(list, 0)->s);
-		self->elide = true;
+		se->elide = true;
 	}
 }
 
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 4e80dbd271e77e245d41f2f36afdffdf8ae7ab4e..43e5ff42a609a6cb61675bd3cd3bacbd3d9b1c29 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -22,7 +22,6 @@
 #include "parse-events.h"
 
 #include "thread.h"
-#include "sort.h"
 
 extern regex_t parent_regex;
 extern const char *sort_order;
@@ -84,7 +83,9 @@ struct hist_entry {
 	struct he_stat		stat;
 	struct map_symbol	ms;
 	struct thread		*thread;
+	struct comm		*comm;
 	u64			ip;
+	u64			transaction;
 	s32			cpu;
 
 	struct hist_entry_diff	diff;
@@ -145,6 +146,7 @@ enum sort_type {
 	SORT_SRCLINE,
 	SORT_LOCAL_WEIGHT,
 	SORT_GLOBAL_WEIGHT,
+	SORT_TRANSACTION,
 
 	/* branch stack specific sort keys */
 	__SORT_BRANCH_STACK,
@@ -153,6 +155,8 @@ enum sort_type {
 	SORT_SYM_FROM,
 	SORT_SYM_TO,
 	SORT_MISPREDICT,
+	SORT_ABORT,
+	SORT_IN_TX,
 
 	/* memory mode specific sort keys */
 	__SORT_MEMORY_MODE,
@@ -175,7 +179,7 @@ struct sort_entry {
 
 	int64_t (*se_cmp)(struct hist_entry *, struct hist_entry *);
 	int64_t (*se_collapse)(struct hist_entry *, struct hist_entry *);
-	int	(*se_snprintf)(struct hist_entry *self, char *bf, size_t size,
+	int	(*se_snprintf)(struct hist_entry *he, char *bf, size_t size,
 			       unsigned int width);
 	u8	se_width_idx;
 	bool	elide;
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
new file mode 100644
index 0000000000000000000000000000000000000000..d11aefbc4b8dcb6f8d0592897776f9717bdb7133
--- /dev/null
+++ b/tools/perf/util/srcline.c
@@ -0,0 +1,265 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <linux/kernel.h>
+
+#include "util/dso.h"
+#include "util/util.h"
+#include "util/debug.h"
+
+#ifdef HAVE_LIBBFD_SUPPORT
+
+/*
+ * Implement addr2line using libbfd.
+ */
+#define PACKAGE "perf"
+#include <bfd.h>
+
+struct a2l_data {
+	const char 	*input;
+	unsigned long 	addr;
+
+	bool 		found;
+	const char 	*filename;
+	const char 	*funcname;
+	unsigned 	line;
+
+	bfd 		*abfd;
+	asymbol 	**syms;
+};
+
+static int bfd_error(const char *string)
+{
+	const char *errmsg;
+
+	errmsg = bfd_errmsg(bfd_get_error());
+	fflush(stdout);
+
+	if (string)
+		pr_debug("%s: %s\n", string, errmsg);
+	else
+		pr_debug("%s\n", errmsg);
+
+	return -1;
+}
+
+static int slurp_symtab(bfd *abfd, struct a2l_data *a2l)
+{
+	long storage;
+	long symcount;
+	asymbol **syms;
+	bfd_boolean dynamic = FALSE;
+
+	if ((bfd_get_file_flags(abfd) & HAS_SYMS) == 0)
+		return bfd_error(bfd_get_filename(abfd));
+
+	storage = bfd_get_symtab_upper_bound(abfd);
+	if (storage == 0L) {
+		storage = bfd_get_dynamic_symtab_upper_bound(abfd);
+		dynamic = TRUE;
+	}
+	if (storage < 0L)
+		return bfd_error(bfd_get_filename(abfd));
+
+	syms = malloc(storage);
+	if (dynamic)
+		symcount = bfd_canonicalize_dynamic_symtab(abfd, syms);
+	else
+		symcount = bfd_canonicalize_symtab(abfd, syms);
+
+	if (symcount < 0) {
+		free(syms);
+		return bfd_error(bfd_get_filename(abfd));
+	}
+
+	a2l->syms = syms;
+	return 0;
+}
+
+static void find_address_in_section(bfd *abfd, asection *section, void *data)
+{
+	bfd_vma pc, vma;
+	bfd_size_type size;
+	struct a2l_data *a2l = data;
+
+	if (a2l->found)
+		return;
+
+	if ((bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0)
+		return;
+
+	pc = a2l->addr;
+	vma = bfd_get_section_vma(abfd, section);
+	size = bfd_get_section_size(section);
+
+	if (pc < vma || pc >= vma + size)
+		return;
+
+	a2l->found = bfd_find_nearest_line(abfd, section, a2l->syms, pc - vma,
+					   &a2l->filename, &a2l->funcname,
+					   &a2l->line);
+}
+
+static struct a2l_data *addr2line_init(const char *path)
+{
+	bfd *abfd;
+	struct a2l_data *a2l = NULL;
+
+	abfd = bfd_openr(path, NULL);
+	if (abfd == NULL)
+		return NULL;
+
+	if (!bfd_check_format(abfd, bfd_object))
+		goto out;
+
+	a2l = zalloc(sizeof(*a2l));
+	if (a2l == NULL)
+		goto out;
+
+	a2l->abfd = abfd;
+	a2l->input = strdup(path);
+	if (a2l->input == NULL)
+		goto out;
+
+	if (slurp_symtab(abfd, a2l))
+		goto out;
+
+	return a2l;
+
+out:
+	if (a2l) {
+		free((void *)a2l->input);
+		free(a2l);
+	}
+	bfd_close(abfd);
+	return NULL;
+}
+
+static void addr2line_cleanup(struct a2l_data *a2l)
+{
+	if (a2l->abfd)
+		bfd_close(a2l->abfd);
+	free((void *)a2l->input);
+	free(a2l->syms);
+	free(a2l);
+}
+
+static int addr2line(const char *dso_name, unsigned long addr,
+		     char **file, unsigned int *line)
+{
+	int ret = 0;
+	struct a2l_data *a2l;
+
+	a2l = addr2line_init(dso_name);
+	if (a2l == NULL) {
+		pr_warning("addr2line_init failed for %s\n", dso_name);
+		return 0;
+	}
+
+	a2l->addr = addr;
+	bfd_map_over_sections(a2l->abfd, find_address_in_section, a2l);
+
+	if (a2l->found && a2l->filename) {
+		*file = strdup(a2l->filename);
+		*line = a2l->line;
+
+		if (*file)
+			ret = 1;
+	}
+
+	addr2line_cleanup(a2l);
+	return ret;
+}
+
+#else /* HAVE_LIBBFD_SUPPORT */
+
+static int addr2line(const char *dso_name, unsigned long addr,
+		     char **file, unsigned int *line_nr)
+{
+	FILE *fp;
+	char cmd[PATH_MAX];
+	char *filename = NULL;
+	size_t len;
+	char *sep;
+	int ret = 0;
+
+	scnprintf(cmd, sizeof(cmd), "addr2line -e %s %016"PRIx64,
+		  dso_name, addr);
+
+	fp = popen(cmd, "r");
+	if (fp == NULL) {
+		pr_warning("popen failed for %s\n", dso_name);
+		return 0;
+	}
+
+	if (getline(&filename, &len, fp) < 0 || !len) {
+		pr_warning("addr2line has no output for %s\n", dso_name);
+		goto out;
+	}
+
+	sep = strchr(filename, '\n');
+	if (sep)
+		*sep = '\0';
+
+	if (!strcmp(filename, "??:0")) {
+		pr_debug("no debugging info in %s\n", dso_name);
+		free(filename);
+		goto out;
+	}
+
+	sep = strchr(filename, ':');
+	if (sep) {
+		*sep++ = '\0';
+		*file = filename;
+		*line_nr = strtoul(sep, NULL, 0);
+		ret = 1;
+	}
+out:
+	pclose(fp);
+	return ret;
+}
+#endif /* HAVE_LIBBFD_SUPPORT */
+
+char *get_srcline(struct dso *dso, unsigned long addr)
+{
+	char *file = NULL;
+	unsigned line = 0;
+	char *srcline;
+	char *dso_name = dso->long_name;
+	size_t size;
+
+	if (!dso->has_srcline)
+		return SRCLINE_UNKNOWN;
+
+	if (dso_name[0] == '[')
+		goto out;
+
+	if (!strncmp(dso_name, "/tmp/perf-", 10))
+		goto out;
+
+	if (!addr2line(dso_name, addr, &file, &line))
+		goto out;
+
+	/* just calculate actual length */
+	size = snprintf(NULL, 0, "%s:%u", file, line) + 1;
+
+	srcline = malloc(size);
+	if (srcline)
+		snprintf(srcline, size, "%s:%u", file, line);
+	else
+		srcline = SRCLINE_UNKNOWN;
+
+	free(file);
+	return srcline;
+
+out:
+	dso->has_srcline = 0;
+	return SRCLINE_UNKNOWN;
+}
+
+void free_srcline(char *srcline)
+{
+	if (srcline && strcmp(srcline, SRCLINE_UNKNOWN) != 0)
+		free(srcline);
+}
diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c
index 834c8ebfe38e961169e07eb8d89bf17ffe42782c..3edd0538161f3556f49ca1e10b1f3a1edd2e6468 100644
--- a/tools/perf/util/strfilter.c
+++ b/tools/perf/util/strfilter.c
@@ -10,22 +10,22 @@ static const char *OP_not	= "!";	/* Logical NOT */
 #define is_operator(c)	((c) == '|' || (c) == '&' || (c) == '!')
 #define is_separator(c)	(is_operator(c) || (c) == '(' || (c) == ')')
 
-static void strfilter_node__delete(struct strfilter_node *self)
+static void strfilter_node__delete(struct strfilter_node *node)
 {
-	if (self) {
-		if (self->p && !is_operator(*self->p))
-			free((char *)self->p);
-		strfilter_node__delete(self->l);
-		strfilter_node__delete(self->r);
-		free(self);
+	if (node) {
+		if (node->p && !is_operator(*node->p))
+			free((char *)node->p);
+		strfilter_node__delete(node->l);
+		strfilter_node__delete(node->r);
+		free(node);
 	}
 }
 
-void strfilter__delete(struct strfilter *self)
+void strfilter__delete(struct strfilter *filter)
 {
-	if (self) {
-		strfilter_node__delete(self->root);
-		free(self);
+	if (filter) {
+		strfilter_node__delete(filter->root);
+		free(filter);
 	}
 }
 
@@ -62,15 +62,15 @@ static struct strfilter_node *strfilter_node__alloc(const char *op,
 						    struct strfilter_node *l,
 						    struct strfilter_node *r)
 {
-	struct strfilter_node *ret = zalloc(sizeof(struct strfilter_node));
+	struct strfilter_node *node = zalloc(sizeof(*node));
 
-	if (ret) {
-		ret->p = op;
-		ret->l = l;
-		ret->r = r;
+	if (node) {
+		node->p = op;
+		node->l = l;
+		node->r = r;
 	}
 
-	return ret;
+	return node;
 }
 
 static struct strfilter_node *strfilter_node__new(const char *s,
@@ -154,46 +154,46 @@ static struct strfilter_node *strfilter_node__new(const char *s,
  */
 struct strfilter *strfilter__new(const char *rules, const char **err)
 {
-	struct strfilter *ret = zalloc(sizeof(struct strfilter));
+	struct strfilter *filter = zalloc(sizeof(*filter));
 	const char *ep = NULL;
 
-	if (ret)
-		ret->root = strfilter_node__new(rules, &ep);
+	if (filter)
+		filter->root = strfilter_node__new(rules, &ep);
 
-	if (!ret || !ret->root || *ep != '\0') {
+	if (!filter || !filter->root || *ep != '\0') {
 		if (err)
 			*err = ep;
-		strfilter__delete(ret);
-		ret = NULL;
+		strfilter__delete(filter);
+		filter = NULL;
 	}
 
-	return ret;
+	return filter;
 }
 
-static bool strfilter_node__compare(struct strfilter_node *self,
+static bool strfilter_node__compare(struct strfilter_node *node,
 				    const char *str)
 {
-	if (!self || !self->p)
+	if (!node || !node->p)
 		return false;
 
-	switch (*self->p) {
+	switch (*node->p) {
 	case '|':	/* OR */
-		return strfilter_node__compare(self->l, str) ||
-			strfilter_node__compare(self->r, str);
+		return strfilter_node__compare(node->l, str) ||
+			strfilter_node__compare(node->r, str);
 	case '&':	/* AND */
-		return strfilter_node__compare(self->l, str) &&
-			strfilter_node__compare(self->r, str);
+		return strfilter_node__compare(node->l, str) &&
+			strfilter_node__compare(node->r, str);
 	case '!':	/* NOT */
-		return !strfilter_node__compare(self->r, str);
+		return !strfilter_node__compare(node->r, str);
 	default:
-		return strglobmatch(str, self->p);
+		return strglobmatch(str, node->p);
 	}
 }
 
 /* Return true if STR matches the filter rules */
-bool strfilter__compare(struct strfilter *self, const char *str)
+bool strfilter__compare(struct strfilter *filter, const char *str)
 {
-	if (!self)
+	if (!filter)
 		return false;
-	return strfilter_node__compare(self->root, str);
+	return strfilter_node__compare(filter->root, str);
 }
diff --git a/tools/perf/util/strfilter.h b/tools/perf/util/strfilter.h
index 00f58a7506dea45ddcef2f9319324d8813ef2668..fe611f3c9e3965007a7ea5ed9f3fbbc768b5f271 100644
--- a/tools/perf/util/strfilter.h
+++ b/tools/perf/util/strfilter.h
@@ -30,19 +30,19 @@ struct strfilter *strfilter__new(const char *rules, const char **err);
 
 /**
  * strfilter__compare - compare given string and a string filter
- * @self: String filter
+ * @filter: String filter
  * @str: target string
  *
- * Compare @str and @self. Return true if the str match the rule
+ * Compare @str and @filter. Return true if the str match the rule
  */
-bool strfilter__compare(struct strfilter *self, const char *str);
+bool strfilter__compare(struct strfilter *filter, const char *str);
 
 /**
  * strfilter__delete - delete a string filter
- * @self: String filter to delete
+ * @filter: String filter to delete
  *
- * Delete @self.
+ * Delete @filter.
  */
-void strfilter__delete(struct strfilter *self);
+void strfilter__delete(struct strfilter *filter);
 
 #endif
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index a9c829be52169eac5b9f00d9382fd9281152b9e6..eed0b96302af189fa4b7c75f73ef538b8493336e 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -8,7 +8,7 @@
 #include "symbol.h"
 #include "debug.h"
 
-#ifndef HAVE_ELF_GETPHDRNUM
+#ifndef HAVE_ELF_GETPHDRNUM_SUPPORT
 static int elf_getphdrnum(Elf *elf, size_t *dst)
 {
 	GElf_Ehdr gehdr;
@@ -487,27 +487,27 @@ int filename__read_debuglink(const char *filename, char *debuglink,
 
 	ek = elf_kind(elf);
 	if (ek != ELF_K_ELF)
-		goto out_close;
+		goto out_elf_end;
 
 	if (gelf_getehdr(elf, &ehdr) == NULL) {
 		pr_err("%s: cannot get elf header.\n", __func__);
-		goto out_close;
+		goto out_elf_end;
 	}
 
 	sec = elf_section_by_name(elf, &ehdr, &shdr,
 				  ".gnu_debuglink", NULL);
 	if (sec == NULL)
-		goto out_close;
+		goto out_elf_end;
 
 	data = elf_getdata(sec, NULL);
 	if (data == NULL)
-		goto out_close;
+		goto out_elf_end;
 
 	/* the start of this section is a zero-terminated string */
 	strncpy(debuglink, data->d_buf, size);
 
+out_elf_end:
 	elf_end(elf);
-
 out_close:
 	close(fd);
 out:
@@ -1018,6 +1018,601 @@ int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data,
 	return err;
 }
 
+static int copy_bytes(int from, off_t from_offs, int to, off_t to_offs, u64 len)
+{
+	ssize_t r;
+	size_t n;
+	int err = -1;
+	char *buf = malloc(page_size);
+
+	if (buf == NULL)
+		return -1;
+
+	if (lseek(to, to_offs, SEEK_SET) != to_offs)
+		goto out;
+
+	if (lseek(from, from_offs, SEEK_SET) != from_offs)
+		goto out;
+
+	while (len) {
+		n = page_size;
+		if (len < n)
+			n = len;
+		/* Use read because mmap won't work on proc files */
+		r = read(from, buf, n);
+		if (r < 0)
+			goto out;
+		if (!r)
+			break;
+		n = r;
+		r = write(to, buf, n);
+		if (r < 0)
+			goto out;
+		if ((size_t)r != n)
+			goto out;
+		len -= n;
+	}
+
+	err = 0;
+out:
+	free(buf);
+	return err;
+}
+
+struct kcore {
+	int fd;
+	int elfclass;
+	Elf *elf;
+	GElf_Ehdr ehdr;
+};
+
+static int kcore__open(struct kcore *kcore, const char *filename)
+{
+	GElf_Ehdr *ehdr;
+
+	kcore->fd = open(filename, O_RDONLY);
+	if (kcore->fd == -1)
+		return -1;
+
+	kcore->elf = elf_begin(kcore->fd, ELF_C_READ, NULL);
+	if (!kcore->elf)
+		goto out_close;
+
+	kcore->elfclass = gelf_getclass(kcore->elf);
+	if (kcore->elfclass == ELFCLASSNONE)
+		goto out_end;
+
+	ehdr = gelf_getehdr(kcore->elf, &kcore->ehdr);
+	if (!ehdr)
+		goto out_end;
+
+	return 0;
+
+out_end:
+	elf_end(kcore->elf);
+out_close:
+	close(kcore->fd);
+	return -1;
+}
+
+static int kcore__init(struct kcore *kcore, char *filename, int elfclass,
+		       bool temp)
+{
+	GElf_Ehdr *ehdr;
+
+	kcore->elfclass = elfclass;
+
+	if (temp)
+		kcore->fd = mkstemp(filename);
+	else
+		kcore->fd = open(filename, O_WRONLY | O_CREAT | O_EXCL, 0400);
+	if (kcore->fd == -1)
+		return -1;
+
+	kcore->elf = elf_begin(kcore->fd, ELF_C_WRITE, NULL);
+	if (!kcore->elf)
+		goto out_close;
+
+	if (!gelf_newehdr(kcore->elf, elfclass))
+		goto out_end;
+
+	ehdr = gelf_getehdr(kcore->elf, &kcore->ehdr);
+	if (!ehdr)
+		goto out_end;
+
+	return 0;
+
+out_end:
+	elf_end(kcore->elf);
+out_close:
+	close(kcore->fd);
+	unlink(filename);
+	return -1;
+}
+
+static void kcore__close(struct kcore *kcore)
+{
+	elf_end(kcore->elf);
+	close(kcore->fd);
+}
+
+static int kcore__copy_hdr(struct kcore *from, struct kcore *to, size_t count)
+{
+	GElf_Ehdr *ehdr = &to->ehdr;
+	GElf_Ehdr *kehdr = &from->ehdr;
+
+	memcpy(ehdr->e_ident, kehdr->e_ident, EI_NIDENT);
+	ehdr->e_type      = kehdr->e_type;
+	ehdr->e_machine   = kehdr->e_machine;
+	ehdr->e_version   = kehdr->e_version;
+	ehdr->e_entry     = 0;
+	ehdr->e_shoff     = 0;
+	ehdr->e_flags     = kehdr->e_flags;
+	ehdr->e_phnum     = count;
+	ehdr->e_shentsize = 0;
+	ehdr->e_shnum     = 0;
+	ehdr->e_shstrndx  = 0;
+
+	if (from->elfclass == ELFCLASS32) {
+		ehdr->e_phoff     = sizeof(Elf32_Ehdr);
+		ehdr->e_ehsize    = sizeof(Elf32_Ehdr);
+		ehdr->e_phentsize = sizeof(Elf32_Phdr);
+	} else {
+		ehdr->e_phoff     = sizeof(Elf64_Ehdr);
+		ehdr->e_ehsize    = sizeof(Elf64_Ehdr);
+		ehdr->e_phentsize = sizeof(Elf64_Phdr);
+	}
+
+	if (!gelf_update_ehdr(to->elf, ehdr))
+		return -1;
+
+	if (!gelf_newphdr(to->elf, count))
+		return -1;
+
+	return 0;
+}
+
+static int kcore__add_phdr(struct kcore *kcore, int idx, off_t offset,
+			   u64 addr, u64 len)
+{
+	GElf_Phdr gphdr;
+	GElf_Phdr *phdr;
+
+	phdr = gelf_getphdr(kcore->elf, idx, &gphdr);
+	if (!phdr)
+		return -1;
+
+	phdr->p_type	= PT_LOAD;
+	phdr->p_flags	= PF_R | PF_W | PF_X;
+	phdr->p_offset	= offset;
+	phdr->p_vaddr	= addr;
+	phdr->p_paddr	= 0;
+	phdr->p_filesz	= len;
+	phdr->p_memsz	= len;
+	phdr->p_align	= page_size;
+
+	if (!gelf_update_phdr(kcore->elf, idx, phdr))
+		return -1;
+
+	return 0;
+}
+
+static off_t kcore__write(struct kcore *kcore)
+{
+	return elf_update(kcore->elf, ELF_C_WRITE);
+}
+
+struct phdr_data {
+	off_t offset;
+	u64 addr;
+	u64 len;
+};
+
+struct kcore_copy_info {
+	u64 stext;
+	u64 etext;
+	u64 first_symbol;
+	u64 last_symbol;
+	u64 first_module;
+	u64 last_module_symbol;
+	struct phdr_data kernel_map;
+	struct phdr_data modules_map;
+};
+
+static int kcore_copy__process_kallsyms(void *arg, const char *name, char type,
+					u64 start)
+{
+	struct kcore_copy_info *kci = arg;
+
+	if (!symbol_type__is_a(type, MAP__FUNCTION))
+		return 0;
+
+	if (strchr(name, '[')) {
+		if (start > kci->last_module_symbol)
+			kci->last_module_symbol = start;
+		return 0;
+	}
+
+	if (!kci->first_symbol || start < kci->first_symbol)
+		kci->first_symbol = start;
+
+	if (!kci->last_symbol || start > kci->last_symbol)
+		kci->last_symbol = start;
+
+	if (!strcmp(name, "_stext")) {
+		kci->stext = start;
+		return 0;
+	}
+
+	if (!strcmp(name, "_etext")) {
+		kci->etext = start;
+		return 0;
+	}
+
+	return 0;
+}
+
+static int kcore_copy__parse_kallsyms(struct kcore_copy_info *kci,
+				      const char *dir)
+{
+	char kallsyms_filename[PATH_MAX];
+
+	scnprintf(kallsyms_filename, PATH_MAX, "%s/kallsyms", dir);
+
+	if (symbol__restricted_filename(kallsyms_filename, "/proc/kallsyms"))
+		return -1;
+
+	if (kallsyms__parse(kallsyms_filename, kci,
+			    kcore_copy__process_kallsyms) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int kcore_copy__process_modules(void *arg,
+				       const char *name __maybe_unused,
+				       u64 start)
+{
+	struct kcore_copy_info *kci = arg;
+
+	if (!kci->first_module || start < kci->first_module)
+		kci->first_module = start;
+
+	return 0;
+}
+
+static int kcore_copy__parse_modules(struct kcore_copy_info *kci,
+				     const char *dir)
+{
+	char modules_filename[PATH_MAX];
+
+	scnprintf(modules_filename, PATH_MAX, "%s/modules", dir);
+
+	if (symbol__restricted_filename(modules_filename, "/proc/modules"))
+		return -1;
+
+	if (modules__parse(modules_filename, kci,
+			   kcore_copy__process_modules) < 0)
+		return -1;
+
+	return 0;
+}
+
+static void kcore_copy__map(struct phdr_data *p, u64 start, u64 end, u64 pgoff,
+			    u64 s, u64 e)
+{
+	if (p->addr || s < start || s >= end)
+		return;
+
+	p->addr = s;
+	p->offset = (s - start) + pgoff;
+	p->len = e < end ? e - s : end - s;
+}
+
+static int kcore_copy__read_map(u64 start, u64 len, u64 pgoff, void *data)
+{
+	struct kcore_copy_info *kci = data;
+	u64 end = start + len;
+
+	kcore_copy__map(&kci->kernel_map, start, end, pgoff, kci->stext,
+			kci->etext);
+
+	kcore_copy__map(&kci->modules_map, start, end, pgoff, kci->first_module,
+			kci->last_module_symbol);
+
+	return 0;
+}
+
+static int kcore_copy__read_maps(struct kcore_copy_info *kci, Elf *elf)
+{
+	if (elf_read_maps(elf, true, kcore_copy__read_map, kci) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int kcore_copy__calc_maps(struct kcore_copy_info *kci, const char *dir,
+				 Elf *elf)
+{
+	if (kcore_copy__parse_kallsyms(kci, dir))
+		return -1;
+
+	if (kcore_copy__parse_modules(kci, dir))
+		return -1;
+
+	if (kci->stext)
+		kci->stext = round_down(kci->stext, page_size);
+	else
+		kci->stext = round_down(kci->first_symbol, page_size);
+
+	if (kci->etext) {
+		kci->etext = round_up(kci->etext, page_size);
+	} else if (kci->last_symbol) {
+		kci->etext = round_up(kci->last_symbol, page_size);
+		kci->etext += page_size;
+	}
+
+	kci->first_module = round_down(kci->first_module, page_size);
+
+	if (kci->last_module_symbol) {
+		kci->last_module_symbol = round_up(kci->last_module_symbol,
+						   page_size);
+		kci->last_module_symbol += page_size;
+	}
+
+	if (!kci->stext || !kci->etext)
+		return -1;
+
+	if (kci->first_module && !kci->last_module_symbol)
+		return -1;
+
+	return kcore_copy__read_maps(kci, elf);
+}
+
+static int kcore_copy__copy_file(const char *from_dir, const char *to_dir,
+				 const char *name)
+{
+	char from_filename[PATH_MAX];
+	char to_filename[PATH_MAX];
+
+	scnprintf(from_filename, PATH_MAX, "%s/%s", from_dir, name);
+	scnprintf(to_filename, PATH_MAX, "%s/%s", to_dir, name);
+
+	return copyfile_mode(from_filename, to_filename, 0400);
+}
+
+static int kcore_copy__unlink(const char *dir, const char *name)
+{
+	char filename[PATH_MAX];
+
+	scnprintf(filename, PATH_MAX, "%s/%s", dir, name);
+
+	return unlink(filename);
+}
+
+static int kcore_copy__compare_fds(int from, int to)
+{
+	char *buf_from;
+	char *buf_to;
+	ssize_t ret;
+	size_t len;
+	int err = -1;
+
+	buf_from = malloc(page_size);
+	buf_to = malloc(page_size);
+	if (!buf_from || !buf_to)
+		goto out;
+
+	while (1) {
+		/* Use read because mmap won't work on proc files */
+		ret = read(from, buf_from, page_size);
+		if (ret < 0)
+			goto out;
+
+		if (!ret)
+			break;
+
+		len = ret;
+
+		if (readn(to, buf_to, len) != (int)len)
+			goto out;
+
+		if (memcmp(buf_from, buf_to, len))
+			goto out;
+	}
+
+	err = 0;
+out:
+	free(buf_to);
+	free(buf_from);
+	return err;
+}
+
+static int kcore_copy__compare_files(const char *from_filename,
+				     const char *to_filename)
+{
+	int from, to, err = -1;
+
+	from = open(from_filename, O_RDONLY);
+	if (from < 0)
+		return -1;
+
+	to = open(to_filename, O_RDONLY);
+	if (to < 0)
+		goto out_close_from;
+
+	err = kcore_copy__compare_fds(from, to);
+
+	close(to);
+out_close_from:
+	close(from);
+	return err;
+}
+
+static int kcore_copy__compare_file(const char *from_dir, const char *to_dir,
+				    const char *name)
+{
+	char from_filename[PATH_MAX];
+	char to_filename[PATH_MAX];
+
+	scnprintf(from_filename, PATH_MAX, "%s/%s", from_dir, name);
+	scnprintf(to_filename, PATH_MAX, "%s/%s", to_dir, name);
+
+	return kcore_copy__compare_files(from_filename, to_filename);
+}
+
+/**
+ * kcore_copy - copy kallsyms, modules and kcore from one directory to another.
+ * @from_dir: from directory
+ * @to_dir: to directory
+ *
+ * This function copies kallsyms, modules and kcore files from one directory to
+ * another.  kallsyms and modules are copied entirely.  Only code segments are
+ * copied from kcore.  It is assumed that two segments suffice: one for the
+ * kernel proper and one for all the modules.  The code segments are determined
+ * from kallsyms and modules files.  The kernel map starts at _stext or the
+ * lowest function symbol, and ends at _etext or the highest function symbol.
+ * The module map starts at the lowest module address and ends at the highest
+ * module symbol.  Start addresses are rounded down to the nearest page.  End
+ * addresses are rounded up to the nearest page.  An extra page is added to the
+ * highest kernel symbol and highest module symbol to, hopefully, encompass that
+ * symbol too.  Because it contains only code sections, the resulting kcore is
+ * unusual.  One significant peculiarity is that the mapping (start -> pgoff)
+ * is not the same for the kernel map and the modules map.  That happens because
+ * the data is copied adjacently whereas the original kcore has gaps.  Finally,
+ * kallsyms and modules files are compared with their copies to check that
+ * modules have not been loaded or unloaded while the copies were taking place.
+ *
+ * Return: %0 on success, %-1 on failure.
+ */
+int kcore_copy(const char *from_dir, const char *to_dir)
+{
+	struct kcore kcore;
+	struct kcore extract;
+	size_t count = 2;
+	int idx = 0, err = -1;
+	off_t offset = page_size, sz, modules_offset = 0;
+	struct kcore_copy_info kci = { .stext = 0, };
+	char kcore_filename[PATH_MAX];
+	char extract_filename[PATH_MAX];
+
+	if (kcore_copy__copy_file(from_dir, to_dir, "kallsyms"))
+		return -1;
+
+	if (kcore_copy__copy_file(from_dir, to_dir, "modules"))
+		goto out_unlink_kallsyms;
+
+	scnprintf(kcore_filename, PATH_MAX, "%s/kcore", from_dir);
+	scnprintf(extract_filename, PATH_MAX, "%s/kcore", to_dir);
+
+	if (kcore__open(&kcore, kcore_filename))
+		goto out_unlink_modules;
+
+	if (kcore_copy__calc_maps(&kci, from_dir, kcore.elf))
+		goto out_kcore_close;
+
+	if (kcore__init(&extract, extract_filename, kcore.elfclass, false))
+		goto out_kcore_close;
+
+	if (!kci.modules_map.addr)
+		count -= 1;
+
+	if (kcore__copy_hdr(&kcore, &extract, count))
+		goto out_extract_close;
+
+	if (kcore__add_phdr(&extract, idx++, offset, kci.kernel_map.addr,
+			    kci.kernel_map.len))
+		goto out_extract_close;
+
+	if (kci.modules_map.addr) {
+		modules_offset = offset + kci.kernel_map.len;
+		if (kcore__add_phdr(&extract, idx, modules_offset,
+				    kci.modules_map.addr, kci.modules_map.len))
+			goto out_extract_close;
+	}
+
+	sz = kcore__write(&extract);
+	if (sz < 0 || sz > offset)
+		goto out_extract_close;
+
+	if (copy_bytes(kcore.fd, kci.kernel_map.offset, extract.fd, offset,
+		       kci.kernel_map.len))
+		goto out_extract_close;
+
+	if (modules_offset && copy_bytes(kcore.fd, kci.modules_map.offset,
+					 extract.fd, modules_offset,
+					 kci.modules_map.len))
+		goto out_extract_close;
+
+	if (kcore_copy__compare_file(from_dir, to_dir, "modules"))
+		goto out_extract_close;
+
+	if (kcore_copy__compare_file(from_dir, to_dir, "kallsyms"))
+		goto out_extract_close;
+
+	err = 0;
+
+out_extract_close:
+	kcore__close(&extract);
+	if (err)
+		unlink(extract_filename);
+out_kcore_close:
+	kcore__close(&kcore);
+out_unlink_modules:
+	if (err)
+		kcore_copy__unlink(to_dir, "modules");
+out_unlink_kallsyms:
+	if (err)
+		kcore_copy__unlink(to_dir, "kallsyms");
+
+	return err;
+}
+
+int kcore_extract__create(struct kcore_extract *kce)
+{
+	struct kcore kcore;
+	struct kcore extract;
+	size_t count = 1;
+	int idx = 0, err = -1;
+	off_t offset = page_size, sz;
+
+	if (kcore__open(&kcore, kce->kcore_filename))
+		return -1;
+
+	strcpy(kce->extract_filename, PERF_KCORE_EXTRACT);
+	if (kcore__init(&extract, kce->extract_filename, kcore.elfclass, true))
+		goto out_kcore_close;
+
+	if (kcore__copy_hdr(&kcore, &extract, count))
+		goto out_extract_close;
+
+	if (kcore__add_phdr(&extract, idx, offset, kce->addr, kce->len))
+		goto out_extract_close;
+
+	sz = kcore__write(&extract);
+	if (sz < 0 || sz > offset)
+		goto out_extract_close;
+
+	if (copy_bytes(kcore.fd, kce->offs, extract.fd, offset, kce->len))
+		goto out_extract_close;
+
+	err = 0;
+
+out_extract_close:
+	kcore__close(&extract);
+	if (err)
+		unlink(kce->extract_filename);
+out_kcore_close:
+	kcore__close(&kcore);
+
+	return err;
+}
+
+void kcore_extract__delete(struct kcore_extract *kce)
+{
+	unlink(kce->extract_filename);
+}
+
 void symbol__elf_init(void)
 {
 	elf_version(EV_CURRENT);
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index 3a802c300fc564adce713aebbd848a1b936e78c1..2d2dd0532b5a971e588086c0605ae0d6ed4080df 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -308,6 +308,21 @@ int file__read_maps(int fd __maybe_unused, bool exe __maybe_unused,
 	return -1;
 }
 
+int kcore_extract__create(struct kcore_extract *kce __maybe_unused)
+{
+	return -1;
+}
+
+void kcore_extract__delete(struct kcore_extract *kce __maybe_unused)
+{
+}
+
+int kcore_copy(const char *from_dir __maybe_unused,
+	       const char *to_dir __maybe_unused)
+{
+	return -1;
+}
+
 void symbol__elf_init(void)
 {
 }
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 7eb0362f4ffd2468c81f6ae1a93686d00dfefe50..c0c36965fff0f0060b2f2a077edc17c569648bf2 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -51,6 +51,7 @@ static enum dso_binary_type binary_type_symtab[] = {
 	DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
 	DSO_BINARY_TYPE__GUEST_KMODULE,
 	DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE,
+	DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
 	DSO_BINARY_TYPE__NOT_FOUND,
 };
 
@@ -159,10 +160,12 @@ void symbols__fixup_duplicate(struct rb_root *symbols)
 
 		if (choose_best_symbol(curr, next) == SYMBOL_A) {
 			rb_erase(&next->rb_node, symbols);
+			symbol__delete(next);
 			goto again;
 		} else {
 			nd = rb_next(&curr->rb_node);
 			rb_erase(&curr->rb_node, symbols);
+			symbol__delete(curr);
 		}
 	}
 }
@@ -499,6 +502,64 @@ int kallsyms__parse(const char *filename, void *arg,
 	return -1;
 }
 
+int modules__parse(const char *filename, void *arg,
+		   int (*process_module)(void *arg, const char *name,
+					 u64 start))
+{
+	char *line = NULL;
+	size_t n;
+	FILE *file;
+	int err = 0;
+
+	file = fopen(filename, "r");
+	if (file == NULL)
+		return -1;
+
+	while (1) {
+		char name[PATH_MAX];
+		u64 start;
+		char *sep;
+		ssize_t line_len;
+
+		line_len = getline(&line, &n, file);
+		if (line_len < 0) {
+			if (feof(file))
+				break;
+			err = -1;
+			goto out;
+		}
+
+		if (!line) {
+			err = -1;
+			goto out;
+		}
+
+		line[--line_len] = '\0'; /* \n */
+
+		sep = strrchr(line, 'x');
+		if (sep == NULL)
+			continue;
+
+		hex2u64(sep + 1, &start);
+
+		sep = strchr(line, ' ');
+		if (sep == NULL)
+			continue;
+
+		*sep = '\0';
+
+		scnprintf(name, sizeof(name), "[%s]", line);
+
+		err = process_module(arg, name, start);
+		if (err)
+			break;
+	}
+out:
+	free(line);
+	fclose(file);
+	return err;
+}
+
 struct process_kallsyms_args {
 	struct map *map;
 	struct dso *dso;
@@ -739,51 +800,242 @@ bool symbol__restricted_filename(const char *filename,
 	return restricted;
 }
 
-struct kcore_mapfn_data {
-	struct dso *dso;
-	enum map_type type;
-	struct list_head maps;
+struct module_info {
+	struct rb_node rb_node;
+	char *name;
+	u64 start;
 };
 
-static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data)
+static void add_module(struct module_info *mi, struct rb_root *modules)
 {
-	struct kcore_mapfn_data *md = data;
-	struct map *map;
+	struct rb_node **p = &modules->rb_node;
+	struct rb_node *parent = NULL;
+	struct module_info *m;
 
-	map = map__new2(start, md->dso, md->type);
-	if (map == NULL)
+	while (*p != NULL) {
+		parent = *p;
+		m = rb_entry(parent, struct module_info, rb_node);
+		if (strcmp(mi->name, m->name) < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&mi->rb_node, parent, p);
+	rb_insert_color(&mi->rb_node, modules);
+}
+
+static void delete_modules(struct rb_root *modules)
+{
+	struct module_info *mi;
+	struct rb_node *next = rb_first(modules);
+
+	while (next) {
+		mi = rb_entry(next, struct module_info, rb_node);
+		next = rb_next(&mi->rb_node);
+		rb_erase(&mi->rb_node, modules);
+		free(mi->name);
+		free(mi);
+	}
+}
+
+static struct module_info *find_module(const char *name,
+				       struct rb_root *modules)
+{
+	struct rb_node *n = modules->rb_node;
+
+	while (n) {
+		struct module_info *m;
+		int cmp;
+
+		m = rb_entry(n, struct module_info, rb_node);
+		cmp = strcmp(name, m->name);
+		if (cmp < 0)
+			n = n->rb_left;
+		else if (cmp > 0)
+			n = n->rb_right;
+		else
+			return m;
+	}
+
+	return NULL;
+}
+
+static int __read_proc_modules(void *arg, const char *name, u64 start)
+{
+	struct rb_root *modules = arg;
+	struct module_info *mi;
+
+	mi = zalloc(sizeof(struct module_info));
+	if (!mi)
 		return -ENOMEM;
 
-	map->end = map->start + len;
-	map->pgoff = pgoff;
+	mi->name = strdup(name);
+	mi->start = start;
 
-	list_add(&map->node, &md->maps);
+	if (!mi->name) {
+		free(mi);
+		return -ENOMEM;
+	}
+
+	add_module(mi, modules);
+
+	return 0;
+}
+
+static int read_proc_modules(const char *filename, struct rb_root *modules)
+{
+	if (symbol__restricted_filename(filename, "/proc/modules"))
+		return -1;
+
+	if (modules__parse(filename, modules, __read_proc_modules)) {
+		delete_modules(modules);
+		return -1;
+	}
 
 	return 0;
 }
 
+int compare_proc_modules(const char *from, const char *to)
+{
+	struct rb_root from_modules = RB_ROOT;
+	struct rb_root to_modules = RB_ROOT;
+	struct rb_node *from_node, *to_node;
+	struct module_info *from_m, *to_m;
+	int ret = -1;
+
+	if (read_proc_modules(from, &from_modules))
+		return -1;
+
+	if (read_proc_modules(to, &to_modules))
+		goto out_delete_from;
+
+	from_node = rb_first(&from_modules);
+	to_node = rb_first(&to_modules);
+	while (from_node) {
+		if (!to_node)
+			break;
+
+		from_m = rb_entry(from_node, struct module_info, rb_node);
+		to_m = rb_entry(to_node, struct module_info, rb_node);
+
+		if (from_m->start != to_m->start ||
+		    strcmp(from_m->name, to_m->name))
+			break;
+
+		from_node = rb_next(from_node);
+		to_node = rb_next(to_node);
+	}
+
+	if (!from_node && !to_node)
+		ret = 0;
+
+	delete_modules(&to_modules);
+out_delete_from:
+	delete_modules(&from_modules);
+
+	return ret;
+}
+
+static int do_validate_kcore_modules(const char *filename, struct map *map,
+				  struct map_groups *kmaps)
+{
+	struct rb_root modules = RB_ROOT;
+	struct map *old_map;
+	int err;
+
+	err = read_proc_modules(filename, &modules);
+	if (err)
+		return err;
+
+	old_map = map_groups__first(kmaps, map->type);
+	while (old_map) {
+		struct map *next = map_groups__next(old_map);
+		struct module_info *mi;
+
+		if (old_map == map || old_map->start == map->start) {
+			/* The kernel map */
+			old_map = next;
+			continue;
+		}
+
+		/* Module must be in memory at the same address */
+		mi = find_module(old_map->dso->short_name, &modules);
+		if (!mi || mi->start != old_map->start) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		old_map = next;
+	}
+out:
+	delete_modules(&modules);
+	return err;
+}
+
 /*
- * If kallsyms is referenced by name then we look for kcore in the same
+ * If kallsyms is referenced by name then we look for filename in the same
  * directory.
  */
-static bool kcore_filename_from_kallsyms_filename(char *kcore_filename,
-						  const char *kallsyms_filename)
+static bool filename_from_kallsyms_filename(char *filename,
+					    const char *base_name,
+					    const char *kallsyms_filename)
 {
 	char *name;
 
-	strcpy(kcore_filename, kallsyms_filename);
-	name = strrchr(kcore_filename, '/');
+	strcpy(filename, kallsyms_filename);
+	name = strrchr(filename, '/');
 	if (!name)
 		return false;
 
-	if (!strcmp(name, "/kallsyms")) {
-		strcpy(name, "/kcore");
+	name += 1;
+
+	if (!strcmp(name, "kallsyms")) {
+		strcpy(name, base_name);
 		return true;
 	}
 
 	return false;
 }
 
+static int validate_kcore_modules(const char *kallsyms_filename,
+				  struct map *map)
+{
+	struct map_groups *kmaps = map__kmap(map)->kmaps;
+	char modules_filename[PATH_MAX];
+
+	if (!filename_from_kallsyms_filename(modules_filename, "modules",
+					     kallsyms_filename))
+		return -EINVAL;
+
+	if (do_validate_kcore_modules(modules_filename, map, kmaps))
+		return -EINVAL;
+
+	return 0;
+}
+
+struct kcore_mapfn_data {
+	struct dso *dso;
+	enum map_type type;
+	struct list_head maps;
+};
+
+static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data)
+{
+	struct kcore_mapfn_data *md = data;
+	struct map *map;
+
+	map = map__new2(start, md->dso, md->type);
+	if (map == NULL)
+		return -ENOMEM;
+
+	map->end = map->start + len;
+	map->pgoff = pgoff;
+
+	list_add(&map->node, &md->maps);
+
+	return 0;
+}
+
 static int dso__load_kcore(struct dso *dso, struct map *map,
 			   const char *kallsyms_filename)
 {
@@ -800,8 +1052,12 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
 	if (map != machine->vmlinux_maps[map->type])
 		return -EINVAL;
 
-	if (!kcore_filename_from_kallsyms_filename(kcore_filename,
-						   kallsyms_filename))
+	if (!filename_from_kallsyms_filename(kcore_filename, "kcore",
+					     kallsyms_filename))
+		return -EINVAL;
+
+	/* All modules must be present at their original addresses */
+	if (validate_kcore_modules(kallsyms_filename, map))
 		return -EINVAL;
 
 	md.dso = dso;
@@ -1188,6 +1444,105 @@ int dso__load_vmlinux_path(struct dso *dso, struct map *map,
 	return err;
 }
 
+static int find_matching_kcore(struct map *map, char *dir, size_t dir_sz)
+{
+	char kallsyms_filename[PATH_MAX];
+	struct dirent *dent;
+	int ret = -1;
+	DIR *d;
+
+	d = opendir(dir);
+	if (!d)
+		return -1;
+
+	while (1) {
+		dent = readdir(d);
+		if (!dent)
+			break;
+		if (dent->d_type != DT_DIR)
+			continue;
+		scnprintf(kallsyms_filename, sizeof(kallsyms_filename),
+			  "%s/%s/kallsyms", dir, dent->d_name);
+		if (!validate_kcore_modules(kallsyms_filename, map)) {
+			strlcpy(dir, kallsyms_filename, dir_sz);
+			ret = 0;
+			break;
+		}
+	}
+
+	closedir(d);
+
+	return ret;
+}
+
+static char *dso__find_kallsyms(struct dso *dso, struct map *map)
+{
+	u8 host_build_id[BUILD_ID_SIZE];
+	char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+	bool is_host = false;
+	char path[PATH_MAX];
+
+	if (!dso->has_build_id) {
+		/*
+		 * Last resort, if we don't have a build-id and couldn't find
+		 * any vmlinux file, try the running kernel kallsyms table.
+		 */
+		goto proc_kallsyms;
+	}
+
+	if (sysfs__read_build_id("/sys/kernel/notes", host_build_id,
+				 sizeof(host_build_id)) == 0)
+		is_host = dso__build_id_equal(dso, host_build_id);
+
+	build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+
+	/* Use /proc/kallsyms if possible */
+	if (is_host) {
+		DIR *d;
+		int fd;
+
+		/* If no cached kcore go with /proc/kallsyms */
+		scnprintf(path, sizeof(path), "%s/[kernel.kcore]/%s",
+			  buildid_dir, sbuild_id);
+		d = opendir(path);
+		if (!d)
+			goto proc_kallsyms;
+		closedir(d);
+
+		/*
+		 * Do not check the build-id cache, until we know we cannot use
+		 * /proc/kcore.
+		 */
+		fd = open("/proc/kcore", O_RDONLY);
+		if (fd != -1) {
+			close(fd);
+			/* If module maps match go with /proc/kallsyms */
+			if (!validate_kcore_modules("/proc/kallsyms", map))
+				goto proc_kallsyms;
+		}
+
+		/* Find kallsyms in build-id cache with kcore */
+		if (!find_matching_kcore(map, path, sizeof(path)))
+			return strdup(path);
+
+		goto proc_kallsyms;
+	}
+
+	scnprintf(path, sizeof(path), "%s/[kernel.kallsyms]/%s",
+		  buildid_dir, sbuild_id);
+
+	if (access(path, F_OK)) {
+		pr_err("No kallsyms or vmlinux with build-id %s was found\n",
+		       sbuild_id);
+		return NULL;
+	}
+
+	return strdup(path);
+
+proc_kallsyms:
+	return strdup("/proc/kallsyms");
+}
+
 static int dso__load_kernel_sym(struct dso *dso, struct map *map,
 				symbol_filter_t filter)
 {
@@ -1214,7 +1569,7 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map,
 		goto do_kallsyms;
 	}
 
-	if (symbol_conf.vmlinux_name != NULL) {
+	if (!symbol_conf.ignore_vmlinux && symbol_conf.vmlinux_name != NULL) {
 		err = dso__load_vmlinux(dso, map,
 					symbol_conf.vmlinux_name, filter);
 		if (err > 0) {
@@ -1226,7 +1581,7 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map,
 		return err;
 	}
 
-	if (vmlinux_path != NULL) {
+	if (!symbol_conf.ignore_vmlinux && vmlinux_path != NULL) {
 		err = dso__load_vmlinux_path(dso, map, filter);
 		if (err > 0)
 			return err;
@@ -1236,51 +1591,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map,
 	if (symbol_conf.symfs[0] != 0)
 		return -1;
 
-	/*
-	 * Say the kernel DSO was created when processing the build-id header table,
-	 * we have a build-id, so check if it is the same as the running kernel,
-	 * using it if it is.
-	 */
-	if (dso->has_build_id) {
-		u8 kallsyms_build_id[BUILD_ID_SIZE];
-		char sbuild_id[BUILD_ID_SIZE * 2 + 1];
-
-		if (sysfs__read_build_id("/sys/kernel/notes", kallsyms_build_id,
-					 sizeof(kallsyms_build_id)) == 0) {
-			if (dso__build_id_equal(dso, kallsyms_build_id)) {
-				kallsyms_filename = "/proc/kallsyms";
-				goto do_kallsyms;
-			}
-		}
-		/*
-		 * Now look if we have it on the build-id cache in
-		 * $HOME/.debug/[kernel.kallsyms].
-		 */
-		build_id__sprintf(dso->build_id, sizeof(dso->build_id),
-				  sbuild_id);
-
-		if (asprintf(&kallsyms_allocated_filename,
-			     "%s/.debug/[kernel.kallsyms]/%s",
-			     getenv("HOME"), sbuild_id) == -1) {
-			pr_err("Not enough memory for kallsyms file lookup\n");
-			return -1;
-		}
-
-		kallsyms_filename = kallsyms_allocated_filename;
+	kallsyms_allocated_filename = dso__find_kallsyms(dso, map);
+	if (!kallsyms_allocated_filename)
+		return -1;
 
-		if (access(kallsyms_filename, F_OK)) {
-			pr_err("No kallsyms or vmlinux with build-id %s "
-			       "was found\n", sbuild_id);
-			free(kallsyms_allocated_filename);
-			return -1;
-		}
-	} else {
-		/*
-		 * Last resort, if we don't have a build-id and couldn't find
-		 * any vmlinux file, try the running kernel kallsyms table.
-		 */
-		kallsyms_filename = "/proc/kallsyms";
-	}
+	kallsyms_filename = kallsyms_allocated_filename;
 
 do_kallsyms:
 	err = dso__load_kallsyms(dso, kallsyms_filename, map, filter);
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index fd5b70ea29812334572fc7a6f5fad2c3ec7058e7..07de8fea2f48dbc346124539e7393307b839051a 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -13,7 +13,7 @@
 #include <libgen.h>
 #include "build-id.h"
 
-#ifdef LIBELF_SUPPORT
+#ifdef HAVE_LIBELF_SUPPORT
 #include <libelf.h>
 #include <gelf.h>
 #endif
@@ -21,7 +21,7 @@
 
 #include "dso.h"
 
-#ifdef HAVE_CPLUS_DEMANGLE
+#ifdef HAVE_CPLUS_DEMANGLE_SUPPORT
 extern char *cplus_demangle(const char *, int);
 
 static inline char *bfd_demangle(void __maybe_unused *v, const char *c, int i)
@@ -46,7 +46,7 @@ static inline char *bfd_demangle(void __maybe_unused *v,
  * libelf 0.8.x and earlier do not support ELF_C_READ_MMAP;
  * for newer versions we can use mmap to reduce memory usage:
  */
-#ifdef LIBELF_MMAP
+#ifdef HAVE_LIBELF_MMAP_SUPPORT
 # define PERF_ELF_C_READ_MMAP ELF_C_READ_MMAP
 #else
 # define PERF_ELF_C_READ_MMAP ELF_C_READ
@@ -85,6 +85,7 @@ struct symbol_conf {
 	unsigned short	priv_size;
 	unsigned short	nr_events;
 	bool		try_vmlinux_path,
+			ignore_vmlinux,
 			show_kernel_path,
 			use_modules,
 			sort_by_name,
@@ -178,7 +179,7 @@ struct symsrc {
 	int fd;
 	enum dso_binary_type type;
 
-#ifdef LIBELF_SUPPORT
+#ifdef HAVE_LIBELF_SUPPORT
 	Elf *elf;
 	GElf_Ehdr ehdr;
 
@@ -222,6 +223,9 @@ int sysfs__read_build_id(const char *filename, void *bf, size_t size);
 int kallsyms__parse(const char *filename, void *arg,
 		    int (*process_symbol)(void *arg, const char *name,
 					  char type, u64 start));
+int modules__parse(const char *filename, void *arg,
+		   int (*process_module)(void *arg, const char *name,
+					 u64 start));
 int filename__read_debuglink(const char *filename, char *debuglink,
 			     size_t size);
 
@@ -252,4 +256,21 @@ typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data);
 int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data,
 		    bool *is_64_bit);
 
+#define PERF_KCORE_EXTRACT "/tmp/perf-kcore-XXXXXX"
+
+struct kcore_extract {
+	char *kcore_filename;
+	u64 addr;
+	u64 offs;
+	u64 len;
+	char extract_filename[sizeof(PERF_KCORE_EXTRACT)];
+	int fd;
+};
+
+int kcore_extract__create(struct kcore_extract *kce);
+void kcore_extract__delete(struct kcore_extract *kce);
+
+int kcore_copy(const char *from_dir, const char *to_dir);
+int compare_proc_modules(const char *from, const char *to);
+
 #endif /* __PERF_SYMBOL */
diff --git a/tools/perf/util/sysfs.c b/tools/perf/util/sysfs.c
deleted file mode 100644
index f71e9eafe15a7323e127145c13cd0240daae6136..0000000000000000000000000000000000000000
--- a/tools/perf/util/sysfs.c
+++ /dev/null
@@ -1,60 +0,0 @@
-
-#include "util.h"
-#include "sysfs.h"
-
-static const char * const sysfs_known_mountpoints[] = {
-	"/sys",
-	0,
-};
-
-static int sysfs_found;
-char sysfs_mountpoint[PATH_MAX + 1];
-
-static int sysfs_valid_mountpoint(const char *sysfs)
-{
-	struct statfs st_fs;
-
-	if (statfs(sysfs, &st_fs) < 0)
-		return -ENOENT;
-	else if (st_fs.f_type != (long) SYSFS_MAGIC)
-		return -ENOENT;
-
-	return 0;
-}
-
-const char *sysfs_find_mountpoint(void)
-{
-	const char * const *ptr;
-	char type[100];
-	FILE *fp;
-
-	if (sysfs_found)
-		return (const char *) sysfs_mountpoint;
-
-	ptr = sysfs_known_mountpoints;
-	while (*ptr) {
-		if (sysfs_valid_mountpoint(*ptr) == 0) {
-			sysfs_found = 1;
-			strcpy(sysfs_mountpoint, *ptr);
-			return sysfs_mountpoint;
-		}
-		ptr++;
-	}
-
-	/* give up and parse /proc/mounts */
-	fp = fopen("/proc/mounts", "r");
-	if (fp == NULL)
-		return NULL;
-
-	while (!sysfs_found &&
-	       fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
-		      sysfs_mountpoint, type) == 2) {
-
-		if (strcmp(type, "sysfs") == 0)
-			sysfs_found = 1;
-	}
-
-	fclose(fp);
-
-	return sysfs_found ? sysfs_mountpoint : NULL;
-}
diff --git a/tools/perf/util/sysfs.h b/tools/perf/util/sysfs.h
deleted file mode 100644
index a813b720393852e9638885af77ef6e33f88d0811..0000000000000000000000000000000000000000
--- a/tools/perf/util/sysfs.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __SYSFS_H__
-#define __SYSFS_H__
-
-const char *sysfs_find_mountpoint(void);
-
-#endif /* __DEBUGFS_H__ */
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index e3d4a550a703211d8fabeb9807c28a86466fe6d8..cd8e2f59271969f410daf28afdf93523699f475c 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -6,86 +6,137 @@
 #include "thread.h"
 #include "util.h"
 #include "debug.h"
+#include "comm.h"
 
 struct thread *thread__new(pid_t pid, pid_t tid)
 {
-	struct thread *self = zalloc(sizeof(*self));
-
-	if (self != NULL) {
-		map_groups__init(&self->mg);
-		self->pid_ = pid;
-		self->tid = tid;
-		self->ppid = -1;
-		self->comm = malloc(32);
-		if (self->comm)
-			snprintf(self->comm, 32, ":%d", self->tid);
+	char *comm_str;
+	struct comm *comm;
+	struct thread *thread = zalloc(sizeof(*thread));
+
+	if (thread != NULL) {
+		map_groups__init(&thread->mg);
+		thread->pid_ = pid;
+		thread->tid = tid;
+		thread->ppid = -1;
+		INIT_LIST_HEAD(&thread->comm_list);
+
+		comm_str = malloc(32);
+		if (!comm_str)
+			goto err_thread;
+
+		snprintf(comm_str, 32, ":%d", tid);
+		comm = comm__new(comm_str, 0);
+		free(comm_str);
+		if (!comm)
+			goto err_thread;
+
+		list_add(&comm->list, &thread->comm_list);
+	}
+
+	return thread;
+
+err_thread:
+	free(thread);
+	return NULL;
+}
+
+void thread__delete(struct thread *thread)
+{
+	struct comm *comm, *tmp;
+
+	map_groups__exit(&thread->mg);
+	list_for_each_entry_safe(comm, tmp, &thread->comm_list, list) {
+		list_del(&comm->list);
+		comm__free(comm);
 	}
 
-	return self;
+	free(thread);
 }
 
-void thread__delete(struct thread *self)
+struct comm *thread__comm(const struct thread *thread)
 {
-	map_groups__exit(&self->mg);
-	free(self->comm);
-	free(self);
+	if (list_empty(&thread->comm_list))
+		return NULL;
+
+	return list_first_entry(&thread->comm_list, struct comm, list);
 }
 
-int thread__set_comm(struct thread *self, const char *comm)
+/* CHECKME: time should always be 0 if event aren't ordered */
+int thread__set_comm(struct thread *thread, const char *str, u64 timestamp)
 {
-	int err;
-
-	if (self->comm)
-		free(self->comm);
-	self->comm = strdup(comm);
-	err = self->comm == NULL ? -ENOMEM : 0;
-	if (!err) {
-		self->comm_set = true;
+	struct comm *new, *curr = thread__comm(thread);
+
+	/* Override latest entry if it had no specific time coverage */
+	if (!curr->start) {
+		comm__override(curr, str, timestamp);
+		return 0;
 	}
-	return err;
+
+	new = comm__new(str, timestamp);
+	if (!new)
+		return -ENOMEM;
+
+	list_add(&new->list, &thread->comm_list);
+	thread->comm_set = true;
+
+	return 0;
+}
+
+const char *thread__comm_str(const struct thread *thread)
+{
+	const struct comm *comm = thread__comm(thread);
+
+	if (!comm)
+		return NULL;
+
+	return comm__str(comm);
 }
 
-int thread__comm_len(struct thread *self)
+/* CHECKME: it should probably better return the max comm len from its comm list */
+int thread__comm_len(struct thread *thread)
 {
-	if (!self->comm_len) {
-		if (!self->comm)
+	if (!thread->comm_len) {
+		const char *comm = thread__comm_str(thread);
+		if (!comm)
 			return 0;
-		self->comm_len = strlen(self->comm);
+		thread->comm_len = strlen(comm);
 	}
 
-	return self->comm_len;
+	return thread->comm_len;
 }
 
 size_t thread__fprintf(struct thread *thread, FILE *fp)
 {
-	return fprintf(fp, "Thread %d %s\n", thread->tid, thread->comm) +
+	return fprintf(fp, "Thread %d %s\n", thread->tid, thread__comm_str(thread)) +
 	       map_groups__fprintf(&thread->mg, verbose, fp);
 }
 
-void thread__insert_map(struct thread *self, struct map *map)
+void thread__insert_map(struct thread *thread, struct map *map)
 {
-	map_groups__fixup_overlappings(&self->mg, map, verbose, stderr);
-	map_groups__insert(&self->mg, map);
+	map_groups__fixup_overlappings(&thread->mg, map, verbose, stderr);
+	map_groups__insert(&thread->mg, map);
 }
 
-int thread__fork(struct thread *self, struct thread *parent)
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp)
 {
-	int i;
+	int i, err;
 
 	if (parent->comm_set) {
-		if (self->comm)
-			free(self->comm);
-		self->comm = strdup(parent->comm);
-		if (!self->comm)
+		const char *comm = thread__comm_str(parent);
+		if (!comm)
 			return -ENOMEM;
-		self->comm_set = true;
+		err = thread__set_comm(thread, comm, timestamp);
+		if (!err)
+			return err;
+		thread->comm_set = true;
 	}
 
 	for (i = 0; i < MAP__NR_TYPES; ++i)
-		if (map_groups__clone(&self->mg, &parent->mg, i) < 0)
+		if (map_groups__clone(&thread->mg, &parent->mg, i) < 0)
 			return -ENOMEM;
 
-	self->ppid = parent->tid;
+	thread->ppid = parent->tid;
 
 	return 0;
 }
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 4ebbb40d46d43e13cc78010601611eb30e5c7498..897c1b2a750a278c8cecf000c892d5f6bba8f0af 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -2,6 +2,7 @@
 #define __PERF_THREAD_H
 
 #include <linux/rbtree.h>
+#include <linux/list.h>
 #include <unistd.h>
 #include <sys/types.h>
 #include "symbol.h"
@@ -18,31 +19,34 @@ struct thread {
 	char			shortname[3];
 	bool			comm_set;
 	bool			dead; /* if set thread has exited */
-	char			*comm;
+	struct list_head	comm_list;
 	int			comm_len;
 
 	void			*priv;
 };
 
 struct machine;
+struct comm;
 
 struct thread *thread__new(pid_t pid, pid_t tid);
-void thread__delete(struct thread *self);
+void thread__delete(struct thread *thread);
 static inline void thread__exited(struct thread *thread)
 {
 	thread->dead = true;
 }
 
-int thread__set_comm(struct thread *self, const char *comm);
-int thread__comm_len(struct thread *self);
-void thread__insert_map(struct thread *self, struct map *map);
-int thread__fork(struct thread *self, struct thread *parent);
+int thread__set_comm(struct thread *thread, const char *comm, u64 timestamp);
+int thread__comm_len(struct thread *thread);
+struct comm *thread__comm(const struct thread *thread);
+const char *thread__comm_str(const struct thread *thread);
+void thread__insert_map(struct thread *thread, struct map *map);
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp);
 size_t thread__fprintf(struct thread *thread, FILE *fp);
 
-static inline struct map *thread__find_map(struct thread *self,
+static inline struct map *thread__find_map(struct thread *thread,
 					   enum map_type type, u64 addr)
 {
-	return self ? map_groups__find(&self->mg, type, addr) : NULL;
+	return thread ? map_groups__find(&thread->mg, type, addr) : NULL;
 }
 
 void thread__find_addr_map(struct thread *thread, struct machine *machine,
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index b554ffc462b653e73b7e8de84cfa53e2609be989..88cfeaff600b334390842e725b9a7f792b0fd9af 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -24,6 +24,7 @@ struct perf_top {
 	u64		   exact_samples;
 	u64		   guest_us_samples, guest_kernel_samples;
 	int		   print_entries, count_filter, delay_secs;
+	int		   max_stack;
 	bool		   hide_kernel_symbols, hide_user_symbols, zero;
 	bool		   use_tui, use_stdio;
 	bool		   kptr_restrict_warned;
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index e9e1c03f927d27bce1e041a9cc61ca47dae2b987..6681f71f2f95e329ddc5eba636d05ce67d3591ee 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -120,42 +120,6 @@ raw_field_value(struct event_format *event, const char *name, void *data)
 	return val;
 }
 
-void *raw_field_ptr(struct event_format *event, const char *name, void *data)
-{
-	struct format_field *field;
-
-	field = pevent_find_any_field(event, name);
-	if (!field)
-		return NULL;
-
-	if (field->flags & FIELD_IS_DYNAMIC) {
-		int offset;
-
-		offset = *(int *)(data + field->offset);
-		offset &= 0xffff;
-
-		return data + offset;
-	}
-
-	return data + field->offset;
-}
-
-int trace_parse_common_type(struct pevent *pevent, void *data)
-{
-	struct pevent_record record;
-
-	record.data = data;
-	return pevent_data_type(pevent, &record);
-}
-
-int trace_parse_common_pid(struct pevent *pevent, void *data)
-{
-	struct pevent_record record;
-
-	record.data = data;
-	return pevent_data_pid(pevent, &record);
-}
-
 unsigned long long read_size(struct event_format *event, void *ptr, int size)
 {
 	return pevent_read_number(event->pevent, ptr, size);
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index fafe1a40444a2b0785e4d41b048ee0926786fcf9..04df63114109604403f2f57a84ed03ec0b765707 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -11,8 +11,6 @@ union perf_event;
 struct perf_tool;
 struct thread;
 
-extern struct pevent *perf_pevent;
-
 int bigendian(void);
 
 struct pevent *read_trace_init(int file_bigendian, int host_bigendian);
@@ -23,26 +21,19 @@ int parse_ftrace_file(struct pevent *pevent, char *buf, unsigned long size);
 int parse_event_file(struct pevent *pevent,
 		     char *buf, unsigned long size, char *sys);
 
-struct pevent_record *trace_peek_data(struct pevent *pevent, int cpu);
-
 unsigned long long
 raw_field_value(struct event_format *event, const char *name, void *data);
-void *raw_field_ptr(struct event_format *event, const char *name, void *data);
 
 void parse_proc_kallsyms(struct pevent *pevent, char *file, unsigned int size);
 void parse_ftrace_printk(struct pevent *pevent, char *file, unsigned int size);
 
 ssize_t trace_report(int fd, struct pevent **pevent, bool repipe);
 
-int trace_parse_common_type(struct pevent *pevent, void *data);
-int trace_parse_common_pid(struct pevent *pevent, void *data);
-
 struct event_format *trace_find_next_event(struct pevent *pevent,
 					   struct event_format *event);
 unsigned long long read_size(struct event_format *event, void *ptr, int size);
 unsigned long long eval_flag(const char *flag);
 
-struct pevent_record *trace_read_data(struct pevent *pevent, int cpu);
 int read_tracing_data(int fd, struct list_head *pattrs);
 
 struct tracing_data {
diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h
index cb6bc503a792413dd701fec8dafbf60d9e921ed6..ec0c71a2ca2e22f2c78ef90726d0e9afe623b43c 100644
--- a/tools/perf/util/unwind.h
+++ b/tools/perf/util/unwind.h
@@ -13,7 +13,7 @@ struct unwind_entry {
 
 typedef int (*unwind_entry_cb_t)(struct unwind_entry *entry, void *arg);
 
-#ifdef LIBUNWIND_SUPPORT
+#ifdef HAVE_LIBUNWIND_SUPPORT
 int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
 			struct machine *machine,
 			struct thread *thread,
@@ -31,5 +31,5 @@ unwind__get_entries(unwind_entry_cb_t cb __maybe_unused,
 {
 	return 0;
 }
-#endif /* LIBUNWIND_SUPPORT */
+#endif /* HAVE_LIBUNWIND_SUPPORT */
 #endif /* __UNWIND_H */
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 6d17b18e915d56b47e02d16c4c27cae2a85764db..28a0a89c1f739749a59c8b80aa3d3b37d8f4000f 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -1,7 +1,7 @@
 #include "../perf.h"
 #include "util.h"
 #include <sys/mman.h>
-#ifdef BACKTRACE_SUPPORT
+#ifdef HAVE_BACKTRACE_SUPPORT
 #include <execinfo.h>
 #endif
 #include <stdio.h>
@@ -55,17 +55,20 @@ int mkdir_p(char *path, mode_t mode)
 	return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0;
 }
 
-static int slow_copyfile(const char *from, const char *to)
+static int slow_copyfile(const char *from, const char *to, mode_t mode)
 {
-	int err = 0;
+	int err = -1;
 	char *line = NULL;
 	size_t n;
 	FILE *from_fp = fopen(from, "r"), *to_fp;
+	mode_t old_umask;
 
 	if (from_fp == NULL)
 		goto out;
 
+	old_umask = umask(mode ^ 0777);
 	to_fp = fopen(to, "w");
+	umask(old_umask);
 	if (to_fp == NULL)
 		goto out_fclose_from;
 
@@ -82,7 +85,7 @@ static int slow_copyfile(const char *from, const char *to)
 	return err;
 }
 
-int copyfile(const char *from, const char *to)
+int copyfile_mode(const char *from, const char *to, mode_t mode)
 {
 	int fromfd, tofd;
 	struct stat st;
@@ -93,13 +96,13 @@ int copyfile(const char *from, const char *to)
 		goto out;
 
 	if (st.st_size == 0) /* /proc? do it slowly... */
-		return slow_copyfile(from, to);
+		return slow_copyfile(from, to, mode);
 
 	fromfd = open(from, O_RDONLY);
 	if (fromfd < 0)
 		goto out;
 
-	tofd = creat(to, 0755);
+	tofd = creat(to, mode);
 	if (tofd < 0)
 		goto out_close_from;
 
@@ -121,6 +124,11 @@ int copyfile(const char *from, const char *to)
 	return err;
 }
 
+int copyfile(const char *from, const char *to)
+{
+	return copyfile_mode(from, to, 0755);
+}
+
 unsigned long convert_unit(unsigned long value, char *unit)
 {
 	*unit = ' ';
@@ -204,7 +212,7 @@ int hex2u64(const char *ptr, u64 *long_val)
 }
 
 /* Obtain a backtrace and print it to stdout. */
-#ifdef BACKTRACE_SUPPORT
+#ifdef HAVE_BACKTRACE_SUPPORT
 void dump_stack(void)
 {
 	void *array[16];
@@ -361,3 +369,47 @@ int parse_nsec_time(const char *str, u64 *ptime)
 	*ptime = time_sec * NSEC_PER_SEC + time_nsec;
 	return 0;
 }
+
+unsigned long parse_tag_value(const char *str, struct parse_tag *tags)
+{
+	struct parse_tag *i = tags;
+
+	while (i->tag) {
+		char *s;
+
+		s = strchr(str, i->tag);
+		if (s) {
+			unsigned long int value;
+			char *endptr;
+
+			value = strtoul(str, &endptr, 10);
+			if (s != endptr)
+				break;
+
+			if (value > ULONG_MAX / i->mult)
+				break;
+			value *= i->mult;
+			return value;
+		}
+		i++;
+	}
+
+	return (unsigned long) -1;
+}
+
+int filename__read_int(const char *filename, int *value)
+{
+	char line[64];
+	int fd = open(filename, O_RDONLY), err = -1;
+
+	if (fd < 0)
+		return -1;
+
+	if (read(fd, line, sizeof(line)) > 0) {
+		*value = atoi(line);
+		err = 0;
+	}
+
+	close(fd);
+	return err;
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index a53535949043f32654a181c3728a421c52baf276..c8f362daba8755f5b054d70c90f52211f6d8ccc8 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -128,6 +128,8 @@ void put_tracing_file(char *file);
 #endif
 #endif
 
+#define PERF_GTK_DSO  "libperf-gtk.so"
+
 /* General helper functions */
 extern void usage(const char *err) NORETURN;
 extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2)));
@@ -241,6 +243,7 @@ static inline int sane_case(int x, int high)
 
 int mkdir_p(char *path, mode_t mode);
 int copyfile(const char *from, const char *to);
+int copyfile_mode(const char *from, const char *to, mode_t mode);
 
 s64 perf_atoll(const char *str);
 char **argv_split(const char *str, int *argcp);
@@ -270,6 +273,13 @@ bool is_power_of_2(unsigned long n)
 	return (n != 0 && ((n & (n - 1)) == 0));
 }
 
+static inline unsigned next_pow2(unsigned x)
+{
+	if (!x)
+		return 1;
+	return 1ULL << (32 - __builtin_clz(x - 1));
+}
+
 size_t hex_width(u64 v);
 int hex2u64(const char *ptr, u64 *val);
 
@@ -281,4 +291,20 @@ void dump_stack(void);
 extern unsigned int page_size;
 
 void get_term_dimensions(struct winsize *ws);
+
+struct parse_tag {
+	char tag;
+	int mult;
+};
+
+unsigned long parse_tag_value(const char *str, struct parse_tag *tags);
+
+#define SRCLINE_UNKNOWN  ((char *) "??:0")
+
+struct dso;
+
+char *get_srcline(struct dso *dso, unsigned long addr);
+void free_srcline(char *srcline);
+
+int filename__read_int(const char *filename, int *value);
 #endif /* GIT_COMPAT_UTIL_H */
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index 0d0506d55c71348a3ddce72fcec03824ed987bf3..ee76544deecb4138c7fdd2fee2ef4b410d39e5cc 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -59,21 +59,22 @@ QUIET_SUBDIR0  = +$(MAKE) $(COMMAND_O) -C # space to separate -C and subdir
 QUIET_SUBDIR1  =
 
 ifneq ($(findstring $(MAKEFLAGS),s),s)
-ifneq ($(V),1)
-	QUIET_CC       = @echo '   ' CC $@;
-	QUIET_AR       = @echo '   ' AR $@;
-	QUIET_LINK     = @echo '   ' LINK $@;
-	QUIET_MKDIR    = @echo '   ' MKDIR $@;
-	QUIET_GEN      = @echo '   ' GEN $@;
+  ifneq ($(V),1)
+	QUIET_CC       = @echo '  CC       '$@;
+	QUIET_AR       = @echo '  AR       '$@;
+	QUIET_LINK     = @echo '  LINK     '$@;
+	QUIET_MKDIR    = @echo '  MKDIR    '$@;
+	QUIET_GEN      = @echo '  GEN      '$@;
 	QUIET_SUBDIR0  = +@subdir=
-	QUIET_SUBDIR1  = ;$(NO_SUBDIR) echo '   ' SUBDIR $$subdir; \
+	QUIET_SUBDIR1  = ;$(NO_SUBDIR) \
+			  echo '  SUBDIR   '$$subdir; \
 			 $(MAKE) $(PRINT_DIR) -C $$subdir
-	QUIET_FLEX     = @echo '   ' FLEX $@;
-	QUIET_BISON    = @echo '   ' BISON $@;
+	QUIET_FLEX     = @echo '  FLEX     '$@;
+	QUIET_BISON    = @echo '  BISON    '$@;
 
 	descend = \
-		+@echo '   ' DESCEND $(1); \
+		+@echo	       '  DESCEND  '$(1); \
 		mkdir -p $(OUTPUT)$(1) && \
 		$(MAKE) $(COMMAND_O) subdir=$(if $(subdir),$(subdir)/$(1),$(1)) $(PRINT_DIR) -C $(1) $(2)
-endif
+  endif
 endif