diff options
Diffstat (limited to 'arch/x86/events/intel/core.c')
-rw-r--r-- | arch/x86/events/intel/core.c | 296 |
1 files changed, 277 insertions, 19 deletions
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index f9451566cd9b..4b4dac089635 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -239,6 +239,35 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; +static struct event_constraint intel_icl_event_constraints[] = { + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + INTEL_UEVENT_CONSTRAINT(0x1c0, 0), /* INST_RETIRED.PREC_DIST */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */ + INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf), + INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */ + INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x54, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf), + INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff), /* CYCLE_ACTIVITY.STALLS_TOTAL */ + INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.STALLS_MEM_ANY */ + INTEL_EVENT_CONSTRAINT(0xa3, 0xf), /* CYCLE_ACTIVITY.* */ + INTEL_EVENT_CONSTRAINT_RANGE(0xa8, 0xb0, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0xb7, 0xbd, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xe6, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0xf0, 0xf4, 0xf), + EVENT_CONSTRAINT_END +}; + +static struct extra_reg intel_icl_extra_regs[] __read_mostly = { + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff9fffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff9fffull, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), + INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE), + EVENT_EXTRA_END +}; + EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); @@ -1827,6 +1856,45 @@ static __initconst const u64 glp_hw_cache_extra_regs }, }; +#define TNT_LOCAL_DRAM BIT_ULL(26) +#define TNT_DEMAND_READ GLM_DEMAND_DATA_RD +#define TNT_DEMAND_WRITE GLM_DEMAND_RFO +#define TNT_LLC_ACCESS GLM_ANY_RESPONSE +#define TNT_SNP_ANY (SNB_SNP_NOT_NEEDED|SNB_SNP_MISS| \ + SNB_NO_FWD|SNB_SNP_FWD|SNB_HITM) +#define TNT_LLC_MISS (TNT_SNP_ANY|SNB_NON_DRAM|TNT_LOCAL_DRAM) + +static __initconst const u64 tnt_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = TNT_DEMAND_READ| + TNT_LLC_ACCESS, + [C(RESULT_MISS)] = TNT_DEMAND_READ| + TNT_LLC_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = TNT_DEMAND_WRITE| + TNT_LLC_ACCESS, + [C(RESULT_MISS)] = TNT_DEMAND_WRITE| + TNT_LLC_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + }, +}; + +static struct extra_reg intel_tnt_extra_regs[] __read_mostly = { + /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffffff9fffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xffffff9fffull, RSP_1), + EVENT_EXTRA_END +}; + #define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ #define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ #define KNL_MCDRAM_LOCAL BIT_ULL(21) @@ -2015,7 +2083,7 @@ static void intel_tfa_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int /* * We're going to use PMC3, make sure TFA is set before we touch it. */ - if (cntr == 3 && !cpuc->is_fake) + if (cntr == 3) intel_set_tfa(cpuc, true); } @@ -2145,6 +2213,11 @@ static void intel_pmu_enable_fixed(struct perf_event *event) bits <<= (idx * 4); mask = 0xfULL << (idx * 4); + if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip) { + bits |= ICL_FIXED_0_ADAPTIVE << (idx * 4); + mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4); + } + rdmsrl(hwc->config_base, ctrl_val); ctrl_val &= ~mask; ctrl_val |= bits; @@ -2688,7 +2761,7 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx, if (x86_pmu.event_constraints) { for_each_event_constraint(c, x86_pmu.event_constraints) { - if ((event->hw.config & c->cmask) == c->code) { + if (constraint_match(c, event->hw.config)) { event->hw.flags |= c->flags; return c; } @@ -2838,7 +2911,7 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; struct intel_excl_states *xlo; int tid = cpuc->excl_thread_id; - int is_excl, i; + int is_excl, i, w; /* * validating a group does not require @@ -2894,36 +2967,40 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, * SHARED : sibling counter measuring non-exclusive event * UNUSED : sibling counter unused */ + w = c->weight; for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) { /* * exclusive event in sibling counter * our corresponding counter cannot be used * regardless of our event */ - if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) + if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) { __clear_bit(i, c->idxmsk); + w--; + continue; + } /* * if measuring an exclusive event, sibling * measuring non-exclusive, then counter cannot * be used */ - if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) + if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) { __clear_bit(i, c->idxmsk); + w--; + continue; + } } /* - * recompute actual bit weight for scheduling algorithm - */ - c->weight = hweight64(c->idxmsk64); - - /* * if we return an empty mask, then switch * back to static empty constraint to avoid * the cost of freeing later on */ - if (c->weight == 0) + if (!w) c = &emptyconstraint; + c->weight = w; + return c; } @@ -2931,11 +3008,9 @@ static struct event_constraint * intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) { - struct event_constraint *c1 = NULL; - struct event_constraint *c2; + struct event_constraint *c1, *c2; - if (idx >= 0) /* fake does < 0 */ - c1 = cpuc->event_constraint[idx]; + c1 = cpuc->event_constraint[idx]; /* * first time only @@ -2943,7 +3018,8 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, * - dynamic constraint: handled by intel_get_excl_constraints() */ c2 = __intel_get_event_constraints(cpuc, idx, event); - if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) { + if (c1) { + WARN_ON_ONCE(!(c1->flags & PERF_X86_EVENT_DYNAMIC)); bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX); c1->weight = c2->weight; c2 = c1; @@ -3366,6 +3442,12 @@ static struct event_constraint counter0_constraint = static struct event_constraint counter2_constraint = EVENT_CONSTRAINT(0, 0x4, 0); +static struct event_constraint fixed0_constraint = + FIXED_EVENT_CONSTRAINT(0x00c0, 0); + +static struct event_constraint fixed0_counter0_constraint = + INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000001ULL); + static struct event_constraint * hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) @@ -3385,6 +3467,21 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx, } static struct event_constraint * +icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + /* + * Fixed counter 0 has less skid. + * Force instruction:ppp in Fixed counter 0 + */ + if ((event->attr.precise_ip == 3) && + constraint_match(&fixed0_constraint, event->hw.config)) + return &fixed0_constraint; + + return hsw_get_event_constraints(cpuc, idx, event); +} + +static struct event_constraint * glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) { @@ -3399,6 +3496,29 @@ glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx, return c; } +static struct event_constraint * +tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct event_constraint *c; + + /* + * :ppp means to do reduced skid PEBS, + * which is available on PMC0 and fixed counter 0. + */ + if (event->attr.precise_ip == 3) { + /* Force instruction:ppp on PMC0 and Fixed counter 0 */ + if (constraint_match(&fixed0_constraint, event->hw.config)) + return &fixed0_counter0_constraint; + + return &counter0_constraint; + } + + c = intel_get_event_constraints(cpuc, idx, event); + + return c; +} + static bool allow_tsx_force_abort = true; static struct event_constraint * @@ -3410,7 +3530,7 @@ tfa_get_event_constraints(struct cpu_hw_events *cpuc, int idx, /* * Without TFA we must not use PMC3. */ - if (!allow_tsx_force_abort && test_bit(3, c->idxmsk) && idx >= 0) { + if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) { c = dyn_constraint(cpuc, c, idx); c->idxmsk64 &= ~(1ULL << 3); c->weight--; @@ -3507,6 +3627,8 @@ static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu) int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu) { + cpuc->pebs_record_size = x86_pmu.pebs_record_size; + if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) { cpuc->shared_regs = allocate_shared_regs(cpu); if (!cpuc->shared_regs) @@ -4114,6 +4236,42 @@ static struct attribute *hsw_tsx_events_attrs[] = { NULL }; +EVENT_ATTR_STR(tx-capacity-read, tx_capacity_read, "event=0x54,umask=0x80"); +EVENT_ATTR_STR(tx-capacity-write, tx_capacity_write, "event=0x54,umask=0x2"); +EVENT_ATTR_STR(el-capacity-read, el_capacity_read, "event=0x54,umask=0x80"); +EVENT_ATTR_STR(el-capacity-write, el_capacity_write, "event=0x54,umask=0x2"); + +static struct attribute *icl_events_attrs[] = { + EVENT_PTR(mem_ld_hsw), + EVENT_PTR(mem_st_hsw), + NULL, +}; + +static struct attribute *icl_tsx_events_attrs[] = { + EVENT_PTR(tx_start), + EVENT_PTR(tx_abort), + EVENT_PTR(tx_commit), + EVENT_PTR(tx_capacity_read), + EVENT_PTR(tx_capacity_write), + EVENT_PTR(tx_conflict), + EVENT_PTR(el_start), + EVENT_PTR(el_abort), + EVENT_PTR(el_commit), + EVENT_PTR(el_capacity_read), + EVENT_PTR(el_capacity_write), + EVENT_PTR(el_conflict), + EVENT_PTR(cycles_t), + EVENT_PTR(cycles_ct), + NULL, +}; + +static __init struct attribute **get_icl_events_attrs(void) +{ + return boot_cpu_has(X86_FEATURE_RTM) ? + merge_attr(icl_events_attrs, icl_tsx_events_attrs) : + icl_events_attrs; +} + static ssize_t freeze_on_smi_show(struct device *cdev, struct device_attribute *attr, char *buf) @@ -4153,6 +4311,50 @@ done: return count; } +static void update_tfa_sched(void *ignored) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + /* + * check if PMC3 is used + * and if so force schedule out for all event types all contexts + */ + if (test_bit(3, cpuc->active_mask)) + perf_pmu_resched(x86_get_pmu()); +} + +static ssize_t show_sysctl_tfa(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, 40, "%d\n", allow_tsx_force_abort); +} + +static ssize_t set_sysctl_tfa(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + bool val; + ssize_t ret; + + ret = kstrtobool(buf, &val); + if (ret) + return ret; + + /* no change */ + if (val == allow_tsx_force_abort) + return count; + + allow_tsx_force_abort = val; + + get_online_cpus(); + on_each_cpu(update_tfa_sched, NULL, 1); + put_online_cpus(); + + return count; +} + + static DEVICE_ATTR_RW(freeze_on_smi); static ssize_t branches_show(struct device *cdev, @@ -4185,7 +4387,9 @@ static struct attribute *intel_pmu_caps_attrs[] = { NULL }; -static DEVICE_BOOL_ATTR(allow_tsx_force_abort, 0644, allow_tsx_force_abort); +static DEVICE_ATTR(allow_tsx_force_abort, 0644, + show_sysctl_tfa, + set_sysctl_tfa); static struct attribute *intel_pmu_attrs[] = { &dev_attr_freeze_on_smi.attr, @@ -4446,6 +4650,32 @@ __init int intel_pmu_init(void) name = "goldmont_plus"; break; + case INTEL_FAM6_ATOM_TREMONT_X: + x86_pmu.late_ack = true; + memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; + + intel_pmu_lbr_init_skl(); + + x86_pmu.event_constraints = intel_slm_event_constraints; + x86_pmu.extra_regs = intel_tnt_extra_regs; + /* + * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS + * for precise cycles. + */ + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.lbr_pt_coexist = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.get_event_constraints = tnt_get_event_constraints; + extra_attr = slm_format_attr; + pr_cont("Tremont events, "); + name = "Tremont"; + break; + case INTEL_FAM6_WESTMERE: case INTEL_FAM6_WESTMERE_EP: case INTEL_FAM6_WESTMERE_EX: @@ -4694,13 +4924,41 @@ __init int intel_pmu_init(void) x86_pmu.get_event_constraints = tfa_get_event_constraints; x86_pmu.enable_all = intel_tfa_pmu_enable_all; x86_pmu.commit_scheduling = intel_tfa_commit_scheduling; - intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr.attr; + intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr; } pr_cont("Skylake events, "); name = "skylake"; break; + case INTEL_FAM6_ICELAKE_MOBILE: + x86_pmu.late_ack = true; + memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; + intel_pmu_lbr_init_skl(); + + x86_pmu.event_constraints = intel_icl_event_constraints; + x86_pmu.pebs_constraints = intel_icl_pebs_event_constraints; + x86_pmu.extra_regs = intel_icl_extra_regs; + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_NO_HT_SHARING; + + x86_pmu.hw_config = hsw_hw_config; + x86_pmu.get_event_constraints = icl_get_event_constraints; + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + hsw_format_attr : nhm_format_attr; + extra_attr = merge_attr(extra_attr, skl_format_attr); + x86_pmu.cpu_events = get_icl_events_attrs(); + x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02); + x86_pmu.lbr_pt_coexist = true; + intel_pmu_pebs_data_source_skl(false); + pr_cont("Icelake events, "); + name = "icelake"; + break; + default: switch (x86_pmu.version) { case 1: |