diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_rps.c')
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_rps.c | 449 |
1 files changed, 305 insertions, 144 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 19542fd9e207..2f59fc6df3c2 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -8,12 +8,15 @@ #include "i915_drv.h" #include "intel_gt.h" +#include "intel_gt_clock_utils.h" #include "intel_gt_irq.h" #include "intel_gt_pm_irq.h" #include "intel_rps.h" #include "intel_sideband.h" #include "../../../platform/x86/intel_ips.h" +#define BUSY_MAX_EI 20u /* ms */ + /* * Lock protecting IPS related data structures */ @@ -44,6 +47,100 @@ static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) intel_uncore_write_fw(uncore, reg, val); } +static void rps_timer(struct timer_list *t) +{ + struct intel_rps *rps = from_timer(rps, t, timer); + struct intel_engine_cs *engine; + enum intel_engine_id id; + s64 max_busy[3] = {}; + ktime_t dt, last; + + for_each_engine(engine, rps_to_gt(rps), id) { + s64 busy; + int i; + + dt = intel_engine_get_busy_time(engine); + last = engine->stats.rps; + engine->stats.rps = dt; + + busy = ktime_to_ns(ktime_sub(dt, last)); + for (i = 0; i < ARRAY_SIZE(max_busy); i++) { + if (busy > max_busy[i]) + swap(busy, max_busy[i]); + } + } + + dt = ktime_get(); + last = rps->pm_timestamp; + rps->pm_timestamp = dt; + + if (intel_rps_is_active(rps)) { + s64 busy; + int i; + + dt = ktime_sub(dt, last); + + /* + * Our goal is to evaluate each engine independently, so we run + * at the lowest clocks required to sustain the heaviest + * workload. However, a task may be split into sequential + * dependent operations across a set of engines, such that + * the independent contributions do not account for high load, + * but overall the task is GPU bound. For example, consider + * video decode on vcs followed by colour post-processing + * on vecs, followed by general post-processing on rcs. + * Since multi-engines being active does imply a single + * continuous workload across all engines, we hedge our + * bets by only contributing a factor of the distributed + * load into our busyness calculation. + */ + busy = max_busy[0]; + for (i = 1; i < ARRAY_SIZE(max_busy); i++) { + if (!max_busy[i]) + break; + + busy += div_u64(max_busy[i], 1 << i); + } + GT_TRACE(rps_to_gt(rps), + "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n", + busy, (int)div64_u64(100 * busy, dt), + max_busy[0], max_busy[1], max_busy[2], + rps->pm_interval); + + if (100 * busy > rps->power.up_threshold * dt && + rps->cur_freq < rps->max_freq_softlimit) { + rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD; + rps->pm_interval = 1; + schedule_work(&rps->work); + } else if (100 * busy < rps->power.down_threshold * dt && + rps->cur_freq > rps->min_freq_softlimit) { + rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD; + rps->pm_interval = 1; + schedule_work(&rps->work); + } else { + rps->last_adj = 0; + } + + mod_timer(&rps->timer, + jiffies + msecs_to_jiffies(rps->pm_interval)); + rps->pm_interval = min(rps->pm_interval * 2, BUSY_MAX_EI); + } +} + +static void rps_start_timer(struct intel_rps *rps) +{ + rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); + rps->pm_interval = 1; + mod_timer(&rps->timer, jiffies + 1); +} + +static void rps_stop_timer(struct intel_rps *rps) +{ + del_timer_sync(&rps->timer); + rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); + cancel_work_sync(&rps->work); +} + static u32 rps_pm_mask(struct intel_rps *rps, u8 val) { u32 mask = 0; @@ -57,7 +154,7 @@ static u32 rps_pm_mask(struct intel_rps *rps, u8 val) if (val < rps->max_freq_softlimit) mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; - mask &= READ_ONCE(rps->pm_events); + mask &= rps->pm_events; return rps_pm_sanitize_mask(rps, ~mask); } @@ -70,18 +167,11 @@ static void rps_reset_ei(struct intel_rps *rps) static void rps_enable_interrupts(struct intel_rps *rps) { struct intel_gt *gt = rps_to_gt(rps); - u32 events; - rps_reset_ei(rps); + GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n", + rps->pm_events, rps_pm_mask(rps, rps->last_freq)); - if (IS_VALLEYVIEW(gt->i915)) - /* WaGsvRC0ResidencyMethod:vlv */ - events = GEN6_PM_RP_UP_EI_EXPIRED; - else - events = (GEN6_PM_RP_UP_THRESHOLD | - GEN6_PM_RP_DOWN_THRESHOLD | - GEN6_PM_RP_DOWN_TIMEOUT); - WRITE_ONCE(rps->pm_events, events); + rps_reset_ei(rps); spin_lock_irq(>->irq_lock); gen6_gt_pm_enable_irq(gt, rps->pm_events); @@ -120,8 +210,6 @@ static void rps_disable_interrupts(struct intel_rps *rps) { struct intel_gt *gt = rps_to_gt(rps); - WRITE_ONCE(rps->pm_events, 0); - intel_uncore_write(gt->uncore, GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u)); @@ -140,6 +228,7 @@ static void rps_disable_interrupts(struct intel_rps *rps) cancel_work_sync(&rps->work); rps_reset_interrupts(rps); + GT_TRACE(gt, "interrupts:off\n"); } static const struct cparams { @@ -186,14 +275,12 @@ static void gen5_rps_init(struct intel_rps *rps) fmin = (rgvmodectl & MEMMODE_FMIN_MASK); fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> MEMMODE_FSTART_SHIFT; - DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", - fmax, fmin, fstart); + drm_dbg(&i915->drm, "fmax: %d, fmin: %d, fstart: %d\n", + fmax, fmin, fstart); rps->min_freq = fmax; + rps->efficient_freq = fstart; rps->max_freq = fmin; - - rps->idle_freq = rps->min_freq; - rps->cur_freq = rps->idle_freq; } static unsigned long @@ -456,7 +543,8 @@ static bool gen5_rps_enable(struct intel_rps *rps) if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10)) - DRM_ERROR("stuck trying to change perf mode\n"); + drm_err(&uncore->i915->drm, + "stuck trying to change perf mode\n"); mdelay(1); gen5_rps_set(rps, rps->cur_freq); @@ -533,8 +621,8 @@ static u32 rps_limits(struct intel_rps *rps, u8 val) static void rps_set_power(struct intel_rps *rps, int new_power) { - struct intel_uncore *uncore = rps_to_uncore(rps); - struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_gt *gt = rps_to_gt(rps); + struct intel_uncore *uncore = gt->uncore; u32 threshold_up = 0, threshold_down = 0; /* in % */ u32 ei_up = 0, ei_down = 0; @@ -543,55 +631,49 @@ static void rps_set_power(struct intel_rps *rps, int new_power) if (new_power == rps->power.mode) return; + threshold_up = 95; + threshold_down = 85; + /* Note the units here are not exactly 1us, but 1280ns. */ switch (new_power) { case LOW_POWER: - /* Upclock if more than 95% busy over 16ms */ ei_up = 16000; - threshold_up = 95; - - /* Downclock if less than 85% busy over 32ms */ ei_down = 32000; - threshold_down = 85; break; case BETWEEN: - /* Upclock if more than 90% busy over 13ms */ ei_up = 13000; - threshold_up = 90; - - /* Downclock if less than 75% busy over 32ms */ ei_down = 32000; - threshold_down = 75; break; case HIGH_POWER: - /* Upclock if more than 85% busy over 10ms */ ei_up = 10000; - threshold_up = 85; - - /* Downclock if less than 60% busy over 32ms */ ei_down = 32000; - threshold_down = 60; break; } /* When byt can survive without system hang with dynamic * sw freq adjustments, this restriction can be lifted. */ - if (IS_VALLEYVIEW(i915)) + if (IS_VALLEYVIEW(gt->i915)) goto skip_hw_write; - set(uncore, GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(i915, ei_up)); + GT_TRACE(gt, + "changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n", + new_power, threshold_up, ei_up, threshold_down, ei_down); + + set(uncore, GEN6_RP_UP_EI, + intel_gt_ns_to_pm_interval(gt, ei_up * 1000)); set(uncore, GEN6_RP_UP_THRESHOLD, - GT_INTERVAL_FROM_US(i915, ei_up * threshold_up / 100)); + intel_gt_ns_to_pm_interval(gt, ei_up * threshold_up * 10)); - set(uncore, GEN6_RP_DOWN_EI, GT_INTERVAL_FROM_US(i915, ei_down)); + set(uncore, GEN6_RP_DOWN_EI, + intel_gt_ns_to_pm_interval(gt, ei_down * 1000)); set(uncore, GEN6_RP_DOWN_THRESHOLD, - GT_INTERVAL_FROM_US(i915, ei_down * threshold_down / 100)); + intel_gt_ns_to_pm_interval(gt, ei_down * threshold_down * 10)); set(uncore, GEN6_RP_CONTROL, - (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | + (INTEL_GEN(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | GEN6_RP_MEDIA_HW_NORMAL_MODE | GEN6_RP_MEDIA_IS_GFX | GEN6_RP_ENABLE | @@ -646,9 +728,11 @@ static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val) void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive) { + GT_TRACE(rps_to_gt(rps), "mark interactive: %s\n", yesno(interactive)); + mutex_lock(&rps->power.mutex); if (interactive) { - if (!rps->power.interactive++ && READ_ONCE(rps->active)) + if (!rps->power.interactive++ && intel_rps_is_active(rps)) rps_set_power(rps, HIGH_POWER); } else { GEM_BUG_ON(!rps->power.interactive); @@ -673,6 +757,9 @@ static int gen6_rps_set(struct intel_rps *rps, u8 val) GEN6_AGGRESSIVE_TURBO); set(uncore, GEN6_RPNSWREQ, swreq); + GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d, swreq:%x\n", + val, intel_gpu_freq(rps, val), swreq); + return 0; } @@ -685,6 +772,9 @@ static int vlv_rps_set(struct intel_rps *rps, u8 val) err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val); vlv_punit_put(i915); + GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d\n", + val, intel_gpu_freq(rps, val)); + return err; } @@ -715,29 +805,30 @@ static int rps_set(struct intel_rps *rps, u8 val, bool update) void intel_rps_unpark(struct intel_rps *rps) { - u8 freq; - - if (!rps->enabled) + if (!intel_rps_is_enabled(rps)) return; + GT_TRACE(rps_to_gt(rps), "unpark:%x\n", rps->cur_freq); + /* * Use the user's desired frequency as a guide, but for better * performance, jump directly to RPe as our starting frequency. */ mutex_lock(&rps->lock); - WRITE_ONCE(rps->active, true); - - freq = max(rps->cur_freq, rps->efficient_freq), - freq = clamp(freq, rps->min_freq_softlimit, rps->max_freq_softlimit); - intel_rps_set(rps, freq); - - rps->last_adj = 0; + intel_rps_set_active(rps); + intel_rps_set(rps, + clamp(rps->cur_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit)); mutex_unlock(&rps->lock); - if (INTEL_GEN(rps_to_i915(rps)) >= 6) + rps->pm_iir = 0; + if (intel_rps_has_interrupts(rps)) rps_enable_interrupts(rps); + if (intel_rps_uses_timer(rps)) + rps_start_timer(rps); if (IS_GEN(rps_to_i915(rps), 5)) gen5_rps_update(rps); @@ -745,15 +836,16 @@ void intel_rps_unpark(struct intel_rps *rps) void intel_rps_park(struct intel_rps *rps) { - struct drm_i915_private *i915 = rps_to_i915(rps); + int adj; - if (!rps->enabled) + if (!intel_rps_clear_active(rps)) return; - if (INTEL_GEN(i915) >= 6) + if (intel_rps_uses_timer(rps)) + rps_stop_timer(rps); + if (intel_rps_has_interrupts(rps)) rps_disable_interrupts(rps); - WRITE_ONCE(rps->active, false); if (rps->last_freq <= rps->idle_freq) return; @@ -784,8 +876,15 @@ void intel_rps_park(struct intel_rps *rps) * (Note we accommodate Cherryview's limitation of only using an * even bin by applying it to all.) */ - rps->cur_freq = - max_t(int, round_down(rps->cur_freq - 1, 2), rps->min_freq); + adj = rps->last_adj; + if (adj < 0) + adj *= 2; + else /* CHV needs even encode values */ + adj = -2; + rps->last_adj = adj; + rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq); + + GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq); } void intel_rps_boost(struct i915_request *rq) @@ -793,7 +892,7 @@ void intel_rps_boost(struct i915_request *rq) struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps; unsigned long flags; - if (i915_request_signaled(rq) || !READ_ONCE(rps->active)) + if (i915_request_signaled(rq) || !intel_rps_is_active(rps)) return; /* Serializes with i915_request_retire() */ @@ -802,6 +901,9 @@ void intel_rps_boost(struct i915_request *rq) !dma_fence_is_signaled_locked(&rq->fence)) { set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags); + GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", + rq->fence.context, rq->fence.seqno); + if (!atomic_fetch_inc(&rps->num_waiters) && READ_ONCE(rps->cur_freq) < rps->boost_freq) schedule_work(&rps->work); @@ -819,7 +921,7 @@ int intel_rps_set(struct intel_rps *rps, u8 val) GEM_BUG_ON(val > rps->max_freq); GEM_BUG_ON(val < rps->min_freq); - if (rps->active) { + if (intel_rps_is_active(rps)) { err = rps_set(rps, val, true); if (err) return err; @@ -828,7 +930,7 @@ int intel_rps_set(struct intel_rps *rps, u8 val) * Make sure we continue to get interrupts * until we hit the minimum or maximum frequencies. */ - if (INTEL_GEN(rps_to_i915(rps)) >= 6) { + if (intel_rps_has_interrupts(rps)) { struct intel_uncore *uncore = rps_to_uncore(rps); set(uncore, @@ -896,12 +998,14 @@ static void gen6_rps_init(struct intel_rps *rps) static bool rps_reset(struct intel_rps *rps) { + struct drm_i915_private *i915 = rps_to_i915(rps); + /* force a reset */ rps->power.mode = -1; rps->last_freq = -1; if (rps_set(rps, rps->min_freq, true)) { - DRM_ERROR("Failed to reset RPS to initial values\n"); + drm_err(&i915->drm, "Failed to reset RPS to initial values\n"); return false; } @@ -912,20 +1016,18 @@ static bool rps_reset(struct intel_rps *rps) /* See the Gen9_GT_PM_Programming_Guide doc for the below */ static bool gen9_rps_enable(struct intel_rps *rps) { - struct drm_i915_private *i915 = rps_to_i915(rps); - struct intel_uncore *uncore = rps_to_uncore(rps); + struct intel_gt *gt = rps_to_gt(rps); + struct intel_uncore *uncore = gt->uncore; /* Program defaults and thresholds for RPS */ - if (IS_GEN(i915, 9)) + if (IS_GEN(gt->i915, 9)) intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, GEN9_FREQUENCY(rps->rp1_freq)); - /* 1 second timeout */ - intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, - GT_INTERVAL_FROM_US(i915, 1000000)); - intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa); + rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD; + return rps_reset(rps); } @@ -936,12 +1038,10 @@ static bool gen8_rps_enable(struct intel_rps *rps) intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, HSW_FREQUENCY(rps->rp1_freq)); - /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ - intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, - 100000000 / 128); /* 1 second timeout */ - intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD; + return rps_reset(rps); } @@ -953,6 +1053,10 @@ static bool gen6_rps_enable(struct intel_rps *rps) intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000); intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); + rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | + GEN6_PM_RP_DOWN_THRESHOLD | + GEN6_PM_RP_DOWN_TIMEOUT); + return rps_reset(rps); } @@ -1038,6 +1142,10 @@ static bool chv_rps_enable(struct intel_rps *rps) GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_AVG); + rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | + GEN6_PM_RP_DOWN_THRESHOLD | + GEN6_PM_RP_DOWN_TIMEOUT); + /* Setting Fixed Bias */ vlv_punit_get(i915); @@ -1052,8 +1160,8 @@ static bool chv_rps_enable(struct intel_rps *rps) drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0, "GPLL not enabled\n"); - DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); - DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); + drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE)); + drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val); return rps_reset(rps); } @@ -1136,6 +1244,9 @@ static bool vlv_rps_enable(struct intel_rps *rps) GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_CONT); + /* WaGsvRC0ResidencyMethod:vlv */ + rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED; + vlv_punit_get(i915); /* Setting Fixed Bias */ @@ -1150,8 +1261,8 @@ static bool vlv_rps_enable(struct intel_rps *rps) drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0, "GPLL not enabled\n"); - DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); - DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); + drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE)); + drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val); return rps_reset(rps); } @@ -1194,33 +1305,71 @@ static unsigned long __ips_gfx_val(struct intel_ips *ips) return ips->gfx_power + state2; } +static bool has_busy_stats(struct intel_rps *rps) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, rps_to_gt(rps), id) { + if (!intel_engine_supports_stats(engine)) + return false; + } + + return true; +} + void intel_rps_enable(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); struct intel_uncore *uncore = rps_to_uncore(rps); + bool enabled = false; + + if (!HAS_RPS(i915)) + return; + + intel_gt_check_clock_frequency(rps_to_gt(rps)); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); - if (IS_CHERRYVIEW(i915)) - rps->enabled = chv_rps_enable(rps); + if (rps->max_freq <= rps->min_freq) + /* leave disabled, no room for dynamic reclocking */; + else if (IS_CHERRYVIEW(i915)) + enabled = chv_rps_enable(rps); else if (IS_VALLEYVIEW(i915)) - rps->enabled = vlv_rps_enable(rps); + enabled = vlv_rps_enable(rps); else if (INTEL_GEN(i915) >= 9) - rps->enabled = gen9_rps_enable(rps); + enabled = gen9_rps_enable(rps); else if (INTEL_GEN(i915) >= 8) - rps->enabled = gen8_rps_enable(rps); + enabled = gen8_rps_enable(rps); else if (INTEL_GEN(i915) >= 6) - rps->enabled = gen6_rps_enable(rps); + enabled = gen6_rps_enable(rps); else if (IS_IRONLAKE_M(i915)) - rps->enabled = gen5_rps_enable(rps); + enabled = gen5_rps_enable(rps); + else + MISSING_CASE(INTEL_GEN(i915)); intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); - if (!rps->enabled) + if (!enabled) return; - drm_WARN_ON(&i915->drm, rps->max_freq < rps->min_freq); - drm_WARN_ON(&i915->drm, rps->idle_freq > rps->max_freq); + GT_TRACE(rps_to_gt(rps), + "min:%x, max:%x, freq:[%d, %d]\n", + rps->min_freq, rps->max_freq, + intel_gpu_freq(rps, rps->min_freq), + intel_gpu_freq(rps, rps->max_freq)); - drm_WARN_ON(&i915->drm, rps->efficient_freq < rps->min_freq); - drm_WARN_ON(&i915->drm, rps->efficient_freq > rps->max_freq); + GEM_BUG_ON(rps->max_freq < rps->min_freq); + GEM_BUG_ON(rps->idle_freq > rps->max_freq); + + GEM_BUG_ON(rps->efficient_freq < rps->min_freq); + GEM_BUG_ON(rps->efficient_freq > rps->max_freq); + + if (has_busy_stats(rps)) + intel_rps_set_timer(rps); + else if (INTEL_GEN(i915) >= 6) + intel_rps_set_interrupts(rps); + else + /* Ironlake currently uses intel_ips.ko */ {} + + intel_rps_set_enabled(rps); } static void gen6_rps_disable(struct intel_rps *rps) @@ -1232,7 +1381,9 @@ void intel_rps_disable(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); - rps->enabled = false; + intel_rps_clear_enabled(rps); + intel_rps_clear_interrupts(rps); + intel_rps_clear_timer(rps); if (INTEL_GEN(i915) >= 6) gen6_rps_disable(rps); @@ -1308,7 +1459,8 @@ static void vlv_init_gpll_ref_freq(struct intel_rps *rps) CCK_GPLL_CLOCK_CONTROL, i915->czclk_freq); - DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", rps->gpll_ref_freq); + drm_dbg(&i915->drm, "GPLL reference freq: %d kHz\n", + rps->gpll_ref_freq); } static void vlv_rps_init(struct intel_rps *rps) @@ -1336,28 +1488,24 @@ static void vlv_rps_init(struct intel_rps *rps) i915->mem_freq = 1333; break; } - DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq); + drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq); rps->max_freq = vlv_rps_max_freq(rps); rps->rp0_freq = rps->max_freq; - DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", - intel_gpu_freq(rps, rps->max_freq), - rps->max_freq); + drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->max_freq), rps->max_freq); rps->efficient_freq = vlv_rps_rpe_freq(rps); - DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", - intel_gpu_freq(rps, rps->efficient_freq), - rps->efficient_freq); + drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq); rps->rp1_freq = vlv_rps_guar_freq(rps); - DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", - intel_gpu_freq(rps, rps->rp1_freq), - rps->rp1_freq); + drm_dbg(&i915->drm, "RP1(Guar Freq) GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq); rps->min_freq = vlv_rps_min_freq(rps); - DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", - intel_gpu_freq(rps, rps->min_freq), - rps->min_freq); + drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->min_freq), rps->min_freq); vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_PUNIT) | @@ -1387,28 +1535,24 @@ static void chv_rps_init(struct intel_rps *rps) i915->mem_freq = 1600; break; } - DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq); + drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq); rps->max_freq = chv_rps_max_freq(rps); rps->rp0_freq = rps->max_freq; - DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", - intel_gpu_freq(rps, rps->max_freq), - rps->max_freq); + drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->max_freq), rps->max_freq); rps->efficient_freq = chv_rps_rpe_freq(rps); - DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", - intel_gpu_freq(rps, rps->efficient_freq), - rps->efficient_freq); + drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq); rps->rp1_freq = chv_rps_guar_freq(rps); - DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", - intel_gpu_freq(rps, rps->rp1_freq), - rps->rp1_freq); + drm_dbg(&i915->drm, "RP1(Guar) GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq); rps->min_freq = chv_rps_min_freq(rps); - DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", - intel_gpu_freq(rps, rps->min_freq), - rps->min_freq); + drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n", + intel_gpu_freq(rps, rps->min_freq), rps->min_freq); vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_PUNIT) | @@ -1471,12 +1615,13 @@ static void rps_work(struct work_struct *work) { struct intel_rps *rps = container_of(work, typeof(*rps), work); struct intel_gt *gt = rps_to_gt(rps); + struct drm_i915_private *i915 = rps_to_i915(rps); bool client_boost = false; int new_freq, adj, min, max; u32 pm_iir = 0; spin_lock_irq(>->irq_lock); - pm_iir = fetch_and_zero(&rps->pm_iir) & READ_ONCE(rps->pm_events); + pm_iir = fetch_and_zero(&rps->pm_iir) & rps->pm_events; client_boost = atomic_read(&rps->num_waiters); spin_unlock_irq(>->irq_lock); @@ -1485,6 +1630,10 @@ static void rps_work(struct work_struct *work) goto out; mutex_lock(&rps->lock); + if (!intel_rps_is_active(rps)) { + mutex_unlock(&rps->lock); + return; + } pm_iir |= vlv_wa_c0_ei(rps, pm_iir); @@ -1494,6 +1643,12 @@ static void rps_work(struct work_struct *work) max = rps->max_freq_softlimit; if (client_boost) max = rps->max_freq; + + GT_TRACE(gt, + "pm_iir:%x, client_boost:%s, last:%d, cur:%x, min:%x, max:%x\n", + pm_iir, yesno(client_boost), + adj, new_freq, min, max); + if (client_boost && new_freq < rps->boost_freq) { new_freq = rps->boost_freq; adj = 0; @@ -1525,30 +1680,18 @@ static void rps_work(struct work_struct *work) adj = 0; } - rps->last_adj = adj; - /* - * Limit deboosting and boosting to keep ourselves at the extremes - * when in the respective power modes (i.e. slowly decrease frequencies - * while in the HIGH_POWER zone and slowly increase frequencies while - * in the LOW_POWER zone). On idle, we will hit the timeout and drop - * to the next level quickly, and conversely if busy we expect to - * hit a waitboost and rapidly switch into max power. - */ - if ((adj < 0 && rps->power.mode == HIGH_POWER) || - (adj > 0 && rps->power.mode == LOW_POWER)) - rps->last_adj = 0; - - /* sysfs frequency interfaces may have snuck in while servicing the - * interrupt + * sysfs frequency limits may have snuck in while + * servicing the interrupt */ new_freq += adj; new_freq = clamp_t(int, new_freq, min, max); if (intel_rps_set(rps, new_freq)) { - DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); - rps->last_adj = 0; + drm_dbg(&i915->drm, "Failed to set new GPU frequency\n"); + adj = 0; } + rps->last_adj = adj; mutex_unlock(&rps->lock); @@ -1568,6 +1711,8 @@ void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) if (unlikely(!events)) return; + GT_TRACE(gt, "irq events:%x\n", events); + gen6_gt_pm_mask_irq(gt, events); rps->pm_iir |= events; @@ -1579,10 +1724,12 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) struct intel_gt *gt = rps_to_gt(rps); u32 events; - events = pm_iir & READ_ONCE(rps->pm_events); + events = pm_iir & rps->pm_events; if (events) { spin_lock(>->irq_lock); + GT_TRACE(gt, "irq events:%x\n", events); + gen6_gt_pm_mask_irq(gt, events); rps->pm_iir |= events; @@ -1640,6 +1787,7 @@ void intel_rps_init_early(struct intel_rps *rps) mutex_init(&rps->power.mutex); INIT_WORK(&rps->work, rps_work); + timer_setup(&rps->timer, rps_timer, 0); atomic_set(&rps->num_waiters, 0); } @@ -1668,9 +1816,10 @@ void intel_rps_init(struct intel_rps *rps) sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS, ¶ms, NULL); if (params & BIT(31)) { /* OC supported */ - DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", - (rps->max_freq & 0xff) * 50, - (params & 0xff) * 50); + drm_dbg(&i915->drm, + "Overclocking supported, max: %dMHz, overclock: %dMHz\n", + (rps->max_freq & 0xff) * 50, + (params & 0xff) * 50); rps->max_freq = params & 0xff; } } @@ -1678,7 +1827,9 @@ void intel_rps_init(struct intel_rps *rps) /* Finally allow us to boost to max by default */ rps->boost_freq = rps->max_freq; rps->idle_freq = rps->min_freq; - rps->cur_freq = rps->idle_freq; + + /* Start in the middle, from here we will autotune based on workload */ + rps->cur_freq = rps->efficient_freq; rps->pm_intrmsk_mbz = 0; @@ -1695,6 +1846,12 @@ void intel_rps_init(struct intel_rps *rps) rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; } +void intel_rps_sanitize(struct intel_rps *rps) +{ + if (INTEL_GEN(rps_to_i915(rps)) >= 6) + rps_disable_interrupts(rps); +} + u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) { struct drm_i915_private *i915 = rps_to_i915(rps); @@ -1722,7 +1879,7 @@ static u32 read_cagf(struct intel_rps *rps) freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); vlv_punit_put(i915); } else { - freq = intel_uncore_read(rps_to_gt(rps)->uncore, GEN6_RPSTAT1); + freq = intel_uncore_read(rps_to_uncore(rps), GEN6_RPSTAT1); } return intel_rps_get_cagf(rps, freq); @@ -1730,7 +1887,7 @@ static u32 read_cagf(struct intel_rps *rps) u32 intel_rps_read_actual_frequency(struct intel_rps *rps) { - struct intel_runtime_pm *rpm = rps_to_gt(rps)->uncore->rpm; + struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; intel_wakeref_t wakeref; u32 freq = 0; @@ -1930,3 +2087,7 @@ bool i915_gpu_turbo_disable(void) return ret; } EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_rps.c" +#endif |