diff options
author | Maxime Ripard <maxime@cerno.tech> | 2022-05-03 11:53:42 +0200 |
---|---|---|
committer | Maxime Ripard <maxime@cerno.tech> | 2022-05-03 11:53:42 +0200 |
commit | b812f646bb818ca0e1806072eb7f0006f3a65dde (patch) | |
tree | 3ba2134dcefd8127f5ad286e0fe1135588fbd0b3 /drivers | |
parent | a5fc012e6ee75a899173398573e77207542f588a (diff) | |
parent | e954d2c94d007afe487044ecfa48f2518643df0e (diff) | |
download | linux-b812f646bb818ca0e1806072eb7f0006f3a65dde.tar.gz |
Merge drm/drm-next into drm-misc-next
Christian needs a backmerge to avoid a merge conflict for amdgpu.
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Diffstat (limited to 'drivers')
625 files changed, 15810 insertions, 8658 deletions
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 4556c86c3465..eb95e188d62b 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -96,11 +96,6 @@ static const struct dmi_system_id processor_power_dmi_table[] = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."), DMI_MATCH(DMI_PRODUCT_NAME,"L8400B series Notebook PC")}, (void *)1}, - /* T40 can not handle C3 idle state */ - { set_max_cstate, "IBM ThinkPad T40", { - DMI_MATCH(DMI_SYS_VENDOR, "IBM"), - DMI_MATCH(DMI_PRODUCT_NAME, "23737CU")}, - (void *)2}, {}, }; @@ -795,7 +790,8 @@ static int acpi_processor_setup_cstates(struct acpi_processor *pr) if (cx->type == ACPI_STATE_C1 || cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3) { state->enter_dead = acpi_idle_play_dead; - drv->safe_state_index = count; + if (cx->type != ACPI_STATE_C3) + drv->safe_state_index = count; } /* * Halt-induced C1 is not good for ->enter_s2idle, because it diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 8351c5638880..f3b639e89dd8 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2295,6 +2295,7 @@ static int binder_do_deferred_txn_copies(struct binder_alloc *alloc, { int ret = 0; struct binder_sg_copy *sgc, *tmpsgc; + struct binder_ptr_fixup *tmppf; struct binder_ptr_fixup *pf = list_first_entry_or_null(pf_head, struct binder_ptr_fixup, node); @@ -2349,7 +2350,11 @@ static int binder_do_deferred_txn_copies(struct binder_alloc *alloc, list_del(&sgc->node); kfree(sgc); } - BUG_ON(!list_empty(pf_head)); + list_for_each_entry_safe(pf, tmppf, pf_head, node) { + BUG_ON(pf->skip_size == 0); + list_del(&pf->node); + kfree(pf); + } BUG_ON(!list_empty(sgc_head)); return ret > 0 ? -EINVAL : ret; @@ -2486,6 +2491,9 @@ static int binder_translate_fd_array(struct list_head *pf_head, struct binder_proc *proc = thread->proc; int ret; + if (fda->num_fds == 0) + return 0; + fd_buf_size = sizeof(u32) * fda->num_fds; if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { binder_user_error("%d:%d got transaction with invalid number of fds (%lld)\n", diff --git a/drivers/ata/pata_marvell.c b/drivers/ata/pata_marvell.c index 0c5a51970fbf..014ccb0f45dc 100644 --- a/drivers/ata/pata_marvell.c +++ b/drivers/ata/pata_marvell.c @@ -77,6 +77,8 @@ static int marvell_cable_detect(struct ata_port *ap) switch(ap->port_no) { case 0: + if (!ap->ioaddr.bmdma_addr) + return ATA_CBL_PATA_UNK; if (ioread8(ap->ioaddr.bmdma_addr + 1) & 1) return ATA_CBL_PATA40; return ATA_CBL_PATA80; diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 1d6636ebaac5..f73b836047cf 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -667,6 +667,15 @@ const struct cpumask *cpu_coregroup_mask(int cpu) core_mask = &cpu_topology[cpu].llc_sibling; } + /* + * For systems with no shared cpu-side LLC but with clusters defined, + * extend core_mask to cluster_siblings. The sched domain builder will + * then remove MC as redundant with CLS if SCHED_CLUSTER is enabled. + */ + if (IS_ENABLED(CONFIG_SCHED_CLUSTER) && + cpumask_subset(core_mask, &cpu_topology[cpu].cluster_sibling)) + core_mask = &cpu_topology[cpu].cluster_sibling; + return core_mask; } @@ -684,7 +693,7 @@ void update_siblings_masks(unsigned int cpuid) for_each_online_cpu(cpu) { cpu_topo = &cpu_topology[cpu]; - if (cpuid_topo->llc_id == cpu_topo->llc_id) { + if (cpu_topo->llc_id != -1 && cpuid_topo->llc_id == cpu_topo->llc_id) { cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling); cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling); } diff --git a/drivers/base/dd.c b/drivers/base/dd.c index af6bea56f4e2..3fc3b5940bb3 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -296,6 +296,7 @@ int driver_deferred_probe_check_state(struct device *dev) return -EPROBE_DEFER; } +EXPORT_SYMBOL_GPL(driver_deferred_probe_check_state); static void deferred_probe_timeout_work_func(struct work_struct *work) { diff --git a/drivers/base/topology.c b/drivers/base/topology.c index e9d1efcda89b..ac6ad9ab67f9 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -152,9 +152,19 @@ static struct attribute *default_attrs[] = { NULL }; +static umode_t topology_is_visible(struct kobject *kobj, + struct attribute *attr, int unused) +{ + if (attr == &dev_attr_ppin.attr && !topology_ppin(kobj_to_dev(kobj)->id)) + return 0; + + return attr->mode; +} + static const struct attribute_group topology_attr_group = { .attrs = default_attrs, .bin_attrs = bin_attrs, + .is_visible = topology_is_visible, .name = "topology" }; diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 519b6d38d4df..fdb81f2794cd 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -33,6 +33,22 @@ config BLK_DEV_FD To compile this driver as a module, choose M here: the module will be called floppy. +config BLK_DEV_FD_RAWCMD + bool "Support for raw floppy disk commands (DEPRECATED)" + depends on BLK_DEV_FD + help + If you want to use actual physical floppies and expect to do + special low-level hardware accesses to them (access and use + non-standard formats, for example), then enable this. + + Note that the code enabled by this option is rarely used and + might be unstable or insecure, and distros should not enable it. + + Note: FDRAWCMD is deprecated and will be removed from the kernel + in the near future. + + If unsure, say N. + config AMIGA_FLOPPY tristate "Amiga floppy support" depends on AMIGA diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 8c647532e3ce..d5b9ff9bcbb2 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2982,6 +2982,8 @@ static const char *drive_name(int type, int drive) return "(null)"; } +#ifdef CONFIG_BLK_DEV_FD_RAWCMD + /* raw commands */ static void raw_cmd_done(int flag) { @@ -3181,6 +3183,35 @@ static int raw_cmd_ioctl(int cmd, void __user *param) return ret; } +static int floppy_raw_cmd_ioctl(int type, int drive, int cmd, + void __user *param) +{ + int ret; + + pr_warn_once("Note: FDRAWCMD is deprecated and will be removed from the kernel in the near future.\n"); + + if (type) + return -EINVAL; + if (lock_fdc(drive)) + return -EINTR; + set_floppy(drive); + ret = raw_cmd_ioctl(cmd, param); + if (ret == -EINTR) + return -EINTR; + process_fd_request(); + return ret; +} + +#else /* CONFIG_BLK_DEV_FD_RAWCMD */ + +static int floppy_raw_cmd_ioctl(int type, int drive, int cmd, + void __user *param) +{ + return -EOPNOTSUPP; +} + +#endif + static int invalidate_drive(struct block_device *bdev) { /* invalidate the buffer track to force a reread */ @@ -3369,7 +3400,6 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int { int drive = (long)bdev->bd_disk->private_data; int type = ITYPE(drive_state[drive].fd_device); - int i; int ret; int size; union inparam { @@ -3520,16 +3550,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int outparam = &write_errors[drive]; break; case FDRAWCMD: - if (type) - return -EINVAL; - if (lock_fdc(drive)) - return -EINTR; - set_floppy(drive); - i = raw_cmd_ioctl(cmd, (void __user *)param); - if (i == -EINTR) - return -EINTR; - process_fd_request(); - return i; + return floppy_raw_cmd_ioctl(type, drive, cmd, (void __user *)param); case FDTWADDLE: if (lock_fdc(drive)) return -EINTR; diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 05b1120e6623..c441a4972064 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1600,7 +1600,7 @@ static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res) * Only fake timeouts need to execute blk_mq_complete_request() here. */ cmd->error = BLK_STS_TIMEOUT; - if (cmd->fake_timeout) + if (cmd->fake_timeout || hctx->type == HCTX_TYPE_POLL) blk_mq_complete_request(rq); return BLK_EH_DONE; } diff --git a/drivers/bus/fsl-mc/fsl-mc-msi.c b/drivers/bus/fsl-mc/fsl-mc-msi.c index 5e0e4393ce4d..0cfe859a4ac4 100644 --- a/drivers/bus/fsl-mc/fsl-mc-msi.c +++ b/drivers/bus/fsl-mc/fsl-mc-msi.c @@ -224,8 +224,12 @@ int fsl_mc_msi_domain_alloc_irqs(struct device *dev, unsigned int irq_count) if (error) return error; + msi_lock_descs(dev); if (msi_first_desc(dev, MSI_DESC_ALL)) - return -EINVAL; + error = -EINVAL; + msi_unlock_descs(dev); + if (error) + return error; /* * NOTE: Calling this function will trigger the invocation of the diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c index 60fbd42041dd..828c66bbaa67 100644 --- a/drivers/bus/imx-weim.c +++ b/drivers/bus/imx-weim.c @@ -352,8 +352,7 @@ static int of_weim_notify(struct notifier_block *nb, unsigned long action, pdev = of_find_device_by_node(rd->dn); if (!pdev) { - dev_err(&pdev->dev, - "Could not find platform device for '%pOF'\n", + pr_err("Could not find platform device for '%pOF'\n", rd->dn); ret = notifier_from_errno(-EINVAL); @@ -370,7 +369,7 @@ static int of_weim_notify(struct notifier_block *nb, unsigned long action, return ret; } -struct notifier_block weim_of_notifier = { +static struct notifier_block weim_of_notifier = { .notifier_call = of_weim_notify, }; #endif /* IS_ENABLED(CONFIG_OF_DYNAMIC) */ diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c index 9527b7d63840..541ced27d941 100644 --- a/drivers/bus/mhi/host/pci_generic.c +++ b/drivers/bus/mhi/host/pci_generic.c @@ -1060,6 +1060,7 @@ static int __maybe_unused mhi_pci_freeze(struct device *dev) * the intermediate restore kernel reinitializes MHI device with new * context. */ + flush_work(&mhi_pdev->recovery_work); if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) { mhi_power_down(mhi_cntrl, true); mhi_unprepare_after_power_down(mhi_cntrl); @@ -1085,6 +1086,7 @@ static const struct dev_pm_ops mhi_pci_pm_ops = { .resume = mhi_pci_resume, .freeze = mhi_pci_freeze, .thaw = mhi_pci_restore, + .poweroff = mhi_pci_freeze, .restore = mhi_pci_restore, #endif }; diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c index 4566e730ef2b..60b082fe2ed0 100644 --- a/drivers/bus/sunxi-rsb.c +++ b/drivers/bus/sunxi-rsb.c @@ -227,6 +227,8 @@ static struct sunxi_rsb_device *sunxi_rsb_device_create(struct sunxi_rsb *rsb, dev_dbg(&rdev->dev, "device %s registered\n", dev_name(&rdev->dev)); + return rdev; + err_device_add: put_device(&rdev->dev); diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 54c0ee6dda30..7a1b1f9e4933 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -3232,13 +3232,27 @@ static int sysc_check_disabled_devices(struct sysc *ddata) */ static int sysc_check_active_timer(struct sysc *ddata) { + int error; + if (ddata->cap->type != TI_SYSC_OMAP2_TIMER && ddata->cap->type != TI_SYSC_OMAP4_TIMER) return 0; + /* + * Quirk for omap3 beagleboard revision A to B4 to use gpt12. + * Revision C and later are fixed with commit 23885389dbbb ("ARM: + * dts: Fix timer regression for beagleboard revision c"). This all + * can be dropped if we stop supporting old beagleboard revisions + * A to B4 at some point. + */ + if (sysc_soc->soc == SOC_3430) + error = -ENXIO; + else + error = -EBUSY; + if ((ddata->cfg.quirks & SYSC_QUIRK_NO_RESET_ON_INIT) && (ddata->cfg.quirks & SYSC_QUIRK_NO_IDLE)) - return -ENXIO; + return error; return 0; } diff --git a/drivers/char/random.c b/drivers/char/random.c index e15063d61460..4c9adb4f3d5d 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -318,6 +318,13 @@ static void crng_reseed(bool force) * the resultant ChaCha state to the user, along with the second * half of the block containing 32 bytes of random data that may * be used; random_data_len may not be greater than 32. + * + * The returned ChaCha state contains within it a copy of the old + * key value, at index 4, so the state should always be zeroed out + * immediately after using in order to maintain forward secrecy. + * If the state cannot be erased in a timely manner, then it is + * safer to set the random_data parameter to &chacha_state[4] so + * that this function overwrites it before returning. */ static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE], u32 chacha_state[CHACHA_STATE_WORDS], @@ -523,8 +530,7 @@ EXPORT_SYMBOL(get_random_bytes); static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) { - ssize_t ret = 0; - size_t len; + size_t len, left, ret = 0; u32 chacha_state[CHACHA_STATE_WORDS]; u8 output[CHACHA_BLOCK_SIZE]; @@ -543,37 +549,40 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) * the user directly. */ if (nbytes <= CHACHA_KEY_SIZE) { - ret = copy_to_user(buf, &chacha_state[4], nbytes) ? -EFAULT : nbytes; + ret = nbytes - copy_to_user(buf, &chacha_state[4], nbytes); goto out_zero_chacha; } - do { + for (;;) { chacha20_block(chacha_state, output); if (unlikely(chacha_state[12] == 0)) ++chacha_state[13]; len = min_t(size_t, nbytes, CHACHA_BLOCK_SIZE); - if (copy_to_user(buf, output, len)) { - ret = -EFAULT; + left = copy_to_user(buf, output, len); + if (left) { + ret += len - left; break; } - nbytes -= len; buf += len; ret += len; + nbytes -= len; + if (!nbytes) + break; BUILD_BUG_ON(PAGE_SIZE % CHACHA_BLOCK_SIZE != 0); - if (!(ret % PAGE_SIZE) && nbytes) { + if (ret % PAGE_SIZE == 0) { if (signal_pending(current)) break; cond_resched(); } - } while (nbytes); + } memzero_explicit(output, sizeof(output)); out_zero_chacha: memzero_explicit(chacha_state, sizeof(chacha_state)); - return ret; + return ret ? ret : -EFAULT; } /* @@ -1016,7 +1025,7 @@ int __init rand_initialize(void) */ void add_device_randomness(const void *buf, size_t size) { - cycles_t cycles = random_get_entropy(); + unsigned long cycles = random_get_entropy(); unsigned long flags, now = jiffies; if (crng_init == 0 && size) @@ -1047,8 +1056,7 @@ struct timer_rand_state { */ static void add_timer_randomness(struct timer_rand_state *state, unsigned int num) { - cycles_t cycles = random_get_entropy(); - unsigned long flags, now = jiffies; + unsigned long cycles = random_get_entropy(), now = jiffies, flags; long delta, delta2, delta3; spin_lock_irqsave(&input_pool.lock, flags); @@ -1337,8 +1345,7 @@ static void mix_interrupt_randomness(struct work_struct *work) void add_interrupt_randomness(int irq) { enum { MIX_INFLIGHT = 1U << 31 }; - cycles_t cycles = random_get_entropy(); - unsigned long now = jiffies; + unsigned long cycles = random_get_entropy(), now = jiffies; struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); struct pt_regs *regs = get_irq_regs(); unsigned int new_count; @@ -1351,16 +1358,12 @@ void add_interrupt_randomness(int irq) if (cycles == 0) cycles = get_reg(fast_pool, regs); - if (sizeof(cycles) == 8) + if (sizeof(unsigned long) == 8) { irq_data.u64[0] = cycles ^ rol64(now, 32) ^ irq; - else { + irq_data.u64[1] = regs ? instruction_pointer(regs) : _RET_IP_; + } else { irq_data.u32[0] = cycles ^ irq; irq_data.u32[1] = now; - } - - if (sizeof(unsigned long) == 8) - irq_data.u64[1] = regs ? instruction_pointer(regs) : _RET_IP_; - else { irq_data.u32[2] = regs ? instruction_pointer(regs) : _RET_IP_; irq_data.u32[3] = get_reg(fast_pool, regs); } @@ -1407,7 +1410,7 @@ static void entropy_timer(struct timer_list *t) static void try_to_generate_entropy(void) { struct { - cycles_t cycles; + unsigned long cycles; struct timer_list timer; } stack; diff --git a/drivers/clk/microchip/clk-mpfs.c b/drivers/clk/microchip/clk-mpfs.c index aa1561b773d6..070c3b896559 100644 --- a/drivers/clk/microchip/clk-mpfs.c +++ b/drivers/clk/microchip/clk-mpfs.c @@ -11,20 +11,48 @@ #include <dt-bindings/clock/microchip,mpfs-clock.h> /* address offset of control registers */ +#define REG_MSSPLL_REF_CR 0x08u +#define REG_MSSPLL_POSTDIV_CR 0x10u +#define REG_MSSPLL_SSCG_2_CR 0x2Cu #define REG_CLOCK_CONFIG_CR 0x08u +#define REG_RTC_CLOCK_CR 0x0Cu #define REG_SUBBLK_CLOCK_CR 0x84u #define REG_SUBBLK_RESET_CR 0x88u +#define MSSPLL_FBDIV_SHIFT 0x00u +#define MSSPLL_FBDIV_WIDTH 0x0Cu +#define MSSPLL_REFDIV_SHIFT 0x08u +#define MSSPLL_REFDIV_WIDTH 0x06u +#define MSSPLL_POSTDIV_SHIFT 0x08u +#define MSSPLL_POSTDIV_WIDTH 0x07u +#define MSSPLL_FIXED_DIV 4u + struct mpfs_clock_data { void __iomem *base; + void __iomem *msspll_base; struct clk_hw_onecell_data hw_data; }; +struct mpfs_msspll_hw_clock { + void __iomem *base; + unsigned int id; + u32 reg_offset; + u32 shift; + u32 width; + u32 flags; + struct clk_hw hw; + struct clk_init_data init; +}; + +#define to_mpfs_msspll_clk(_hw) container_of(_hw, struct mpfs_msspll_hw_clock, hw) + struct mpfs_cfg_clock { const struct clk_div_table *table; unsigned int id; + u32 reg_offset; u8 shift; u8 width; + u8 flags; }; struct mpfs_cfg_hw_clock { @@ -55,7 +83,7 @@ struct mpfs_periph_hw_clock { */ static DEFINE_SPINLOCK(mpfs_clk_lock); -static const struct clk_parent_data mpfs_cfg_parent[] = { +static const struct clk_parent_data mpfs_ext_ref[] = { { .index = 0 }, }; @@ -69,6 +97,86 @@ static const struct clk_div_table mpfs_div_ahb_table[] = { { 0, 0 } }; +/* + * The only two supported reference clock frequencies for the PolarFire SoC are + * 100 and 125 MHz, as the rtc reference is required to be 1 MHz. + * It therefore only needs to have divider table entries corresponding to + * divide by 100 and 125. + */ +static const struct clk_div_table mpfs_div_rtcref_table[] = { + { 100, 100 }, { 125, 125 }, + { 0, 0 } +}; + +static unsigned long mpfs_clk_msspll_recalc_rate(struct clk_hw *hw, unsigned long prate) +{ + struct mpfs_msspll_hw_clock *msspll_hw = to_mpfs_msspll_clk(hw); + void __iomem *mult_addr = msspll_hw->base + msspll_hw->reg_offset; + void __iomem *ref_div_addr = msspll_hw->base + REG_MSSPLL_REF_CR; + void __iomem *postdiv_addr = msspll_hw->base + REG_MSSPLL_POSTDIV_CR; + u32 mult, ref_div, postdiv; + + mult = readl_relaxed(mult_addr) >> MSSPLL_FBDIV_SHIFT; + mult &= clk_div_mask(MSSPLL_FBDIV_WIDTH); + ref_div = readl_relaxed(ref_div_addr) >> MSSPLL_REFDIV_SHIFT; + ref_div &= clk_div_mask(MSSPLL_REFDIV_WIDTH); + postdiv = readl_relaxed(postdiv_addr) >> MSSPLL_POSTDIV_SHIFT; + postdiv &= clk_div_mask(MSSPLL_POSTDIV_WIDTH); + + return prate * mult / (ref_div * MSSPLL_FIXED_DIV * postdiv); +} + +static const struct clk_ops mpfs_clk_msspll_ops = { + .recalc_rate = mpfs_clk_msspll_recalc_rate, +}; + +#define CLK_PLL(_id, _name, _parent, _shift, _width, _flags, _offset) { \ + .id = _id, \ + .shift = _shift, \ + .width = _width, \ + .reg_offset = _offset, \ + .flags = _flags, \ + .hw.init = CLK_HW_INIT_PARENTS_DATA(_name, _parent, &mpfs_clk_msspll_ops, 0), \ +} + +static struct mpfs_msspll_hw_clock mpfs_msspll_clks[] = { + CLK_PLL(CLK_MSSPLL, "clk_msspll", mpfs_ext_ref, MSSPLL_FBDIV_SHIFT, + MSSPLL_FBDIV_WIDTH, 0, REG_MSSPLL_SSCG_2_CR), +}; + +static int mpfs_clk_register_msspll(struct device *dev, struct mpfs_msspll_hw_clock *msspll_hw, + void __iomem *base) +{ + msspll_hw->base = base; + + return devm_clk_hw_register(dev, &msspll_hw->hw); +} + +static int mpfs_clk_register_mssplls(struct device *dev, struct mpfs_msspll_hw_clock *msspll_hws, + unsigned int num_clks, struct mpfs_clock_data *data) +{ + void __iomem *base = data->msspll_base; + unsigned int i; + int ret; + + for (i = 0; i < num_clks; i++) { + struct mpfs_msspll_hw_clock *msspll_hw = &msspll_hws[i]; + + ret = mpfs_clk_register_msspll(dev, msspll_hw, base); + if (ret) + return dev_err_probe(dev, ret, "failed to register msspll id: %d\n", + CLK_MSSPLL); + + data->hw_data.hws[msspll_hw->id] = &msspll_hw->hw; + } + + return 0; +} + +/* + * "CFG" clocks + */ + static unsigned long mpfs_cfg_clk_recalc_rate(struct clk_hw *hw, unsigned long prate) { struct mpfs_cfg_hw_clock *cfg_hw = to_mpfs_cfg_clk(hw); @@ -76,10 +184,10 @@ static unsigned long mpfs_cfg_clk_recalc_rate(struct clk_hw *hw, unsigned long p void __iomem *base_addr = cfg_hw->sys_base; u32 val; - val = readl_relaxed(base_addr + REG_CLOCK_CONFIG_CR) >> cfg->shift; + val = readl_relaxed(base_addr + cfg->reg_offset) >> cfg->shift; val &= clk_div_mask(cfg->width); - return prate / (1u << val); + return divider_recalc_rate(hw, prate, val, cfg->table, cfg->flags, cfg->width); } static long mpfs_cfg_clk_round_rate(struct clk_hw *hw, unsigned long rate, unsigned long *prate) @@ -105,11 +213,10 @@ static int mpfs_cfg_clk_set_rate(struct clk_hw *hw, unsigned long rate, unsigned return divider_setting; spin_lock_irqsave(&mpfs_clk_lock, flags); - - val = readl_relaxed(base_addr + REG_CLOCK_CONFIG_CR); + val = readl_relaxed(base_addr + cfg->reg_offset); val &= ~(clk_div_mask(cfg->width) << cfg_hw->cfg.shift); val |= divider_setting << cfg->shift; - writel_relaxed(val, base_addr + REG_CLOCK_CONFIG_CR); + writel_relaxed(val, base_addr + cfg->reg_offset); spin_unlock_irqrestore(&mpfs_clk_lock, flags); @@ -122,19 +229,33 @@ static const struct clk_ops mpfs_clk_cfg_ops = { .set_rate = mpfs_cfg_clk_set_rate, }; -#define CLK_CFG(_id, _name, _parent, _shift, _width, _table, _flags) { \ - .cfg.id = _id, \ - .cfg.shift = _shift, \ - .cfg.width = _width, \ - .cfg.table = _table, \ - .hw.init = CLK_HW_INIT_PARENTS_DATA(_name, _parent, &mpfs_clk_cfg_ops, \ - _flags), \ +#define CLK_CFG(_id, _name, _parent, _shift, _width, _table, _flags, _offset) { \ + .cfg.id = _id, \ + .cfg.shift = _shift, \ + .cfg.width = _width, \ + .cfg.table = _table, \ + .cfg.reg_offset = _offset, \ + .cfg.flags = _flags, \ + .hw.init = CLK_HW_INIT(_name, _parent, &mpfs_clk_cfg_ops, 0), \ } static struct mpfs_cfg_hw_clock mpfs_cfg_clks[] = { - CLK_CFG(CLK_CPU, "clk_cpu", mpfs_cfg_parent, 0, 2, mpfs_div_cpu_axi_table, 0), - CLK_CFG(CLK_AXI, "clk_axi", mpfs_cfg_parent, 2, 2, mpfs_div_cpu_axi_table, 0), - CLK_CFG(CLK_AHB, "clk_ahb", mpfs_cfg_parent, 4, 2, mpfs_div_ahb_table, 0), + CLK_CFG(CLK_CPU, "clk_cpu", "clk_msspll", 0, 2, mpfs_div_cpu_axi_table, 0, + REG_CLOCK_CONFIG_CR), + CLK_CFG(CLK_AXI, "clk_axi", "clk_msspll", 2, 2, mpfs_div_cpu_axi_table, 0, + REG_CLOCK_CONFIG_CR), + CLK_CFG(CLK_AHB, "clk_ahb", "clk_msspll", 4, 2, mpfs_div_ahb_table, 0, + REG_CLOCK_CONFIG_CR), + { + .cfg.id = CLK_RTCREF, + .cfg.shift = 0, + .cfg.width = 12, + .cfg.table = mpfs_div_rtcref_table, + .cfg.reg_offset = REG_RTC_CLOCK_CR, + .cfg.flags = CLK_DIVIDER_ONE_BASED, + .hw.init = + CLK_HW_INIT_PARENTS_DATA("clk_rtcref", mpfs_ext_ref, &mpfs_clk_cfg_ops, 0), + } }; static int mpfs_clk_register_cfg(struct device *dev, struct mpfs_cfg_hw_clock *cfg_hw, @@ -160,13 +281,17 @@ static int mpfs_clk_register_cfgs(struct device *dev, struct mpfs_cfg_hw_clock * return dev_err_probe(dev, ret, "failed to register clock id: %d\n", cfg_hw->cfg.id); - id = cfg_hws[i].cfg.id; + id = cfg_hw->cfg.id; data->hw_data.hws[id] = &cfg_hw->hw; } return 0; } +/* + * peripheral clocks - devices connected to axi or ahb buses. + */ + static int mpfs_periph_clk_enable(struct clk_hw *hw) { struct mpfs_periph_hw_clock *periph_hw = to_mpfs_periph_clk(hw); @@ -200,10 +325,6 @@ static void mpfs_periph_clk_disable(struct clk_hw *hw) spin_lock_irqsave(&mpfs_clk_lock, flags); - reg = readl_relaxed(base_addr + REG_SUBBLK_RESET_CR); - val = reg | (1u << periph->shift); - writel_relaxed(val, base_addr + REG_SUBBLK_RESET_CR); - reg = readl_relaxed(base_addr + REG_SUBBLK_CLOCK_CR); val = reg & ~(1u << periph->shift); writel_relaxed(val, base_addr + REG_SUBBLK_CLOCK_CR); @@ -249,8 +370,10 @@ static const struct clk_ops mpfs_periph_clk_ops = { * trap handler * - CLK_MMUART0: reserved by the hss * - CLK_DDRC: provides clock to the ddr subsystem - * - CLK_FICx: these provide clocks for sections of the fpga fabric, disabling them would - * cause the fabric to go into reset + * - CLK_FICx: these provide the processor side clocks to the "FIC" (Fabric InterConnect) + * clock domain crossers which provide the interface to the FPGA fabric. Disabling them + * causes the FPGA fabric to go into reset. + * - CLK_ATHENA: The athena clock is FIC4, which is reserved for the Athena TeraFire. */ static struct mpfs_periph_hw_clock mpfs_periph_clks[] = { @@ -258,7 +381,7 @@ static struct mpfs_periph_hw_clock mpfs_periph_clks[] = { CLK_PERIPH(CLK_MAC0, "clk_periph_mac0", PARENT_CLK(AHB), 1, 0), CLK_PERIPH(CLK_MAC1, "clk_periph_mac1", PARENT_CLK(AHB), 2, 0), CLK_PERIPH(CLK_MMC, "clk_periph_mmc", PARENT_CLK(AHB), 3, 0), - CLK_PERIPH(CLK_TIMER, "clk_periph_timer", PARENT_CLK(AHB), 4, 0), + CLK_PERIPH(CLK_TIMER, "clk_periph_timer", PARENT_CLK(RTCREF), 4, 0), CLK_PERIPH(CLK_MMUART0, "clk_periph_mmuart0", PARENT_CLK(AHB), 5, CLK_IS_CRITICAL), CLK_PERIPH(CLK_MMUART1, "clk_periph_mmuart1", PARENT_CLK(AHB), 6, 0), CLK_PERIPH(CLK_MMUART2, "clk_periph_mmuart2", PARENT_CLK(AHB), 7, 0), @@ -277,11 +400,11 @@ static struct mpfs_periph_hw_clock mpfs_periph_clks[] = { CLK_PERIPH(CLK_GPIO1, "clk_periph_gpio1", PARENT_CLK(AHB), 21, 0), CLK_PERIPH(CLK_GPIO2, "clk_periph_gpio2", PARENT_CLK(AHB), 22, 0), CLK_PERIPH(CLK_DDRC, "clk_periph_ddrc", PARENT_CLK(AHB), 23, CLK_IS_CRITICAL), - CLK_PERIPH(CLK_FIC0, "clk_periph_fic0", PARENT_CLK(AHB), 24, CLK_IS_CRITICAL), - CLK_PERIPH(CLK_FIC1, "clk_periph_fic1", PARENT_CLK(AHB), 25, CLK_IS_CRITICAL), - CLK_PERIPH(CLK_FIC2, "clk_periph_fic2", PARENT_CLK(AHB), 26, CLK_IS_CRITICAL), - CLK_PERIPH(CLK_FIC3, "clk_periph_fic3", PARENT_CLK(AHB), 27, CLK_IS_CRITICAL), - CLK_PERIPH(CLK_ATHENA, "clk_periph_athena", PARENT_CLK(AHB), 28, 0), + CLK_PERIPH(CLK_FIC0, "clk_periph_fic0", PARENT_CLK(AXI), 24, CLK_IS_CRITICAL), + CLK_PERIPH(CLK_FIC1, "clk_periph_fic1", PARENT_CLK(AXI), 25, CLK_IS_CRITICAL), + CLK_PERIPH(CLK_FIC2, "clk_periph_fic2", PARENT_CLK(AXI), 26, CLK_IS_CRITICAL), + CLK_PERIPH(CLK_FIC3, "clk_periph_fic3", PARENT_CLK(AXI), 27, CLK_IS_CRITICAL), + CLK_PERIPH(CLK_ATHENA, "clk_periph_athena", PARENT_CLK(AXI), 28, CLK_IS_CRITICAL), CLK_PERIPH(CLK_CFM, "clk_periph_cfm", PARENT_CLK(AHB), 29, 0), }; @@ -322,8 +445,9 @@ static int mpfs_clk_probe(struct platform_device *pdev) unsigned int num_clks; int ret; - /* CLK_RESERVED is not part of cfg_clks nor periph_clks, so add 1 */ - num_clks = ARRAY_SIZE(mpfs_cfg_clks) + ARRAY_SIZE(mpfs_periph_clks) + 1; + /* CLK_RESERVED is not part of clock arrays, so add 1 */ + num_clks = ARRAY_SIZE(mpfs_msspll_clks) + ARRAY_SIZE(mpfs_cfg_clks) + + ARRAY_SIZE(mpfs_periph_clks) + 1; clk_data = devm_kzalloc(dev, struct_size(clk_data, hw_data.hws, num_clks), GFP_KERNEL); if (!clk_data) @@ -333,8 +457,17 @@ static int mpfs_clk_probe(struct platform_device *pdev) if (IS_ERR(clk_data->base)) return PTR_ERR(clk_data->base); + clk_data->msspll_base = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(clk_data->msspll_base)) + return PTR_ERR(clk_data->msspll_base); + clk_data->hw_data.num = num_clks; + ret = mpfs_clk_register_mssplls(dev, mpfs_msspll_clks, ARRAY_SIZE(mpfs_msspll_clks), + clk_data); + if (ret) + return ret; + ret = mpfs_clk_register_cfgs(dev, mpfs_cfg_clks, ARRAY_SIZE(mpfs_cfg_clks), clk_data); if (ret) return ret; diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index f675fd969c4d..e9c357309fd9 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -818,7 +818,7 @@ EXPORT_SYMBOL_GPL(clk_pixel_ops); static int clk_gfx3d_determine_rate(struct clk_hw *hw, struct clk_rate_request *req) { - struct clk_rate_request parent_req = { }; + struct clk_rate_request parent_req = { .min_rate = 0, .max_rate = ULONG_MAX }; struct clk_rcg2_gfx3d *cgfx = to_clk_rcg2_gfx3d(hw); struct clk_hw *xo, *p0, *p1, *p2; unsigned long p0_rate; diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-rtc.c b/drivers/clk/sunxi-ng/ccu-sun6i-rtc.c index 8a10bade7e0d..2f3ddc908ebd 100644 --- a/drivers/clk/sunxi-ng/ccu-sun6i-rtc.c +++ b/drivers/clk/sunxi-ng/ccu-sun6i-rtc.c @@ -241,6 +241,7 @@ static struct clk_init_data rtc_32k_init_data = { .ops = &ccu_mux_ops, .parent_hws = rtc_32k_parents, .num_parents = ARRAY_SIZE(rtc_32k_parents), /* updated during probe */ + .flags = CLK_IS_CRITICAL, }; static struct ccu_mux rtc_32k_clk = { @@ -346,6 +347,7 @@ static const struct of_device_id sun6i_rtc_ccu_match[] = { .compatible = "allwinner,sun50i-r329-rtc", .data = &sun50i_r329_rtc_ccu_data, }, + {}, }; int sun6i_rtc_ccu_probe(struct device *dev, void __iomem *reg) diff --git a/drivers/clk/sunxi/clk-sun9i-mmc.c b/drivers/clk/sunxi/clk-sun9i-mmc.c index 542b31d6e96d..636bcf2439ef 100644 --- a/drivers/clk/sunxi/clk-sun9i-mmc.c +++ b/drivers/clk/sunxi/clk-sun9i-mmc.c @@ -109,6 +109,8 @@ static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev) spin_lock_init(&data->lock); r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!r) + return -EINVAL; /* one clock/reset pair per word */ count = DIV_ROUND_UP((resource_size(r)), SUN9I_MMC_WIDTH); data->membase = devm_ioremap_resource(&pdev->dev, r); diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index f9d593ff4718..0253731d6d25 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -24,13 +24,17 @@ #define CLK_HW_DIV 2 #define LUT_TURBO_IND 1 +#define GT_IRQ_STATUS BIT(2) + #define HZ_PER_KHZ 1000 struct qcom_cpufreq_soc_data { u32 reg_enable; + u32 reg_domain_state; u32 reg_dcvs_ctrl; u32 reg_freq_lut; u32 reg_volt_lut; + u32 reg_intr_clr; u32 reg_current_vote; u32 reg_perf_state; u8 lut_row_size; @@ -280,37 +284,46 @@ static void qcom_get_related_cpus(int index, struct cpumask *m) } } -static unsigned int qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data) +static unsigned long qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data) { - unsigned int val = readl_relaxed(data->base + data->soc_data->reg_current_vote); + unsigned int lval; + + if (data->soc_data->reg_current_vote) + lval = readl_relaxed(data->base + data->soc_data->reg_current_vote) & 0x3ff; + else + lval = readl_relaxed(data->base + data->soc_data->reg_domain_state) & 0xff; - return (val & 0x3FF) * 19200; + return lval * xo_rate; } static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data) { struct cpufreq_policy *policy = data->policy; - int cpu = cpumask_first(policy->cpus); + int cpu = cpumask_first(policy->related_cpus); struct device *dev = get_cpu_device(cpu); unsigned long freq_hz, throttled_freq; struct dev_pm_opp *opp; - unsigned int freq; /* * Get the h/w throttled frequency, normalize it using the * registered opp table and use it to calculate thermal pressure. */ - freq = qcom_lmh_get_throttle_freq(data); - freq_hz = freq * HZ_PER_KHZ; + freq_hz = qcom_lmh_get_throttle_freq(data); opp = dev_pm_opp_find_freq_floor(dev, &freq_hz); if (IS_ERR(opp) && PTR_ERR(opp) == -ERANGE) - dev_pm_opp_find_freq_ceil(dev, &freq_hz); + opp = dev_pm_opp_find_freq_ceil(dev, &freq_hz); + + if (IS_ERR(opp)) { + dev_warn(dev, "Can't find the OPP for throttling: %pe!\n", opp); + } else { + throttled_freq = freq_hz / HZ_PER_KHZ; - throttled_freq = freq_hz / HZ_PER_KHZ; + /* Update thermal pressure (the boost frequencies are accepted) */ + arch_update_thermal_pressure(policy->related_cpus, throttled_freq); - /* Update thermal pressure (the boost frequencies are accepted) */ - arch_update_thermal_pressure(policy->related_cpus, throttled_freq); + dev_pm_opp_put(opp); + } /* * In the unlikely case policy is unregistered do not enable @@ -350,6 +363,10 @@ static irqreturn_t qcom_lmh_dcvs_handle_irq(int irq, void *data) disable_irq_nosync(c_data->throttle_irq); schedule_delayed_work(&c_data->throttle_work, 0); + if (c_data->soc_data->reg_intr_clr) + writel_relaxed(GT_IRQ_STATUS, + c_data->base + c_data->soc_data->reg_intr_clr); + return IRQ_HANDLED; } @@ -365,9 +382,11 @@ static const struct qcom_cpufreq_soc_data qcom_soc_data = { static const struct qcom_cpufreq_soc_data epss_soc_data = { .reg_enable = 0x0, + .reg_domain_state = 0x20, .reg_dcvs_ctrl = 0xb0, .reg_freq_lut = 0x100, .reg_volt_lut = 0x200, + .reg_intr_clr = 0x308, .reg_perf_state = 0x320, .lut_row_size = 4, }; @@ -417,16 +436,39 @@ static int qcom_cpufreq_hw_lmh_init(struct cpufreq_policy *policy, int index) return 0; } -static void qcom_cpufreq_hw_lmh_exit(struct qcom_cpufreq_data *data) +static int qcom_cpufreq_hw_cpu_online(struct cpufreq_policy *policy) +{ + struct qcom_cpufreq_data *data = policy->driver_data; + struct platform_device *pdev = cpufreq_get_driver_data(); + int ret; + + ret = irq_set_affinity_hint(data->throttle_irq, policy->cpus); + if (ret) + dev_err(&pdev->dev, "Failed to set CPU affinity of %s[%d]\n", + data->irq_name, data->throttle_irq); + + return ret; +} + +static int qcom_cpufreq_hw_cpu_offline(struct cpufreq_policy *policy) { + struct qcom_cpufreq_data *data = policy->driver_data; + if (data->throttle_irq <= 0) - return; + return 0; mutex_lock(&data->throttle_lock); data->cancel_throttle = true; mutex_unlock(&data->throttle_lock); cancel_delayed_work_sync(&data->throttle_work); + irq_set_affinity_hint(data->throttle_irq, NULL); + + return 0; +} + +static void qcom_cpufreq_hw_lmh_exit(struct qcom_cpufreq_data *data) +{ free_irq(data->throttle_irq, data); } @@ -583,6 +625,8 @@ static struct cpufreq_driver cpufreq_qcom_hw_driver = { .get = qcom_cpufreq_hw_get, .init = qcom_cpufreq_hw_cpu_init, .exit = qcom_cpufreq_hw_cpu_exit, + .online = qcom_cpufreq_hw_cpu_online, + .offline = qcom_cpufreq_hw_cpu_offline, .register_em = cpufreq_register_em_with_opp, .fast_switch = qcom_cpufreq_hw_fast_switch, .name = "qcom-cpufreq-hw", diff --git a/drivers/cpufreq/sun50i-cpufreq-nvmem.c b/drivers/cpufreq/sun50i-cpufreq-nvmem.c index 2deed8d8773f..75e1bf3a08f7 100644 --- a/drivers/cpufreq/sun50i-cpufreq-nvmem.c +++ b/drivers/cpufreq/sun50i-cpufreq-nvmem.c @@ -98,8 +98,10 @@ static int sun50i_cpufreq_nvmem_probe(struct platform_device *pdev) return -ENOMEM; ret = sun50i_cpufreq_get_efuse(&speed); - if (ret) + if (ret) { + kfree(opp_tables); return ret; + } snprintf(name, MAX_NAME_LEN, "speed%d", speed); diff --git a/drivers/cpuidle/cpuidle-riscv-sbi.c b/drivers/cpuidle/cpuidle-riscv-sbi.c index b459eda2cd37..5c852e671992 100644 --- a/drivers/cpuidle/cpuidle-riscv-sbi.c +++ b/drivers/cpuidle/cpuidle-riscv-sbi.c @@ -22,6 +22,7 @@ #include <linux/pm_runtime.h> #include <asm/cpuidle.h> #include <asm/sbi.h> +#include <asm/smp.h> #include <asm/suspend.h> #include "dt_idle_states.h" diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 1476156af74b..def564d1e8fa 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1453,7 +1453,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, { struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); - struct at_xdmac_desc *desc, *_desc; + struct at_xdmac_desc *desc, *_desc, *iter; struct list_head *descs_list; enum dma_status ret; int residue, retry; @@ -1568,11 +1568,13 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, * microblock. */ descs_list = &desc->descs_list; - list_for_each_entry_safe(desc, _desc, descs_list, desc_node) { - dwidth = at_xdmac_get_dwidth(desc->lld.mbr_cfg); - residue -= (desc->lld.mbr_ubc & 0xffffff) << dwidth; - if ((desc->lld.mbr_nda & 0xfffffffc) == cur_nda) + list_for_each_entry_safe(iter, _desc, descs_list, desc_node) { + dwidth = at_xdmac_get_dwidth(iter->lld.mbr_cfg); + residue -= (iter->lld.mbr_ubc & 0xffffff) << dwidth; + if ((iter->lld.mbr_nda & 0xfffffffc) == cur_nda) { + desc = iter; break; + } } residue += cur_ubc << dwidth; diff --git a/drivers/dma/dw-edma/dw-edma-v0-core.c b/drivers/dma/dw-edma/dw-edma-v0-core.c index 329fc2e57b70..33bc1e6c4cf2 100644 --- a/drivers/dma/dw-edma/dw-edma-v0-core.c +++ b/drivers/dma/dw-edma/dw-edma-v0-core.c @@ -414,14 +414,18 @@ void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first) SET_CH_32(dw, chan->dir, chan->id, ch_control1, (DW_EDMA_V0_CCS | DW_EDMA_V0_LLE)); /* Linked list */ + #ifdef CONFIG_64BIT - SET_CH_64(dw, chan->dir, chan->id, llp.reg, - chunk->ll_region.paddr); + /* llp is not aligned on 64bit -> keep 32bit accesses */ + SET_CH_32(dw, chan->dir, chan->id, llp.lsb, + lower_32_bits(chunk->ll_region.paddr)); + SET_CH_32(dw, chan->dir, chan->id, llp.msb, + upper_32_bits(chunk->ll_region.paddr)); #else /* CONFIG_64BIT */ - SET_CH_32(dw, chan->dir, chan->id, llp.lsb, - lower_32_bits(chunk->ll_region.paddr)); - SET_CH_32(dw, chan->dir, chan->id, llp.msb, - upper_32_bits(chunk->ll_region.paddr)); + SET_CH_32(dw, chan->dir, chan->id, llp.lsb, + lower_32_bits(chunk->ll_region.paddr)); + SET_CH_32(dw, chan->dir, chan->id, llp.msb, + upper_32_bits(chunk->ll_region.paddr)); #endif /* CONFIG_64BIT */ } /* Doorbell */ diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 3061fe857d69..f652da6ab47d 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -373,7 +373,6 @@ static void idxd_wq_device_reset_cleanup(struct idxd_wq *wq) { lockdep_assert_held(&wq->wq_lock); - idxd_wq_disable_cleanup(wq); wq->size = 0; wq->group = NULL; } @@ -701,14 +700,17 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd) if (wq->state == IDXD_WQ_ENABLED) { idxd_wq_disable_cleanup(wq); - idxd_wq_device_reset_cleanup(wq); wq->state = IDXD_WQ_DISABLED; } + idxd_wq_device_reset_cleanup(wq); } } void idxd_device_clear_state(struct idxd_device *idxd) { + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return; + idxd_groups_clear_state(idxd); idxd_engines_clear_state(idxd); idxd_device_wqs_clear_state(idxd); diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index e289fd48711a..c01db23e3333 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -150,14 +150,15 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, */ int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc) { - int rc, retries = 0; + unsigned int retries = wq->enqcmds_retries; + int rc; do { rc = enqcmds(portal, desc); if (rc == 0) break; cpu_relax(); - } while (retries++ < wq->enqcmds_retries); + } while (retries--); return rc; } diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 7e19ab92b61a..dfd549685c46 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -905,6 +905,9 @@ static ssize_t wq_max_transfer_size_store(struct device *dev, struct device_attr u64 xfer_size; int rc; + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + if (wq->state != IDXD_WQ_DISABLED) return -EPERM; @@ -939,6 +942,9 @@ static ssize_t wq_max_batch_size_store(struct device *dev, struct device_attribu u64 batch_size; int rc; + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + if (wq->state != IDXD_WQ_DISABLED) return -EPERM; diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 70c0aa931ddf..6196a7b3956b 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -198,12 +198,12 @@ struct sdma_script_start_addrs { s32 per_2_firi_addr; s32 mcu_2_firi_addr; s32 uart_2_per_addr; - s32 uart_2_mcu_ram_addr; + s32 uart_2_mcu_addr; s32 per_2_app_addr; s32 mcu_2_app_addr; s32 per_2_per_addr; s32 uartsh_2_per_addr; - s32 uartsh_2_mcu_ram_addr; + s32 uartsh_2_mcu_addr; s32 per_2_shp_addr; s32 mcu_2_shp_addr; s32 ata_2_mcu_addr; @@ -232,8 +232,8 @@ struct sdma_script_start_addrs { s32 mcu_2_ecspi_addr; s32 mcu_2_sai_addr; s32 sai_2_mcu_addr; - s32 uart_2_mcu_addr; - s32 uartsh_2_mcu_addr; + s32 uart_2_mcu_rom_addr; + s32 uartsh_2_mcu_rom_addr; /* End of v3 array */ s32 mcu_2_zqspi_addr; /* End of v4 array */ @@ -1796,17 +1796,17 @@ static void sdma_add_scripts(struct sdma_engine *sdma, saddr_arr[i] = addr_arr[i]; /* - * get uart_2_mcu_addr/uartsh_2_mcu_addr rom script specially because - * they are now replaced by uart_2_mcu_ram_addr/uartsh_2_mcu_ram_addr - * to be compatible with legacy freescale/nxp sdma firmware, and they - * are located in the bottom part of sdma_script_start_addrs which are - * beyond the SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1. + * For compatibility with NXP internal legacy kernel before 4.19 which + * is based on uart ram script and mainline kernel based on uart rom + * script, both uart ram/rom scripts are present in newer sdma + * firmware. Use the rom versions if they are present (V3 or newer). */ - if (addr->uart_2_mcu_addr) - sdma->script_addrs->uart_2_mcu_addr = addr->uart_2_mcu_addr; - if (addr->uartsh_2_mcu_addr) - sdma->script_addrs->uartsh_2_mcu_addr = addr->uartsh_2_mcu_addr; - + if (sdma->script_number >= SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V3) { + if (addr->uart_2_mcu_rom_addr) + sdma->script_addrs->uart_2_mcu_addr = addr->uart_2_mcu_rom_addr; + if (addr->uartsh_2_mcu_rom_addr) + sdma->script_addrs->uartsh_2_mcu_addr = addr->uartsh_2_mcu_rom_addr; + } } static void sdma_load_firmware(const struct firmware *fw, void *context) @@ -1885,7 +1885,7 @@ static int sdma_event_remap(struct sdma_engine *sdma) u32 reg, val, shift, num_map, i; int ret = 0; - if (IS_ERR(np) || IS_ERR(gpr_np)) + if (IS_ERR(np) || !gpr_np) goto out; event_remap = of_find_property(np, propname, NULL); @@ -1933,7 +1933,7 @@ static int sdma_event_remap(struct sdma_engine *sdma) } out: - if (!IS_ERR(gpr_np)) + if (gpr_np) of_node_put(gpr_np); return ret; diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c index 375e7e647df6..a1517ef1f4a0 100644 --- a/drivers/dma/mediatek/mtk-uart-apdma.c +++ b/drivers/dma/mediatek/mtk-uart-apdma.c @@ -274,7 +274,7 @@ static int mtk_uart_apdma_alloc_chan_resources(struct dma_chan *chan) unsigned int status; int ret; - ret = pm_runtime_get_sync(mtkd->ddev.dev); + ret = pm_runtime_resume_and_get(mtkd->ddev.dev); if (ret < 0) { pm_runtime_put_noidle(chan->device->dev); return ret; @@ -288,18 +288,21 @@ static int mtk_uart_apdma_alloc_chan_resources(struct dma_chan *chan) ret = readx_poll_timeout(readl, c->base + VFF_EN, status, !status, 10, 100); if (ret) - return ret; + goto err_pm; ret = request_irq(c->irq, mtk_uart_apdma_irq_handler, IRQF_TRIGGER_NONE, KBUILD_MODNAME, chan); if (ret < 0) { dev_err(chan->device->dev, "Can't request dma IRQ\n"); - return -EINVAL; + ret = -EINVAL; + goto err_pm; } if (mtkd->support_33bits) mtk_uart_apdma_write(c, VFF_4G_SUPPORT, VFF_4G_SUPPORT_CLR_B); +err_pm: + pm_runtime_put_noidle(mtkd->ddev.dev); return ret; } diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c index f05ff02c0656..40b1abeca856 100644 --- a/drivers/edac/synopsys_edac.c +++ b/drivers/edac/synopsys_edac.c @@ -164,6 +164,11 @@ #define ECC_STAT_CECNT_SHIFT 8 #define ECC_STAT_BITNUM_MASK 0x7F +/* ECC error count register definitions */ +#define ECC_ERRCNT_UECNT_MASK 0xFFFF0000 +#define ECC_ERRCNT_UECNT_SHIFT 16 +#define ECC_ERRCNT_CECNT_MASK 0xFFFF + /* DDR QOS Interrupt register definitions */ #define DDR_QOS_IRQ_STAT_OFST 0x20200 #define DDR_QOSUE_MASK 0x4 @@ -423,15 +428,16 @@ static int zynqmp_get_error_info(struct synps_edac_priv *priv) base = priv->baseaddr; p = &priv->stat; + regval = readl(base + ECC_ERRCNT_OFST); + p->ce_cnt = regval & ECC_ERRCNT_CECNT_MASK; + p->ue_cnt = (regval & ECC_ERRCNT_UECNT_MASK) >> ECC_ERRCNT_UECNT_SHIFT; + if (!p->ce_cnt) + goto ue_err; + regval = readl(base + ECC_STAT_OFST); if (!regval) return 1; - p->ce_cnt = (regval & ECC_STAT_CECNT_MASK) >> ECC_STAT_CECNT_SHIFT; - p->ue_cnt = (regval & ECC_STAT_UECNT_MASK) >> ECC_STAT_UECNT_SHIFT; - if (!p->ce_cnt) - goto ue_err; - p->ceinfo.bitpos = (regval & ECC_STAT_BITNUM_MASK); regval = readl(base + ECC_CEADDR0_OFST); diff --git a/drivers/firmware/arm_scmi/clock.c b/drivers/firmware/arm_scmi/clock.c index cf6fed6dec77..45600acc0f45 100644 --- a/drivers/firmware/arm_scmi/clock.c +++ b/drivers/firmware/arm_scmi/clock.c @@ -49,7 +49,7 @@ struct scmi_msg_resp_clock_describe_rates { struct { __le32 value_low; __le32 value_high; - } rate[0]; + } rate[]; #define RATE_TO_U64(X) \ ({ \ typeof(X) x = (X); \ @@ -210,7 +210,8 @@ scmi_clock_describe_rates_get(const struct scmi_protocol_handle *ph, u32 clk_id, if (rate_discrete && rate) { clk->list.num_rates = tot_rate_cnt; - sort(rate, tot_rate_cnt, sizeof(*rate), rate_cmp_func, NULL); + sort(clk->list.rates, tot_rate_cnt, sizeof(*rate), + rate_cmp_func, NULL); } clk->rate_discrete = rate_discrete; diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 46118300a4d1..e17c6568344d 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -679,7 +679,8 @@ static void scmi_handle_response(struct scmi_chan_info *cinfo, xfer = scmi_xfer_command_acquire(cinfo, msg_hdr); if (IS_ERR(xfer)) { - scmi_clear_channel(info, cinfo); + if (MSG_XTRACT_TYPE(msg_hdr) == MSG_TYPE_DELAYED_RESP) + scmi_clear_channel(info, cinfo); return; } diff --git a/drivers/firmware/arm_scmi/optee.c b/drivers/firmware/arm_scmi/optee.c index 734f1eeee161..8302a2b4aeeb 100644 --- a/drivers/firmware/arm_scmi/optee.c +++ b/drivers/firmware/arm_scmi/optee.c @@ -405,8 +405,8 @@ static int scmi_optee_chan_free(int id, void *p, void *data) return 0; } -static struct scmi_shared_mem *get_channel_shm(struct scmi_optee_channel *chan, - struct scmi_xfer *xfer) +static struct scmi_shared_mem __iomem * +get_channel_shm(struct scmi_optee_channel *chan, struct scmi_xfer *xfer) { if (!chan) return NULL; @@ -419,7 +419,7 @@ static int scmi_optee_send_message(struct scmi_chan_info *cinfo, struct scmi_xfer *xfer) { struct scmi_optee_channel *channel = cinfo->transport_info; - struct scmi_shared_mem *shmem = get_channel_shm(channel, xfer); + struct scmi_shared_mem __iomem *shmem = get_channel_shm(channel, xfer); int ret; mutex_lock(&channel->mu); @@ -436,7 +436,7 @@ static void scmi_optee_fetch_response(struct scmi_chan_info *cinfo, struct scmi_xfer *xfer) { struct scmi_optee_channel *channel = cinfo->transport_info; - struct scmi_shared_mem *shmem = get_channel_shm(channel, xfer); + struct scmi_shared_mem __iomem *shmem = get_channel_shm(channel, xfer); shmem_fetch_response(shmem, xfer); } diff --git a/drivers/firmware/cirrus/cs_dsp.c b/drivers/firmware/cirrus/cs_dsp.c index e48108e694f8..7dad6f57d970 100644 --- a/drivers/firmware/cirrus/cs_dsp.c +++ b/drivers/firmware/cirrus/cs_dsp.c @@ -955,8 +955,7 @@ static int cs_dsp_create_control(struct cs_dsp *dsp, ctl->alg_region = *alg_region; if (subname && dsp->fw_ver >= 2) { ctl->subname_len = subname_len; - ctl->subname = kmemdup(subname, - strlen(subname) + 1, GFP_KERNEL); + ctl->subname = kasprintf(GFP_KERNEL, "%.*s", subname_len, subname); if (!ctl->subname) { ret = -ENOMEM; goto err_ctl; diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index 8e5d87984a48..41c31b10ae84 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -134,7 +134,7 @@ static int gpio_sim_get_multiple(struct gpio_chip *gc, struct gpio_sim_chip *chip = gpiochip_get_data(gc); mutex_lock(&chip->lock); - bitmap_copy(bits, chip->value_map, gc->ngpio); + bitmap_replace(bits, bits, chip->value_map, mask, gc->ngpio); mutex_unlock(&chip->lock); return 0; @@ -146,7 +146,7 @@ static void gpio_sim_set_multiple(struct gpio_chip *gc, struct gpio_sim_chip *chip = gpiochip_get_data(gc); mutex_lock(&chip->lock); - bitmap_copy(chip->value_map, bits, gc->ngpio); + bitmap_replace(chip->value_map, chip->value_map, bits, mask, gc->ngpio); mutex_unlock(&chip->lock); } diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index a5495ad31c9c..c2523ac26fac 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -108,7 +108,7 @@ static int acpi_gpiochip_find(struct gpio_chip *gc, void *data) * controller does not have GPIO chip registered at the moment. This is to * support probe deferral. */ -static struct gpio_desc *acpi_get_gpiod(char *path, int pin) +static struct gpio_desc *acpi_get_gpiod(char *path, unsigned int pin) { struct gpio_chip *chip; acpi_handle handle; @@ -136,7 +136,7 @@ static struct gpio_desc *acpi_get_gpiod(char *path, int pin) * as it is intended for use outside of the GPIO layer (in a similar fashion to * gpiod_get_index() for example) it also holds a reference to the GPIO device. */ -struct gpio_desc *acpi_get_and_request_gpiod(char *path, int pin, char *label) +struct gpio_desc *acpi_get_and_request_gpiod(char *path, unsigned int pin, char *label) { struct gpio_desc *gpio; int ret; @@ -317,11 +317,12 @@ static struct gpio_desc *acpi_request_own_gpiod(struct gpio_chip *chip, return desc; } -static bool acpi_gpio_in_ignore_list(const char *controller_in, int pin_in) +static bool acpi_gpio_in_ignore_list(const char *controller_in, unsigned int pin_in) { const char *controller, *pin_str; - int len, pin; + unsigned int pin; char *endp; + int len; controller = ignore_wake; while (controller) { @@ -354,13 +355,13 @@ err: static bool acpi_gpio_irq_is_wake(struct device *parent, struct acpi_resource_gpio *agpio) { - int pin = agpio->pin_table[0]; + unsigned int pin = agpio->pin_table[0]; if (agpio->wake_capable != ACPI_WAKE_CAPABLE) return false; if (acpi_gpio_in_ignore_list(dev_name(parent), pin)) { - dev_info(parent, "Ignoring wakeup on pin %d\n", pin); + dev_info(parent, "Ignoring wakeup on pin %u\n", pin); return false; } @@ -378,7 +379,8 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares, struct acpi_gpio_event *event; irq_handler_t handler = NULL; struct gpio_desc *desc; - int ret, pin, irq; + unsigned int pin; + int ret, irq; if (!acpi_gpio_get_irq_resource(ares, &agpio)) return AE_OK; @@ -387,8 +389,8 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares, pin = agpio->pin_table[0]; if (pin <= 255) { - char ev_name[5]; - sprintf(ev_name, "_%c%02hhX", + char ev_name[8]; + sprintf(ev_name, "_%c%02X", agpio->triggering == ACPI_EDGE_SENSITIVE ? 'E' : 'L', pin); if (ACPI_SUCCESS(acpi_get_handle(handle, ev_name, &evt_handle))) @@ -1098,7 +1100,7 @@ acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address, length = min_t(u16, agpio->pin_table_length, pin_index + bits); for (i = pin_index; i < length; ++i) { - int pin = agpio->pin_table[i]; + unsigned int pin = agpio->pin_table[i]; struct acpi_gpio_connection *conn; struct gpio_desc *desc; bool found; diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 085348e08986..b7694171655c 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1601,8 +1601,6 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc, gpiochip_set_irq_hooks(gc); - acpi_gpiochip_request_interrupts(gc); - /* * Using barrier() here to prevent compiler from reordering * gc->irq.initialized before initialization of above @@ -1612,6 +1610,8 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc, gc->irq.initialized = true; + acpi_gpiochip_request_interrupts(gc); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 40e2c6e2df79..b525f9be9326 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -49,7 +49,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \ atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ - amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ + amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \ amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \ @@ -58,7 +58,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ - amdgpu_eeprom.o amdgpu_mca.o + amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index cdf0818088b3..bffd24845765 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -179,7 +179,7 @@ extern int amdgpu_sched_jobs; extern int amdgpu_sched_hw_submission; extern uint amdgpu_pcie_gen_cap; extern uint amdgpu_pcie_lane_cap; -extern uint amdgpu_cg_mask; +extern u64 amdgpu_cg_mask; extern uint amdgpu_pg_mask; extern uint amdgpu_sdma_phase_quantum; extern char *amdgpu_disable_cu; @@ -322,7 +322,7 @@ int amdgpu_device_ip_set_powergating_state(void *dev, enum amd_ip_block_type block_type, enum amd_powergating_state state); void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags); + u64 *flags); int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, enum amd_ip_block_type block_type); bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, @@ -860,7 +860,7 @@ struct amdgpu_device { /* powerplay */ struct amd_powerplay powerplay; struct amdgpu_pm pm; - u32 cg_flags; + u64 cg_flags; u32 pg_flags; /* nbio */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 6ca1db3c243f..64c6664b34e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -724,3 +724,11 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bo else if (reset) amdgpu_amdkfd_gpu_reset(adev); } + +bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev) +{ + if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status) + return adev->gfx.ras->query_utcl2_poison_status(adev); + else + return false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 4cb14c2fe53f..f8b9f27adcf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -273,9 +273,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv, uint64_t *size); -int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv, - bool *table_freed); +int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev, + struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_sync_memory( @@ -301,6 +300,7 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem); void amdgpu_amdkfd_block_mmu_notifications(void *p); int amdgpu_amdkfd_criu_resume(void *p); +bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev); #if IS_ENABLED(CONFIG_HSA_AMD) void amdgpu_amdkfd_gpuvm_init_mem_limits(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 3dc5ab2764ff..80b6b8e432fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1058,8 +1058,7 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem, static int update_gpuvm_pte(struct kgd_mem *mem, struct kfd_mem_attachment *entry, - struct amdgpu_sync *sync, - bool *table_freed) + struct amdgpu_sync *sync) { struct amdgpu_bo_va *bo_va = entry->bo_va; struct amdgpu_device *adev = entry->adev; @@ -1070,7 +1069,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem, return ret; /* Update the page tables */ - ret = amdgpu_vm_bo_update(adev, bo_va, false, table_freed); + ret = amdgpu_vm_bo_update(adev, bo_va, false); if (ret) { pr_err("amdgpu_vm_bo_update failed\n"); return ret; @@ -1082,8 +1081,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem, static int map_bo_to_gpuvm(struct kgd_mem *mem, struct kfd_mem_attachment *entry, struct amdgpu_sync *sync, - bool no_update_pte, - bool *table_freed) + bool no_update_pte) { int ret; @@ -1100,7 +1098,7 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem, if (no_update_pte) return 0; - ret = update_gpuvm_pte(mem, entry, sync, table_freed); + ret = update_gpuvm_pte(mem, entry, sync); if (ret) { pr_err("update_gpuvm_pte() failed\n"); goto update_gpuvm_pte_failed; @@ -1710,7 +1708,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, - void *drm_priv, bool *table_freed) + void *drm_priv) { struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); int ret; @@ -1797,7 +1795,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( entry->va, entry->va + bo_size, entry); ret = map_bo_to_gpuvm(mem, entry, ctx.sync, - is_invalid_userptr, table_freed); + is_invalid_userptr); if (ret) { pr_err("Failed to map bo to gpuvm\n"); goto out_unreserve; @@ -2265,7 +2263,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) continue; kfd_mem_dmaunmap_attachment(mem, attachment); - ret = update_gpuvm_pte(mem, attachment, &sync, NULL); + ret = update_gpuvm_pte(mem, attachment, &sync); if (ret) { pr_err("%s: update PTE failed\n", __func__); /* make sure this gets validated again */ @@ -2476,7 +2474,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) continue; kfd_mem_dmaunmap_attachment(mem, attachment); - ret = update_gpuvm_pte(mem, attachment, &sync_obj, NULL); + ret = update_gpuvm_pte(mem, attachment, &sync_obj); if (ret) { pr_debug("Memory eviction: update PTE failed. Try again\n"); goto validate_map_fail; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8de283997769..01853431249d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -128,6 +128,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs goto free_chunk; } + mutex_lock(&p->ctx->lock); + /* skip guilty context job */ if (atomic_read(&p->ctx->guilty) == 1) { ret = -ECANCELED; @@ -688,6 +690,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, dma_fence_put(parser->fence); if (parser->ctx) { + mutex_unlock(&parser->ctx->lock); amdgpu_ctx_put(parser->ctx); } if (parser->bo_list) @@ -785,22 +788,22 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false, NULL); + r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false); if (r) return r; - r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update); + r = amdgpu_sync_fence(&p->job->sync, fpriv->prt_va->last_pt_update); if (r) return r; if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { bo_va = fpriv->csa_va; BUG_ON(!bo_va); - r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; - r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update); + r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); if (r) return r; } @@ -815,11 +818,11 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (bo_va == NULL) continue; - r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; - r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update); + r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); if (r) return r; } @@ -832,7 +835,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update); + r = amdgpu_sync_fence(&p->job->sync, vm->last_update); if (r) return r; @@ -1136,6 +1139,9 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, { int i, r; + /* TODO: Investigate why we still need the context lock */ + mutex_unlock(&p->ctx->lock); + for (i = 0; i < p->nchunks; ++i) { struct amdgpu_cs_chunk *chunk; @@ -1146,32 +1152,34 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: r = amdgpu_cs_process_fence_dep(p, chunk); if (r) - return r; + goto out; break; case AMDGPU_CHUNK_ID_SYNCOBJ_IN: r = amdgpu_cs_process_syncobj_in_dep(p, chunk); if (r) - return r; + goto out; break; case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: r = amdgpu_cs_process_syncobj_out_dep(p, chunk); if (r) - return r; + goto out; break; case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk); if (r) - return r; + goto out; break; case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk); if (r) - return r; + goto out; break; } } - return 0; +out: + mutex_lock(&p->ctx->lock); + return r; } static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) @@ -1332,6 +1340,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; r = amdgpu_cs_submit(&parser, cs); + out: amdgpu_cs_parser_fini(&parser, r, reserved_buffers); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 5981c7d9bd48..8f0e6d93bb9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -237,6 +237,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, kref_init(&ctx->refcount); spin_lock_init(&ctx->ring_lock); + mutex_init(&ctx->lock); ctx->reset_counter = atomic_read(&adev->gpu_reset_counter); ctx->reset_counter_query = ctx->reset_counter; @@ -357,6 +358,7 @@ static void amdgpu_ctx_fini(struct kref *ref) drm_dev_exit(idx); } + mutex_destroy(&ctx->lock); kfree(ctx); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index d0cbfcea90f7..142f2f87d44c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -49,6 +49,7 @@ struct amdgpu_ctx { bool preamble_presented; int32_t init_priority; int32_t override_priority; + struct mutex lock; atomic_t guilty; unsigned long ras_counter_ce; unsigned long ras_counter_ue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 5d04d24a0d5f..eedb12f6b8a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -38,6 +38,7 @@ #include "amdgpu_umr.h" #include "amdgpu_reset.h" +#include "amdgpu_psp_ta.h" #if defined(CONFIG_DEBUG_FS) @@ -730,7 +731,7 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, return -ENOMEM; /* version, increment each time something is added */ - config[no_regs++] = 4; + config[no_regs++] = 5; config[no_regs++] = adev->gfx.config.max_shader_engines; config[no_regs++] = adev->gfx.config.max_tile_pipes; config[no_regs++] = adev->gfx.config.max_cu_per_sh; @@ -757,8 +758,8 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, /* rev==1 */ config[no_regs++] = adev->rev_id; - config[no_regs++] = adev->pg_flags; - config[no_regs++] = adev->cg_flags; + config[no_regs++] = lower_32_bits(adev->pg_flags); + config[no_regs++] = lower_32_bits(adev->cg_flags); /* rev==2 */ config[no_regs++] = adev->family; @@ -773,6 +774,10 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, /* rev==4 APU flag */ config[no_regs++] = adev->flags & AMD_IS_APU ? 1 : 0; + /* rev==5 PG/CG flag upper 32bit */ + config[no_regs++] = upper_32_bits(adev->pg_flags); + config[no_regs++] = upper_32_bits(adev->cg_flags); + while (size && (*pos < no_regs * 4)) { uint32_t value; @@ -1763,6 +1768,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) DRM_ERROR("registering register debugfs failed (%d).\n", r); amdgpu_debugfs_firmware_init(adev); + amdgpu_ta_if_debugfs_init(adev); #if defined(CONFIG_DRM_AMD_DC) if (amdgpu_device_has_dc_support(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 49f734137f15..94666c2417c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1703,7 +1703,7 @@ int amdgpu_device_ip_set_powergating_state(void *dev, * clockgating is enabled. */ void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int i; @@ -3700,7 +3700,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* enable PCIE atomic ops */ if (amdgpu_sriov_vf(adev)) adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *) - adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_enabled_flags == + adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags == (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64); else adev->have_atomics_support = diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h index 6b25837955c4..1538b2dbfff1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h @@ -40,7 +40,7 @@ struct amdgpu_df_funcs { void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, bool enable); void (*get_clockgating_state)(struct amdgpu_device *adev, - u32 *flags); + u64 *flags); void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev, bool enable); int (*pmc_start)(struct amdgpu_device *adev, uint64_t config, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index e4fcbb385a62..aaf2fc6b1a82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -430,7 +430,7 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev, } } next_ip: - ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); + ip_offset += struct_size(ip, base_address, ip->num_base_address); } } } @@ -798,7 +798,7 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev, res = kobject_add(&ip_hw_instance->kobj, NULL, "%d", ip_hw_instance->num_instance); next_ip: - ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); + ip_offset += struct_size(ip, base_address, ip->num_base_address); } } @@ -1063,7 +1063,7 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) } next_ip: - ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); + ip_offset += struct_size(ip, base_address, ip->num_base_address); } } @@ -1113,7 +1113,7 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int n *revision = ip->revision; return 0; } - ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); + ip_offset += struct_size(ip, base_address, ip->num_base_address); } } @@ -1150,13 +1150,6 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK; adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK; } - if ((adev->pdev->device == 0x731E && - (adev->pdev->revision == 0xC6 || adev->pdev->revision == 0xC7)) || - (adev->pdev->device == 0x7340 && adev->pdev->revision == 0xC9) || - (adev->pdev->device == 0x7360 && adev->pdev->revision == 0xC7)) { - adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK; - adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK; - } } union gc_info { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 7a6908d71820..17c9bbe0cbc5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -41,6 +41,11 @@ #include <drm/drm_fourcc.h> #include <drm/drm_vblank.h> +static int amdgpu_display_framebuffer_init(struct drm_device *dev, + struct amdgpu_framebuffer *rfb, + const struct drm_mode_fb_cmd2 *mode_cmd, + struct drm_gem_object *obj); + static void amdgpu_display_flip_callback(struct dma_fence *f, struct dma_fence_cb *cb) { @@ -113,8 +118,9 @@ static void amdgpu_display_flip_work_func(struct work_struct *__work) spin_unlock_irqrestore(&crtc->dev->event_lock, flags); - DRM_DEBUG_DRIVER("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_SUBMITTED, work: %p,\n", - amdgpu_crtc->crtc_id, amdgpu_crtc, work); + drm_dbg_vbl(adev_to_drm(adev), + "crtc:%d[%p], pflip_stat:AMDGPU_FLIP_SUBMITTED, work: %p,\n", + amdgpu_crtc->crtc_id, amdgpu_crtc, work); } @@ -1038,35 +1044,11 @@ static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb return r; } -int amdgpu_display_gem_fb_init(struct drm_device *dev, - struct amdgpu_framebuffer *rfb, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj) -{ - int ret; - - rfb->base.obj[0] = obj; - drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd); - - ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj); - if (ret) - goto err; - - ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); - if (ret) - goto err; - - return 0; -err: - drm_dbg_kms(dev, "Failed to init gem fb: %d\n", ret); - rfb->base.obj[0] = NULL; - return ret; -} - -int amdgpu_display_gem_fb_verify_and_init( - struct drm_device *dev, struct amdgpu_framebuffer *rfb, - struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj) +static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev, + struct amdgpu_framebuffer *rfb, + struct drm_file *file_priv, + const struct drm_mode_fb_cmd2 *mode_cmd, + struct drm_gem_object *obj) { int ret; @@ -1098,10 +1080,10 @@ err: return ret; } -int amdgpu_display_framebuffer_init(struct drm_device *dev, - struct amdgpu_framebuffer *rfb, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj) +static int amdgpu_display_framebuffer_init(struct drm_device *dev, + struct amdgpu_framebuffer *rfb, + const struct drm_mode_fb_cmd2 *mode_cmd, + struct drm_gem_object *obj) { struct amdgpu_device *adev = drm_to_adev(dev); int ret, i; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b03663f42cc9..ebd37fb19cdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -136,7 +136,7 @@ int amdgpu_sched_jobs = 32; int amdgpu_sched_hw_submission = 2; uint amdgpu_pcie_gen_cap; uint amdgpu_pcie_lane_cap; -uint amdgpu_cg_mask = 0xffffffff; +u64 amdgpu_cg_mask = 0xffffffffffffffff; uint amdgpu_pg_mask = 0xffffffff; uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu = NULL; @@ -454,12 +454,12 @@ MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))"); module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444); /** - * DOC: cg_mask (uint) + * DOC: cg_mask (ullong) * Override Clockgating features enabled on GPU (0 = disable clock gating). See the AMD_CG_SUPPORT flags in - * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffff (all enabled). + * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffffffffffff (all enabled). */ MODULE_PARM_DESC(cg_mask, "Clockgating flags mask (0 = disable clock gating)"); -module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444); +module_param_named(cg_mask, amdgpu_cg_mask, ullong, 0444); /** * DOC: pg_mask (uint) @@ -2323,18 +2323,23 @@ static int amdgpu_pmops_suspend(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); - int r; if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = true; else adev->in_s3 = true; - r = amdgpu_device_suspend(drm_dev, true); - if (r) - return r; + return amdgpu_device_suspend(drm_dev, true); +} + +static int amdgpu_pmops_suspend_noirq(struct device *dev) +{ + struct drm_device *drm_dev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + if (!adev->in_s0ix) - r = amdgpu_asic_reset(adev); - return r; + return amdgpu_asic_reset(adev); + + return 0; } static int amdgpu_pmops_resume(struct device *dev) @@ -2390,6 +2395,71 @@ static int amdgpu_pmops_restore(struct device *dev) return amdgpu_device_resume(drm_dev, true); } +static int amdgpu_runtime_idle_check_display(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + + if (adev->mode_info.num_crtc) { + struct drm_connector *list_connector; + struct drm_connector_list_iter iter; + int ret = 0; + + /* XXX: Return busy if any displays are connected to avoid + * possible display wakeups after runtime resume due to + * hotplug events in case any displays were connected while + * the GPU was in suspend. Remove this once that is fixed. + */ + mutex_lock(&drm_dev->mode_config.mutex); + drm_connector_list_iter_begin(drm_dev, &iter); + drm_for_each_connector_iter(list_connector, &iter) { + if (list_connector->status == connector_status_connected) { + ret = -EBUSY; + break; + } + } + drm_connector_list_iter_end(&iter); + mutex_unlock(&drm_dev->mode_config.mutex); + + if (ret) + return ret; + + if (amdgpu_device_has_dc_support(adev)) { + struct drm_crtc *crtc; + + drm_for_each_crtc(crtc, drm_dev) { + drm_modeset_lock(&crtc->mutex, NULL); + if (crtc->state->active) + ret = -EBUSY; + drm_modeset_unlock(&crtc->mutex); + if (ret < 0) + break; + } + } else { + mutex_lock(&drm_dev->mode_config.mutex); + drm_modeset_lock(&drm_dev->mode_config.connection_mutex, NULL); + + drm_connector_list_iter_begin(drm_dev, &iter); + drm_for_each_connector_iter(list_connector, &iter) { + if (list_connector->dpms == DRM_MODE_DPMS_ON) { + ret = -EBUSY; + break; + } + } + + drm_connector_list_iter_end(&iter); + + drm_modeset_unlock(&drm_dev->mode_config.connection_mutex); + mutex_unlock(&drm_dev->mode_config.mutex); + } + if (ret) + return ret; + } + + return 0; +} + static int amdgpu_pmops_runtime_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); @@ -2402,6 +2472,10 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) return -EBUSY; } + ret = amdgpu_runtime_idle_check_display(dev); + if (ret) + return ret; + /* wait for all rings to drain before suspending */ for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; @@ -2511,41 +2585,7 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) return -EBUSY; } - if (amdgpu_device_has_dc_support(adev)) { - struct drm_crtc *crtc; - - drm_for_each_crtc(crtc, drm_dev) { - drm_modeset_lock(&crtc->mutex, NULL); - if (crtc->state->active) - ret = -EBUSY; - drm_modeset_unlock(&crtc->mutex); - if (ret < 0) - break; - } - - } else { - struct drm_connector *list_connector; - struct drm_connector_list_iter iter; - - mutex_lock(&drm_dev->mode_config.mutex); - drm_modeset_lock(&drm_dev->mode_config.connection_mutex, NULL); - - drm_connector_list_iter_begin(drm_dev, &iter); - drm_for_each_connector_iter(list_connector, &iter) { - if (list_connector->dpms == DRM_MODE_DPMS_ON) { - ret = -EBUSY; - break; - } - } - - drm_connector_list_iter_end(&iter); - - drm_modeset_unlock(&drm_dev->mode_config.connection_mutex); - mutex_unlock(&drm_dev->mode_config.mutex); - } - - if (ret == -EBUSY) - DRM_DEBUG_DRIVER("failing to power off - crtc active\n"); + ret = amdgpu_runtime_idle_check_display(dev); pm_runtime_mark_last_busy(dev); pm_runtime_autosuspend(dev); @@ -2575,6 +2615,7 @@ static const struct dev_pm_ops amdgpu_pm_ops = { .prepare = amdgpu_pmops_prepare, .complete = amdgpu_pmops_complete, .suspend = amdgpu_pmops_suspend, + .suspend_noirq = amdgpu_pmops_suspend_noirq, .resume = amdgpu_pmops_resume, .freeze = amdgpu_pmops_freeze, .thaw = amdgpu_pmops_thaw, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 84a53758e18e..652571267077 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -613,7 +613,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, if (operation == AMDGPU_VA_OP_MAP || operation == AMDGPU_VA_OP_REPLACE) { - r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) goto error; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index dcb3c7871c73..5ed9b8a4c571 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -202,6 +202,7 @@ struct amdgpu_cu_info { struct amdgpu_gfx_ras { struct amdgpu_ras_block_object ras_block; void (*enable_watchdog_timer)(struct amdgpu_device *adev); + bool (*query_utcl2_poison_status)(struct amdgpu_device *adev); }; struct amdgpu_gfx_funcs { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index a66a0881a934..88b852b3a2cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -25,6 +25,9 @@ */ #include <linux/io-64-nonatomic-lo-hi.h> +#ifdef CONFIG_X86 +#include <asm/hypervisor.h> +#endif #include "amdgpu.h" #include "amdgpu_gmc.h" @@ -647,12 +650,14 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev) case CHIP_VEGA10: adev->mman.keep_stolen_vga_memory = true; /* - * VEGA10 SRIOV VF needs some firmware reserved area. + * VEGA10 SRIOV VF with MS_HYPERV host needs some firmware reserved area. */ - if (amdgpu_sriov_vf(adev)) { - adev->mman.stolen_reserved_offset = 0x100000; - adev->mman.stolen_reserved_size = 0x600000; +#ifdef CONFIG_X86 + if (amdgpu_sriov_vf(adev) && hypervisor_is_type(X86_HYPER_MS_HYPERV)) { + adev->mman.stolen_reserved_offset = 0x500000; + adev->mman.stolen_reserved_size = 0x200000; } +#endif break; case CHIP_RAVEN: case CHIP_RENOIR: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h index 9181c7bef7c6..ac5c61d3de2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h @@ -33,7 +33,7 @@ struct amdgpu_hdp_funcs { void (*invalidate_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring); void (*update_clock_gating)(struct amdgpu_device *adev, bool enable); - void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags); + void (*get_clock_gating_state)(struct amdgpu_device *adev, u64 *flags); void (*init_registers)(struct amdgpu_device *adev); }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 4ba4b54092f1..03d115d2b5ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -260,19 +260,15 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; uint64_t fence_context = adev->fence_context + ring->idx; - struct dma_fence *updates = sync->last_vm_update; bool needs_flush = vm->use_cpu_for_update; - int r = 0; + uint64_t updates = amdgpu_vm_tlb_seq(vm); + int r; *id = vm->reserved_vmid[vmhub]; - if (updates && (*id)->flushed_updates && - updates->context == (*id)->flushed_updates->context && - !dma_fence_is_later(updates, (*id)->flushed_updates)) - updates = NULL; - if ((*id)->owner != vm->immediate.fence_context || - job->vm_pd_addr != (*id)->pd_gpu_addr || - updates || !(*id)->last_flush || + (*id)->pd_gpu_addr != job->vm_pd_addr || + (*id)->flushed_updates < updates || + !(*id)->last_flush || ((*id)->last_flush->context != fence_context && !dma_fence_is_signaled((*id)->last_flush))) { struct dma_fence *tmp; @@ -286,8 +282,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); if (tmp) { *id = NULL; - r = amdgpu_sync_fence(sync, tmp); - return r; + return amdgpu_sync_fence(sync, tmp); } needs_flush = true; } @@ -299,10 +294,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, if (r) return r; - if (updates) { - dma_fence_put((*id)->flushed_updates); - (*id)->flushed_updates = dma_fence_get(updates); - } + (*id)->flushed_updates = updates; job->vm_needs_flush = needs_flush; return 0; } @@ -330,7 +322,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, unsigned vmhub = ring->funcs->vmhub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; uint64_t fence_context = adev->fence_context + ring->idx; - struct dma_fence *updates = sync->last_vm_update; + uint64_t updates = amdgpu_vm_tlb_seq(vm); int r; job->vm_needs_flush = vm->use_cpu_for_update; @@ -338,7 +330,6 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, /* Check if we can use a VMID already assigned to this VM */ list_for_each_entry_reverse((*id), &id_mgr->ids_lru, list) { bool needs_flush = vm->use_cpu_for_update; - struct dma_fence *flushed; /* Check all the prerequisites to using this VMID */ if ((*id)->owner != vm->immediate.fence_context) @@ -352,8 +343,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, !dma_fence_is_signaled((*id)->last_flush))) needs_flush = true; - flushed = (*id)->flushed_updates; - if (updates && (!flushed || dma_fence_is_later(updates, flushed))) + if ((*id)->flushed_updates < updates) needs_flush = true; if (needs_flush && !adev->vm_manager.concurrent_flush) @@ -366,11 +356,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, if (r) return r; - if (updates && (!flushed || dma_fence_is_later(updates, flushed))) { - dma_fence_put((*id)->flushed_updates); - (*id)->flushed_updates = dma_fence_get(updates); - } - + (*id)->flushed_updates = updates; job->vm_needs_flush |= needs_flush; return 0; } @@ -416,8 +402,6 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, goto error; if (!id) { - struct dma_fence *updates = sync->last_vm_update; - /* Still no ID to use? Then use the idle one found earlier */ id = idle; @@ -426,8 +410,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r) goto error; - dma_fence_put(id->flushed_updates); - id->flushed_updates = dma_fence_get(updates); + id->flushed_updates = amdgpu_vm_tlb_seq(vm); job->vm_needs_flush = true; } @@ -594,7 +577,6 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev) struct amdgpu_vmid *id = &id_mgr->ids[j]; amdgpu_sync_free(&id->active); - dma_fence_put(id->flushed_updates); dma_fence_put(id->last_flush); dma_fence_put(id->pasid_mapping); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index 0c3b4fa1f936..06c8a0034fa5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -47,7 +47,7 @@ struct amdgpu_vmid { uint64_t pd_gpu_addr; /* last flushed PD/PT update */ - struct dma_fence *flushed_updates; + uint64_t flushed_updates; uint32_t current_gpu_reset_count; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h index 55fbff2be761..b6c7fb00e05a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h @@ -24,6 +24,8 @@ #ifndef __AMDGPU_JPEG_H__ #define __AMDGPU_JPEG_H__ +#include "amdgpu_ras.h" + #define AMDGPU_MAX_JPEG_INSTANCES 2 #define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0) @@ -39,6 +41,10 @@ struct amdgpu_jpeg_inst { struct amdgpu_jpeg_reg external; }; +struct amdgpu_jpeg_ras { + struct amdgpu_ras_block_object ras_block; +}; + struct amdgpu_jpeg { uint8_t num_jpeg_inst; struct amdgpu_jpeg_inst inst[AMDGPU_MAX_JPEG_INSTANCES]; @@ -48,6 +54,8 @@ struct amdgpu_jpeg { enum amd_powergating_state cur_state; struct mutex jpeg_pg_lock; atomic_t total_submission_cnt; + struct ras_common_if *ras_if; + struct amdgpu_jpeg_ras *ras; }; int amdgpu_jpeg_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index 9f1540f0ebf9..f939395c5914 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -34,7 +34,7 @@ struct amdgpu_mmhub_funcs { void (*gart_disable)(struct amdgpu_device *adev); int (*set_clockgating)(struct amdgpu_device *adev, enum amd_clockgating_state state); - void (*get_clockgating)(struct amdgpu_device *adev, u32 *flags); + void (*get_clockgating)(struct amdgpu_device *adev, u64 *flags); void (*setup_vm_pt_regs)(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base); void (*update_power_gating)(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 72c6f6cb7a44..f80b4838cea1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -592,19 +592,6 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev, int *hpos, ktime_t *stime, ktime_t *etime, const struct drm_display_mode *mode); -int amdgpu_display_gem_fb_init(struct drm_device *dev, - struct amdgpu_framebuffer *rfb, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj); -int amdgpu_display_gem_fb_verify_and_init( - struct drm_device *dev, struct amdgpu_framebuffer *rfb, - struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj); -int amdgpu_display_framebuffer_init(struct drm_device *dev, - struct amdgpu_framebuffer *rfb, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj); - int amdgpufb_remove(struct drm_device *dev, struct drm_framebuffer *fb); void amdgpu_enc_destroy(struct drm_encoder *encoder); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 3d13e601fc35..03439083182a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -83,7 +83,7 @@ struct amdgpu_nbio_funcs { void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev, bool enable); void (*get_clockgating_state)(struct amdgpu_device *adev, - u32 *flags); + u64 *flags); void (*ih_control)(struct amdgpu_device *adev); void (*init_registers)(struct amdgpu_device *adev); void (*remap_hdp_registers)(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e92ecabfa7bd..5444515c1476 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -472,7 +472,7 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, fail: DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, - man->size << PAGE_SHIFT); + man->size); return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index a6acec1a6155..f6527aa19238 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -46,8 +46,6 @@ static int psp_sysfs_init(struct amdgpu_device *adev); static void psp_sysfs_fini(struct amdgpu_device *adev); static int psp_load_smu_fw(struct psp_context *psp); -static int psp_ta_unload(struct psp_context *psp, struct ta_context *context); -static int psp_ta_load(struct psp_context *psp, struct ta_context *context); static int psp_rap_terminate(struct psp_context *psp); static int psp_securedisplay_terminate(struct psp_context *psp); @@ -862,7 +860,7 @@ static void psp_prep_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd, cmd->cmd.cmd_unload_ta.session_id = session_id; } -static int psp_ta_unload(struct psp_context *psp, struct ta_context *context) +int psp_ta_unload(struct psp_context *psp, struct ta_context *context) { int ret; struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); @@ -944,7 +942,7 @@ static void psp_prep_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, cmd->cmd.cmd_load_ta.cmd_buf_len = context->mem_context.shared_mem_size; } -static int psp_ta_init_shared_buf(struct psp_context *psp, +int psp_ta_init_shared_buf(struct psp_context *psp, struct ta_mem_context *mem_ctx) { /* @@ -958,7 +956,7 @@ static int psp_ta_init_shared_buf(struct psp_context *psp, &mem_ctx->shared_buf); } -static void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx) +void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx) { amdgpu_bo_free_kernel(&mem_ctx->shared_bo, &mem_ctx->shared_mc_addr, &mem_ctx->shared_buf); @@ -969,6 +967,42 @@ static int psp_xgmi_init_shared_buf(struct psp_context *psp) return psp_ta_init_shared_buf(psp, &psp->xgmi_context.context.mem_context); } +static void psp_prep_ta_invoke_indirect_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t ta_cmd_id, + struct ta_context *context) +{ + cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; + cmd->cmd.cmd_invoke_cmd.session_id = context->session_id; + cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; + + cmd->cmd.cmd_invoke_cmd.buf.num_desc = 1; + cmd->cmd.cmd_invoke_cmd.buf.total_size = context->mem_context.shared_mem_size; + cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_size = context->mem_context.shared_mem_size; + cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_phy_addr_lo = + lower_32_bits(context->mem_context.shared_mc_addr); + cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_phy_addr_hi = + upper_32_bits(context->mem_context.shared_mc_addr); +} + +int psp_ta_invoke_indirect(struct psp_context *psp, + uint32_t ta_cmd_id, + struct ta_context *context) +{ + int ret; + struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); + + psp_prep_ta_invoke_indirect_cmd_buf(cmd, ta_cmd_id, context); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + + context->resp_status = cmd->resp.status; + + release_psp_cmd_buf(psp); + + return ret; +} + static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, uint32_t ta_cmd_id, uint32_t session_id) @@ -978,7 +1012,7 @@ static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; } -static int psp_ta_invoke(struct psp_context *psp, +int psp_ta_invoke(struct psp_context *psp, uint32_t ta_cmd_id, struct ta_context *context) { @@ -990,12 +1024,14 @@ static int psp_ta_invoke(struct psp_context *psp, ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + context->resp_status = cmd->resp.status; + release_psp_cmd_buf(psp); return ret; } -static int psp_ta_load(struct psp_context *psp, struct ta_context *context) +int psp_ta_load(struct psp_context *psp, struct ta_context *context) { int ret; struct psp_gfx_cmd_resp *cmd; @@ -1010,6 +1046,8 @@ static int psp_ta_load(struct psp_context *psp, struct ta_context *context) ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + context->resp_status = cmd->resp.status; + if (!ret) { context->session_id = cmd->resp.session_id; } @@ -1415,7 +1453,7 @@ int psp_ras_enable_features(struct psp_context *psp, return 0; } -static int psp_ras_terminate(struct psp_context *psp) +int psp_ras_terminate(struct psp_context *psp) { int ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index ff7d533eb746..cf8d3199b35b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -48,6 +48,17 @@ enum psp_shared_mem_size { PSP_SECUREDISPLAY_SHARED_MEM_SIZE = 0x4000, }; +enum ta_type_id { + TA_TYPE_XGMI = 1, + TA_TYPE_RAS, + TA_TYPE_HDCP, + TA_TYPE_DTM, + TA_TYPE_RAP, + TA_TYPE_SECUREDISPLAY, + + TA_TYPE_MAX_INDEX, +}; + struct psp_context; struct psp_xgmi_node_info; struct psp_xgmi_topology_info; @@ -151,9 +162,11 @@ struct ta_mem_context { struct ta_context { bool initialized; uint32_t session_id; + uint32_t resp_status; struct ta_mem_context mem_context; struct psp_bin_desc bin_desc; enum psp_gfx_cmd_id ta_load_type; + enum ta_type_id ta_type; }; struct ta_cp_context { @@ -407,6 +420,18 @@ int psp_gpu_reset(struct amdgpu_device *adev); int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, uint64_t cmd_gpu_addr, int cmd_size); +int psp_ta_init_shared_buf(struct psp_context *psp, + struct ta_mem_context *mem_ctx); +void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx); +int psp_ta_unload(struct psp_context *psp, struct ta_context *context); +int psp_ta_load(struct psp_context *psp, struct ta_context *context); +int psp_ta_invoke(struct psp_context *psp, + uint32_t ta_cmd_id, + struct ta_context *context); +int psp_ta_invoke_indirect(struct psp_context *psp, + uint32_t ta_cmd_id, + struct ta_context *context); + int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool load_ta); int psp_xgmi_terminate(struct psp_context *psp); int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id); @@ -425,6 +450,7 @@ int psp_ras_enable_features(struct psp_context *psp, union ta_ras_cmd_input *info, bool enable); int psp_ras_trigger_error(struct psp_context *psp, struct ta_ras_trigger_error_input *info); +int psp_ras_terminate(struct psp_context *psp); int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c new file mode 100644 index 000000000000..247a476e6354 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -0,0 +1,308 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_psp_ta.h" + +static const char *TA_IF_FS_NAME = "ta_if"; + +struct dentry *dir; +static struct dentry *ta_load_debugfs_dentry; +static struct dentry *ta_unload_debugfs_dentry; +static struct dentry *ta_invoke_debugfs_dentry; + +static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, + size_t len, loff_t *off); +static ssize_t ta_if_unload_debugfs_write(struct file *fp, const char *buf, + size_t len, loff_t *off); +static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, + size_t len, loff_t *off); + + +static uint32_t get_bin_version(const uint8_t *bin) +{ + const struct common_firmware_header *hdr = + (const struct common_firmware_header *)bin; + + return hdr->ucode_version; +} + +static void prep_ta_mem_context(struct psp_context *psp, + struct ta_context *context, + uint8_t *shared_buf, + uint32_t shared_buf_len) +{ + context->mem_context.shared_mem_size = PAGE_ALIGN(shared_buf_len); + psp_ta_init_shared_buf(psp, &context->mem_context); + + memcpy((void *)context->mem_context.shared_buf, shared_buf, shared_buf_len); +} + +static bool is_ta_type_valid(enum ta_type_id ta_type) +{ + bool ret = false; + + switch (ta_type) { + case TA_TYPE_RAS: + ret = true; + break; + default: + break; + } + + return ret; +} + +static const struct file_operations ta_load_debugfs_fops = { + .write = ta_if_load_debugfs_write, + .llseek = default_llseek, + .owner = THIS_MODULE +}; + +static const struct file_operations ta_unload_debugfs_fops = { + .write = ta_if_unload_debugfs_write, + .llseek = default_llseek, + .owner = THIS_MODULE +}; + +static const struct file_operations ta_invoke_debugfs_fops = { + .write = ta_if_invoke_debugfs_write, + .llseek = default_llseek, + .owner = THIS_MODULE +}; + + +/** + * DOC: AMDGPU TA debugfs interfaces + * + * Three debugfs interfaces can be opened by a program to + * load/invoke/unload TA, + * + * - /sys/kernel/debug/dri/<N>/ta_if/ta_load + * - /sys/kernel/debug/dri/<N>/ta_if/ta_invoke + * - /sys/kernel/debug/dri/<N>/ta_if/ta_unload + * + * How to use the interfaces in a program? + * + * A program needs to provide transmit buffer to the interfaces + * and will receive buffer from the interfaces below, + * + * - For TA load debugfs interface: + * Transmit buffer: + * - TA type (4bytes) + * - TA bin length (4bytes) + * - TA bin + * Receive buffer: + * - TA ID (4bytes) + * + * - For TA invoke debugfs interface: + * Transmit buffer: + * - TA ID (4bytes) + * - TA CMD ID (4bytes) + * - TA shard buf length (4bytes) + * - TA shared buf + * Receive buffer: + * - TA shared buf + * + * - For TA unload debugfs interface: + * Transmit buffer: + * - TA ID (4bytes) + */ + +static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off) +{ + uint32_t ta_type = 0; + uint32_t ta_bin_len = 0; + uint8_t *ta_bin = NULL; + uint32_t copy_pos = 0; + int ret = 0; + + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private; + struct psp_context *psp = &adev->psp; + struct ta_context context = {0}; + + if (!buf) + return -EINVAL; + + ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t)); + if (ret || (!is_ta_type_valid(ta_type))) + return -EINVAL; + + copy_pos += sizeof(uint32_t); + + ret = copy_from_user((void *)&ta_bin_len, &buf[copy_pos], sizeof(uint32_t)); + if (ret) + return -EINVAL; + + copy_pos += sizeof(uint32_t); + + ta_bin = kzalloc(ta_bin_len, GFP_KERNEL); + if (!ta_bin) + ret = -ENOMEM; + ret = copy_from_user((void *)ta_bin, &buf[copy_pos], ta_bin_len); + if (ret) + goto err_free_bin; + + ret = psp_ras_terminate(psp); + if (ret) { + dev_err(adev->dev, "Failed to unload embedded RAS TA\n"); + goto err_free_bin; + } + + context.ta_type = ta_type; + context.ta_load_type = GFX_CMD_ID_LOAD_TA; + context.bin_desc.fw_version = get_bin_version(ta_bin); + context.bin_desc.size_bytes = ta_bin_len; + context.bin_desc.start_addr = ta_bin; + + ret = psp_ta_load(psp, &context); + + if (ret || context.resp_status) { + dev_err(adev->dev, "TA load via debugfs failed (%d) status %d\n", + ret, context.resp_status); + goto err_free_bin; + } + + context.initialized = true; + ret = copy_to_user((char *)buf, (void *)&context.session_id, sizeof(uint32_t)); + +err_free_bin: + kfree(ta_bin); + + return ret; +} + +static ssize_t ta_if_unload_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off) +{ + uint32_t ta_id = 0; + int ret = 0; + + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private; + struct psp_context *psp = &adev->psp; + struct ta_context context = {0}; + + if (!buf) + return -EINVAL; + + ret = copy_from_user((void *)&ta_id, buf, sizeof(uint32_t)); + if (ret) + return -EINVAL; + + context.session_id = ta_id; + + ret = psp_ta_unload(psp, &context); + if (!ret) + context.initialized = false; + + return ret; +} + +static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off) +{ + uint32_t ta_id = 0; + uint32_t cmd_id = 0; + uint32_t shared_buf_len = 0; + uint8_t *shared_buf = NULL; + uint32_t copy_pos = 0; + int ret = 0; + + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private; + struct psp_context *psp = &adev->psp; + struct ta_context context = {0}; + + if (!buf) + return -EINVAL; + + ret = copy_from_user((void *)&ta_id, &buf[copy_pos], sizeof(uint32_t)); + if (ret) + return -EINVAL; + copy_pos += sizeof(uint32_t); + + ret = copy_from_user((void *)&cmd_id, &buf[copy_pos], sizeof(uint32_t)); + if (ret) + return -EINVAL; + copy_pos += sizeof(uint32_t); + + ret = copy_from_user((void *)&shared_buf_len, &buf[copy_pos], sizeof(uint32_t)); + if (ret) + return -EINVAL; + copy_pos += sizeof(uint32_t); + + shared_buf = kzalloc(shared_buf_len, GFP_KERNEL); + if (!shared_buf) + ret = -ENOMEM; + ret = copy_from_user((void *)shared_buf, &buf[copy_pos], shared_buf_len); + if (ret) + goto err_free_shared_buf; + + context.session_id = ta_id; + + prep_ta_mem_context(psp, &context, shared_buf, shared_buf_len); + + ret = psp_ta_invoke_indirect(psp, cmd_id, &context); + + if (ret || context.resp_status) { + dev_err(adev->dev, "TA invoke via debugfs failed (%d) status %d\n", + ret, context.resp_status); + goto err_free_ta_shared_buf; + } + + ret = copy_to_user((char *)buf, context.mem_context.shared_buf, shared_buf_len); + +err_free_ta_shared_buf: + psp_ta_free_shared_buf(&context.mem_context); + +err_free_shared_buf: + kfree(shared_buf); + + return ret; +} + +static struct dentry *amdgpu_ta_if_debugfs_create(struct amdgpu_device *adev) +{ + struct drm_minor *minor = adev_to_drm(adev)->primary; + + dir = debugfs_create_dir(TA_IF_FS_NAME, minor->debugfs_root); + + ta_load_debugfs_dentry = debugfs_create_file("ta_load", 0200, dir, adev, + &ta_load_debugfs_fops); + + ta_unload_debugfs_dentry = debugfs_create_file("ta_unload", 0200, dir, + adev, &ta_unload_debugfs_fops); + + ta_invoke_debugfs_dentry = debugfs_create_file("ta_invoke", 0200, dir, + adev, &ta_invoke_debugfs_fops); + return dir; +} + +void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev) +{ +#if defined(CONFIG_DEBUG_FS) + dir = amdgpu_ta_if_debugfs_create(adev); +#endif +} + +void amdgpu_ta_if_debugfs_remove(void) +{ + debugfs_remove_recursive(dir); +} diff --git a/drivers/gpu/drm/radeon/r600_blit_shaders.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h index f437d36dd98c..883f89d57616 100644 --- a/drivers/gpu/drm/radeon/r600_blit_shaders.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h @@ -1,6 +1,5 @@ /* - * Copyright 2009 Advanced Micro Devices, Inc. - * Copyright 2009 Red Hat Inc. + * Copyright 2022 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -9,30 +8,23 @@ * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. * */ -#ifndef R600_BLIT_SHADERS_H -#define R600_BLIT_SHADERS_H +#ifndef __AMDGPU_PSP_TA_H__ +#define __AMDGPU_PSP_TA_H__ -extern const u32 r6xx_ps[]; -extern const u32 r6xx_vs[]; -extern const u32 r7xx_default_state[]; -extern const u32 r6xx_default_state[]; - - -extern const u32 r6xx_ps_size, r6xx_vs_size; -extern const u32 r6xx_default_size, r7xx_default_size; +void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev); +void amdgpu_ta_if_debugfs_remove(void); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 424c22a841f4..ec709997c9c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -66,6 +66,8 @@ const char *ras_block_string[] = { "mp1", "fuse", "mca", + "vcn", + "jpeg", }; const char *ras_mca_block_string[] = { @@ -2205,6 +2207,13 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) dev_info(adev->dev, "SRAM ECC is active.\n"); adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC | 1 << AMDGPU_RAS_BLOCK__DF); + + if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0)) + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN | + 1 << AMDGPU_RAS_BLOCK__JPEG); + else + adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN | + 1 << AMDGPU_RAS_BLOCK__JPEG); } else { dev_info(adev->dev, "SRAM ECC is not presented.\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 9314fde81e68..606df8869b89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -49,6 +49,8 @@ enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__MP1, AMDGPU_RAS_BLOCK__FUSE, AMDGPU_RAS_BLOCK__MCA, + AMDGPU_RAS_BLOCK__VCN, + AMDGPU_RAS_BLOCK__JPEG, AMDGPU_RAS_BLOCK__LAST }; @@ -506,6 +508,7 @@ struct amdgpu_ras_block_hw_ops { void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status); void (*reset_ras_error_count)(struct amdgpu_device *adev); void (*reset_ras_error_status)(struct amdgpu_device *adev); + bool (*query_poison_status)(struct amdgpu_device *adev); }; /* work flow diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h index 484bb3dcec47..c7a823f3f2c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h @@ -27,7 +27,7 @@ struct amdgpu_smuio_funcs { u32 (*get_rom_index_offset)(struct amdgpu_device *adev); u32 (*get_rom_data_offset)(struct amdgpu_device *adev); void (*update_rom_clock_gating)(struct amdgpu_device *adev, bool enable); - void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags); + void (*get_clock_gating_state)(struct amdgpu_device *adev, u64 *flags); u32 (*get_die_id)(struct amdgpu_device *adev); u32 (*get_socket_id)(struct amdgpu_device *adev); bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 11c46b3e4c60..504af1b93bfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -51,7 +51,6 @@ static struct kmem_cache *amdgpu_sync_slab; void amdgpu_sync_create(struct amdgpu_sync *sync) { hash_init(sync->fences); - sync->last_vm_update = NULL; } /** @@ -171,23 +170,6 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) return 0; } -/** - * amdgpu_sync_vm_fence - remember to sync to this VM fence - * - * @sync: sync object to add fence to - * @fence: the VM fence to add - * - * Add the fence to the sync object and remember it as VM update. - */ -int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence) -{ - if (!fence) - return 0; - - amdgpu_sync_keep_later(&sync->last_vm_update, fence); - return amdgpu_sync_fence(sync, fence); -} - /* Determine based on the owner and mode if we should sync to a fence or not */ static bool amdgpu_sync_test_fence(struct amdgpu_device *adev, enum amdgpu_sync_mode mode, @@ -377,9 +359,6 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) } } - dma_fence_put(clone->last_vm_update); - clone->last_vm_update = dma_fence_get(source->last_vm_update); - return 0; } @@ -420,8 +399,6 @@ void amdgpu_sync_free(struct amdgpu_sync *sync) dma_fence_put(e->fence); kmem_cache_free(amdgpu_sync_slab, e); } - - dma_fence_put(sync->last_vm_update); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index 7c0fe20c470d..2d5c613cda10 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -43,12 +43,10 @@ enum amdgpu_sync_mode { */ struct amdgpu_sync { DECLARE_HASHTABLE(fences, 4); - struct dma_fence *last_vm_update; }; void amdgpu_sync_create(struct amdgpu_sync *sync); int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f); -int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence); int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_resv *resv, enum amdgpu_sync_mode mode, void *owner); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index f99093f2ebc7..a0ee828a4a97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -52,7 +52,7 @@ #define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin" #define FIRMWARE_BEIGE_GOBY "amdgpu/beige_goby_vcn.bin" #define FIRMWARE_YELLOW_CARP "amdgpu/yellow_carp_vcn.bin" -#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2_vcn.bin" +#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index f06fb7f882e2..fb39065a96bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -24,6 +24,8 @@ #ifndef __AMDGPU_VCN_H__ #define __AMDGPU_VCN_H__ +#include "amdgpu_ras.h" + #define AMDGPU_VCN_STACK_SIZE (128*1024) #define AMDGPU_VCN_CONTEXT_SIZE (512*1024) @@ -233,6 +235,10 @@ struct amdgpu_vcn_inst { struct amdgpu_vcn_fw_shared fw_shared; }; +struct amdgpu_vcn_ras { + struct amdgpu_ras_block_object ras_block; +}; + struct amdgpu_vcn { unsigned fw_version; struct delayed_work idle_work; @@ -252,6 +258,9 @@ struct amdgpu_vcn { unsigned harvest_config; int (*pause_dpg_mode)(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); + + struct ras_common_if *ras_if; + struct amdgpu_vcn_ras *ras; }; struct amdgpu_fw_shared_rb_ptrs_struct { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index a025f080aa6a..ea92edcc0432 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -23,6 +23,10 @@ #include <linux/module.h> +#ifdef CONFIG_X86 +#include <asm/hypervisor.h> +#endif + #include <drm/drm_drv.h> #include "amdgpu.h" @@ -723,8 +727,12 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) break; case CHIP_VEGA10: soc15_set_virt_ops(adev); - /* send a dummy GPU_INIT_DATA request to host on vega10 */ - amdgpu_virt_request_init_data(adev); +#ifdef CONFIG_X86 + /* not send GPU_INIT_DATA with MS_HYPERV*/ + if (!hypervisor_is_type(X86_HYPER_MS_HYPERV)) +#endif + /* send a dummy GPU_INIT_DATA request to host on vega10 */ + amdgpu_virt_request_init_data(adev); break; case CHIP_VEGA20: case CHIP_ARCTURUS: @@ -862,11 +870,11 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v uint32_t timeout = 50000; uint32_t i, tmp; uint32_t ret = 0; - static void *scratch_reg0; - static void *scratch_reg1; - static void *scratch_reg2; - static void *scratch_reg3; - static void *spare_int; + void *scratch_reg0; + void *scratch_reg1; + void *scratch_reg2; + void *scratch_reg3; + void *spare_int; if (!adev->gfx.rlc.rlcg_reg_access_supported) { dev_err(adev->dev, @@ -919,7 +927,7 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v "wrong operation type, rlcg failed to program reg: 0x%05x\n", offset); } else if (tmp & AMDGPU_RLCG_REG_NOT_IN_RANGE) { dev_err(adev->dev, - "regiser is not in range, rlcg failed to program reg: 0x%05x\n", offset); + "register is not in range, rlcg failed to program reg: 0x%05x\n", offset); } else { dev_err(adev->dev, "unknown error type, rlcg failed to program reg: 0x%05x\n", offset); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5277c10d901d..f9479e23de18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -89,6 +89,21 @@ struct amdgpu_prt_cb { }; /** + * struct amdgpu_vm_tlb_seq_cb - Helper to increment the TLB flush sequence + */ +struct amdgpu_vm_tlb_seq_cb { + /** + * @vm: pointer to the amdgpu_vm structure to set the fence sequence on + */ + struct amdgpu_vm *vm; + + /** + * @cb: callback + */ + struct dma_fence_cb cb; +}; + +/** * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping * * @adev: amdgpu_device pointer @@ -155,108 +170,6 @@ static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm) } /** - * amdgpu_vm_level_shift - return the addr shift for each level - * - * @adev: amdgpu_device pointer - * @level: VMPT level - * - * Returns: - * The number of bits the pfn needs to be right shifted for a level. - */ -static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev, - unsigned level) -{ - switch (level) { - case AMDGPU_VM_PDB2: - case AMDGPU_VM_PDB1: - case AMDGPU_VM_PDB0: - return 9 * (AMDGPU_VM_PDB0 - level) + - adev->vm_manager.block_size; - case AMDGPU_VM_PTB: - return 0; - default: - return ~0; - } -} - -/** - * amdgpu_vm_num_entries - return the number of entries in a PD/PT - * - * @adev: amdgpu_device pointer - * @level: VMPT level - * - * Returns: - * The number of entries in a page directory or page table. - */ -static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, - unsigned level) -{ - unsigned shift = amdgpu_vm_level_shift(adev, - adev->vm_manager.root_level); - - if (level == adev->vm_manager.root_level) - /* For the root directory */ - return round_up(adev->vm_manager.max_pfn, 1ULL << shift) - >> shift; - else if (level != AMDGPU_VM_PTB) - /* Everything in between */ - return 512; - else - /* For the page tables on the leaves */ - return AMDGPU_VM_PTE_COUNT(adev); -} - -/** - * amdgpu_vm_num_ats_entries - return the number of ATS entries in the root PD - * - * @adev: amdgpu_device pointer - * - * Returns: - * The number of entries in the root page directory which needs the ATS setting. - */ -static unsigned amdgpu_vm_num_ats_entries(struct amdgpu_device *adev) -{ - unsigned shift; - - shift = amdgpu_vm_level_shift(adev, adev->vm_manager.root_level); - return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT); -} - -/** - * amdgpu_vm_entries_mask - the mask to get the entry number of a PD/PT - * - * @adev: amdgpu_device pointer - * @level: VMPT level - * - * Returns: - * The mask to extract the entry number of a PD/PT from an address. - */ -static uint32_t amdgpu_vm_entries_mask(struct amdgpu_device *adev, - unsigned int level) -{ - if (level <= adev->vm_manager.root_level) - return 0xffffffff; - else if (level != AMDGPU_VM_PTB) - return 0x1ff; - else - return AMDGPU_VM_PTE_COUNT(adev) - 1; -} - -/** - * amdgpu_vm_bo_size - returns the size of the BOs in bytes - * - * @adev: amdgpu_device pointer - * @level: VMPT level - * - * Returns: - * The size of the BO for a page directory or page table in bytes. - */ -static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level) -{ - return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8); -} - -/** * amdgpu_vm_bo_evicted - vm_bo is evicted * * @vm_bo: vm_bo which is evicted @@ -358,9 +271,8 @@ static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo) * Initialize a bo_va_base structure and add it to the appropriate lists * */ -static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, - struct amdgpu_vm *vm, - struct amdgpu_bo *bo) +void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, + struct amdgpu_vm *vm, struct amdgpu_bo *bo) { base->vm = vm; base->bo = bo; @@ -396,228 +308,6 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, } /** - * amdgpu_vm_pt_parent - get the parent page directory - * - * @pt: child page table - * - * Helper to get the parent entry for the child page table. NULL if we are at - * the root page directory. - */ -static struct amdgpu_vm_bo_base *amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt) -{ - struct amdgpu_bo *parent = pt->bo->parent; - - if (!parent) - return NULL; - - return parent->vm_bo; -} - -/* - * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt - */ -struct amdgpu_vm_pt_cursor { - uint64_t pfn; - struct amdgpu_vm_bo_base *parent; - struct amdgpu_vm_bo_base *entry; - unsigned level; -}; - -/** - * amdgpu_vm_pt_start - start PD/PT walk - * - * @adev: amdgpu_device pointer - * @vm: amdgpu_vm structure - * @start: start address of the walk - * @cursor: state to initialize - * - * Initialize a amdgpu_vm_pt_cursor to start a walk. - */ -static void amdgpu_vm_pt_start(struct amdgpu_device *adev, - struct amdgpu_vm *vm, uint64_t start, - struct amdgpu_vm_pt_cursor *cursor) -{ - cursor->pfn = start; - cursor->parent = NULL; - cursor->entry = &vm->root; - cursor->level = adev->vm_manager.root_level; -} - -/** - * amdgpu_vm_pt_descendant - go to child node - * - * @adev: amdgpu_device pointer - * @cursor: current state - * - * Walk to the child node of the current node. - * Returns: - * True if the walk was possible, false otherwise. - */ -static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev, - struct amdgpu_vm_pt_cursor *cursor) -{ - unsigned mask, shift, idx; - - if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry || - !cursor->entry->bo) - return false; - - mask = amdgpu_vm_entries_mask(adev, cursor->level); - shift = amdgpu_vm_level_shift(adev, cursor->level); - - ++cursor->level; - idx = (cursor->pfn >> shift) & mask; - cursor->parent = cursor->entry; - cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx]; - return true; -} - -/** - * amdgpu_vm_pt_sibling - go to sibling node - * - * @adev: amdgpu_device pointer - * @cursor: current state - * - * Walk to the sibling node of the current node. - * Returns: - * True if the walk was possible, false otherwise. - */ -static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev, - struct amdgpu_vm_pt_cursor *cursor) -{ - unsigned shift, num_entries; - - /* Root doesn't have a sibling */ - if (!cursor->parent) - return false; - - /* Go to our parents and see if we got a sibling */ - shift = amdgpu_vm_level_shift(adev, cursor->level - 1); - num_entries = amdgpu_vm_num_entries(adev, cursor->level - 1); - - if (cursor->entry == &to_amdgpu_bo_vm(cursor->parent->bo)->entries[num_entries - 1]) - return false; - - cursor->pfn += 1ULL << shift; - cursor->pfn &= ~((1ULL << shift) - 1); - ++cursor->entry; - return true; -} - -/** - * amdgpu_vm_pt_ancestor - go to parent node - * - * @cursor: current state - * - * Walk to the parent node of the current node. - * Returns: - * True if the walk was possible, false otherwise. - */ -static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor) -{ - if (!cursor->parent) - return false; - - --cursor->level; - cursor->entry = cursor->parent; - cursor->parent = amdgpu_vm_pt_parent(cursor->parent); - return true; -} - -/** - * amdgpu_vm_pt_next - get next PD/PT in hieratchy - * - * @adev: amdgpu_device pointer - * @cursor: current state - * - * Walk the PD/PT tree to the next node. - */ -static void amdgpu_vm_pt_next(struct amdgpu_device *adev, - struct amdgpu_vm_pt_cursor *cursor) -{ - /* First try a newborn child */ - if (amdgpu_vm_pt_descendant(adev, cursor)) - return; - - /* If that didn't worked try to find a sibling */ - while (!amdgpu_vm_pt_sibling(adev, cursor)) { - /* No sibling, go to our parents and grandparents */ - if (!amdgpu_vm_pt_ancestor(cursor)) { - cursor->pfn = ~0ll; - return; - } - } -} - -/** - * amdgpu_vm_pt_first_dfs - start a deep first search - * - * @adev: amdgpu_device structure - * @vm: amdgpu_vm structure - * @start: optional cursor to start with - * @cursor: state to initialize - * - * Starts a deep first traversal of the PD/PT tree. - */ -static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt_cursor *start, - struct amdgpu_vm_pt_cursor *cursor) -{ - if (start) - *cursor = *start; - else - amdgpu_vm_pt_start(adev, vm, 0, cursor); - while (amdgpu_vm_pt_descendant(adev, cursor)); -} - -/** - * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue - * - * @start: starting point for the search - * @entry: current entry - * - * Returns: - * True when the search should continue, false otherwise. - */ -static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start, - struct amdgpu_vm_bo_base *entry) -{ - return entry && (!start || entry != start->entry); -} - -/** - * amdgpu_vm_pt_next_dfs - get the next node for a deep first search - * - * @adev: amdgpu_device structure - * @cursor: current state - * - * Move the cursor to the next node in a deep first search. - */ -static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, - struct amdgpu_vm_pt_cursor *cursor) -{ - if (!cursor->entry) - return; - - if (!cursor->parent) - cursor->entry = NULL; - else if (amdgpu_vm_pt_sibling(adev, cursor)) - while (amdgpu_vm_pt_descendant(adev, cursor)); - else - amdgpu_vm_pt_ancestor(cursor); -} - -/* - * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs - */ -#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \ - for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \ - (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\ - amdgpu_vm_pt_continue_dfs((start), (entry)); \ - (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor))) - -/** * amdgpu_vm_get_pd_bo - add the VM PD to a validation list * * @vm: vm providing the BOs @@ -726,316 +416,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) } /** - * amdgpu_vm_clear_bo - initially clear the PDs/PTs - * - * @adev: amdgpu_device pointer - * @vm: VM to clear BO from - * @vmbo: BO to clear - * @immediate: use an immediate update - * - * Root PD needs to be reserved when calling this. - * - * Returns: - * 0 on success, errno otherwise. - */ -static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_bo_vm *vmbo, - bool immediate) -{ - struct ttm_operation_ctx ctx = { true, false }; - unsigned level = adev->vm_manager.root_level; - struct amdgpu_vm_update_params params; - struct amdgpu_bo *ancestor = &vmbo->bo; - struct amdgpu_bo *bo = &vmbo->bo; - unsigned entries, ats_entries; - uint64_t addr; - int r, idx; - - /* Figure out our place in the hierarchy */ - if (ancestor->parent) { - ++level; - while (ancestor->parent->parent) { - ++level; - ancestor = ancestor->parent; - } - } - - entries = amdgpu_bo_size(bo) / 8; - if (!vm->pte_support_ats) { - ats_entries = 0; - - } else if (!bo->parent) { - ats_entries = amdgpu_vm_num_ats_entries(adev); - ats_entries = min(ats_entries, entries); - entries -= ats_entries; - - } else { - struct amdgpu_vm_bo_base *pt; - - pt = ancestor->vm_bo; - ats_entries = amdgpu_vm_num_ats_entries(adev); - if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >= ats_entries) { - ats_entries = 0; - } else { - ats_entries = entries; - entries = 0; - } - } - - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - if (r) - return r; - - if (vmbo->shadow) { - struct amdgpu_bo *shadow = vmbo->shadow; - - r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx); - if (r) - return r; - } - - if (!drm_dev_enter(adev_to_drm(adev), &idx)) - return -ENODEV; - - r = vm->update_funcs->map_table(vmbo); - if (r) - goto exit; - - memset(¶ms, 0, sizeof(params)); - params.adev = adev; - params.vm = vm; - params.immediate = immediate; - - r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); - if (r) - goto exit; - - addr = 0; - if (ats_entries) { - uint64_t value = 0, flags; - - flags = AMDGPU_PTE_DEFAULT_ATC; - if (level != AMDGPU_VM_PTB) { - /* Handle leaf PDEs as PTEs */ - flags |= AMDGPU_PDE_PTE; - amdgpu_gmc_get_vm_pde(adev, level, &value, &flags); - } - - r = vm->update_funcs->update(¶ms, vmbo, addr, 0, ats_entries, - value, flags); - if (r) - goto exit; - - addr += ats_entries * 8; - } - - if (entries) { - uint64_t value = 0, flags = 0; - - if (adev->asic_type >= CHIP_VEGA10) { - if (level != AMDGPU_VM_PTB) { - /* Handle leaf PDEs as PTEs */ - flags |= AMDGPU_PDE_PTE; - amdgpu_gmc_get_vm_pde(adev, level, - &value, &flags); - } else { - /* Workaround for fault priority problem on GMC9 */ - flags = AMDGPU_PTE_EXECUTABLE; - } - } - - r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, - value, flags); - if (r) - goto exit; - } - - r = vm->update_funcs->commit(¶ms, NULL); -exit: - drm_dev_exit(idx); - return r; -} - -/** - * amdgpu_vm_pt_create - create bo for PD/PT - * - * @adev: amdgpu_device pointer - * @vm: requesting vm - * @level: the page table level - * @immediate: use a immediate update - * @vmbo: pointer to the buffer object pointer - */ -static int amdgpu_vm_pt_create(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - int level, bool immediate, - struct amdgpu_bo_vm **vmbo) -{ - struct amdgpu_bo_param bp; - struct amdgpu_bo *bo; - struct dma_resv *resv; - unsigned int num_entries; - int r; - - memset(&bp, 0, sizeof(bp)); - - bp.size = amdgpu_vm_bo_size(adev, level); - bp.byte_align = AMDGPU_GPU_PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; - bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); - bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_CPU_GTT_USWC; - - if (level < AMDGPU_VM_PTB) - num_entries = amdgpu_vm_num_entries(adev, level); - else - num_entries = 0; - - bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries); - - if (vm->use_cpu_for_update) - bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - - bp.type = ttm_bo_type_kernel; - bp.no_wait_gpu = immediate; - if (vm->root.bo) - bp.resv = vm->root.bo->tbo.base.resv; - - r = amdgpu_bo_create_vm(adev, &bp, vmbo); - if (r) - return r; - - bo = &(*vmbo)->bo; - if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) { - (*vmbo)->shadow = NULL; - return 0; - } - - if (!bp.resv) - WARN_ON(dma_resv_lock(bo->tbo.base.resv, - NULL)); - resv = bp.resv; - memset(&bp, 0, sizeof(bp)); - bp.size = amdgpu_vm_bo_size(adev, level); - bp.domain = AMDGPU_GEM_DOMAIN_GTT; - bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; - bp.type = ttm_bo_type_kernel; - bp.resv = bo->tbo.base.resv; - bp.bo_ptr_size = sizeof(struct amdgpu_bo); - - r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow); - - if (!resv) - dma_resv_unlock(bo->tbo.base.resv); - - if (r) { - amdgpu_bo_unref(&bo); - return r; - } - - (*vmbo)->shadow->parent = amdgpu_bo_ref(bo); - amdgpu_bo_add_to_shadow_list(*vmbo); - - return 0; -} - -/** - * amdgpu_vm_alloc_pts - Allocate a specific page table - * - * @adev: amdgpu_device pointer - * @vm: VM to allocate page tables for - * @cursor: Which page table to allocate - * @immediate: use an immediate update - * - * Make sure a specific page table or directory is allocated. - * - * Returns: - * 1 if page table needed to be allocated, 0 if page table was already - * allocated, negative errno if an error occurred. - */ -static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt_cursor *cursor, - bool immediate) -{ - struct amdgpu_vm_bo_base *entry = cursor->entry; - struct amdgpu_bo *pt_bo; - struct amdgpu_bo_vm *pt; - int r; - - if (entry->bo) - return 0; - - r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt); - if (r) - return r; - - /* Keep a reference to the root directory to avoid - * freeing them up in the wrong order. - */ - pt_bo = &pt->bo; - pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo); - amdgpu_vm_bo_base_init(entry, vm, pt_bo); - r = amdgpu_vm_clear_bo(adev, vm, pt, immediate); - if (r) - goto error_free_pt; - - return 0; - -error_free_pt: - amdgpu_bo_unref(&pt->shadow); - amdgpu_bo_unref(&pt_bo); - return r; -} - -/** - * amdgpu_vm_free_table - fre one PD/PT - * - * @entry: PDE to free - */ -static void amdgpu_vm_free_table(struct amdgpu_vm_bo_base *entry) -{ - struct amdgpu_bo *shadow; - - if (!entry->bo) - return; - - shadow = amdgpu_bo_shadowed(entry->bo); - if (shadow) { - ttm_bo_set_bulk_move(&shadow->tbo, NULL); - amdgpu_bo_unref(&shadow); - } - - ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); - entry->bo->vm_bo = NULL; - list_del(&entry->vm_status); - amdgpu_bo_unref(&entry->bo); -} - -/** - * amdgpu_vm_free_pts - free PD/PT levels - * - * @adev: amdgpu device structure - * @vm: amdgpu vm structure - * @start: optional cursor where to start freeing PDs/PTs - * - * Free the page directory or page table level and all sub levels. - */ -static void amdgpu_vm_free_pts(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt_cursor *start) -{ - struct amdgpu_vm_pt_cursor cursor; - struct amdgpu_vm_bo_base *entry; - - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) - amdgpu_vm_free_table(entry); - - if (start) - amdgpu_vm_free_table(start->entry); -} - -/** * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug * * @adev: amdgpu_device pointer @@ -1282,53 +662,6 @@ uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) } /** - * amdgpu_vm_update_pde - update a single level in the hierarchy - * - * @params: parameters for the update - * @vm: requested vm - * @entry: entry to update - * - * Makes sure the requested entry in parent is up to date. - */ -static int amdgpu_vm_update_pde(struct amdgpu_vm_update_params *params, - struct amdgpu_vm *vm, - struct amdgpu_vm_bo_base *entry) -{ - struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry); - struct amdgpu_bo *bo = parent->bo, *pbo; - uint64_t pde, pt, flags; - unsigned level; - - for (level = 0, pbo = bo->parent; pbo; ++level) - pbo = pbo->parent; - - level += params->adev->vm_manager.root_level; - amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags); - pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * 8; - return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt, - 1, 0, flags); -} - -/** - * amdgpu_vm_invalidate_pds - mark all PDs as invalid - * - * @adev: amdgpu_device pointer - * @vm: related vm - * - * Mark all PD level as invalid after an error. - */ -static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev, - struct amdgpu_vm *vm) -{ - struct amdgpu_vm_pt_cursor cursor; - struct amdgpu_vm_bo_base *entry; - - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) - if (entry->bo && !entry->moved) - amdgpu_vm_bo_relocated(entry); -} - -/** * amdgpu_vm_update_pdes - make sure that all directories are valid * * @adev: amdgpu_device pointer @@ -1344,6 +677,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, struct amdgpu_vm *vm, bool immediate) { struct amdgpu_vm_update_params params; + struct amdgpu_vm_bo_base *entry; int r, idx; if (list_empty(&vm->relocated)) @@ -1359,17 +693,10 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); if (r) - goto exit; - - while (!list_empty(&vm->relocated)) { - struct amdgpu_vm_bo_base *entry; - - entry = list_first_entry(&vm->relocated, - struct amdgpu_vm_bo_base, - vm_status); - amdgpu_vm_bo_idle(entry); + goto error; - r = amdgpu_vm_update_pde(¶ms, vm, entry); + list_for_each_entry(entry, &vm->relocated, vm_status) { + r = amdgpu_vm_pde_update(¶ms, entry); if (r) goto error; } @@ -1377,297 +704,68 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, r = vm->update_funcs->commit(¶ms, &vm->last_update); if (r) goto error; - drm_dev_exit(idx); - return 0; + + while (!list_empty(&vm->relocated)) { + entry = list_first_entry(&vm->relocated, + struct amdgpu_vm_bo_base, + vm_status); + amdgpu_vm_bo_idle(entry); + } error: - amdgpu_vm_invalidate_pds(adev, vm); -exit: drm_dev_exit(idx); return r; } -/* - * amdgpu_vm_update_flags - figure out flags for PTE updates - * - * Make sure to set the right flags for the PTEs at the desired level. - */ -static void amdgpu_vm_update_flags(struct amdgpu_vm_update_params *params, - struct amdgpu_bo_vm *pt, unsigned int level, - uint64_t pe, uint64_t addr, - unsigned int count, uint32_t incr, - uint64_t flags) - -{ - if (level != AMDGPU_VM_PTB) { - flags |= AMDGPU_PDE_PTE; - amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags); - - } else if (params->adev->asic_type >= CHIP_VEGA10 && - !(flags & AMDGPU_PTE_VALID) && - !(flags & AMDGPU_PTE_PRT)) { - - /* Workaround for fault priority problem on GMC9 */ - flags |= AMDGPU_PTE_EXECUTABLE; - } - - params->vm->update_funcs->update(params, pt, pe, addr, count, incr, - flags); -} - /** - * amdgpu_vm_fragment - get fragment for PTEs + * amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence + * @fence: unused + * @cb: the callback structure * - * @params: see amdgpu_vm_update_params definition - * @start: first PTE to handle - * @end: last PTE to handle - * @flags: hw mapping flags - * @frag: resulting fragment size - * @frag_end: end of this fragment - * - * Returns the first possible fragment for the start and end address. - */ -static void amdgpu_vm_fragment(struct amdgpu_vm_update_params *params, - uint64_t start, uint64_t end, uint64_t flags, - unsigned int *frag, uint64_t *frag_end) -{ - /** - * The MC L1 TLB supports variable sized pages, based on a fragment - * field in the PTE. When this field is set to a non-zero value, page - * granularity is increased from 4KB to (1 << (12 + frag)). The PTE - * flags are considered valid for all PTEs within the fragment range - * and corresponding mappings are assumed to be physically contiguous. - * - * The L1 TLB can store a single PTE for the whole fragment, - * significantly increasing the space available for translation - * caching. This leads to large improvements in throughput when the - * TLB is under pressure. - * - * The L2 TLB distributes small and large fragments into two - * asymmetric partitions. The large fragment cache is significantly - * larger. Thus, we try to use large fragments wherever possible. - * Userspace can support this by aligning virtual base address and - * allocation size to the fragment size. - * - * Starting with Vega10 the fragment size only controls the L1. The L2 - * is now directly feed with small/huge/giant pages from the walker. - */ - unsigned max_frag; - - if (params->adev->asic_type < CHIP_VEGA10) - max_frag = params->adev->vm_manager.fragment_size; - else - max_frag = 31; - - /* system pages are non continuously */ - if (params->pages_addr) { - *frag = 0; - *frag_end = end; - return; - } - - /* This intentionally wraps around if no bit is set */ - *frag = min((unsigned)ffs(start) - 1, (unsigned)fls64(end - start) - 1); - if (*frag >= max_frag) { - *frag = max_frag; - *frag_end = end & ~((1ULL << max_frag) - 1); - } else { - *frag_end = start + (1 << *frag); - } -} - -/** - * amdgpu_vm_update_ptes - make sure that page tables are valid - * - * @params: see amdgpu_vm_update_params definition - * @start: start of GPU address range - * @end: end of GPU address range - * @dst: destination address to map to, the next dst inside the function - * @flags: mapping flags - * - * Update the page tables in the range @start - @end. - * - * Returns: - * 0 for success, -EINVAL for failure. + * Increments the tlb sequence to make sure that future CS execute a VM flush. */ -static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, - uint64_t start, uint64_t end, - uint64_t dst, uint64_t flags) +static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, + struct dma_fence_cb *cb) { - struct amdgpu_device *adev = params->adev; - struct amdgpu_vm_pt_cursor cursor; - uint64_t frag_start = start, frag_end; - unsigned int frag; - int r; - - /* figure out the initial fragment */ - amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end); - - /* walk over the address space and update the PTs */ - amdgpu_vm_pt_start(adev, params->vm, start, &cursor); - while (cursor.pfn < end) { - unsigned shift, parent_shift, mask; - uint64_t incr, entry_end, pe_start; - struct amdgpu_bo *pt; - - if (!params->unlocked) { - /* make sure that the page tables covering the - * address range are actually allocated - */ - r = amdgpu_vm_alloc_pts(params->adev, params->vm, - &cursor, params->immediate); - if (r) - return r; - } - - shift = amdgpu_vm_level_shift(adev, cursor.level); - parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1); - if (params->unlocked) { - /* Unlocked updates are only allowed on the leaves */ - if (amdgpu_vm_pt_descendant(adev, &cursor)) - continue; - } else if (adev->asic_type < CHIP_VEGA10 && - (flags & AMDGPU_PTE_VALID)) { - /* No huge page support before GMC v9 */ - if (cursor.level != AMDGPU_VM_PTB) { - if (!amdgpu_vm_pt_descendant(adev, &cursor)) - return -ENOENT; - continue; - } - } else if (frag < shift) { - /* We can't use this level when the fragment size is - * smaller than the address shift. Go to the next - * child entry and try again. - */ - if (amdgpu_vm_pt_descendant(adev, &cursor)) - continue; - } else if (frag >= parent_shift) { - /* If the fragment size is even larger than the parent - * shift we should go up one level and check it again. - */ - if (!amdgpu_vm_pt_ancestor(&cursor)) - return -EINVAL; - continue; - } + struct amdgpu_vm_tlb_seq_cb *tlb_cb; - pt = cursor.entry->bo; - if (!pt) { - /* We need all PDs and PTs for mapping something, */ - if (flags & AMDGPU_PTE_VALID) - return -ENOENT; - - /* but unmapping something can happen at a higher - * level. - */ - if (!amdgpu_vm_pt_ancestor(&cursor)) - return -EINVAL; - - pt = cursor.entry->bo; - shift = parent_shift; - frag_end = max(frag_end, ALIGN(frag_start + 1, - 1ULL << shift)); - } - - /* Looks good so far, calculate parameters for the update */ - incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift; - mask = amdgpu_vm_entries_mask(adev, cursor.level); - pe_start = ((cursor.pfn >> shift) & mask) * 8; - entry_end = ((uint64_t)mask + 1) << shift; - entry_end += cursor.pfn & ~(entry_end - 1); - entry_end = min(entry_end, end); - - do { - struct amdgpu_vm *vm = params->vm; - uint64_t upd_end = min(entry_end, frag_end); - unsigned nptes = (upd_end - frag_start) >> shift; - uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag); - - /* This can happen when we set higher level PDs to - * silent to stop fault floods. - */ - nptes = max(nptes, 1u); - - trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, - min(nptes, 32u), dst, incr, upd_flags, - vm->task_info.pid, - vm->immediate.fence_context); - amdgpu_vm_update_flags(params, to_amdgpu_bo_vm(pt), - cursor.level, pe_start, dst, - nptes, incr, upd_flags); - - pe_start += nptes * 8; - dst += nptes * incr; - - frag_start = upd_end; - if (frag_start >= frag_end) { - /* figure out the next fragment */ - amdgpu_vm_fragment(params, frag_start, end, - flags, &frag, &frag_end); - if (frag < shift) - break; - } - } while (frag_start < entry_end); - - if (amdgpu_vm_pt_descendant(adev, &cursor)) { - /* Free all child entries. - * Update the tables with the flags and addresses and free up subsequent - * tables in the case of huge pages or freed up areas. - * This is the maximum you can free, because all other page tables are not - * completely covered by the range and so potentially still in use. - */ - while (cursor.pfn < frag_start) { - /* Make sure previous mapping is freed */ - if (cursor.entry->bo) { - params->table_freed = true; - amdgpu_vm_free_pts(adev, params->vm, &cursor); - } - amdgpu_vm_pt_next(adev, &cursor); - } - - } else if (frag >= shift) { - /* or just move on to the next on the same level. */ - amdgpu_vm_pt_next(adev, &cursor); - } - } - - return 0; + tlb_cb = container_of(cb, typeof(*tlb_cb), cb); + atomic64_inc(&tlb_cb->vm->tlb_seq); + kfree(tlb_cb); } /** - * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table + * amdgpu_vm_update_range - update a range in the vm page table * - * @adev: amdgpu_device pointer of the VM - * @bo_adev: amdgpu_device pointer of the mapped BO - * @vm: requested vm + * @adev: amdgpu_device pointer to use for commands + * @vm: the VM to update the range * @immediate: immediate submission in a page fault * @unlocked: unlocked invalidation during MM callback + * @flush_tlb: trigger tlb invalidation after update completed * @resv: fences we need to sync to * @start: start of mapped range * @last: last mapped entry * @flags: flags for the entries * @offset: offset into nodes and pages_addr + * @vram_base: base for vram mappings * @res: ttm_resource to map * @pages_addr: DMA addresses to use for mapping * @fence: optional resulting fence - * @table_freed: return true if page table is freed * * Fill in the page table entries between @start and @last. * * Returns: - * 0 for success, -EINVAL for failure. + * 0 for success, negative erro code for failure. */ -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - struct amdgpu_device *bo_adev, - struct amdgpu_vm *vm, bool immediate, - bool unlocked, struct dma_resv *resv, - uint64_t start, uint64_t last, - uint64_t flags, uint64_t offset, - struct ttm_resource *res, - dma_addr_t *pages_addr, - struct dma_fence **fence, - bool *table_freed) +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, + bool immediate, bool unlocked, bool flush_tlb, + struct dma_resv *resv, uint64_t start, uint64_t last, + uint64_t flags, uint64_t offset, uint64_t vram_base, + struct ttm_resource *res, dma_addr_t *pages_addr, + struct dma_fence **fence) { struct amdgpu_vm_update_params params; + struct amdgpu_vm_tlb_seq_cb *tlb_cb; struct amdgpu_res_cursor cursor; enum amdgpu_sync_mode sync_mode; int r, idx; @@ -1675,6 +773,18 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (!drm_dev_enter(adev_to_drm(adev), &idx)) return -ENODEV; + tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL); + if (!tlb_cb) { + r = -ENOMEM; + goto error_unlock; + } + + /* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache, + * heavy-weight flush TLB unconditionally. + */ + flush_tlb |= adev->gmc.xgmi.num_physical_nodes && + adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0); + memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; @@ -1693,7 +803,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, amdgpu_vm_eviction_lock(vm); if (vm->evicting) { r = -EBUSY; - goto error_unlock; + goto error_free; } if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) { @@ -1706,7 +816,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, r = vm->update_funcs->prepare(¶ms, resv, sync_mode); if (r) - goto error_unlock; + goto error_free; amdgpu_res_first(pages_addr ? NULL : res, offset, (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, &cursor); @@ -1746,16 +856,15 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, } } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { - addr = bo_adev->vm_manager.vram_base_offset + - cursor.start; + addr = vram_base + cursor.start; } else { addr = 0; } tmp = start + num_entries; - r = amdgpu_vm_update_ptes(¶ms, start, tmp, addr, flags); + r = amdgpu_vm_ptes_update(¶ms, start, tmp, addr, flags); if (r) - goto error_unlock; + goto error_free; amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE); start = tmp; @@ -1763,8 +872,21 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, r = vm->update_funcs->commit(¶ms, fence); - if (table_freed) - *table_freed = *table_freed || params.table_freed; + if (flush_tlb || params.table_freed) { + tlb_cb->vm = vm; + if (fence && *fence && + !dma_fence_add_callback(*fence, &tlb_cb->cb, + amdgpu_vm_tlb_seq_cb)) { + dma_fence_put(vm->last_tlb_flush); + vm->last_tlb_flush = dma_fence_get(*fence); + } else { + amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); + } + tlb_cb = NULL; + } + +error_free: + kfree(tlb_cb); error_unlock: amdgpu_vm_eviction_unlock(vm); @@ -1822,7 +944,6 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, * @adev: amdgpu_device pointer * @bo_va: requested BO and VM object * @clear: if true clear the entries - * @table_freed: return true if page table is freed * * Fill in the page table entries for @bo_va. * @@ -1830,7 +951,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, * 0 for success, -EINVAL for failure. */ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, - bool clear, bool *table_freed) + bool clear) { struct amdgpu_bo *bo = bo_va->base.bo; struct amdgpu_vm *vm = bo_va->base.vm; @@ -1838,9 +959,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, dma_addr_t *pages_addr = NULL; struct ttm_resource *mem; struct dma_fence **last_update; + bool flush_tlb = clear; struct dma_resv *resv; + uint64_t vram_base; uint64_t flags; - struct amdgpu_device *bo_adev = adev; int r; if (clear || !bo) { @@ -1865,14 +987,18 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, } if (bo) { + struct amdgpu_device *bo_adev; + flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); if (amdgpu_bo_encrypted(bo)) flags |= AMDGPU_PTE_TMZ; bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); + vram_base = bo_adev->vm_manager.vram_base_offset; } else { flags = 0x0; + vram_base = 0; } if (clear || (bo && bo->tbo.base.resv == @@ -1882,7 +1008,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, last_update = &bo_va->last_pt_update; if (!clear && bo_va->base.moved) { - bo_va->base.moved = false; + flush_tlb = true; list_splice_init(&bo_va->valids, &bo_va->invalids); } else if (bo_va->cleared != clear) { @@ -1905,11 +1031,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, trace_amdgpu_vm_bo_update(mapping); - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, - resv, mapping->start, - mapping->last, update_flags, - mapping->offset, mem, - pages_addr, last_update, table_freed); + r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, + resv, mapping->start, mapping->last, + update_flags, mapping->offset, + vram_base, mem, pages_addr, + last_update); if (r) return r; } @@ -1932,6 +1058,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, list_splice_init(&bo_va->invalids, &bo_va->valids); bo_va->cleared = clear; + bo_va->base.moved = false; if (trace_amdgpu_vm_bo_mapping_enabled()) { list_for_each_entry(mapping, &bo_va->valids, list) @@ -2100,10 +1227,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, mapping->start < AMDGPU_GMC_HOLE_START) init_pte_value = AMDGPU_PTE_DEFAULT_ATC; - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false, - resv, mapping->start, - mapping->last, init_pte_value, - 0, NULL, NULL, &f, NULL); + r = amdgpu_vm_update_range(adev, vm, false, false, true, resv, + mapping->start, mapping->last, + init_pte_value, 0, 0, NULL, NULL, + &f); amdgpu_vm_free_mapping(adev, vm, mapping, f); if (r) { dma_fence_put(f); @@ -2145,7 +1272,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { /* Per VM BOs never need to bo cleared in the page tables */ - r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; } @@ -2164,7 +1291,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, else clear = true; - r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, clear); if (r) return r; @@ -2914,6 +2041,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->update_funcs = &amdgpu_vm_sdma_funcs; vm->last_update = NULL; vm->last_unlocked = dma_fence_get_stub(); + vm->last_tlb_flush = dma_fence_get_stub(); mutex_init(&vm->eviction_lock); vm->evicting = false; @@ -2933,7 +2061,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_bo_base_init(&vm->root, vm, root_bo); - r = amdgpu_vm_clear_bo(adev, vm, root, false); + r = amdgpu_vm_pt_clear(adev, vm, root, false); if (r) goto error_unreserve; @@ -2952,6 +2080,7 @@ error_free_root: vm->root.bo = NULL; error_free_delayed: + dma_fence_put(vm->last_tlb_flush); dma_fence_put(vm->last_unlocked); drm_sched_entity_destroy(&vm->delayed); @@ -2962,34 +2091,6 @@ error_free_immediate: } /** - * amdgpu_vm_check_clean_reserved - check if a VM is clean - * - * @adev: amdgpu_device pointer - * @vm: the VM to check - * - * check all entries of the root PD, if any subsequent PDs are allocated, - * it means there are page table creating and filling, and is no a clean - * VM - * - * Returns: - * 0 if this VM is clean - */ -static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev, - struct amdgpu_vm *vm) -{ - enum amdgpu_vm_level root = adev->vm_manager.root_level; - unsigned int entries = amdgpu_vm_num_entries(adev, root); - unsigned int i = 0; - - for (i = 0; i < entries; i++) { - if (to_amdgpu_bo_vm(vm->root.bo)->entries[i].bo) - return -EINVAL; - } - - return 0; -} - -/** * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM * * @adev: amdgpu_device pointer @@ -3018,17 +2119,17 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) return r; /* Sanity checks */ - r = amdgpu_vm_check_clean_reserved(adev, vm); - if (r) + if (!amdgpu_vm_pt_is_root_clean(adev, vm)) { + r = -EINVAL; goto unreserve_bo; + } /* Check if PD needs to be reinitialized and do it before * changing any other state, in case it fails. */ if (pte_support_ats != vm->pte_support_ats) { vm->pte_support_ats = pte_support_ats; - r = amdgpu_vm_clear_bo(adev, vm, - to_amdgpu_bo_vm(vm->root.bo), + r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo), false); if (r) goto unreserve_bo; @@ -3096,6 +2197,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) struct amdgpu_bo_va_mapping *mapping, *tmp; bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt; struct amdgpu_bo *root; + unsigned long flags; int i; amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); @@ -3105,6 +2207,11 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_set_pasid(adev, vm, 0); dma_fence_wait(vm->last_unlocked, false); dma_fence_put(vm->last_unlocked); + dma_fence_wait(vm->last_tlb_flush, false); + /* Make sure that all fence callbacks have completed */ + spin_lock_irqsave(vm->last_tlb_flush->lock, flags); + spin_unlock_irqrestore(vm->last_tlb_flush->lock, flags); + dma_fence_put(vm->last_tlb_flush); list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { @@ -3116,7 +2223,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_free_mapping(adev, vm, mapping, NULL); } - amdgpu_vm_free_pts(adev, vm, NULL); + amdgpu_vm_pt_free_root(adev, vm); amdgpu_bo_unreserve(root); amdgpu_bo_unref(&root); WARN_ON(vm->root.bo); @@ -3376,9 +2483,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, goto error_unlock; } - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr, - addr, flags, value, NULL, NULL, NULL, - NULL); + r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr, + addr, flags, value, 0, NULL, NULL, NULL); if (r) goto error_unlock; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index bd7892482bbf..9ecb7f663e19 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -284,6 +284,10 @@ struct amdgpu_vm { struct drm_sched_entity immediate; struct drm_sched_entity delayed; + /* Last finished delayed update */ + atomic64_t tlb_seq; + struct dma_fence *last_tlb_flush; + /* Last unlocked submission to the scheduler entities */ struct dma_fence *last_unlocked; @@ -395,18 +399,17 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct dma_fence **fence); int amdgpu_vm_handle_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm); -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - struct amdgpu_device *bo_adev, - struct amdgpu_vm *vm, bool immediate, - bool unlocked, struct dma_resv *resv, - uint64_t start, uint64_t last, - uint64_t flags, uint64_t offset, - struct ttm_resource *res, - dma_addr_t *pages_addr, - struct dma_fence **fence, bool *free_table); +void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, + struct amdgpu_vm *vm, struct amdgpu_bo *bo); +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, + bool immediate, bool unlocked, bool flush_tlb, + struct dma_resv *resv, uint64_t start, uint64_t last, + uint64_t flags, uint64_t offset, uint64_t vram_base, + struct ttm_resource *res, dma_addr_t *pages_addr, + struct dma_fence **fence); int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, - bool clear, bool *table_freed); + bool clear); bool amdgpu_vm_evictable(struct amdgpu_bo *bo); void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, struct amdgpu_bo *bo, bool evicted); @@ -455,8 +458,34 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, uint64_t *gtt_mem, uint64_t *cpu_mem); +int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo_vm *vmbo, bool immediate); +int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int level, bool immediate, struct amdgpu_bo_vm **vmbo); +void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm); +bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev, + struct amdgpu_vm *vm); + +int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, + struct amdgpu_vm_bo_base *entry); +int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, + uint64_t start, uint64_t end, + uint64_t dst, uint64_t flags); + #if defined(CONFIG_DEBUG_FS) void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m); #endif +/** + * amdgpu_vm_tlb_seq - return tlb flush sequence number + * @vm: the amdgpu_vm structure to query + * + * Returns the tlb flush sequence number which indicates that the VM TLBs needs + * to be invalidated whenever the sequence number change. + */ +static inline uint64_t amdgpu_vm_tlb_seq(struct amdgpu_vm *vm) +{ + return atomic64_read(&vm->tlb_seq); +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c new file mode 100644 index 000000000000..7761a3ea172e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -0,0 +1,977 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <drm/drm_drv.h> + +#include "amdgpu.h" +#include "amdgpu_trace.h" +#include "amdgpu_vm.h" + +/* + * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt + */ +struct amdgpu_vm_pt_cursor { + uint64_t pfn; + struct amdgpu_vm_bo_base *parent; + struct amdgpu_vm_bo_base *entry; + unsigned int level; +}; + +/** + * amdgpu_vm_pt_level_shift - return the addr shift for each level + * + * @adev: amdgpu_device pointer + * @level: VMPT level + * + * Returns: + * The number of bits the pfn needs to be right shifted for a level. + */ +static unsigned int amdgpu_vm_pt_level_shift(struct amdgpu_device *adev, + unsigned int level) +{ + switch (level) { + case AMDGPU_VM_PDB2: + case AMDGPU_VM_PDB1: + case AMDGPU_VM_PDB0: + return 9 * (AMDGPU_VM_PDB0 - level) + + adev->vm_manager.block_size; + case AMDGPU_VM_PTB: + return 0; + default: + return ~0; + } +} + +/** + * amdgpu_vm_pt_num_entries - return the number of entries in a PD/PT + * + * @adev: amdgpu_device pointer + * @level: VMPT level + * + * Returns: + * The number of entries in a page directory or page table. + */ +static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev, + unsigned int level) +{ + unsigned int shift; + + shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level); + if (level == adev->vm_manager.root_level) + /* For the root directory */ + return round_up(adev->vm_manager.max_pfn, 1ULL << shift) + >> shift; + else if (level != AMDGPU_VM_PTB) + /* Everything in between */ + return 512; + + /* For the page tables on the leaves */ + return AMDGPU_VM_PTE_COUNT(adev); +} + +/** + * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD + * + * @adev: amdgpu_device pointer + * + * Returns: + * The number of entries in the root page directory which needs the ATS setting. + */ +static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev) +{ + unsigned int shift; + + shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level); + return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT); +} + +/** + * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT + * + * @adev: amdgpu_device pointer + * @level: VMPT level + * + * Returns: + * The mask to extract the entry number of a PD/PT from an address. + */ +static uint32_t amdgpu_vm_pt_entries_mask(struct amdgpu_device *adev, + unsigned int level) +{ + if (level <= adev->vm_manager.root_level) + return 0xffffffff; + else if (level != AMDGPU_VM_PTB) + return 0x1ff; + else + return AMDGPU_VM_PTE_COUNT(adev) - 1; +} + +/** + * amdgpu_vm_pt_size - returns the size of the page table in bytes + * + * @adev: amdgpu_device pointer + * @level: VMPT level + * + * Returns: + * The size of the BO for a page directory or page table in bytes. + */ +static unsigned int amdgpu_vm_pt_size(struct amdgpu_device *adev, + unsigned int level) +{ + return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_pt_num_entries(adev, level) * 8); +} + +/** + * amdgpu_vm_pt_parent - get the parent page directory + * + * @pt: child page table + * + * Helper to get the parent entry for the child page table. NULL if we are at + * the root page directory. + */ +static struct amdgpu_vm_bo_base * +amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt) +{ + struct amdgpu_bo *parent = pt->bo->parent; + + if (!parent) + return NULL; + + return parent->vm_bo; +} + +/** + * amdgpu_vm_pt_start - start PD/PT walk + * + * @adev: amdgpu_device pointer + * @vm: amdgpu_vm structure + * @start: start address of the walk + * @cursor: state to initialize + * + * Initialize a amdgpu_vm_pt_cursor to start a walk. + */ +static void amdgpu_vm_pt_start(struct amdgpu_device *adev, + struct amdgpu_vm *vm, uint64_t start, + struct amdgpu_vm_pt_cursor *cursor) +{ + cursor->pfn = start; + cursor->parent = NULL; + cursor->entry = &vm->root; + cursor->level = adev->vm_manager.root_level; +} + +/** + * amdgpu_vm_pt_descendant - go to child node + * + * @adev: amdgpu_device pointer + * @cursor: current state + * + * Walk to the child node of the current node. + * Returns: + * True if the walk was possible, false otherwise. + */ +static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + unsigned int mask, shift, idx; + + if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry || + !cursor->entry->bo) + return false; + + mask = amdgpu_vm_pt_entries_mask(adev, cursor->level); + shift = amdgpu_vm_pt_level_shift(adev, cursor->level); + + ++cursor->level; + idx = (cursor->pfn >> shift) & mask; + cursor->parent = cursor->entry; + cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx]; + return true; +} + +/** + * amdgpu_vm_pt_sibling - go to sibling node + * + * @adev: amdgpu_device pointer + * @cursor: current state + * + * Walk to the sibling node of the current node. + * Returns: + * True if the walk was possible, false otherwise. + */ +static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + + unsigned int shift, num_entries; + struct amdgpu_bo_vm *parent; + + /* Root doesn't have a sibling */ + if (!cursor->parent) + return false; + + /* Go to our parents and see if we got a sibling */ + shift = amdgpu_vm_pt_level_shift(adev, cursor->level - 1); + num_entries = amdgpu_vm_pt_num_entries(adev, cursor->level - 1); + parent = to_amdgpu_bo_vm(cursor->parent->bo); + + if (cursor->entry == &parent->entries[num_entries - 1]) + return false; + + cursor->pfn += 1ULL << shift; + cursor->pfn &= ~((1ULL << shift) - 1); + ++cursor->entry; + return true; +} + +/** + * amdgpu_vm_pt_ancestor - go to parent node + * + * @cursor: current state + * + * Walk to the parent node of the current node. + * Returns: + * True if the walk was possible, false otherwise. + */ +static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor) +{ + if (!cursor->parent) + return false; + + --cursor->level; + cursor->entry = cursor->parent; + cursor->parent = amdgpu_vm_pt_parent(cursor->parent); + return true; +} + +/** + * amdgpu_vm_pt_next - get next PD/PT in hieratchy + * + * @adev: amdgpu_device pointer + * @cursor: current state + * + * Walk the PD/PT tree to the next node. + */ +static void amdgpu_vm_pt_next(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + /* First try a newborn child */ + if (amdgpu_vm_pt_descendant(adev, cursor)) + return; + + /* If that didn't worked try to find a sibling */ + while (!amdgpu_vm_pt_sibling(adev, cursor)) { + /* No sibling, go to our parents and grandparents */ + if (!amdgpu_vm_pt_ancestor(cursor)) { + cursor->pfn = ~0ll; + return; + } + } +} + +/** + * amdgpu_vm_pt_first_dfs - start a deep first search + * + * @adev: amdgpu_device structure + * @vm: amdgpu_vm structure + * @start: optional cursor to start with + * @cursor: state to initialize + * + * Starts a deep first traversal of the PD/PT tree. + */ +static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt_cursor *start, + struct amdgpu_vm_pt_cursor *cursor) +{ + if (start) + *cursor = *start; + else + amdgpu_vm_pt_start(adev, vm, 0, cursor); + + while (amdgpu_vm_pt_descendant(adev, cursor)) + ; +} + +/** + * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue + * + * @start: starting point for the search + * @entry: current entry + * + * Returns: + * True when the search should continue, false otherwise. + */ +static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start, + struct amdgpu_vm_bo_base *entry) +{ + return entry && (!start || entry != start->entry); +} + +/** + * amdgpu_vm_pt_next_dfs - get the next node for a deep first search + * + * @adev: amdgpu_device structure + * @cursor: current state + * + * Move the cursor to the next node in a deep first search. + */ +static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + if (!cursor->entry) + return; + + if (!cursor->parent) + cursor->entry = NULL; + else if (amdgpu_vm_pt_sibling(adev, cursor)) + while (amdgpu_vm_pt_descendant(adev, cursor)) + ; + else + amdgpu_vm_pt_ancestor(cursor); +} + +/* + * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs + */ +#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \ + for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \ + (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\ + amdgpu_vm_pt_continue_dfs((start), (entry)); \ + (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor))) + +/** + * amdgpu_vm_pt_clear - initially clear the PDs/PTs + * + * @adev: amdgpu_device pointer + * @vm: VM to clear BO from + * @vmbo: BO to clear + * @immediate: use an immediate update + * + * Root PD needs to be reserved when calling this. + * + * Returns: + * 0 on success, errno otherwise. + */ +int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo_vm *vmbo, bool immediate) +{ + unsigned int level = adev->vm_manager.root_level; + struct ttm_operation_ctx ctx = { true, false }; + struct amdgpu_vm_update_params params; + struct amdgpu_bo *ancestor = &vmbo->bo; + unsigned int entries, ats_entries; + struct amdgpu_bo *bo = &vmbo->bo; + uint64_t addr; + int r, idx; + + /* Figure out our place in the hierarchy */ + if (ancestor->parent) { + ++level; + while (ancestor->parent->parent) { + ++level; + ancestor = ancestor->parent; + } + } + + entries = amdgpu_bo_size(bo) / 8; + if (!vm->pte_support_ats) { + ats_entries = 0; + + } else if (!bo->parent) { + ats_entries = amdgpu_vm_pt_num_ats_entries(adev); + ats_entries = min(ats_entries, entries); + entries -= ats_entries; + + } else { + struct amdgpu_vm_bo_base *pt; + + pt = ancestor->vm_bo; + ats_entries = amdgpu_vm_pt_num_ats_entries(adev); + if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >= + ats_entries) { + ats_entries = 0; + } else { + ats_entries = entries; + entries = 0; + } + } + + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) + return r; + + if (vmbo->shadow) { + struct amdgpu_bo *shadow = vmbo->shadow; + + r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx); + if (r) + return r; + } + + if (!drm_dev_enter(adev_to_drm(adev), &idx)) + return -ENODEV; + + r = vm->update_funcs->map_table(vmbo); + if (r) + goto exit; + + memset(¶ms, 0, sizeof(params)); + params.adev = adev; + params.vm = vm; + params.immediate = immediate; + + r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); + if (r) + goto exit; + + addr = 0; + if (ats_entries) { + uint64_t value = 0, flags; + + flags = AMDGPU_PTE_DEFAULT_ATC; + if (level != AMDGPU_VM_PTB) { + /* Handle leaf PDEs as PTEs */ + flags |= AMDGPU_PDE_PTE; + amdgpu_gmc_get_vm_pde(adev, level, &value, &flags); + } + + r = vm->update_funcs->update(¶ms, vmbo, addr, 0, + ats_entries, value, flags); + if (r) + goto exit; + + addr += ats_entries * 8; + } + + if (entries) { + uint64_t value = 0, flags = 0; + + if (adev->asic_type >= CHIP_VEGA10) { + if (level != AMDGPU_VM_PTB) { + /* Handle leaf PDEs as PTEs */ + flags |= AMDGPU_PDE_PTE; + amdgpu_gmc_get_vm_pde(adev, level, + &value, &flags); + } else { + /* Workaround for fault priority problem on GMC9 */ + flags = AMDGPU_PTE_EXECUTABLE; + } + } + + r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, + value, flags); + if (r) + goto exit; + } + + r = vm->update_funcs->commit(¶ms, NULL); +exit: + drm_dev_exit(idx); + return r; +} + +/** + * amdgpu_vm_pt_create - create bo for PD/PT + * + * @adev: amdgpu_device pointer + * @vm: requesting vm + * @level: the page table level + * @immediate: use a immediate update + * @vmbo: pointer to the buffer object pointer + */ +int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int level, bool immediate, struct amdgpu_bo_vm **vmbo) +{ + struct amdgpu_bo_param bp; + struct amdgpu_bo *bo; + struct dma_resv *resv; + unsigned int num_entries; + int r; + + memset(&bp, 0, sizeof(bp)); + + bp.size = amdgpu_vm_pt_size(adev, level); + bp.byte_align = AMDGPU_GPU_PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); + bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_CPU_GTT_USWC; + + if (level < AMDGPU_VM_PTB) + num_entries = amdgpu_vm_pt_num_entries(adev, level); + else + num_entries = 0; + + bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries); + + if (vm->use_cpu_for_update) + bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + + bp.type = ttm_bo_type_kernel; + bp.no_wait_gpu = immediate; + if (vm->root.bo) + bp.resv = vm->root.bo->tbo.base.resv; + + r = amdgpu_bo_create_vm(adev, &bp, vmbo); + if (r) + return r; + + bo = &(*vmbo)->bo; + if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) { + (*vmbo)->shadow = NULL; + return 0; + } + + if (!bp.resv) + WARN_ON(dma_resv_lock(bo->tbo.base.resv, + NULL)); + resv = bp.resv; + memset(&bp, 0, sizeof(bp)); + bp.size = amdgpu_vm_pt_size(adev, level); + bp.domain = AMDGPU_GEM_DOMAIN_GTT; + bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; + bp.type = ttm_bo_type_kernel; + bp.resv = bo->tbo.base.resv; + bp.bo_ptr_size = sizeof(struct amdgpu_bo); + + r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow); + + if (!resv) + dma_resv_unlock(bo->tbo.base.resv); + + if (r) { + amdgpu_bo_unref(&bo); + return r; + } + + (*vmbo)->shadow->parent = amdgpu_bo_ref(bo); + amdgpu_bo_add_to_shadow_list(*vmbo); + + return 0; +} + +/** + * amdgpu_vm_pt_alloc - Allocate a specific page table + * + * @adev: amdgpu_device pointer + * @vm: VM to allocate page tables for + * @cursor: Which page table to allocate + * @immediate: use an immediate update + * + * Make sure a specific page table or directory is allocated. + * + * Returns: + * 1 if page table needed to be allocated, 0 if page table was already + * allocated, negative errno if an error occurred. + */ +static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt_cursor *cursor, + bool immediate) +{ + struct amdgpu_vm_bo_base *entry = cursor->entry; + struct amdgpu_bo *pt_bo; + struct amdgpu_bo_vm *pt; + int r; + + if (entry->bo) + return 0; + + r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt); + if (r) + return r; + + /* Keep a reference to the root directory to avoid + * freeing them up in the wrong order. + */ + pt_bo = &pt->bo; + pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo); + amdgpu_vm_bo_base_init(entry, vm, pt_bo); + r = amdgpu_vm_pt_clear(adev, vm, pt, immediate); + if (r) + goto error_free_pt; + + return 0; + +error_free_pt: + amdgpu_bo_unref(&pt->shadow); + amdgpu_bo_unref(&pt_bo); + return r; +} + +/** + * amdgpu_vm_pt_free - free one PD/PT + * + * @entry: PDE to free + */ +static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) +{ + struct amdgpu_bo *shadow; + + if (!entry->bo) + return; + shadow = amdgpu_bo_shadowed(entry->bo); + entry->bo->vm_bo = NULL; + list_del(&entry->vm_status); + amdgpu_bo_unref(&shadow); + amdgpu_bo_unref(&entry->bo); +} + +/** + * amdgpu_vm_pt_free_dfs - free PD/PT levels + * + * @adev: amdgpu device structure + * @vm: amdgpu vm structure + * @start: optional cursor where to start freeing PDs/PTs + * + * Free the page directory or page table level and all sub levels. + */ +static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt_cursor *start) +{ + struct amdgpu_vm_pt_cursor cursor; + struct amdgpu_vm_bo_base *entry; + + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) + amdgpu_vm_pt_free(entry); + + if (start) + amdgpu_vm_pt_free(start->entry); +} + +/** + * amdgpu_vm_pt_free_root - free root PD + * @adev: amdgpu device structure + * @vm: amdgpu vm structure + * + * Free the root page directory and everything below it. + */ +void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) +{ + amdgpu_vm_pt_free_dfs(adev, vm, NULL); +} + +/** + * amdgpu_vm_pt_is_root_clean - check if a root PD is clean + * + * @adev: amdgpu_device pointer + * @vm: the VM to check + * + * Check all entries of the root PD, if any subsequent PDs are allocated, + * it means there are page table creating and filling, and is no a clean + * VM + * + * Returns: + * 0 if this VM is clean + */ +bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev, + struct amdgpu_vm *vm) +{ + enum amdgpu_vm_level root = adev->vm_manager.root_level; + unsigned int entries = amdgpu_vm_pt_num_entries(adev, root); + unsigned int i = 0; + + for (i = 0; i < entries; i++) { + if (to_amdgpu_bo_vm(vm->root.bo)->entries[i].bo) + return false; + } + return true; +} + +/** + * amdgpu_vm_pde_update - update a single level in the hierarchy + * + * @params: parameters for the update + * @entry: entry to update + * + * Makes sure the requested entry in parent is up to date. + */ +int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, + struct amdgpu_vm_bo_base *entry) +{ + struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry); + struct amdgpu_bo *bo = parent->bo, *pbo; + struct amdgpu_vm *vm = params->vm; + uint64_t pde, pt, flags; + unsigned int level; + + for (level = 0, pbo = bo->parent; pbo; ++level) + pbo = pbo->parent; + + level += params->adev->vm_manager.root_level; + amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags); + pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * 8; + return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt, + 1, 0, flags); +} + +/* + * amdgpu_vm_pte_update_flags - figure out flags for PTE updates + * + * Make sure to set the right flags for the PTEs at the desired level. + */ +static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params, + struct amdgpu_bo_vm *pt, + unsigned int level, + uint64_t pe, uint64_t addr, + unsigned int count, uint32_t incr, + uint64_t flags) + +{ + if (level != AMDGPU_VM_PTB) { + flags |= AMDGPU_PDE_PTE; + amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags); + + } else if (params->adev->asic_type >= CHIP_VEGA10 && + !(flags & AMDGPU_PTE_VALID) && + !(flags & AMDGPU_PTE_PRT)) { + + /* Workaround for fault priority problem on GMC9 */ + flags |= AMDGPU_PTE_EXECUTABLE; + } + + params->vm->update_funcs->update(params, pt, pe, addr, count, incr, + flags); +} + +/** + * amdgpu_vm_pte_fragment - get fragment for PTEs + * + * @params: see amdgpu_vm_update_params definition + * @start: first PTE to handle + * @end: last PTE to handle + * @flags: hw mapping flags + * @frag: resulting fragment size + * @frag_end: end of this fragment + * + * Returns the first possible fragment for the start and end address. + */ +static void amdgpu_vm_pte_fragment(struct amdgpu_vm_update_params *params, + uint64_t start, uint64_t end, uint64_t flags, + unsigned int *frag, uint64_t *frag_end) +{ + /** + * The MC L1 TLB supports variable sized pages, based on a fragment + * field in the PTE. When this field is set to a non-zero value, page + * granularity is increased from 4KB to (1 << (12 + frag)). The PTE + * flags are considered valid for all PTEs within the fragment range + * and corresponding mappings are assumed to be physically contiguous. + * + * The L1 TLB can store a single PTE for the whole fragment, + * significantly increasing the space available for translation + * caching. This leads to large improvements in throughput when the + * TLB is under pressure. + * + * The L2 TLB distributes small and large fragments into two + * asymmetric partitions. The large fragment cache is significantly + * larger. Thus, we try to use large fragments wherever possible. + * Userspace can support this by aligning virtual base address and + * allocation size to the fragment size. + * + * Starting with Vega10 the fragment size only controls the L1. The L2 + * is now directly feed with small/huge/giant pages from the walker. + */ + unsigned int max_frag; + + if (params->adev->asic_type < CHIP_VEGA10) + max_frag = params->adev->vm_manager.fragment_size; + else + max_frag = 31; + + /* system pages are non continuously */ + if (params->pages_addr) { + *frag = 0; + *frag_end = end; + return; + } + + /* This intentionally wraps around if no bit is set */ + *frag = min_t(unsigned int, ffs(start) - 1, fls64(end - start) - 1); + if (*frag >= max_frag) { + *frag = max_frag; + *frag_end = end & ~((1ULL << max_frag) - 1); + } else { + *frag_end = start + (1 << *frag); + } +} + +/** + * amdgpu_vm_ptes_update - make sure that page tables are valid + * + * @params: see amdgpu_vm_update_params definition + * @start: start of GPU address range + * @end: end of GPU address range + * @dst: destination address to map to, the next dst inside the function + * @flags: mapping flags + * + * Update the page tables in the range @start - @end. + * + * Returns: + * 0 for success, -EINVAL for failure. + */ +int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, + uint64_t start, uint64_t end, + uint64_t dst, uint64_t flags) +{ + struct amdgpu_device *adev = params->adev; + struct amdgpu_vm_pt_cursor cursor; + uint64_t frag_start = start, frag_end; + unsigned int frag; + int r; + + /* figure out the initial fragment */ + amdgpu_vm_pte_fragment(params, frag_start, end, flags, &frag, + &frag_end); + + /* walk over the address space and update the PTs */ + amdgpu_vm_pt_start(adev, params->vm, start, &cursor); + while (cursor.pfn < end) { + unsigned int shift, parent_shift, mask; + uint64_t incr, entry_end, pe_start; + struct amdgpu_bo *pt; + + if (!params->unlocked) { + /* make sure that the page tables covering the + * address range are actually allocated + */ + r = amdgpu_vm_pt_alloc(params->adev, params->vm, + &cursor, params->immediate); + if (r) + return r; + } + + shift = amdgpu_vm_pt_level_shift(adev, cursor.level); + parent_shift = amdgpu_vm_pt_level_shift(adev, cursor.level - 1); + if (params->unlocked) { + /* Unlocked updates are only allowed on the leaves */ + if (amdgpu_vm_pt_descendant(adev, &cursor)) + continue; + } else if (adev->asic_type < CHIP_VEGA10 && + (flags & AMDGPU_PTE_VALID)) { + /* No huge page support before GMC v9 */ + if (cursor.level != AMDGPU_VM_PTB) { + if (!amdgpu_vm_pt_descendant(adev, &cursor)) + return -ENOENT; + continue; + } + } else if (frag < shift) { + /* We can't use this level when the fragment size is + * smaller than the address shift. Go to the next + * child entry and try again. + */ + if (amdgpu_vm_pt_descendant(adev, &cursor)) + continue; + } else if (frag >= parent_shift) { + /* If the fragment size is even larger than the parent + * shift we should go up one level and check it again. + */ + if (!amdgpu_vm_pt_ancestor(&cursor)) + return -EINVAL; + continue; + } + + pt = cursor.entry->bo; + if (!pt) { + /* We need all PDs and PTs for mapping something, */ + if (flags & AMDGPU_PTE_VALID) + return -ENOENT; + + /* but unmapping something can happen at a higher + * level. + */ + if (!amdgpu_vm_pt_ancestor(&cursor)) + return -EINVAL; + + pt = cursor.entry->bo; + shift = parent_shift; + frag_end = max(frag_end, ALIGN(frag_start + 1, + 1ULL << shift)); + } + + /* Looks good so far, calculate parameters for the update */ + incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift; + mask = amdgpu_vm_pt_entries_mask(adev, cursor.level); + pe_start = ((cursor.pfn >> shift) & mask) * 8; + entry_end = ((uint64_t)mask + 1) << shift; + entry_end += cursor.pfn & ~(entry_end - 1); + entry_end = min(entry_end, end); + + do { + struct amdgpu_vm *vm = params->vm; + uint64_t upd_end = min(entry_end, frag_end); + unsigned int nptes = (upd_end - frag_start) >> shift; + uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag); + + /* This can happen when we set higher level PDs to + * silent to stop fault floods. + */ + nptes = max(nptes, 1u); + + trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, + min(nptes, 32u), dst, incr, + upd_flags, + vm->task_info.pid, + vm->immediate.fence_context); + amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt), + cursor.level, pe_start, dst, + nptes, incr, upd_flags); + + pe_start += nptes * 8; + dst += nptes * incr; + + frag_start = upd_end; + if (frag_start >= frag_end) { + /* figure out the next fragment */ + amdgpu_vm_pte_fragment(params, frag_start, end, + flags, &frag, &frag_end); + if (frag < shift) + break; + } + } while (frag_start < entry_end); + + if (amdgpu_vm_pt_descendant(adev, &cursor)) { + /* Free all child entries. + * Update the tables with the flags and addresses and free up subsequent + * tables in the case of huge pages or freed up areas. + * This is the maximum you can free, because all other page tables are not + * completely covered by the range and so potentially still in use. + */ + while (cursor.pfn < frag_start) { + /* Make sure previous mapping is freed */ + if (cursor.entry->bo) { + params->table_freed = true; + amdgpu_vm_pt_free_dfs(adev, params->vm, + &cursor); + } + amdgpu_vm_pt_next(adev, &cursor); + } + + } else if (frag >= shift) { + /* or just move on to the next on the same level. */ + amdgpu_vm_pt_next(adev, &cursor); + } + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index bdb44cee19d3..1fd3cbca20a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -109,7 +109,7 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, if (p->unlocked) { struct dma_fence *tmp = dma_fence_get(f); - swap(p->vm->last_unlocked, f); + swap(p->vm->last_unlocked, tmp); dma_fence_put(tmp); } else { amdgpu_bo_fence(p->vm->root.bo, f, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 7326b6c1b71c..e78e4c27b62a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -1,34 +1,33 @@ /* - * Copyright 2018-2019 Advanced Micro Devices, Inc. + * Copyright (c) 2018-2021 Advanced Micro Devices, Inc. All rights reserved. * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. */ #ifndef AMDGV_SRIOV_MSG__H_ #define AMDGV_SRIOV_MSG__H_ /* unit in kilobytes */ -#define AMD_SRIOV_MSG_VBIOS_OFFSET 0 -#define AMD_SRIOV_MSG_VBIOS_SIZE_KB 64 -#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB -#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB 4 +#define AMD_SRIOV_MSG_VBIOS_OFFSET 0 +#define AMD_SRIOV_MSG_VBIOS_SIZE_KB 64 +#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB +#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB 4 /* * layout @@ -51,10 +50,10 @@ * v2 defined in amdgim * v3 current */ -#define AMD_SRIOV_MSG_FW_VRAM_PF2VF_VER 2 -#define AMD_SRIOV_MSG_FW_VRAM_VF2PF_VER 3 +#define AMD_SRIOV_MSG_FW_VRAM_PF2VF_VER 2 +#define AMD_SRIOV_MSG_FW_VRAM_VF2PF_VER 3 -#define AMD_SRIOV_MSG_RESERVE_UCODE 24 +#define AMD_SRIOV_MSG_RESERVE_UCODE 24 #define AMD_SRIOV_MSG_RESERVE_VCN_INST 4 @@ -83,19 +82,19 @@ enum amd_sriov_ucode_engine_id { AMD_SRIOV_UCODE_ID__MAX }; -#pragma pack(push, 1) // PF2VF / VF2PF data areas are byte packed +#pragma pack(push, 1) // PF2VF / VF2PF data areas are byte packed union amd_sriov_msg_feature_flags { struct { - uint32_t error_log_collect : 1; - uint32_t host_load_ucodes : 1; - uint32_t host_flr_vramlost : 1; - uint32_t mm_bw_management : 1; - uint32_t pp_one_vf_mode : 1; - uint32_t reg_indirect_acc : 1; - uint32_t reserved : 26; + uint32_t error_log_collect : 1; + uint32_t host_load_ucodes : 1; + uint32_t host_flr_vramlost : 1; + uint32_t mm_bw_management : 1; + uint32_t pp_one_vf_mode : 1; + uint32_t reg_indirect_acc : 1; + uint32_t reserved : 26; } flags; - uint32_t all; + uint32_t all; }; union amd_sriov_reg_access_flags { @@ -110,10 +109,10 @@ union amd_sriov_reg_access_flags { union amd_sriov_msg_os_info { struct { - uint32_t windows : 1; - uint32_t reserved : 31; + uint32_t windows : 1; + uint32_t reserved : 31; } info; - uint32_t all; + uint32_t all; }; struct amd_sriov_msg_uuid_info { @@ -156,6 +155,7 @@ struct amd_sriov_msg_pf2vf_info_header { uint32_t reserved[2]; }; +#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (48) struct amd_sriov_msg_pf2vf_info { /* header contains size and version */ struct amd_sriov_msg_pf2vf_info_header header; @@ -204,10 +204,10 @@ struct amd_sriov_msg_pf2vf_info { } mm_bw_management[AMD_SRIOV_MSG_RESERVE_VCN_INST]; /* UUID info */ struct amd_sriov_msg_uuid_info uuid_info; - /* pcie atomic Ops info */ - uint32_t pcie_atomic_ops_enabled_flags; + /* PCIE atomic ops support flag */ + uint32_t pcie_atomic_ops_support_flags; /* reserved */ - uint32_t reserved[256 - 48]; + uint32_t reserved[256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE]; }; struct amd_sriov_msg_vf2pf_info_header { @@ -219,12 +219,13 @@ struct amd_sriov_msg_vf2pf_info_header { uint32_t reserved[2]; }; +#define AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE (70) struct amd_sriov_msg_vf2pf_info { /* header contains size and version */ struct amd_sriov_msg_vf2pf_info_header header; uint32_t checksum; /* driver version */ - uint8_t driver_version[64]; + uint8_t driver_version[64]; /* driver certification, 1=WHQL, 0=None */ uint32_t driver_cert; /* guest OS type and version */ @@ -258,13 +259,13 @@ struct amd_sriov_msg_vf2pf_info { uint32_t fb_size; /* guest ucode data, each one is 1.25 Dword */ struct { - uint8_t id; + uint8_t id; uint32_t version; } ucode_info[AMD_SRIOV_MSG_RESERVE_UCODE]; uint64_t dummy_page_addr; /* reserved */ - uint32_t reserved[256-70]; + uint32_t reserved[256 - AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE]; }; /* mailbox message send from guest to host */ @@ -276,7 +277,7 @@ enum amd_sriov_mailbox_request_message { MB_REQ_MSG_REQ_GPU_RESET_ACCESS, MB_REQ_MSG_REQ_GPU_INIT_DATA, - MB_REQ_MSG_LOG_VF_ERROR = 200, + MB_REQ_MSG_LOG_VF_ERROR = 200, }; /* mailbox message send from host to guest */ @@ -298,17 +299,15 @@ enum amd_sriov_gpu_init_data_version { GPU_INIT_DATA_READY_V1 = 1, }; -#pragma pack(pop) // Restore previous packing option +#pragma pack(pop) // Restore previous packing option /* checksum function between host and guest */ -unsigned int amd_sriov_msg_checksum(void *obj, - unsigned long obj_size, - unsigned int key, - unsigned int checksum); +unsigned int amd_sriov_msg_checksum(void *obj, unsigned long obj_size, unsigned int key, + unsigned int checksum); /* assertion at compile time */ #ifdef __linux__ -#define stringification(s) _stringification(s) +#define stringification(s) _stringification(s) #define _stringification(s) #s _Static_assert( @@ -319,13 +318,11 @@ _Static_assert( sizeof(struct amd_sriov_msg_pf2vf_info) == AMD_SRIOV_MSG_SIZE_KB << 10, "amd_sriov_msg_pf2vf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB) " KB"); -_Static_assert( - AMD_SRIOV_MSG_RESERVE_UCODE % 4 == 0, - "AMD_SRIOV_MSG_RESERVE_UCODE must be multiple of 4"); +_Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE % 4 == 0, + "AMD_SRIOV_MSG_RESERVE_UCODE must be multiple of 4"); -_Static_assert( - AMD_SRIOV_MSG_RESERVE_UCODE > AMD_SRIOV_UCODE_ID__MAX, - "AMD_SRIOV_MSG_RESERVE_UCODE must be bigger than AMD_SRIOV_UCODE_ID__MAX"); +_Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE > AMD_SRIOV_UCODE_ID__MAX, + "AMD_SRIOV_MSG_RESERVE_UCODE must be bigger than AMD_SRIOV_UCODE_ID__MAX"); #undef _stringification #undef stringification diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c index 88642e7ecdf4..a13c443ea10f 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c @@ -87,7 +87,7 @@ int athub_v1_0_set_clockgating(struct amdgpu_device *adev, return 0; } -void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h index b279af59e34f..6be0a6704ea7 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h @@ -25,6 +25,6 @@ int athub_v1_0_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state); -void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); +void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c index a720436857b4..a9521c98e7f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c @@ -93,7 +93,7 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev, return 0; } -void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h index 02932c1c8bab..8b763f6dfd81 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h @@ -25,6 +25,6 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state); -void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); +void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c index ad8e87d3d2cb..78508ae6a670 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c @@ -85,7 +85,7 @@ int athub_v2_1_set_clockgating(struct amdgpu_device *adev, return 0; } -void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u32 *flags) +void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h index 5e6824c0f591..b799f14bce03 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h @@ -25,6 +25,6 @@ int athub_v2_1_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state); -void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u32 *flags); +void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u64 *flags); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index a92d86e12718..d4f5a584075d 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -765,7 +765,6 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a int dp_clock = 0; int dp_lane_count = 0; int connector_object_id = 0; - int igp_lane_info = 0; int dig_encoder = dig->dig_encoder; int hpd_id = AMDGPU_HPD_NONE; @@ -848,26 +847,6 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a else args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_DIG1_ENCODER; - if ((adev->flags & AMD_IS_APU) && - (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_UNIPHY)) { - if (is_dp || - !amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock)) { - if (igp_lane_info & 0x1) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_0_3; - else if (igp_lane_info & 0x2) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_4_7; - else if (igp_lane_info & 0x4) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_8_11; - else if (igp_lane_info & 0x8) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_12_15; - } else { - if (igp_lane_info & 0x3) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_0_7; - else if (igp_lane_info & 0xc) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_8_15; - } - } - if (dig->linkb) args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LINKB; else diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c index 2d01ac0d4c11..b991609f46c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c @@ -99,7 +99,7 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev, } static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { u32 tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index f4dfca013ec5..483a441b46aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -332,7 +332,7 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev, } static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { u32 tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 9426e252d8aa..54446162db8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4741,7 +4741,7 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + ring->pipe; hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? - AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; + AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT; /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, hw_prio, NULL); @@ -8451,7 +8451,7 @@ static int gfx_v10_0_set_clockgating_state(void *handle, return 0; } -static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags) +static void gfx_v10_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 5f112efda634..25dc729d0ec2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1925,7 +1925,7 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + ring->pipe; hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? - AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT; + AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT; /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, hw_prio, NULL); @@ -5475,7 +5475,7 @@ static int gfx_v8_0_set_powergating_state(void *handle, return 0; } -static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags) +static void gfx_v8_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 46d4bf27ebbb..d58fd83524ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1205,6 +1205,8 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */ { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, + /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */ + { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 }, { 0, 0, 0, 0, 0 }, }; @@ -2274,7 +2276,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + ring->pipe; hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? - AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; + AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT; /* type-2 packets are deprecated on MEC, use type-3 instead */ return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, hw_prio, NULL); @@ -5231,7 +5233,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle, return 0; } -static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) +static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index 7653ebd0e67b..3a797424579c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -1930,6 +1930,19 @@ static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev) mutex_unlock(&adev->grbm_idx_mutex); } +static bool gfx_v9_4_2_query_uctl2_poison_status(struct amdgpu_device *adev) +{ + u32 status = 0; + struct amdgpu_vmhub *hub; + + hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + status = RREG32(hub->vm_l2_pro_fault_status); + /* reset page fault status */ + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + + return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); +} + struct amdgpu_ras_block_hw_ops gfx_v9_4_2_ras_ops = { .ras_error_inject = &gfx_v9_4_2_ras_error_inject, .query_ras_error_count = &gfx_v9_4_2_query_ras_error_count, @@ -1943,4 +1956,5 @@ struct amdgpu_gfx_ras gfx_v9_4_2_ras = { .hw_ops = &gfx_v9_4_2_ras_ops, }, .enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer, + .query_utcl2_poison_status = gfx_v9_4_2_query_uctl2_poison_status, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 5228421b0f72..20946bc7fc93 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -1151,6 +1151,16 @@ static int gmc_v10_0_set_clockgating_state(void *handle, int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* + * The issue mmhub can't disconnect from DF with MMHUB clock gating being disabled + * is a new problem observed at DF 3.0.3, however with the same suspend sequence not + * seen any issue on the DF 3.0.2 series platform. + */ + if (adev->in_s0ix && adev->ip_versions[DF_HWIP][0] > IP_VERSION(3, 0, 2)) { + dev_dbg(adev->dev, "keep mmhub clock gating being enabled for s0ix\n"); + return 0; + } + r = adev->mmhub.funcs->set_clockgating(adev, state); if (r) return r; @@ -1161,7 +1171,7 @@ static int gmc_v10_0_set_clockgating_state(void *handle, return athub_v2_0_set_clockgating(adev, state); } -static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags) +static void gmc_v10_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 1932a3e4af7e..382dde1ce74c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1690,7 +1690,7 @@ static int gmc_v8_0_set_powergating_state(void *handle, return 0; } -static void gmc_v8_0_get_clockgating_state(void *handle, u32 *flags) +static void gmc_v8_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 6009fbfdcc19..22761a3bb818 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1948,7 +1948,7 @@ static int gmc_v9_0_set_clockgating_state(void *handle, return 0; } -static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags) +static void gmc_v9_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index 046216635262..adf89680f53e 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -124,7 +124,7 @@ static void hdp_v4_0_update_clock_gating(struct amdgpu_device *adev, } static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index 5793977953cc..a9ea23fa0def 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -181,7 +181,7 @@ static void hdp_v5_0_update_clock_gating(struct amdgpu_device *adev, } static void hdp_v5_0_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { uint32_t tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index a29c86617fb5..8c3227d0b8b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -26,6 +26,7 @@ #include "soc15.h" #include "soc15d.h" #include "jpeg_v2_0.h" +#include "jpeg_v2_5.h" #include "vcn/vcn_2_5_offset.h" #include "vcn/vcn_2_5_sh_mask.h" @@ -39,6 +40,7 @@ static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev); static int jpeg_v2_5_set_powergating_state(void *handle, enum amd_powergating_state state); +static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev); static int amdgpu_ih_clientid_jpeg[] = { SOC15_IH_CLIENTID_VCN, @@ -70,6 +72,7 @@ static int jpeg_v2_5_early_init(void *handle) jpeg_v2_5_set_dec_ring_funcs(adev); jpeg_v2_5_set_irq_funcs(adev); + jpeg_v2_5_set_ras_funcs(adev); return 0; } @@ -730,3 +733,74 @@ const struct amdgpu_ip_block_version jpeg_v2_6_ip_block = .rev = 0, .funcs = &jpeg_v2_6_ip_funcs, }; + +static uint32_t jpeg_v2_6_query_poison_by_instance(struct amdgpu_device *adev, + uint32_t instance, uint32_t sub_block) +{ + uint32_t poison_stat = 0, reg_value = 0; + + switch (sub_block) { + case AMDGPU_JPEG_V2_6_JPEG0: + reg_value = RREG32_SOC15(JPEG, instance, mmUVD_RAS_JPEG0_STATUS); + poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF); + break; + case AMDGPU_JPEG_V2_6_JPEG1: + reg_value = RREG32_SOC15(JPEG, instance, mmUVD_RAS_JPEG1_STATUS); + poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF); + break; + default: + break; + } + + if (poison_stat) + dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n", + instance, sub_block); + + return poison_stat; +} + +static bool jpeg_v2_6_query_ras_poison_status(struct amdgpu_device *adev) +{ + uint32_t inst = 0, sub = 0, poison_stat = 0; + + for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++) + for (sub = 0; sub < AMDGPU_JPEG_V2_6_MAX_SUB_BLOCK; sub++) + poison_stat += + jpeg_v2_6_query_poison_by_instance(adev, inst, sub); + + return !!poison_stat; +} + +const struct amdgpu_ras_block_hw_ops jpeg_v2_6_ras_hw_ops = { + .query_poison_status = jpeg_v2_6_query_ras_poison_status, +}; + +static struct amdgpu_jpeg_ras jpeg_v2_6_ras = { + .ras_block = { + .hw_ops = &jpeg_v2_6_ras_hw_ops, + }, +}; + +static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev) +{ + switch (adev->ip_versions[JPEG_HWIP][0]) { + case IP_VERSION(2, 6, 0): + adev->jpeg.ras = &jpeg_v2_6_ras; + break; + default: + break; + } + + if (adev->jpeg.ras) { + amdgpu_ras_register_ras_block(adev, &adev->jpeg.ras->ras_block); + + strcpy(adev->jpeg.ras->ras_block.ras_comm.name, "jpeg"); + adev->jpeg.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__JPEG; + adev->jpeg.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON; + adev->jpeg.ras_if = &adev->jpeg.ras->ras_block.ras_comm; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->jpeg.ras->ras_block.ras_late_init) + adev->jpeg.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init; + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h index 3b0aa29b9879..1e858c6cdf13 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h @@ -24,6 +24,13 @@ #ifndef __JPEG_V2_5_H__ #define __JPEG_V2_5_H__ +enum amdgpu_jpeg_v2_6_sub_block { + AMDGPU_JPEG_V2_6_JPEG0 = 0, + AMDGPU_JPEG_V2_6_JPEG1, + + AMDGPU_JPEG_V2_6_MAX_SUB_BLOCK, +}; + extern const struct amdgpu_ip_block_version jpeg_v2_5_ip_block; extern const struct amdgpu_ip_block_version jpeg_v2_6_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 4c9f0c0f3116..3f44a099c52a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -546,7 +546,7 @@ static int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, return 0; } -static void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +static void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data, data1; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c index 3b901f941627..6fa7090bc6cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c @@ -542,7 +542,7 @@ static int mmhub_v1_7_set_clockgating(struct amdgpu_device *adev, return 0; } -static void mmhub_v1_7_get_clockgating(struct amdgpu_device *adev, u32 *flags) +static void mmhub_v1_7_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data, data1; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 3718ff610ab2..636abd855686 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -682,7 +682,7 @@ static int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, return 0; } -static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data, data1; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index 1957fb098c4d..ff44c5364a8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -577,7 +577,7 @@ static int mmhub_v2_3_set_clockgating(struct amdgpu_device *adev, return 0; } -static void mmhub_v2_3_get_clockgating(struct amdgpu_device *adev, u32 *flags) +static void mmhub_v2_3_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data, data1, data2, data3; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 619106f7d23d..6e0145b2b408 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -647,7 +647,7 @@ static int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev, return 0; } -static void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags) +static void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data, data1; diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 8ce5b8ca1fd7..97201ab0965e 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -685,7 +685,7 @@ static int navi10_ih_set_powergating_state(void *handle, return 0; } -static void navi10_ih_get_clockgating_state(void *handle, u32 *flags) +static void navi10_ih_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index ee7cab37dfd5..6cd1fb2eb913 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -278,7 +278,7 @@ static void nbio_v2_3_update_medium_grain_light_sleep(struct amdgpu_device *adev } static void nbio_v2_3_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 4bbacf1be25a..f7f6ddebd3e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -210,7 +210,7 @@ static void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev } static void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index 37a4039fdfc5..aa0326d00c72 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -205,7 +205,7 @@ static void nbio_v7_0_update_medium_grain_light_sleep(struct amdgpu_device *adev } static void nbio_v7_0_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c index 6f81de6f3cc4..31776b12e4c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c @@ -306,7 +306,7 @@ static void nbio_v7_2_update_medium_grain_light_sleep(struct amdgpu_device *adev } static void nbio_v7_2_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index c2357e83a8c4..4531761dcf77 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -273,7 +273,7 @@ static void nbio_v7_4_update_medium_grain_light_sleep(struct amdgpu_device *adev } static void nbio_v7_4_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index e19f14c3ef59..0a7946c59a42 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -1115,7 +1115,7 @@ static int nv_common_set_powergating_state(void *handle, return 0; } -static void nv_common_get_clockgating_state(void *handle, u32 *flags) +static void nv_common_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 4ef4feff5649..3695374896ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1535,7 +1535,7 @@ static int sdma_v3_0_set_powergating_state(void *handle, return 0; } -static void sdma_v3_0_get_clockgating_state(void *handle, u32 *flags) +static void sdma_v3_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index d7e8f7232364..8589ab1c9800 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2372,7 +2372,7 @@ static int sdma_v4_0_set_powergating_state(void *handle, return 0; } -static void sdma_v4_0_get_clockgating_state(void *handle, u32 *flags) +static void sdma_v4_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index a8d49c005f73..775aabde1ae2 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1648,7 +1648,7 @@ static int sdma_v5_0_set_powergating_state(void *handle, return 0; } -static void sdma_v5_0_get_clockgating_state(void *handle, u32 *flags) +static void sdma_v5_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 824eace69884..ca50857b982d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1645,7 +1645,7 @@ static int sdma_v5_2_set_powergating_state(void *handle, return 0; } -static void sdma_v5_2_get_clockgating_state(void *handle, u32 *flags) +static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c index b6f1322f908c..acdc40f99ab3 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c @@ -59,7 +59,7 @@ static void smuio_v11_0_update_rom_clock_gating(struct amdgpu_device *adev, bool WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data); } -static void smuio_v11_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags) +static void smuio_v11_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags) { u32 data; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c index 3a18dbb55c32..2afeb8b37f62 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c @@ -56,7 +56,7 @@ static void smuio_v11_0_6_update_rom_clock_gating(struct amdgpu_device *adev, bo WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data); } -static void smuio_v11_0_6_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags) +static void smuio_v11_0_6_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags) { u32 data; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c index 39b7c206770f..13e905c22592 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c @@ -58,7 +58,7 @@ static void smuio_v13_0_update_rom_clock_gating(struct amdgpu_device *adev, bool WREG32_SOC15(SMUIO, 0, regCGTT_ROM_CLK_CTRL0, data); } -static void smuio_v13_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags) +static void smuio_v13_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags) { u32 data; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c index 8417890af227..e4e30b9d481b 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c @@ -56,7 +56,7 @@ static void smuio_v9_0_update_rom_clock_gating(struct amdgpu_device *adev, bool WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data); } -static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags) +static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags) { u32 data; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 3d0251ef8d79..3ee7322081d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1419,7 +1419,7 @@ static int soc15_common_set_clockgating_state(void *handle, return 0; } -static void soc15_common_get_clockgating_state(void *handle, u32 *flags) +static void soc15_common_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index c45d9c14ecbc..606892dbea1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -64,21 +64,62 @@ static inline uint32_t get_umc_v6_7_channel_index(struct amdgpu_device *adev, return adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; } +static void umc_v6_7_query_error_status_helper(struct amdgpu_device *adev, + uint64_t mc_umc_status, uint32_t umc_reg_offset) +{ + uint32_t mc_umc_addr; + uint64_t reg_value; + + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) + dev_info(adev->dev, "Deferred error, no user action is needed.\n"); + + if (mc_umc_status) + dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset); + + /* print IPID registers value */ + mc_umc_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0); + reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); + if (reg_value) + dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); + + /* print SYND registers value */ + mc_umc_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0); + reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); + if (reg_value) + dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); + + /* print MISC0 registers value */ + mc_umc_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0); + reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); + if (reg_value) + dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); +} + static void umc_v6_7_ecc_info_query_correctable_error_count(struct amdgpu_device *adev, uint32_t umc_inst, uint32_t ch_inst, unsigned long *error_count) { uint64_t mc_umc_status; uint32_t eccinfo_table_idx; + uint32_t umc_reg_offset; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + umc_reg_offset = get_umc_v6_7_reg_offset(adev, + umc_inst, ch_inst); + eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; /* check for SRAM correctable error MCUMC_STATUS is a 64 bit register */ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) { *error_count += 1; + + umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); + } } static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_device *adev, @@ -88,8 +129,6 @@ static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_dev uint64_t mc_umc_status; uint32_t eccinfo_table_idx; uint32_t umc_reg_offset; - uint32_t mc_umc_addr; - uint64_t reg_value; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); umc_reg_offset = get_umc_v6_7_reg_offset(adev, @@ -106,32 +145,7 @@ static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_dev REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) { *error_count += 1; - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) - dev_info(adev->dev, "Deferred error, no user action is needed.\n"); - - if (mc_umc_status) - dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset); - - /* print IPID registers value */ - mc_umc_addr = - SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0); - reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); - if (reg_value) - dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); - - /* print SYND registers value */ - mc_umc_addr = - SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0); - reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); - if (reg_value) - dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); - - /* print MISC0 registers value */ - mc_umc_addr = - SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0); - reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); - if (reg_value) - dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); + umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); } } @@ -277,8 +291,11 @@ static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev, MCUMC_STATUS is a 64 bit register */ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) { *error_count += 1; + + umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); + } } static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev, @@ -287,8 +304,6 @@ static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev { uint64_t mc_umc_status; uint32_t mc_umc_status_addr; - uint32_t mc_umc_addr; - uint64_t reg_value; mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); @@ -303,32 +318,7 @@ static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) { *error_count += 1; - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) - dev_info(adev->dev, "Deferred error, no user action is needed.\n"); - - if (mc_umc_status) - dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset); - - /* print IPID registers value */ - mc_umc_addr = - SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0); - reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); - if (reg_value) - dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); - - /* print SYND registers value */ - mc_umc_addr = - SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0); - reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); - if (reg_value) - dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); - - /* print MISC0 registers value */ - mc_umc_addr = - SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0); - reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); - if (reg_value) - dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); + umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); } } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 563493d1f830..d7e31e48a2b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -833,7 +833,7 @@ out: return ret; } -static void uvd_v5_0_get_clockgating_state(void *handle, u32 *flags) +static void uvd_v5_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 2d558c2f417d..375c440957dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -1494,7 +1494,7 @@ out: return ret; } -static void uvd_v6_0_get_clockgating_state(void *handle, u32 *flags) +static void uvd_v6_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 142e291983b4..8def62c83ffd 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -831,7 +831,7 @@ out: return ret; } -static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags) +static void vce_v3_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 1bf672966a62..17d44be58877 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -31,6 +31,7 @@ #include "soc15d.h" #include "vcn_v2_0.h" #include "mmsch_v1_0.h" +#include "vcn_v2_5.h" #include "vcn/vcn_2_5_offset.h" #include "vcn/vcn_2_5_sh_mask.h" @@ -59,6 +60,7 @@ static int vcn_v2_5_set_powergating_state(void *handle, static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); static int vcn_v2_5_sriov_start(struct amdgpu_device *adev); +static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev); static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, @@ -100,6 +102,7 @@ static int vcn_v2_5_early_init(void *handle) vcn_v2_5_set_dec_ring_funcs(adev); vcn_v2_5_set_enc_ring_funcs(adev); vcn_v2_5_set_irq_funcs(adev); + vcn_v2_5_set_ras_funcs(adev); return 0; } @@ -1932,3 +1935,71 @@ const struct amdgpu_ip_block_version vcn_v2_6_ip_block = .rev = 0, .funcs = &vcn_v2_6_ip_funcs, }; + +static uint32_t vcn_v2_6_query_poison_by_instance(struct amdgpu_device *adev, + uint32_t instance, uint32_t sub_block) +{ + uint32_t poison_stat = 0, reg_value = 0; + + switch (sub_block) { + case AMDGPU_VCN_V2_6_VCPU_VCODEC: + reg_value = RREG32_SOC15(VCN, instance, mmUVD_RAS_VCPU_VCODEC_STATUS); + poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF); + break; + default: + break; + } + + if (poison_stat) + dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n", + instance, sub_block); + + return poison_stat; +} + +static bool vcn_v2_6_query_poison_status(struct amdgpu_device *adev) +{ + uint32_t inst, sub; + uint32_t poison_stat = 0; + + for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++) + for (sub = 0; sub < AMDGPU_VCN_V2_6_MAX_SUB_BLOCK; sub++) + poison_stat += + vcn_v2_6_query_poison_by_instance(adev, inst, sub); + + return !!poison_stat; +} + +const struct amdgpu_ras_block_hw_ops vcn_v2_6_ras_hw_ops = { + .query_poison_status = vcn_v2_6_query_poison_status, +}; + +static struct amdgpu_vcn_ras vcn_v2_6_ras = { + .ras_block = { + .hw_ops = &vcn_v2_6_ras_hw_ops, + }, +}; + +static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev) +{ + switch (adev->ip_versions[VCN_HWIP][0]) { + case IP_VERSION(2, 6, 0): + adev->vcn.ras = &vcn_v2_6_ras; + break; + default: + break; + } + + if (adev->vcn.ras) { + amdgpu_ras_register_ras_block(adev, &adev->vcn.ras->ras_block); + + strcpy(adev->vcn.ras->ras_block.ras_comm.name, "vcn"); + adev->vcn.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__VCN; + adev->vcn.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON; + adev->vcn.ras_if = &adev->vcn.ras->ras_block.ras_comm; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->vcn.ras->ras_block.ras_late_init) + adev->vcn.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init; + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h index e72f799ed0fd..1c19af74e4fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h @@ -24,6 +24,12 @@ #ifndef __VCN_V2_5_H__ #define __VCN_V2_5_H__ +enum amdgpu_vcn_v2_6_sub_block { + AMDGPU_VCN_V2_6_VCPU_VCODEC = 0, + + AMDGPU_VCN_V2_6_MAX_SUB_BLOCK, +}; + extern const struct amdgpu_ip_block_version vcn_v2_5_ip_block; extern const struct amdgpu_ip_block_version vcn_v2_6_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 039b90cdc3bc..c5b88d15a6df 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -2033,7 +2033,7 @@ static int vi_common_set_powergating_state(void *handle, return 0; } -static void vi_common_get_clockgating_state(void *handle, u32 *flags) +static void vi_common_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 607f65ab39ac..ee8b288dd8cc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1128,14 +1128,6 @@ err_pdd: return ret; } -static bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) -{ - return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) || - (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && - dev->adev->sdma.instance[0].fw_version >= 18) || - KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0); -} - static int kfd_ioctl_map_memory_to_gpu(struct file *filep, struct kfd_process *p, void *data) { @@ -1146,7 +1138,6 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, long err = 0; int i; uint32_t *devices_arr = NULL; - bool table_freed = false; if (!args->n_devices) { pr_debug("Device IDs array empty\n"); @@ -1208,7 +1199,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu( peer_pdd->dev->adev, (struct kgd_mem *)mem, - peer_pdd->drm_priv, &table_freed); + peer_pdd->drm_priv); if (err) { struct pci_dev *pdev = peer_pdd->dev->adev->pdev; @@ -1233,13 +1224,11 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, } /* Flush TLBs after waiting for the page table updates to complete */ - if (table_freed || !kfd_flush_tlb_after_unmap(dev)) { - for (i = 0; i < args->n_devices; i++) { - peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); - if (WARN_ON_ONCE(!peer_pdd)) - continue; - kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY); - } + for (i = 0; i < args->n_devices; i++) { + peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); + if (WARN_ON_ONCE(!peer_pdd)) + continue; + kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY); } kfree(devices_arr); @@ -2206,8 +2195,8 @@ static int criu_restore_bo(struct kfd_process *p, if (IS_ERR(peer_pdd)) return PTR_ERR(peer_pdd); - ret = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(peer->adev, kgd_mem, peer_pdd->drm_priv, - NULL); + ret = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(peer->adev, kgd_mem, + peer_pdd->drm_priv); if (ret) { pr_err("Failed to map to gpu %d/%d\n", j, p->n_pdds); return ret; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 1eaabd2cb41b..afc8a7fcdad8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1056,7 +1056,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, * table, add corresponded reversed direction link now. */ if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) { - to_dev = kfd_topology_device_by_proximity_domain(id_to); + to_dev = kfd_topology_device_by_proximity_domain_no_lock(id_to); if (!to_dev) return -ENODEV; /* same everything but the other direction */ @@ -2225,7 +2225,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, */ if (kdev->hive_id) { for (nid = 0; nid < proximity_domain; ++nid) { - peer_dev = kfd_topology_device_by_proximity_domain(nid); + peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid); if (!peer_dev->gpu) continue; if (peer_dev->gpu->hive_id != kdev->hive_id) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index acf4f7975850..198672264492 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -130,19 +130,33 @@ void program_sh_mem_settings(struct device_queue_manager *dqm, } static void increment_queue_count(struct device_queue_manager *dqm, - enum kfd_queue_type type) + struct qcm_process_device *qpd, + struct queue *q) { dqm->active_queue_count++; - if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ) + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || + q->properties.type == KFD_QUEUE_TYPE_DIQ) dqm->active_cp_queue_count++; + + if (q->properties.is_gws) { + dqm->gws_queue_count++; + qpd->mapped_gws_queue = true; + } } static void decrement_queue_count(struct device_queue_manager *dqm, - enum kfd_queue_type type) + struct qcm_process_device *qpd, + struct queue *q) { dqm->active_queue_count--; - if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ) + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || + q->properties.type == KFD_QUEUE_TYPE_DIQ) dqm->active_cp_queue_count--; + + if (q->properties.is_gws) { + dqm->gws_queue_count--; + qpd->mapped_gws_queue = false; + } } /* @@ -412,7 +426,7 @@ add_queue_to_list: list_add(&q->list, &qpd->queues_list); qpd->queue_count++; if (q->properties.is_active) - increment_queue_count(dqm, q->properties.type); + increment_queue_count(dqm, qpd, q); /* * Unconditionally increment this counter, regardless of the queue's @@ -601,13 +615,8 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, deallocate_vmid(dqm, qpd, q); } qpd->queue_count--; - if (q->properties.is_active) { - decrement_queue_count(dqm, q->properties.type); - if (q->properties.is_gws) { - dqm->gws_queue_count--; - qpd->mapped_gws_queue = false; - } - } + if (q->properties.is_active) + decrement_queue_count(dqm, qpd, q); return retval; } @@ -700,12 +709,11 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q, * dqm->active_queue_count to determine whether a new runlist must be * uploaded. */ - if (q->properties.is_active && !prev_active) - increment_queue_count(dqm, q->properties.type); - else if (!q->properties.is_active && prev_active) - decrement_queue_count(dqm, q->properties.type); - - if (q->gws && !q->properties.is_gws) { + if (q->properties.is_active && !prev_active) { + increment_queue_count(dqm, &pdd->qpd, q); + } else if (!q->properties.is_active && prev_active) { + decrement_queue_count(dqm, &pdd->qpd, q); + } else if (q->gws && !q->properties.is_gws) { if (q->properties.is_active) { dqm->gws_queue_count++; pdd->qpd.mapped_gws_queue = true; @@ -767,11 +775,7 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; q->properties.is_active = false; - decrement_queue_count(dqm, q->properties.type); - if (q->properties.is_gws) { - dqm->gws_queue_count--; - qpd->mapped_gws_queue = false; - } + decrement_queue_count(dqm, qpd, q); if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) continue; @@ -817,7 +821,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, continue; q->properties.is_active = false; - decrement_queue_count(dqm, q->properties.type); + decrement_queue_count(dqm, qpd, q); } pdd->last_evict_timestamp = get_jiffies_64(); retval = execute_queues_cpsch(dqm, @@ -888,11 +892,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; q->properties.is_active = true; - increment_queue_count(dqm, q->properties.type); - if (q->properties.is_gws) { - dqm->gws_queue_count++; - qpd->mapped_gws_queue = true; - } + increment_queue_count(dqm, qpd, q); if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) continue; @@ -950,7 +950,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, continue; q->properties.is_active = true; - increment_queue_count(dqm, q->properties.type); + increment_queue_count(dqm, &pdd->qpd, q); } retval = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); @@ -1378,7 +1378,7 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, dqm->total_queue_count); list_add(&kq->list, &qpd->priv_queue_list); - increment_queue_count(dqm, kq->queue->properties.type); + increment_queue_count(dqm, qpd, kq->queue); qpd->is_debug = true; execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); dqm_unlock(dqm); @@ -1392,7 +1392,7 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, { dqm_lock(dqm); list_del(&kq->list); - decrement_queue_count(dqm, kq->queue->properties.type); + decrement_queue_count(dqm, qpd, kq->queue); qpd->is_debug = false; execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); /* @@ -1467,7 +1467,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, qpd->queue_count++; if (q->properties.is_active) { - increment_queue_count(dqm, q->properties.type); + increment_queue_count(dqm, qpd, q); execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); @@ -1683,15 +1683,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, list_del(&q->list); qpd->queue_count--; if (q->properties.is_active) { - decrement_queue_count(dqm, q->properties.type); + decrement_queue_count(dqm, qpd, q); retval = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); if (retval == -ETIME) qpd->reset_wavefronts = true; - if (q->properties.is_gws) { - dqm->gws_queue_count--; - qpd->mapped_gws_queue = false; - } } /* @@ -1932,7 +1928,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, /* Clean all kernel queues */ list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { list_del(&kq->list); - decrement_queue_count(dqm, kq->queue->properties.type); + decrement_queue_count(dqm, qpd, kq->queue); qpd->is_debug = false; dqm->total_queue_count--; filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; @@ -1945,13 +1941,8 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) deallocate_sdma_queue(dqm, q); - if (q->properties.is_active) { - decrement_queue_count(dqm, q->properties.type); - if (q->properties.is_gws) { - dqm->gws_queue_count--; - qpd->mapped_gws_queue = false; - } - } + if (q->properties.is_active) + decrement_queue_count(dqm, qpd, q); dqm->total_queue_count--; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 64f4a51cc880..6e5e8d637f48 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -128,8 +128,8 @@ static int allocate_event_notification_slot(struct kfd_process *p, } /* - * Assumes that p->event_mutex is held and of course that p is not going - * away (current or locked). + * Assumes that p->event_mutex or rcu_readlock is held and of course that p is + * not going away. */ static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) { @@ -251,16 +251,18 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev) struct kfd_event_waiter *waiter; /* Wake up pending waiters. They will return failure */ + spin_lock(&ev->lock); list_for_each_entry(waiter, &ev->wq.head, wait.entry) - waiter->event = NULL; + WRITE_ONCE(waiter->event, NULL); wake_up_all(&ev->wq); + spin_unlock(&ev->lock); if (ev->type == KFD_EVENT_TYPE_SIGNAL || ev->type == KFD_EVENT_TYPE_DEBUG) p->signal_event_count--; idr_remove(&p->event_idr, ev->event_id); - kfree(ev); + kfree_rcu(ev, rcu); } static void destroy_events(struct kfd_process *p) @@ -392,6 +394,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, ev->auto_reset = auto_reset; ev->signaled = false; + spin_lock_init(&ev->lock); init_waitqueue_head(&ev->wq); *event_page_offset = 0; @@ -466,6 +469,7 @@ int kfd_criu_restore_event(struct file *devkfd, ev->auto_reset = ev_priv->auto_reset; ev->signaled = ev_priv->signaled; + spin_lock_init(&ev->lock); init_waitqueue_head(&ev->wq); mutex_lock(&p->event_mutex); @@ -609,13 +613,13 @@ static void set_event(struct kfd_event *ev) /* Auto reset if the list is non-empty and we're waking * someone. waitqueue_active is safe here because we're - * protected by the p->event_mutex, which is also held when + * protected by the ev->lock, which is also held when * updating the wait queues in kfd_wait_on_events. */ ev->signaled = !ev->auto_reset || !waitqueue_active(&ev->wq); list_for_each_entry(waiter, &ev->wq.head, wait.entry) - waiter->activated = true; + WRITE_ONCE(waiter->activated, true); wake_up_all(&ev->wq); } @@ -626,16 +630,23 @@ int kfd_set_event(struct kfd_process *p, uint32_t event_id) int ret = 0; struct kfd_event *ev; - mutex_lock(&p->event_mutex); + rcu_read_lock(); ev = lookup_event_by_id(p, event_id); + if (!ev) { + ret = -EINVAL; + goto unlock_rcu; + } + spin_lock(&ev->lock); - if (ev && event_can_be_cpu_signaled(ev)) + if (event_can_be_cpu_signaled(ev)) set_event(ev); else ret = -EINVAL; - mutex_unlock(&p->event_mutex); + spin_unlock(&ev->lock); +unlock_rcu: + rcu_read_unlock(); return ret; } @@ -650,23 +661,30 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id) int ret = 0; struct kfd_event *ev; - mutex_lock(&p->event_mutex); + rcu_read_lock(); ev = lookup_event_by_id(p, event_id); + if (!ev) { + ret = -EINVAL; + goto unlock_rcu; + } + spin_lock(&ev->lock); - if (ev && event_can_be_cpu_signaled(ev)) + if (event_can_be_cpu_signaled(ev)) reset_event(ev); else ret = -EINVAL; - mutex_unlock(&p->event_mutex); + spin_unlock(&ev->lock); +unlock_rcu: + rcu_read_unlock(); return ret; } static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev) { - page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT; + WRITE_ONCE(page_slots(p->signal_page)[ev->event_id], UNSIGNALED_EVENT_SLOT); } static void set_event_from_interrupt(struct kfd_process *p, @@ -674,7 +692,9 @@ static void set_event_from_interrupt(struct kfd_process *p, { if (ev && event_can_be_gpu_signaled(ev)) { acknowledge_signal(p, ev); + spin_lock(&ev->lock); set_event(ev); + spin_unlock(&ev->lock); } } @@ -693,7 +713,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, if (!p) return; /* Presumably process exited. */ - mutex_lock(&p->event_mutex); + rcu_read_lock(); if (valid_id_bits) ev = lookup_signaled_event_by_partial_id(p, partial_id, @@ -721,7 +741,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, if (id >= KFD_SIGNAL_EVENT_LIMIT) break; - if (slots[id] != UNSIGNALED_EVENT_SLOT) + if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT) set_event_from_interrupt(p, ev); } } else { @@ -730,14 +750,14 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, * only signaled events from the IDR. */ for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++) - if (slots[id] != UNSIGNALED_EVENT_SLOT) { + if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT) { ev = lookup_event_by_id(p, id); set_event_from_interrupt(p, ev); } } } - mutex_unlock(&p->event_mutex); + rcu_read_unlock(); kfd_unref_process(p); } @@ -760,7 +780,7 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events) return event_waiters; } -static int init_event_waiter_get_status(struct kfd_process *p, +static int init_event_waiter(struct kfd_process *p, struct kfd_event_waiter *waiter, uint32_t event_id) { @@ -769,22 +789,15 @@ static int init_event_waiter_get_status(struct kfd_process *p, if (!ev) return -EINVAL; + spin_lock(&ev->lock); waiter->event = ev; waiter->activated = ev->signaled; ev->signaled = ev->signaled && !ev->auto_reset; - - return 0; -} - -static void init_event_waiter_add_to_waitlist(struct kfd_event_waiter *waiter) -{ - struct kfd_event *ev = waiter->event; - - /* Only add to the wait list if we actually need to - * wait on this event. - */ if (!waiter->activated) add_wait_queue(&ev->wq, &waiter->wait); + spin_unlock(&ev->lock); + + return 0; } /* test_event_condition - Test condition of events being waited for @@ -804,10 +817,10 @@ static uint32_t test_event_condition(bool all, uint32_t num_events, uint32_t activated_count = 0; for (i = 0; i < num_events; i++) { - if (!event_waiters[i].event) + if (!READ_ONCE(event_waiters[i].event)) return KFD_IOC_WAIT_RESULT_FAIL; - if (event_waiters[i].activated) { + if (READ_ONCE(event_waiters[i].activated)) { if (!all) return KFD_IOC_WAIT_RESULT_COMPLETE; @@ -836,6 +849,8 @@ static int copy_signaled_event_data(uint32_t num_events, for (i = 0; i < num_events; i++) { waiter = &event_waiters[i]; event = waiter->event; + if (!event) + return -EINVAL; /* event was destroyed */ if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) { dst = &data[i].memory_exception_data; src = &event->memory_exception_data; @@ -846,11 +861,8 @@ static int copy_signaled_event_data(uint32_t num_events, } return 0; - } - - static long user_timeout_to_jiffies(uint32_t user_timeout_ms) { if (user_timeout_ms == KFD_EVENT_TIMEOUT_IMMEDIATE) @@ -874,9 +886,12 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters) uint32_t i; for (i = 0; i < num_events; i++) - if (waiters[i].event) + if (waiters[i].event) { + spin_lock(&waiters[i].event->lock); remove_wait_queue(&waiters[i].event->wq, &waiters[i].wait); + spin_unlock(&waiters[i].event->lock); + } kfree(waiters); } @@ -900,6 +915,9 @@ int kfd_wait_on_events(struct kfd_process *p, goto out; } + /* Use p->event_mutex here to protect against concurrent creation and + * destruction of events while we initialize event_waiters. + */ mutex_lock(&p->event_mutex); for (i = 0; i < num_events; i++) { @@ -911,8 +929,8 @@ int kfd_wait_on_events(struct kfd_process *p, goto out_unlock; } - ret = init_event_waiter_get_status(p, &event_waiters[i], - event_data.event_id); + ret = init_event_waiter(p, &event_waiters[i], + event_data.event_id); if (ret) goto out_unlock; } @@ -930,10 +948,6 @@ int kfd_wait_on_events(struct kfd_process *p, goto out_unlock; } - /* Add to wait lists if we need to wait. */ - for (i = 0; i < num_events; i++) - init_event_waiter_add_to_waitlist(&event_waiters[i]); - mutex_unlock(&p->event_mutex); while (true) { @@ -978,14 +992,19 @@ int kfd_wait_on_events(struct kfd_process *p, } __set_current_state(TASK_RUNNING); + mutex_lock(&p->event_mutex); /* copy_signaled_event_data may sleep. So this has to happen * after the task state is set back to RUNNING. + * + * The event may also have been destroyed after signaling. So + * copy_signaled_event_data also must confirm that the event + * still exists. Therefore this must be under the p->event_mutex + * which is also held when events are destroyed. */ if (!ret && *wait_result == KFD_IOC_WAIT_RESULT_COMPLETE) ret = copy_signaled_event_data(num_events, event_waiters, events); - mutex_lock(&p->event_mutex); out_unlock: free_waiters(num_events, event_waiters); mutex_unlock(&p->event_mutex); @@ -1044,8 +1063,7 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) } /* - * Assumes that p->event_mutex is held and of course - * that p is not going away (current or locked). + * Assumes that p is not going away. */ static void lookup_events_by_type_and_signal(struct kfd_process *p, int type, void *event_data) @@ -1057,6 +1075,8 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, ev_data = (struct kfd_hsa_memory_exception_data *) event_data; + rcu_read_lock(); + id = KFD_FIRST_NONSIGNAL_EVENT_ID; idr_for_each_entry_continue(&p->event_idr, ev, id) if (ev->type == type) { @@ -1064,9 +1084,11 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, dev_dbg(kfd_device, "Event found: id %X type %d", ev->event_id, ev->type); + spin_lock(&ev->lock); set_event(ev); if (ev->type == KFD_EVENT_TYPE_MEMORY && ev_data) ev->memory_exception_data = *ev_data; + spin_unlock(&ev->lock); } if (type == KFD_EVENT_TYPE_MEMORY) { @@ -1089,6 +1111,8 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, p->lead_thread->pid, p->pasid); } } + + rcu_read_unlock(); } #ifdef KFD_SUPPORT_IOMMU_V2 @@ -1164,16 +1188,10 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid, if (KFD_GC_VERSION(dev) != IP_VERSION(9, 1, 0) && KFD_GC_VERSION(dev) != IP_VERSION(9, 2, 2) && - KFD_GC_VERSION(dev) != IP_VERSION(9, 3, 0)) { - mutex_lock(&p->event_mutex); - - /* Lookup events by type and signal them */ + KFD_GC_VERSION(dev) != IP_VERSION(9, 3, 0)) lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY, &memory_exception_data); - mutex_unlock(&p->event_mutex); - } - kfd_unref_process(p); } #endif /* KFD_SUPPORT_IOMMU_V2 */ @@ -1190,12 +1208,7 @@ void kfd_signal_hw_exception_event(u32 pasid) if (!p) return; /* Presumably process exited. */ - mutex_lock(&p->event_mutex); - - /* Lookup events by type and signal them */ lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL); - - mutex_unlock(&p->event_mutex); kfd_unref_process(p); } @@ -1231,16 +1244,19 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid, info->prot_write ? 1 : 0; memory_exception_data.failure.imprecise = 0; } - mutex_lock(&p->event_mutex); + + rcu_read_lock(); id = KFD_FIRST_NONSIGNAL_EVENT_ID; idr_for_each_entry_continue(&p->event_idr, ev, id) if (ev->type == KFD_EVENT_TYPE_MEMORY) { + spin_lock(&ev->lock); ev->memory_exception_data = memory_exception_data; set_event(ev); + spin_unlock(&ev->lock); } - mutex_unlock(&p->event_mutex); + rcu_read_unlock(); kfd_unref_process(p); } @@ -1274,22 +1290,28 @@ void kfd_signal_reset_event(struct kfd_dev *dev) continue; } - mutex_lock(&p->event_mutex); + rcu_read_lock(); + id = KFD_FIRST_NONSIGNAL_EVENT_ID; idr_for_each_entry_continue(&p->event_idr, ev, id) { if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) { + spin_lock(&ev->lock); ev->hw_exception_data = hw_exception_data; ev->hw_exception_data.gpu_id = user_gpu_id; set_event(ev); + spin_unlock(&ev->lock); } if (ev->type == KFD_EVENT_TYPE_MEMORY && reset_cause == KFD_HW_EXCEPTION_ECC) { + spin_lock(&ev->lock); ev->memory_exception_data = memory_exception_data; ev->memory_exception_data.gpu_id = user_gpu_id; set_event(ev); + spin_unlock(&ev->lock); } } - mutex_unlock(&p->event_mutex); + + rcu_read_unlock(); } srcu_read_unlock(&kfd_processes_srcu, idx); } @@ -1322,19 +1344,25 @@ void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid) memory_exception_data.gpu_id = user_gpu_id; memory_exception_data.failure.imprecise = true; - mutex_lock(&p->event_mutex); + rcu_read_lock(); + idr_for_each_entry_continue(&p->event_idr, ev, id) { if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) { + spin_lock(&ev->lock); ev->hw_exception_data = hw_exception_data; set_event(ev); + spin_unlock(&ev->lock); } if (ev->type == KFD_EVENT_TYPE_MEMORY) { + spin_lock(&ev->lock); ev->memory_exception_data = memory_exception_data; set_event(ev); + spin_unlock(&ev->lock); } } - mutex_unlock(&p->event_mutex); + + rcu_read_unlock(); /* user application will handle SIGBUS signal */ send_sig(SIGBUS, p->lead_thread, 0); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h index 1238af11916e..1c62c8dd6460 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h @@ -59,6 +59,7 @@ struct kfd_event { int type; + spinlock_t lock; wait_queue_head_t wq; /* List of event waiters. */ /* Only for signal events. */ @@ -69,6 +70,8 @@ struct kfd_event { struct kfd_hsa_memory_exception_data memory_exception_data; struct kfd_hsa_hw_exception_data hw_exception_data; }; + + struct rcu_head rcu; /* for asynchronous kfree_rcu */ }; #define KFD_EVENT_TIMEOUT_IMMEDIATE 0 diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 7eedbcd14828..03c29bdd89a1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -91,28 +91,34 @@ enum SQ_INTERRUPT_ERROR_TYPE { #define KFD_SQ_INT_DATA__ERR_TYPE__SHIFT 20 static void event_interrupt_poison_consumption(struct kfd_dev *dev, - uint16_t pasid, uint16_t source_id) + uint16_t pasid, uint16_t client_id) { - int ret = -EINVAL; + int old_poison, ret = -EINVAL; struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); if (!p) return; /* all queues of a process will be unmapped in one time */ - if (atomic_read(&p->poison)) { - kfd_unref_process(p); - return; - } - - atomic_set(&p->poison, 1); + old_poison = atomic_cmpxchg(&p->poison, 0, 1); kfd_unref_process(p); + if (old_poison) + return; - switch (source_id) { - case SOC15_INTSRC_SQ_INTERRUPT_MSG: + switch (client_id) { + case SOC15_IH_CLIENTID_SE0SH: + case SOC15_IH_CLIENTID_SE1SH: + case SOC15_IH_CLIENTID_SE2SH: + case SOC15_IH_CLIENTID_SE3SH: + case SOC15_IH_CLIENTID_UTCL2: ret = kfd_dqm_evict_pasid(dev->dqm, pasid); break; - case SOC15_INTSRC_SDMA_ECC: + case SOC15_IH_CLIENTID_SDMA0: + case SOC15_IH_CLIENTID_SDMA1: + case SOC15_IH_CLIENTID_SDMA2: + case SOC15_IH_CLIENTID_SDMA3: + case SOC15_IH_CLIENTID_SDMA4: + break; default: break; } @@ -122,10 +128,17 @@ static void event_interrupt_poison_consumption(struct kfd_dev *dev, /* resetting queue passes, do page retirement without gpu reset * resetting queue fails, fallback to gpu reset solution */ - if (!ret) + if (!ret) { + dev_warn(dev->adev->dev, + "RAS poison consumption, unmap queue flow succeeded: client id %d\n", + client_id); amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, false); - else + } else { + dev_warn(dev->adev->dev, + "RAS poison consumption, fall back to gpu reset flow: client id %d\n", + client_id); amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, true); + } } static bool event_interrupt_isr_v9(struct kfd_dev *dev, @@ -270,7 +283,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, sq_intr_err); if (sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST && sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) { - event_interrupt_poison_consumption(dev, pasid, source_id); + event_interrupt_poison_consumption(dev, pasid, client_id); return; } break; @@ -291,7 +304,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, if (source_id == SOC15_INTSRC_SDMA_TRAP) { kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28); } else if (source_id == SOC15_INTSRC_SDMA_ECC) { - event_interrupt_poison_consumption(dev, pasid, source_id); + event_interrupt_poison_consumption(dev, pasid, client_id); return; } } else if (client_id == SOC15_IH_CLIENTID_VMC || @@ -300,6 +313,12 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, struct kfd_vm_fault_info info = {0}; uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); + if (client_id == SOC15_IH_CLIENTID_UTCL2 && + amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev)) { + event_interrupt_poison_consumption(dev, pasid, client_id); + return; + } + info.vmid = vmid; info.mc_id = client_id; info.page_addr = ih_ring_entry[4] | diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index 9178cfe34f20..a9466d154395 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -146,15 +146,24 @@ static void interrupt_wq(struct work_struct *work) struct kfd_dev *dev = container_of(work, struct kfd_dev, interrupt_work); uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE]; + long start_jiffies = jiffies; if (dev->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) { dev_err_once(dev->adev->dev, "Ring entry too small\n"); return; } - while (dequeue_ih_ring_entry(dev, ih_ring_entry)) + while (dequeue_ih_ring_entry(dev, ih_ring_entry)) { dev->device_info.event_interrupt_class->interrupt_wq(dev, ih_ring_entry); + if (jiffies - start_jiffies > HZ) { + /* If we spent more than a second processing signals, + * reschedule the worker to avoid soft-lockup warnings + */ + queue_work(dev->ih_wq, &dev->interrupt_work); + break; + } + } } bool interrupt_is_wanted(struct kfd_dev *dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 9967a73d5b0f..bfb3b5c288cb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -706,6 +706,7 @@ struct kfd_process_device { /* VM context for GPUVM allocations */ struct file *drm_file; void *drm_priv; + atomic64_t tlb_seq; /* GPUVM allocations storage */ struct idr alloc_idr; @@ -1016,6 +1017,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu); int kfd_topology_remove_device(struct kfd_dev *gpu); struct kfd_topology_device *kfd_topology_device_by_proximity_domain( uint32_t proximity_domain); +struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock( + uint32_t proximity_domain); struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id); struct kfd_dev *kfd_device_by_id(uint32_t gpu_id); struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); @@ -1103,7 +1106,7 @@ struct kfd_criu_queue_priv_data { uint32_t priority; uint32_t q_percent; uint32_t doorbell_id; - uint32_t is_gws; + uint32_t gws; uint32_t sdma_id; uint32_t eop_ring_buffer_size; uint32_t ctx_save_restore_area_size; @@ -1326,6 +1329,14 @@ void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid); void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type); +static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) +{ + return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) || + (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && + dev->adev->sdma.instance[0].fw_version >= 18) || + KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0); +} + bool kfd_is_locked(void); /* Compute profile */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 59c04b2d383b..9e82d7aa67fa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -722,7 +722,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, goto err_alloc_mem; err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->adev, *mem, - pdd->drm_priv, NULL); + pdd->drm_priv); if (err) goto err_map_mem; @@ -1560,6 +1560,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, return ret; } pdd->drm_priv = drm_file->private_data; + atomic64_set(&pdd->tlb_seq, 0); ret = kfd_process_device_reserve_ib_mem(pdd); if (ret) @@ -1949,8 +1950,18 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type) { + struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); + uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm); struct kfd_dev *dev = pdd->dev; + /* + * It can be that we race and lose here, but that is extremely unlikely + * and the worst thing which could happen is that we flush the changes + * into the TLB once more which is harmless. + */ + if (atomic64_xchg(&pdd->tlb_seq, tlb_seq) == tlb_seq) + return; + if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { /* Nothing to flush until a VMID is assigned, which * only happens when the first queue is created. diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 6eca9509f2e3..4f58e671d39b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -636,6 +636,8 @@ static int criu_checkpoint_queue(struct kfd_process_device *pdd, q_data->ctx_save_restore_area_size = q->properties.ctx_save_restore_area_size; + q_data->gws = !!q->gws; + ret = pqm_checkpoint_mqd(&pdd->process->pqm, q->properties.queue_id, mqd, ctl_stack); if (ret) { pr_err("Failed checkpoint queue_mqd (%d)\n", ret); @@ -743,7 +745,6 @@ static void set_queue_properties_from_criu(struct queue_properties *qp, struct kfd_criu_queue_priv_data *q_data) { qp->is_interop = false; - qp->is_gws = q_data->is_gws; qp->queue_percent = q_data->q_percent; qp->priority = q_data->priority; qp->queue_address = q_data->q_address; @@ -826,12 +827,15 @@ int kfd_criu_restore_queue(struct kfd_process *p, NULL); if (ret) { pr_err("Failed to create new queue err:%d\n", ret); - ret = -EINVAL; + goto exit; } + if (q_data->gws) + ret = pqm_set_gws(&p->pqm, q_data->q_id, pdd->dev->gws); + exit: if (ret) - pr_err("Failed to create queue (%d)\n", ret); + pr_err("Failed to restore queue (%d)\n", ret); else pr_debug("Queue id %d was restored successfully\n", queue_id); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index b3fc3e958227..11b395b90a3d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1188,9 +1188,9 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, pr_debug("[0x%llx 0x%llx]\n", start, last); - return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, true, NULL, - start, last, init_pte_value, 0, - NULL, NULL, fence, NULL); + return amdgpu_vm_update_range(adev, vm, false, true, true, NULL, start, + last, init_pte_value, 0, 0, NULL, NULL, + fence); } static int @@ -1243,7 +1243,6 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, { struct amdgpu_device *adev = pdd->dev->adev; struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); - bool table_freed = false; uint64_t pte_flags; unsigned long last_start; int last_domain; @@ -1278,13 +1277,12 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0, pte_flags); - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, - NULL, last_start, - prange->start + i, pte_flags, - last_start - prange->start, - NULL, dma_addr, - &vm->last_update, - &table_freed); + r = amdgpu_vm_update_range(adev, vm, false, false, false, NULL, + last_start, prange->start + i, + pte_flags, + last_start - prange->start, + bo_adev ? bo_adev->vm_manager.vram_base_offset : 0, + NULL, dma_addr, &vm->last_update); for (j = last_start - prange->start; j <= i; j++) dma_addr[j] |= last_domain; @@ -1306,8 +1304,6 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, if (fence) *fence = dma_fence_get(vm->last_update); - if (table_freed) - kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); out: return r; } @@ -1363,6 +1359,8 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset, break; } } + + kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); } return r; @@ -1372,7 +1370,7 @@ struct svm_validate_context { struct kfd_process *process; struct svm_range *prange; bool intr; - unsigned long bitmap[MAX_GPU_INSTANCE]; + DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); struct ttm_validate_buffer tv[MAX_GPU_INSTANCE]; struct list_head validate_list; struct ww_acquire_ctx ticket; @@ -2687,11 +2685,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, pr_debug("kfd process not founded pasid 0x%x\n", pasid); return 0; } - if (!p->xnack_enabled) { - pr_debug("XNACK not enabled for pasid 0x%x\n", pasid); - r = -EFAULT; - goto out; - } svms = &p->svms; pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr); @@ -2702,6 +2695,12 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, goto out; } + if (!p->xnack_enabled) { + pr_debug("XNACK not enabled for pasid 0x%x\n", pasid); + r = -EFAULT; + goto out; + } + /* p->lead_thread is available as kfd_process_wq_release flush the work * before releasing task ref. */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 3bdcae239bc0..8b7710b4d3ed 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -46,22 +46,32 @@ static struct list_head topology_device_list; static struct kfd_system_properties sys_props; static DECLARE_RWSEM(topology_lock); -static atomic_t topology_crat_proximity_domain; +static uint32_t topology_crat_proximity_domain; -struct kfd_topology_device *kfd_topology_device_by_proximity_domain( +struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock( uint32_t proximity_domain) { struct kfd_topology_device *top_dev; struct kfd_topology_device *device = NULL; - down_read(&topology_lock); - list_for_each_entry(top_dev, &topology_device_list, list) if (top_dev->proximity_domain == proximity_domain) { device = top_dev; break; } + return device; +} + +struct kfd_topology_device *kfd_topology_device_by_proximity_domain( + uint32_t proximity_domain) +{ + struct kfd_topology_device *device = NULL; + + down_read(&topology_lock); + + device = kfd_topology_device_by_proximity_domain_no_lock( + proximity_domain); up_read(&topology_lock); return device; @@ -1060,7 +1070,7 @@ int kfd_topology_init(void) down_write(&topology_lock); kfd_topology_update_device_list(&temp_topology_device_list, &topology_device_list); - atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1); + topology_crat_proximity_domain = sys_props.num_devices-1; ret = kfd_topology_update_sysfs(); up_write(&topology_lock); @@ -1295,8 +1305,6 @@ int kfd_topology_add_device(struct kfd_dev *gpu) pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); - proximity_domain = atomic_inc_return(&topology_crat_proximity_domain); - /* Include the CPU in xGMI hive if xGMI connected by assigning it the hive ID. */ if (gpu->hive_id && gpu->adev->gmc.xgmi.connected_to_cpu) { struct kfd_topology_device *top_dev; @@ -1321,12 +1329,16 @@ int kfd_topology_add_device(struct kfd_dev *gpu) */ dev = kfd_assign_gpu(gpu); if (!dev) { + down_write(&topology_lock); + proximity_domain = ++topology_crat_proximity_domain; + res = kfd_create_crat_image_virtual(&crat_image, &image_size, COMPUTE_UNIT_GPU, gpu, proximity_domain); if (res) { pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n", gpu_id); + topology_crat_proximity_domain--; return res; } res = kfd_parse_crat_table(crat_image, @@ -1335,10 +1347,10 @@ int kfd_topology_add_device(struct kfd_dev *gpu) if (res) { pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n", gpu_id); + topology_crat_proximity_domain--; goto err; } - down_write(&topology_lock); kfd_topology_update_device_list(&temp_topology_device_list, &topology_device_list); @@ -1485,25 +1497,78 @@ err: return res; } +/** + * kfd_topology_update_io_links() - Update IO links after device removal. + * @proximity_domain: Proximity domain value of the dev being removed. + * + * The topology list currently is arranged in increasing order of + * proximity domain. + * + * Two things need to be done when a device is removed: + * 1. All the IO links to this device need to be removed. + * 2. All nodes after the current device node need to move + * up once this device node is removed from the topology + * list. As a result, the proximity domain values for + * all nodes after the node being deleted reduce by 1. + * This would also cause the proximity domain values for + * io links to be updated based on new proximity domain + * values. + * + * Context: The caller must hold write topology_lock. + */ +static void kfd_topology_update_io_links(int proximity_domain) +{ + struct kfd_topology_device *dev; + struct kfd_iolink_properties *iolink, *tmp; + + list_for_each_entry(dev, &topology_device_list, list) { + if (dev->proximity_domain > proximity_domain) + dev->proximity_domain--; + + list_for_each_entry_safe(iolink, tmp, &dev->io_link_props, list) { + /* + * If there is an io link to the dev being deleted + * then remove that IO link also. + */ + if (iolink->node_to == proximity_domain) { + list_del(&iolink->list); + dev->io_link_count--; + dev->node_props.io_links_count--; + } else if (iolink->node_from > proximity_domain) { + iolink->node_from--; + } else if (iolink->node_to > proximity_domain) { + iolink->node_to--; + } + } + + } +} + int kfd_topology_remove_device(struct kfd_dev *gpu) { struct kfd_topology_device *dev, *tmp; uint32_t gpu_id; int res = -ENODEV; + int i = 0; down_write(&topology_lock); - list_for_each_entry_safe(dev, tmp, &topology_device_list, list) + list_for_each_entry_safe(dev, tmp, &topology_device_list, list) { if (dev->gpu == gpu) { gpu_id = dev->gpu_id; kfd_remove_sysfs_node_entry(dev); kfd_release_topology_device(dev); sys_props.num_devices--; + kfd_topology_update_io_links(i); + topology_crat_proximity_domain = sys_props.num_devices-1; + sys_props.generation_count++; res = 0; if (kfd_topology_update_sysfs() < 0) kfd_topology_release_sysfs(); break; } + i++; + } up_write(&topology_lock); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 2ade82cfb1ac..a6880dd9c0bb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9254,7 +9254,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, &bundle->flip_addrs[planes_count].address, afb->tmz_surface, false); - DRM_DEBUG_ATOMIC("plane: id=%d dcc_en=%d\n", + drm_dbg_state(state->dev, "plane: id=%d dcc_en=%d\n", new_plane_state->plane->index, bundle->plane_infos[planes_count].dcc.enable); @@ -9288,7 +9288,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, dc_plane, bundle->flip_addrs[planes_count].flip_timestamp_in_us); - DRM_DEBUG_ATOMIC("%s Flipping to hi: 0x%x, low: 0x%x\n", + drm_dbg_state(state->dev, "%s Flipping to hi: 0x%x, low: 0x%x\n", __func__, bundle->flip_addrs[planes_count].address.grph.addr.high_part, bundle->flip_addrs[planes_count].address.grph.addr.low_part); @@ -9630,7 +9630,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); - DRM_DEBUG_ATOMIC( + drm_dbg_state(state->dev, "amdgpu_crtc id:%d crtc_state_flags: enable:%d, active:%d, " "planes_changed:%d, mode_changed:%d,active_changed:%d," "connectors_changed:%d\n", @@ -10334,7 +10334,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, if (!drm_atomic_crtc_needs_modeset(new_crtc_state)) goto skip_modeset; - DRM_DEBUG_ATOMIC( + drm_dbg_state(state->dev, "amdgpu_crtc id:%d crtc_state_flags: enable:%d, active:%d, " "planes_changed:%d, mode_changed:%d,active_changed:%d," "connectors_changed:%d\n", diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index da17ece1a2c5..188039f14544 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -3491,6 +3491,40 @@ DEFINE_SHOW_ATTRIBUTE(mst_topo); DEFINE_DEBUGFS_ATTRIBUTE(visual_confirm_fops, visual_confirm_get, visual_confirm_set, "%llu\n"); + +/* + * Sets the DC skip_detection_link_training debug option from the given string. + * Example usage: echo 1 > /sys/kernel/debug/dri/0/amdgpu_skip_detection_link_training + */ +static int skip_detection_link_training_set(void *data, u64 val) +{ + struct amdgpu_device *adev = data; + + if (val == 0) + adev->dm.dc->debug.skip_detection_link_training = false; + else + adev->dm.dc->debug.skip_detection_link_training = true; + + return 0; +} + +/* + * Reads the DC skip_detection_link_training debug option value into the given buffer. + * Example usage: cat /sys/kernel/debug/dri/0/amdgpu_dm_skip_detection_link_training + */ +static int skip_detection_link_training_get(void *data, u64 *val) +{ + struct amdgpu_device *adev = data; + + *val = adev->dm.dc->debug.skip_detection_link_training; + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(skip_detection_link_training_fops, + skip_detection_link_training_get, + skip_detection_link_training_set, "%llu\n"); + /* * Dumps the DCC_EN bit for each pipe. * Example usage: cat /sys/kernel/debug/dri/0/amdgpu_dm_dcc_en @@ -3584,6 +3618,9 @@ void dtn_debugfs_init(struct amdgpu_device *adev) debugfs_create_file_unsafe("amdgpu_dm_visual_confirm", 0644, root, adev, &visual_confirm_fops); + debugfs_create_file_unsafe("amdgpu_dm_skip_detection_link_training", 0644, root, adev, + &skip_detection_link_training_fops); + debugfs_create_file_unsafe("amdgpu_dm_dmub_tracebuffer", 0644, root, adev, &dmub_tracebuffer_fops); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index f5f39984702f..28cf24f6ab32 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -90,7 +90,7 @@ enum dc_edid_status dm_helpers_parse_edid_caps( { struct amdgpu_dm_connector *aconnector = link->priv; struct drm_connector *connector = &aconnector->base; - struct edid *edid_buf = (struct edid *) edid->raw_edid; + struct edid *edid_buf = edid ? (struct edid *) edid->raw_edid : NULL; struct cea_sad *sads; int sad_count = -1; int sadb_count = -1; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index 0c923a90615c..13b1751e69bf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -27,6 +27,7 @@ #include "dc.h" #include "dm_helpers.h" #include "amdgpu_dm.h" +#include "modules/power/power_helpers.h" #ifdef CONFIG_DRM_AMD_DC_DCN static bool link_supports_psrsu(struct dc_link *link) @@ -39,6 +40,9 @@ static bool link_supports_psrsu(struct dc_link *link) if (dc->ctx->dce_version < DCN_VERSION_3_1) return false; + if (!is_psr_su_specific_panel(link)) + return false; + if (!link->dpcd_caps.alpm_caps.bits.AUX_WAKE_ALPM_CAP || !link->dpcd_caps.psr_info.psr_dpcd_caps.bits.Y_COORDINATE_REQUIRED) return false; @@ -79,7 +83,10 @@ void amdgpu_dm_set_psr_caps(struct dc_link *link) link->psr_settings.psr_feature_enabled = true; } - DRM_INFO("PSR support:%d\n", link->psr_settings.psr_feature_enabled); + DRM_INFO("PSR support %d, DC PSR ver %d, sink PSR ver %d\n", + link->psr_settings.psr_feature_enabled, + link->psr_settings.psr_version, + link->dpcd_caps.psr_info.psr_version); } diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_common.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_common.c index a8cb039d2572..34e3a64f556e 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_common.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_common.c @@ -213,6 +213,9 @@ static enum connector_id connector_id_from_bios_object_id( case CONNECTOR_OBJECT_ID_MXM: id = CONNECTOR_ID_MXM; break; + case CONNECTOR_OBJECT_ID_USBC: + id = CONNECTOR_ID_USBC; + break; default: id = CONNECTOR_ID_UNKNOWN; break; diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.c b/drivers/gpu/drm/amd/display/dc/bios/command_table.c index 0e36cd800fc9..32efa92422e8 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.c @@ -522,7 +522,8 @@ static enum bp_result transmitter_control_v2( */ params.acConfig.ucEncoderSel = 1; - if (CONNECTOR_ID_DISPLAY_PORT == connector_id) + if (CONNECTOR_ID_DISPLAY_PORT == connector_id + || CONNECTOR_ID_USBC == connector_id) /* Bit4: DP connector flag * =0 connector is none-DP connector * =1 connector is DP connector diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c index dfba6138f538..26feefbb8990 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c @@ -374,7 +374,7 @@ void dce_clock_read_ss_info(struct clk_mgr_internal *clk_mgr_dce) clk_mgr_dce->dprefclk_ss_percentage = info.spread_spectrum_percentage; } - if (clk_mgr_dce->base.ctx->dc->debug.ignore_dpref_ss) + if (clk_mgr_dce->base.ctx->dc->config.ignore_dpref_ss) clk_mgr_dce->dprefclk_ss_percentage = 0; } } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c index fbdd0a92d146..451e8d6cd8bd 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c @@ -157,8 +157,7 @@ static void dcn201_update_clocks(struct clk_mgr *clk_mgr_base, } else { if (update_dppclk || update_dispclk) dcn20_update_clocks_update_dentist(clk_mgr, context); - if (new_clocks->dppclk_khz >= dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz) - dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); + dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); } } } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index f4dee0e48a67..02943ca65807 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -88,11 +88,22 @@ static int rn_get_active_display_cnt_wa(struct dc *dc, struct dc_state *context) static void rn_set_low_power_state(struct clk_mgr *clk_mgr_base) { + int display_count; struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + struct dc *dc = clk_mgr_base->ctx->dc; + struct dc_state *context = dc->current_state; + + if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) { + + display_count = rn_get_active_display_cnt_wa(dc, context); - rn_vbios_smu_set_dcn_low_power_state(clk_mgr, DCN_PWR_STATE_LOW_POWER); - /* update power state */ - clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER; + /* if we can go lower, go lower */ + if (display_count == 0) { + rn_vbios_smu_set_dcn_low_power_state(clk_mgr, DCN_PWR_STATE_LOW_POWER); + /* update power state */ + clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER; + } + } } static void rn_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c index 8161a6ae410d..30c6f9cd717f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c @@ -94,6 +94,9 @@ static int rn_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, { uint32_t result; + result = rn_smu_wait_for_response(clk_mgr, 10, 200000); + ASSERT(result == VBIOSSMC_Result_OK); + /* First clear response register */ REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Status_BUSY); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c index bc4ddc36fe58..f310b0d25a07 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c @@ -176,8 +176,7 @@ static void vg_update_clocks(struct clk_mgr *clk_mgr_base, if (update_dppclk || update_dispclk) dcn301_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz); // always update dtos unless clock is lowered and not safe to lower - if (new_clocks->dppclk_khz >= dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz) - dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); + dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); } } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index 59fdd7f0d609..969b40250434 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -615,13 +615,37 @@ static void dcn31_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk } } +void dcn31_set_low_power_state(struct clk_mgr *clk_mgr_base) +{ + int display_count; + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + struct dc *dc = clk_mgr_base->ctx->dc; + struct dc_state *context = dc->current_state; + + if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) { + display_count = dcn31_get_active_display_cnt_wa(dc, context); + /* if we can go lower, go lower */ + if (display_count == 0) { + union display_idle_optimization_u idle_info = { 0 }; + + idle_info.idle_info.df_request_disabled = 1; + idle_info.idle_info.phy_ref_clk_off = 1; + idle_info.idle_info.s0i2_rdy = 1; + dcn31_smu_set_display_idle_optimization(clk_mgr, idle_info.data); + /* update power state */ + clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER; + } + } +} + static struct clk_mgr_funcs dcn31_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, .update_clocks = dcn31_update_clocks, .init_clocks = dcn31_init_clocks, .enable_pme_wa = dcn31_enable_pme_wa, .are_clock_states_equal = dcn31_are_clock_states_equal, - .notify_wm_ranges = dcn31_notify_wm_ranges + .notify_wm_ranges = dcn31_notify_wm_ranges, + .set_low_power_state = dcn31_set_low_power_state }; extern struct clk_mgr_funcs dcn3_fpga_funcs; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c index 702d00ce7da4..3121dd2d2a91 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c @@ -686,8 +686,8 @@ void dcn316_clk_mgr_construct( clk_mgr->base.base.dprefclk_khz = dcn316_smu_get_dpref_clk(&clk_mgr->base); clk_mgr->base.dccg->ref_dtbclk_khz = clk_mgr->base.base.dprefclk_khz; dce_clock_read_ss_info(&clk_mgr->base); - clk_mgr->base.dccg->ref_dtbclk_khz = - dce_adjust_dp_ref_freq_for_ss(&clk_mgr->base, clk_mgr->base.base.dprefclk_khz); + /*clk_mgr->base.dccg->ref_dtbclk_khz = + dce_adjust_dp_ref_freq_for_ss(&clk_mgr->base, clk_mgr->base.base.dprefclk_khz);*/ clk_mgr->base.base.bw_params = &dcn316_bw_params; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index c436db416708..c2fcd67bcc4d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1569,11 +1569,24 @@ bool dc_validate_boot_timing(const struct dc *dc, if (dc_is_dp_signal(link->connector_signal)) { unsigned int pix_clk_100hz; + uint32_t numOdmPipes = 1; + uint32_t id_src[4] = {0}; dc->res_pool->dp_clock_source->funcs->get_pixel_clk_frequency_100hz( dc->res_pool->dp_clock_source, tg_inst, &pix_clk_100hz); + if (tg->funcs->get_optc_source) + tg->funcs->get_optc_source(tg, + &numOdmPipes, &id_src[0], &id_src[1]); + + if (numOdmPipes == 2) + pix_clk_100hz *= 2; + if (numOdmPipes == 4) + pix_clk_100hz *= 4; + + // Note: In rare cases, HW pixclk may differ from crtc's pixclk + // slightly due to rounding issues in 10 kHz units. if (crtc_timing->pix_clk_100hz != pix_clk_100hz) return false; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index bbaa5abdf888..faab1460d0b5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -345,6 +345,7 @@ static enum signal_type get_basic_signal_type(struct graphics_object_id encoder, case CONNECTOR_ID_LVDS: return SIGNAL_TYPE_LVDS; case CONNECTOR_ID_DISPLAY_PORT: + case CONNECTOR_ID_USBC: return SIGNAL_TYPE_DISPLAY_PORT; case CONNECTOR_ID_EDP: return SIGNAL_TYPE_EDP; @@ -380,7 +381,8 @@ bool dc_link_is_dp_sink_present(struct dc_link *link) bool present = ((connector_id == CONNECTOR_ID_DISPLAY_PORT) || - (connector_id == CONNECTOR_ID_EDP)); + (connector_id == CONNECTOR_ID_EDP) || + (connector_id == CONNECTOR_ID_USBC)); ddc = dal_ddc_service_get_ddc_pin(link->ddc); @@ -476,7 +478,8 @@ static enum signal_type link_detect_sink(struct dc_link *link, result = SIGNAL_TYPE_DVI_SINGLE_LINK; } break; - case CONNECTOR_ID_DISPLAY_PORT: { + case CONNECTOR_ID_DISPLAY_PORT: + case CONNECTOR_ID_USBC: { /* DP HPD short pulse. Passive DP dongle will not * have short pulse */ @@ -1591,6 +1594,7 @@ static bool dc_link_construct_legacy(struct dc_link *link, link->connector_signal = SIGNAL_TYPE_DVI_DUAL_LINK; break; case CONNECTOR_ID_DISPLAY_PORT: + case CONNECTOR_ID_USBC: link->connector_signal = SIGNAL_TYPE_DISPLAY_PORT; if (link->hpd_gpio) @@ -3075,6 +3079,11 @@ bool dc_link_set_psr_allow_active(struct dc_link *link, const bool *allow_active if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst)) return false; + if (allow_active && link->type == dc_connection_none) { + // Don't enter PSR if panel is not connected + return false; + } + /* Set power optimization flag */ if (power_opts && link->psr_settings.psr_power_opt != *power_opts) { link->psr_settings.psr_power_opt = *power_opts; @@ -3083,6 +3092,10 @@ bool dc_link_set_psr_allow_active(struct dc_link *link, const bool *allow_active psr->funcs->psr_set_power_opt(psr, link->psr_settings.psr_power_opt, panel_inst); } + if (psr != NULL && link->psr_settings.psr_feature_enabled && + force_static && psr->funcs->psr_force_static) + psr->funcs->psr_force_static(psr, panel_inst); + /* Enable or Disable PSR */ if (allow_active && link->psr_settings.psr_allow_active != *allow_active) { link->psr_settings.psr_allow_active = *allow_active; @@ -3093,8 +3106,6 @@ bool dc_link_set_psr_allow_active(struct dc_link *link, const bool *allow_active #endif if (psr != NULL && link->psr_settings.psr_feature_enabled) { - if (force_static && psr->funcs->psr_force_static) - psr->funcs->psr_force_static(psr, panel_inst); psr->funcs->psr_enable(psr, link->psr_settings.psr_allow_active, wait, panel_inst); } else if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu)) && link->psr_settings.psr_feature_enabled) @@ -3476,8 +3487,6 @@ static enum dc_status dc_link_update_sst_payload(struct pipe_ctx *pipe_ctx, { struct dc_stream_state *stream = pipe_ctx->stream; struct dc_link *link = stream->link; - struct hpo_dp_link_encoder *hpo_dp_link_encoder = pipe_ctx->link_res.hpo_dp_link_enc; - struct hpo_dp_stream_encoder *hpo_dp_stream_encoder = pipe_ctx->stream_res.hpo_dp_stream_enc; struct link_mst_stream_allocation_table proposed_table = {0}; struct fixed31_32 avg_time_slots_per_mtp; const struct dc_link_settings empty_link_settings = {0}; @@ -3511,7 +3520,7 @@ static enum dc_status dc_link_update_sst_payload(struct pipe_ctx *pipe_ctx, pipe_ctx->pipe_idx); } - proposed_table.stream_allocations[0].hpo_dp_stream_enc = hpo_dp_stream_encoder; + proposed_table.stream_allocations[0].hpo_dp_stream_enc = pipe_ctx->stream_res.hpo_dp_stream_enc; ASSERT(proposed_table.stream_count == 1); @@ -3524,8 +3533,7 @@ static enum dc_status dc_link_update_sst_payload(struct pipe_ctx *pipe_ctx, proposed_table.stream_allocations[0].slot_count); /* program DP source TX for payload */ - hpo_dp_link_encoder->funcs->update_stream_allocation_table( - hpo_dp_link_encoder, + link_hwss->ext.update_stream_allocation_table(link, &pipe_ctx->link_res, &proposed_table); /* poll for ACT handled */ @@ -3562,8 +3570,6 @@ enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx) { struct dc_stream_state *stream = pipe_ctx->stream; struct dc_link *link = stream->link; - struct link_encoder *link_encoder = NULL; - struct hpo_dp_link_encoder *hpo_dp_link_encoder = pipe_ctx->link_res.hpo_dp_link_enc; struct dp_mst_stream_allocation_table proposed_table = {0}; struct fixed31_32 avg_time_slots_per_mtp; struct fixed31_32 pbn; @@ -3573,9 +3579,6 @@ enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx) const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res); DC_LOGGER_INIT(link->ctx->logger); - link_encoder = link_enc_cfg_get_link_enc(link); - ASSERT(link_encoder); - /* enable_link_dp_mst already check link->enabled_stream_count * and stream is in link->stream[]. This is called during set mode, * stream_enc is available. @@ -3620,37 +3623,17 @@ enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx) ASSERT(proposed_table.stream_count > 0); - if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { - static enum dc_status status; - uint8_t mst_alloc_slots = 0, prev_mst_slots_in_use = 0xFF; - - for (i = 0; i < link->mst_stream_alloc_table.stream_count; i++) - mst_alloc_slots += link->mst_stream_alloc_table.stream_allocations[i].slot_count; - - status = dc_process_dmub_set_mst_slots(link->dc, link->link_index, - mst_alloc_slots, &prev_mst_slots_in_use); - ASSERT(status == DC_OK); - DC_LOG_MST("dpia : status[%d]: alloc_slots[%d]: used_slots[%d]\n", - status, mst_alloc_slots, prev_mst_slots_in_use); - } - /* program DP source TX for payload */ - switch (dp_get_link_encoding_format(&link->cur_link_settings)) { - case DP_8b_10b_ENCODING: - link_encoder->funcs->update_mst_stream_allocation_table( - link_encoder, - &link->mst_stream_alloc_table); - break; - case DP_128b_132b_ENCODING: - hpo_dp_link_encoder->funcs->update_stream_allocation_table( - hpo_dp_link_encoder, - &link->mst_stream_alloc_table); - break; - case DP_UNKNOWN_ENCODING: + if (link_hwss->ext.update_stream_allocation_table == NULL || + dp_get_link_encoding_format(&link->cur_link_settings) == DP_UNKNOWN_ENCODING) { DC_LOG_ERROR("Failure: unknown encoding format\n"); return DC_ERROR_UNEXPECTED; } + link_hwss->ext.update_stream_allocation_table(link, + &pipe_ctx->link_res, + &link->mst_stream_alloc_table); + /* send down message */ ret = dm_helpers_dp_mst_poll_for_allocation_change_trigger( stream->ctx, @@ -3692,7 +3675,6 @@ enum dc_status dc_link_reduce_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t bw struct fixed31_32 avg_time_slots_per_mtp; struct fixed31_32 pbn; struct fixed31_32 pbn_per_slot; - struct link_encoder *link_encoder = link->link_enc; struct dp_mst_stream_allocation_table proposed_table = {0}; uint8_t i; enum act_return_status ret; @@ -3756,8 +3738,13 @@ enum dc_status dc_link_reduce_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t bw ASSERT(proposed_table.stream_count > 0); /* update mst stream allocation table hardware state */ - link_encoder->funcs->update_mst_stream_allocation_table( - link_encoder, + if (link_hwss->ext.update_stream_allocation_table == NULL || + dp_get_link_encoding_format(&link->cur_link_settings) == DP_UNKNOWN_ENCODING) { + DC_LOG_ERROR("Failure: unknown encoding format\n"); + return DC_ERROR_UNEXPECTED; + } + + link_hwss->ext.update_stream_allocation_table(link, &pipe_ctx->link_res, &link->mst_stream_alloc_table); /* poll for immediate branch device ACT handled */ @@ -3852,8 +3839,6 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) { struct dc_stream_state *stream = pipe_ctx->stream; struct dc_link *link = stream->link; - struct link_encoder *link_encoder = NULL; - struct hpo_dp_link_encoder *hpo_dp_link_encoder = pipe_ctx->link_res.hpo_dp_link_enc; struct dp_mst_stream_allocation_table proposed_table = {0}; struct fixed31_32 avg_time_slots_per_mtp = dc_fixpt_from_int(0); int i; @@ -3862,9 +3847,6 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) const struct dc_link_settings empty_link_settings = {0}; DC_LOGGER_INIT(link->ctx->logger); - link_encoder = link_enc_cfg_get_link_enc(link); - ASSERT(link_encoder); - /* deallocate_mst_payload is called before disable link. When mode or * disable/enable monitor, new stream is created which is not in link * stream[] yet. For this, payload is not allocated yet, so de-alloc @@ -3922,36 +3904,16 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) link->mst_stream_alloc_table.stream_allocations[i].slot_count); } - if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { - enum dc_status status; - uint8_t mst_alloc_slots = 0, prev_mst_slots_in_use = 0xFF; - - for (i = 0; i < link->mst_stream_alloc_table.stream_count; i++) - mst_alloc_slots += link->mst_stream_alloc_table.stream_allocations[i].slot_count; - - status = dc_process_dmub_set_mst_slots(link->dc, link->link_index, - mst_alloc_slots, &prev_mst_slots_in_use); - ASSERT(status != DC_NOT_SUPPORTED); - DC_LOG_MST("dpia : status[%d]: alloc_slots[%d]: used_slots[%d]\n", - status, mst_alloc_slots, prev_mst_slots_in_use); - } - - switch (dp_get_link_encoding_format(&link->cur_link_settings)) { - case DP_8b_10b_ENCODING: - link_encoder->funcs->update_mst_stream_allocation_table( - link_encoder, - &link->mst_stream_alloc_table); - break; - case DP_128b_132b_ENCODING: - hpo_dp_link_encoder->funcs->update_stream_allocation_table( - hpo_dp_link_encoder, - &link->mst_stream_alloc_table); - break; - case DP_UNKNOWN_ENCODING: + /* update mst stream allocation table hardware state */ + if (link_hwss->ext.update_stream_allocation_table == NULL || + dp_get_link_encoding_format(&link->cur_link_settings) == DP_UNKNOWN_ENCODING) { DC_LOG_DEBUG("Unknown encoding format\n"); return DC_ERROR_UNEXPECTED; } + link_hwss->ext.update_stream_allocation_table(link, &pipe_ctx->link_res, + &link->mst_stream_alloc_table); + if (mst_mode) { dm_helpers_dp_mst_poll_for_allocation_change_trigger( stream->ctx, @@ -4098,8 +4060,8 @@ static void fpga_dp_hpo_enable_link_and_stream(struct dc_state *state, struct pi proposed_table.stream_allocations[0].hpo_dp_stream_enc = pipe_ctx->stream_res.hpo_dp_stream_enc; } - pipe_ctx->link_res.hpo_dp_link_enc->funcs->update_stream_allocation_table( - pipe_ctx->link_res.hpo_dp_link_enc, + link_hwss->ext.update_stream_allocation_table(stream->link, + &pipe_ctx->link_res, &proposed_table); if (link_hwss->ext.set_throttled_vcp_size) @@ -4119,6 +4081,7 @@ void core_link_enable_stream( struct link_encoder *link_enc; enum otg_out_mux_dest otg_out_dest = OUT_MUX_DIO; struct vpg *vpg = pipe_ctx->stream_res.stream_enc->vpg; + const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res); if (is_dp_128b_132b_signal(pipe_ctx)) vpg = pipe_ctx->stream_res.hpo_dp_stream_enc->vpg; @@ -4147,56 +4110,19 @@ void core_link_enable_stream( link_enc->funcs->setup( link_enc, pipe_ctx->stream->signal); - pipe_ctx->stream_res.stream_enc->funcs->setup_stereo_sync( - pipe_ctx->stream_res.stream_enc, - pipe_ctx->stream_res.tg->inst, - stream->timing.timing_3d_format != TIMING_3D_FORMAT_NONE); - } - - if (is_dp_128b_132b_signal(pipe_ctx)) { - pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->set_stream_attribute( - pipe_ctx->stream_res.hpo_dp_stream_enc, - &stream->timing, - stream->output_color_space, - stream->use_vsc_sdp_for_colorimetry, - stream->timing.flags.DSC, - false); - otg_out_dest = OUT_MUX_HPO_DP; - } else if (dc_is_dp_signal(pipe_ctx->stream->signal)) { - pipe_ctx->stream_res.stream_enc->funcs->dp_set_stream_attribute( - pipe_ctx->stream_res.stream_enc, - &stream->timing, - stream->output_color_space, - stream->use_vsc_sdp_for_colorimetry, - stream->link->dpcd_caps.dprx_feature.bits.SST_SPLIT_SDP_CAP); } - if (dc_is_dp_signal(pipe_ctx->stream->signal)) - dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_DP_STREAM_ATTR); - - if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal)) - pipe_ctx->stream_res.stream_enc->funcs->hdmi_set_stream_attribute( - pipe_ctx->stream_res.stream_enc, - &stream->timing, - stream->phy_pix_clk, - pipe_ctx->stream_res.audio != NULL); - pipe_ctx->stream->link->link_state_valid = true; - if (pipe_ctx->stream_res.tg->funcs->set_out_mux) + if (pipe_ctx->stream_res.tg->funcs->set_out_mux) { + if (is_dp_128b_132b_signal(pipe_ctx)) + otg_out_dest = OUT_MUX_HPO_DP; + else + otg_out_dest = OUT_MUX_DIO; pipe_ctx->stream_res.tg->funcs->set_out_mux(pipe_ctx->stream_res.tg, otg_out_dest); + } - if (dc_is_dvi_signal(pipe_ctx->stream->signal)) - pipe_ctx->stream_res.stream_enc->funcs->dvi_set_stream_attribute( - pipe_ctx->stream_res.stream_enc, - &stream->timing, - (pipe_ctx->stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK) ? - true : false); - - if (dc_is_lvds_signal(pipe_ctx->stream->signal)) - pipe_ctx->stream_res.stream_enc->funcs->lvds_set_stream_attribute( - pipe_ctx->stream_res.stream_enc, - &stream->timing); + link_hwss->setup_stream_attribute(pipe_ctx); if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { bool apply_edp_fast_boot_optimization = @@ -4331,13 +4257,11 @@ void core_link_enable_stream( dc->hwss.enable_audio_stream(pipe_ctx); } else { // if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) - if (is_dp_128b_132b_signal(pipe_ctx)) { + if (is_dp_128b_132b_signal(pipe_ctx)) fpga_dp_hpo_enable_link_and_stream(state, pipe_ctx); - } if (dc_is_dp_signal(pipe_ctx->stream->signal) || dc_is_virtual_signal(pipe_ctx->stream->signal)) dp_set_dsc_enable(pipe_ctx, true); - } if (pipe_ctx->stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) { @@ -4683,22 +4607,22 @@ bool dc_link_is_fec_supported(const struct dc_link *link) bool dc_link_should_enable_fec(const struct dc_link *link) { - bool is_fec_disable = false; - bool ret = false; + bool force_disable = false; - if ((link->connector_signal != SIGNAL_TYPE_DISPLAY_PORT_MST && + if (link->fec_state == dc_link_fec_enabled) + force_disable = false; + else if (link->connector_signal != SIGNAL_TYPE_DISPLAY_PORT_MST && link->local_sink && - link->local_sink->edid_caps.panel_patch.disable_fec) || - (link->connector_signal == SIGNAL_TYPE_EDP - // enable FEC for EDP if DSC is supported - && link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT == false - )) - is_fec_disable = true; - - if (dc_link_is_fec_supported(link) && !link->dc->debug.disable_fec && !is_fec_disable) - ret = true; - - return ret; + link->local_sink->edid_caps.panel_patch.disable_fec) + force_disable = true; + else if (link->connector_signal == SIGNAL_TYPE_EDP + && (link->dpcd_caps.dsc_caps.dsc_basic_caps.fields. + dsc_support.DSC_SUPPORT == false + || link->dc->debug.disable_dsc_edp + || !link->dc->caps.edp_dsc_support)) + force_disable = true; + + return !force_disable && dc_link_is_fec_supported(link); } uint32_t dc_bandwidth_in_kbps_from_timing( diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 22dabe596dfc..c5f5d25035d2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -4085,9 +4085,32 @@ static bool handle_hpd_irq_psr_sink(struct dc_link *link) return false; } +static enum dc_link_rate get_link_rate_from_test_link_rate(uint8_t test_rate) +{ + switch (test_rate) { + case DP_TEST_LINK_RATE_RBR: + return LINK_RATE_LOW; + case DP_TEST_LINK_RATE_HBR: + return LINK_RATE_HIGH; + case DP_TEST_LINK_RATE_HBR2: + return LINK_RATE_HIGH2; + case DP_TEST_LINK_RATE_HBR3: + return LINK_RATE_HIGH3; + case DP_TEST_LINK_RATE_UHBR10: + return LINK_RATE_UHBR10; + case DP_TEST_LINK_RATE_UHBR20: + return LINK_RATE_UHBR20; + case DP_TEST_LINK_RATE_UHBR13_5: + return LINK_RATE_UHBR13_5; + default: + return LINK_RATE_UNKNOWN; + } +} + static void dp_test_send_link_training(struct dc_link *link) { struct dc_link_settings link_settings = {0}; + uint8_t test_rate = 0; core_link_read_dpcd( link, @@ -4097,8 +4120,9 @@ static void dp_test_send_link_training(struct dc_link *link) core_link_read_dpcd( link, DP_TEST_LINK_RATE, - (unsigned char *)(&link_settings.link_rate), + &test_rate, 1); + link_settings.link_rate = get_link_rate_from_test_link_rate(test_rate); /* Set preferred link settings */ link->verified_link_cap.lane_count = link_settings.lane_count; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index d251c3f3a714..f292303b75a5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1076,6 +1076,15 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) bool res = false; DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger); + /* Invalid input */ + if (!plane_state->dst_rect.width || + !plane_state->dst_rect.height || + !plane_state->src_rect.width || + !plane_state->src_rect.height) { + ASSERT(0); + return false; + } + pipe_ctx->plane_res.scl_data.format = convert_pixel_format_to_dalsurface( pipe_ctx->plane_state->format); @@ -2111,6 +2120,8 @@ static int acquire_resource_from_hw_enabled_state( { struct dc_link *link = stream->link; unsigned int i, inst, tg_inst = 0; + uint32_t numPipes = 1; + uint32_t id_src[4] = {0}; /* Check for enabled DIG to identify enabled display */ if (!link->link_enc->funcs->is_dig_enabled(link->link_enc)) @@ -2139,38 +2150,62 @@ static int acquire_resource_from_hw_enabled_state( if (!res_ctx->pipe_ctx[tg_inst].stream) { struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[tg_inst]; - pipe_ctx->stream_res.tg = pool->timing_generators[tg_inst]; - pipe_ctx->plane_res.mi = pool->mis[tg_inst]; - pipe_ctx->plane_res.hubp = pool->hubps[tg_inst]; - pipe_ctx->plane_res.ipp = pool->ipps[tg_inst]; - pipe_ctx->plane_res.xfm = pool->transforms[tg_inst]; - pipe_ctx->plane_res.dpp = pool->dpps[tg_inst]; - pipe_ctx->stream_res.opp = pool->opps[tg_inst]; - - if (pool->dpps[tg_inst]) { - pipe_ctx->plane_res.mpcc_inst = pool->dpps[tg_inst]->inst; - - // Read DPP->MPCC->OPP Pipe from HW State - if (pool->mpc->funcs->read_mpcc_state) { - struct mpcc_state s = {0}; - - pool->mpc->funcs->read_mpcc_state(pool->mpc, pipe_ctx->plane_res.mpcc_inst, &s); - - if (s.dpp_id < MAX_MPCC) - pool->mpc->mpcc_array[pipe_ctx->plane_res.mpcc_inst].dpp_id = s.dpp_id; - - if (s.bot_mpcc_id < MAX_MPCC) - pool->mpc->mpcc_array[pipe_ctx->plane_res.mpcc_inst].mpcc_bot = - &pool->mpc->mpcc_array[s.bot_mpcc_id]; + id_src[0] = tg_inst; + + if (pipe_ctx->stream_res.tg->funcs->get_optc_source) + pipe_ctx->stream_res.tg->funcs->get_optc_source(pipe_ctx->stream_res.tg, + &numPipes, &id_src[0], &id_src[1]); + + for (i = 0; i < numPipes; i++) { + //Check if src id invalid + if (id_src[i] == 0xf) + return -1; + + pipe_ctx->stream_res.tg = pool->timing_generators[tg_inst]; + pipe_ctx->plane_res.mi = pool->mis[id_src[i]]; + pipe_ctx->plane_res.hubp = pool->hubps[id_src[i]]; + pipe_ctx->plane_res.ipp = pool->ipps[id_src[i]]; + pipe_ctx->plane_res.xfm = pool->transforms[id_src[i]]; + pipe_ctx->plane_res.dpp = pool->dpps[id_src[i]]; + pipe_ctx->stream_res.opp = pool->opps[id_src[i]]; + + if (pool->dpps[id_src[i]]) { + pipe_ctx->plane_res.mpcc_inst = pool->dpps[id_src[i]]->inst; + + if (pool->mpc->funcs->read_mpcc_state) { + struct mpcc_state s = {0}; + pool->mpc->funcs->read_mpcc_state(pool->mpc, pipe_ctx->plane_res.mpcc_inst, &s); + if (s.dpp_id < MAX_MPCC) + pool->mpc->mpcc_array[pipe_ctx->plane_res.mpcc_inst].dpp_id = + s.dpp_id; + if (s.bot_mpcc_id < MAX_MPCC) + pool->mpc->mpcc_array[pipe_ctx->plane_res.mpcc_inst].mpcc_bot = + &pool->mpc->mpcc_array[s.bot_mpcc_id]; + if (s.opp_id < MAX_OPP) + pipe_ctx->stream_res.opp->mpc_tree_params.opp_id = s.opp_id; + } + } + pipe_ctx->pipe_idx = id_src[i]; - if (s.opp_id < MAX_OPP) - pipe_ctx->stream_res.opp->mpc_tree_params.opp_id = s.opp_id; + if (id_src[i] >= pool->timing_generator_count) { + id_src[i] = pool->timing_generator_count - 1; + pipe_ctx->stream_res.tg = pool->timing_generators[id_src[i]]; + pipe_ctx->stream_res.opp = pool->opps[id_src[i]]; } + + pipe_ctx->stream = stream; } - pipe_ctx->pipe_idx = tg_inst; - pipe_ctx->stream = stream; - return tg_inst; + if (numPipes == 2) { + stream->apply_boot_odm_mode = dm_odm_combine_policy_2to1; + res_ctx->pipe_ctx[id_src[0]].next_odm_pipe = &res_ctx->pipe_ctx[id_src[1]]; + res_ctx->pipe_ctx[id_src[0]].prev_odm_pipe = NULL; + res_ctx->pipe_ctx[id_src[1]].next_odm_pipe = NULL; + res_ctx->pipe_ctx[id_src[1]].prev_odm_pipe = &res_ctx->pipe_ctx[id_src[0]]; + } else + stream->apply_boot_odm_mode = dm_odm_combine_mode_disabled; + + return id_src[0]; } return -1; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 77ef9d1f9ea8..2f0c436dae4c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -47,7 +47,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.177" +#define DC_VER "3.2.181" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -340,6 +340,7 @@ struct dc_config { bool is_asymmetric_memory; bool is_single_rank_dimm; bool use_pipe_ctx_sync_logic; + bool ignore_dpref_ss; }; enum visual_confirm { @@ -416,6 +417,7 @@ struct dc_clocks { #if defined(CONFIG_DRM_AMD_DC_DCN) enum dcn_zstate_support_state zstate_support; bool dtbclk_en; + int dtbclk_khz; #endif enum dcn_pwr_state pwr_state; /* @@ -665,6 +667,7 @@ struct dc_debug_options { uint32_t edid_read_retry_times; bool remove_disconnect_edp; unsigned int force_odm_combine; //bit vector based on otg inst + unsigned int seamless_boot_odm_combine; #if defined(CONFIG_DRM_AMD_DC_DCN) unsigned int force_odm_combine_4to1; //bit vector based on otg inst bool disable_z9_mpc; @@ -729,7 +732,6 @@ struct dc_debug_options { bool apply_vendor_specific_lttpr_wa; bool extended_blank_optimization; union aux_wake_wa_options aux_wake_wa; - bool ignore_dpref_ss; uint8_t psr_power_use_phy_fsm; }; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 36ac2a8746bd..2c54b6e0498b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -133,6 +133,16 @@ enum dp_link_encoding { DP_128b_132b_ENCODING = 2, }; +enum dp_test_link_rate { + DP_TEST_LINK_RATE_RBR = 0x06, + DP_TEST_LINK_RATE_HBR = 0x0A, + DP_TEST_LINK_RATE_HBR2 = 0x14, + DP_TEST_LINK_RATE_HBR3 = 0x1E, + DP_TEST_LINK_RATE_UHBR10 = 0x01, + DP_TEST_LINK_RATE_UHBR20 = 0x02, + DP_TEST_LINK_RATE_UHBR13_5 = 0x03, +}; + struct dc_link_settings { enum dc_lane_count lane_count; enum dc_link_rate link_rate; @@ -620,7 +630,7 @@ union test_request { uint8_t LINK_TEST_PATTRN :1; uint8_t EDID_READ :1; uint8_t PHY_TEST_PATTERN :1; - uint8_t RESERVED :1; + uint8_t PHY_TEST_CHANNEL_CODING_TYPE :2; uint8_t AUDIO_TEST_PATTERN :1; uint8_t TEST_AUDIO_DISABLED_VIDEO :1; } bits; @@ -993,8 +1003,8 @@ union dp_128b_132b_supported_link_rates { union dp_128b_132b_supported_lttpr_link_rates { struct { uint8_t UHBR10 :1; - uint8_t UHBR13_5:1; uint8_t UHBR20 :1; + uint8_t UHBR13_5:1; uint8_t RESERVED:5; } bits; uint8_t raw; diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index aa818bf840eb..a3c37ee3f849 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -67,13 +67,9 @@ struct link_mst_stream_allocation_table { struct link_mst_stream_allocation stream_allocations[MAX_CONTROLLER_NUM]; }; -struct time_stamp { - uint64_t edp_poweroff; - uint64_t edp_poweron; -}; - -struct link_trace { - struct time_stamp time_stamp; +struct edp_trace_power_timestamps { + uint64_t poweroff; + uint64_t poweron; }; struct dp_trace_lt_counts { @@ -96,6 +92,7 @@ struct dp_trace { struct dp_trace_lt commit_lt_trace; unsigned int link_loss_count; bool is_initialized; + struct edp_trace_power_timestamps edp_trace_power_timestamps; }; /* PSR feature flags */ @@ -231,7 +228,6 @@ struct dc_link { struct dc_link_status link_status; struct dprx_states dprx_states; - struct link_trace link_trace; struct gpio *hpd_gpio; enum dc_link_fec_state fec_state; }; diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index c4168c11257c..580420c3eedc 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -246,6 +246,7 @@ struct dc_stream_state { bool apply_edp_fast_boot_optimization; bool apply_seamless_boot_optimization; + uint32_t apply_boot_odm_mode; uint32_t stream_id; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index 8e814000db62..29e20d92b0bb 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -565,13 +565,11 @@ int dce_aux_transfer_raw(struct ddc_service *ddc, struct ddc *ddc_pin = ddc->ddc_pin; struct dce_aux *aux_engine; struct aux_request_transaction_data aux_req; - struct aux_reply_transaction_data aux_rep; uint8_t returned_bytes = 0; int res = -1; uint32_t status; memset(&aux_req, 0, sizeof(aux_req)); - memset(&aux_rep, 0, sizeof(aux_rep)); aux_engine = ddc->ctx->dc->res_pool->engines[ddc_pin->pin_data->en]; if (!acquire(aux_engine, ddc_pin)) { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index cc5128e67daf..760653e2b607 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -985,7 +985,7 @@ static bool dcn31_program_pix_clk( struct bp_pixel_clock_parameters bp_pc_params = {0}; enum transmitter_color_depth bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24; // For these signal types Driver to program DP_DTO without calling VBIOS Command table - if (dc_is_dp_signal(pix_clk_params->signal_type)) { + if (dc_is_dp_signal(pix_clk_params->signal_type) || dc_is_virtual_signal(pix_clk_params->signal_type)) { if (e) { /* Set DTO values: phase = target clock, modulo = reference clock*/ REG_WRITE(PHASE[inst], e->target_pixel_rate_khz * e->mult_factor); @@ -1254,7 +1254,7 @@ static uint32_t dcn3_get_pix_clk_dividers( struct pixel_clk_params *pix_clk_params, struct pll_settings *pll_settings) { - unsigned long long actual_pix_clk_100Hz = pix_clk_params->requested_pix_clk_100hz; + unsigned long long actual_pix_clk_100Hz = pix_clk_params ? pix_clk_params->requested_pix_clk_100hz : 0; struct dce110_clk_src *clk_src; clk_src = TO_DCE110_CLK_SRC(cs); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 248602c15f3a..9fc1ba12ec19 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -67,6 +67,7 @@ #include "dcn10/dcn10_hw_sequencer.h" +#include "link/link_dp_trace.h" #include "dce110_hw_sequencer.h" #define GAMMA_HW_POINTS_NUM 256 @@ -819,19 +820,19 @@ void dce110_edp_power_control( div64_u64(dm_get_elapse_time_in_ns( ctx, current_ts, - link->link_trace.time_stamp.edp_poweroff), 1000000); + dp_trace_get_edp_poweroff_timestamp(link)), 1000000); unsigned long long time_since_edp_poweron_ms = div64_u64(dm_get_elapse_time_in_ns( ctx, current_ts, - link->link_trace.time_stamp.edp_poweron), 1000000); + dp_trace_get_edp_poweron_timestamp(link)), 1000000); DC_LOG_HW_RESUME_S3( "%s: transition: power_up=%d current_ts=%llu edp_poweroff=%llu edp_poweron=%llu time_since_edp_poweroff_ms=%llu time_since_edp_poweron_ms=%llu", __func__, power_up, current_ts, - link->link_trace.time_stamp.edp_poweroff, - link->link_trace.time_stamp.edp_poweron, + dp_trace_get_edp_poweroff_timestamp(link), + dp_trace_get_edp_poweron_timestamp(link), time_since_edp_poweroff_ms, time_since_edp_poweron_ms); @@ -846,7 +847,7 @@ void dce110_edp_power_control( link->local_sink->edid_caps.panel_patch.extra_t12_ms; /* Adjust remaining_min_edp_poweroff_time_ms if this is not the first time. */ - if (link->link_trace.time_stamp.edp_poweroff != 0) { + if (dp_trace_get_edp_poweroff_timestamp(link) != 0) { if (time_since_edp_poweroff_ms < remaining_min_edp_poweroff_time_ms) remaining_min_edp_poweroff_time_ms = remaining_min_edp_poweroff_time_ms - time_since_edp_poweroff_ms; @@ -904,17 +905,13 @@ void dce110_edp_power_control( __func__, (power_up ? "On":"Off"), bp_result); - if (!power_up) - /*save driver power off time stamp*/ - link->link_trace.time_stamp.edp_poweroff = dm_get_timestamp(ctx); - else - link->link_trace.time_stamp.edp_poweron = dm_get_timestamp(ctx); + dp_trace_set_edp_power_timestamp(link, power_up); DC_LOG_HW_RESUME_S3( "%s: updated values: edp_poweroff=%llu edp_poweron=%llu\n", __func__, - link->link_trace.time_stamp.edp_poweroff, - link->link_trace.time_stamp.edp_poweron); + dp_trace_get_edp_poweroff_timestamp(link), + dp_trace_get_edp_poweron_timestamp(link)); if (bp_result != BP_RESULT_OK) DC_LOG_ERROR( @@ -942,14 +939,14 @@ void dce110_edp_wait_for_T12( return; if (!link->panel_cntl->funcs->is_panel_powered_on(link->panel_cntl) && - link->link_trace.time_stamp.edp_poweroff != 0) { + dp_trace_get_edp_poweroff_timestamp(link) != 0) { unsigned int t12_duration = 500; // Default T12 as per spec unsigned long long current_ts = dm_get_timestamp(ctx); unsigned long long time_since_edp_poweroff_ms = div64_u64(dm_get_elapse_time_in_ns( ctx, current_ts, - link->link_trace.time_stamp.edp_poweroff), 1000000); + dp_trace_get_edp_poweroff_timestamp(link)), 1000000); t12_duration += link->local_sink->edid_caps.panel_patch.extra_t12_ms; // Add extra T12 diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index fbff6beb78be..3a7f76e2c598 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -1316,7 +1316,7 @@ void hubp1_set_flip_int(struct hubp *hubp) * * @hubp: hubp struct reference. */ -void hubp1_wait_pipe_read_start(struct hubp *hubp) +static void hubp1_wait_pipe_read_start(struct hubp *hubp) { struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 781334b395ba..e02ac75afbf7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1259,6 +1259,7 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) { int i; struct dce_hwseq *hws = dc->hwseq; + struct hubbub *hubbub = dc->res_pool->hubbub; bool can_apply_seamless_boot = false; for (i = 0; i < context->stream_count; i++) { @@ -1294,6 +1295,21 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) } } + /* Reset det size */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + struct hubp *hubp = dc->res_pool->hubps[i]; + + /* Do not need to reset for seamless boot */ + if (pipe_ctx->stream != NULL && can_apply_seamless_boot) + continue; + + if (hubbub && hubp) { + if (hubbub->funcs->program_det_size) + hubbub->funcs->program_det_size(hubbub, hubp->inst, 0); + } + } + /* num_opp will be equal to number of mpcc */ for (i = 0; i < dc->res_pool->res_cap->num_opp; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; @@ -1359,6 +1375,11 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) pipe_ctx->stream_res.tg = NULL; pipe_ctx->plane_res.hubp = NULL; + if (tg->funcs->is_tg_enabled(tg)) { + if (tg->funcs->init_odm) + tg->funcs->init_odm(tg); + } + tg->funcs->tg_init(tg); } @@ -1493,8 +1514,12 @@ void dcn10_init_hw(struct dc *dc) /* Check for enabled DIG to identify enabled display */ if (link->link_enc->funcs->is_dig_enabled && - link->link_enc->funcs->is_dig_enabled(link->link_enc)) + link->link_enc->funcs->is_dig_enabled(link->link_enc)) { link->link_status.link_active = true; + if (link->link_enc->funcs->fec_is_active && + link->link_enc->funcs->fec_is_active(link->link_enc)) + link->fec_state = dc_link_fec_enabled; + } } /* we want to turn off all dp displays before doing detection */ @@ -2522,14 +2547,18 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) struct mpc *mpc = dc->res_pool->mpc; struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params); - if (per_pixel_alpha) - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; - else - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; - blnd_cfg.overlap_only = false; blnd_cfg.global_gain = 0xff; + if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN; + blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value; + } else if (per_pixel_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; + } else { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; + } + if (pipe_ctx->plane_state->global_alpha) blnd_cfg.global_alpha = pipe_ctx->plane_state->global_alpha_value; else @@ -2979,8 +3008,11 @@ void dcn10_prepare_bandwidth( true); dcn10_stereo_hw_frame_pack_wa(dc, context); - if (dc->debug.pplib_wm_report_mode == WM_REPORT_OVERRIDE) + if (dc->debug.pplib_wm_report_mode == WM_REPORT_OVERRIDE) { + DC_FP_START(); dcn_bw_notify_pplib_of_wm_ranges(dc); + DC_FP_END(); + } if (dc->debug.sanity_checks) hws->funcs.verify_allow_pstate_change_high(dc); @@ -3013,8 +3045,11 @@ void dcn10_optimize_bandwidth( dcn10_stereo_hw_frame_pack_wa(dc, context); - if (dc->debug.pplib_wm_report_mode == WM_REPORT_OVERRIDE) + if (dc->debug.pplib_wm_report_mode == WM_REPORT_OVERRIDE) { + DC_FP_START(); dcn_bw_notify_pplib_of_wm_ranges(dc); + DC_FP_END(); + } if (dc->debug.sanity_checks) hws->funcs.verify_allow_pstate_change_high(dc); @@ -3039,12 +3074,16 @@ void dcn10_set_drr(struct pipe_ctx **pipe_ctx, * as well. */ for (i = 0; i < num_pipes; i++) { - pipe_ctx[i]->stream_res.tg->funcs->set_drr( - pipe_ctx[i]->stream_res.tg, ¶ms); - if (adjust.v_total_max != 0 && adjust.v_total_min != 0) - pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( - pipe_ctx[i]->stream_res.tg, - event_triggers, num_frames); + if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) { + if (pipe_ctx[i]->stream_res.tg->funcs->set_drr) + pipe_ctx[i]->stream_res.tg->funcs->set_drr( + pipe_ctx[i]->stream_res.tg, ¶ms); + if (adjust.v_total_max != 0 && adjust.v_total_min != 0) + if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control) + pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( + pipe_ctx[i]->stream_res.tg, + event_triggers, num_frames); + } } } @@ -3175,7 +3214,8 @@ void dcn10_wait_for_mpcc_disconnect( if (pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst]) { struct hubp *hubp = get_hubp_by_inst(res_pool, mpcc_inst); - res_pool->mpc->funcs->wait_for_idle(res_pool->mpc, mpcc_inst); + if (pipe_ctx->stream_res.tg->funcs->is_tg_enabled(pipe_ctx->stream_res.tg)) + res_pool->mpc->funcs->wait_for_idle(res_pool->mpc, mpcc_inst); pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst] = false; hubp->funcs->set_blank(hubp, true); } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 4048908dd265..bca049b2f867 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -1141,6 +1141,20 @@ static void dcn10_destroy_resource_pool(struct resource_pool **pool) *pool = NULL; } +static bool dcn10_validate_bandwidth( + struct dc *dc, + struct dc_state *context, + bool fast_validate) +{ + bool voltage_supported; + + DC_FP_START(); + voltage_supported = dcn_validate_bandwidth(dc, context, fast_validate); + DC_FP_END(); + + return voltage_supported; +} + static enum dc_status dcn10_validate_plane(const struct dc_plane_state *plane_state, struct dc_caps *caps) { if (plane_state->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN @@ -1492,6 +1506,7 @@ static bool dcn10_resource_construct( && pool->base.pp_smu->rv_funcs.set_pme_wa_enable != NULL) dc->debug.az_endpoint_mute_only = false; + DC_FP_START(); if (!dc->debug.disable_pplib_clock_request) dcn_bw_update_from_pplib(dc); dcn_bw_sync_calcs_and_dml(dc); @@ -1499,6 +1514,7 @@ static bool dcn10_resource_construct( dc->res_pool = &pool->base; dcn_bw_notify_pplib_of_wm_ranges(dc); } + DC_FP_END(); { struct irq_service_init_data init_data; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 4290eaf11a04..b627c41713cc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -2344,14 +2344,18 @@ void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) struct mpc *mpc = dc->res_pool->mpc; struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params); - if (per_pixel_alpha) - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; - else - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; - blnd_cfg.overlap_only = false; blnd_cfg.global_gain = 0xff; + if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN; + blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value; + } else if (per_pixel_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; + } else { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; + } + if (pipe_ctx->plane_state->global_alpha) blnd_cfg.global_alpha = pipe_ctx->plane_state->global_alpha_value; else diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 3fe4bfbb98a0..faab59508d82 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -997,6 +997,7 @@ static struct clock_source *dcn21_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index f61ec8763844..782b8db451b4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -535,8 +535,12 @@ void dcn30_init_hw(struct dc *dc) /* Check for enabled DIG to identify enabled display */ if (link->link_enc->funcs->is_dig_enabled && - link->link_enc->funcs->is_dig_enabled(link->link_enc)) + link->link_enc->funcs->is_dig_enabled(link->link_enc)) { link->link_status.link_active = true; + if (link->link_enc->funcs->fec_is_active && + link->link_enc->funcs->fec_is_active(link->link_enc)) + link->fec_state = dc_link_fec_enabled; + } } /* Power gate DSCs */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index e6a62cc75139..336b2ce6a636 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -2602,9 +2602,9 @@ static bool dcn30_resource_construct( dc->caps.mall_size_total = dc->caps.mall_size_per_mem_channel * dc->ctx->dc_bios->vram_info.num_chans * 1048576; dc->caps.cursor_cache_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8; - dc->caps.max_slave_planes = 1; - dc->caps.max_slave_yuv_planes = 1; - dc->caps.max_slave_rgb_planes = 1; + dc->caps.max_slave_planes = 2; + dc->caps.max_slave_yuv_planes = 2; + dc->caps.max_slave_rgb_planes = 2; dc->caps.post_blend_color_processing = true; dc->caps.force_dp_tps4_for_cp2520 = true; dc->caps.extended_aux_timeout_support = true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index 88318e8ffca8..f0938653bb88 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -1219,9 +1219,9 @@ static bool dcn302_resource_construct( /* total size = mall per channel * num channels * 1024 * 1024 */ dc->caps.mall_size_total = dc->caps.mall_size_per_mem_channel * dc->ctx->dc_bios->vram_info.num_chans * 1048576; dc->caps.cursor_cache_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8; - dc->caps.max_slave_planes = 1; - dc->caps.max_slave_yuv_planes = 1; - dc->caps.max_slave_rgb_planes = 1; + dc->caps.max_slave_planes = 2; + dc->caps.max_slave_yuv_planes = 2; + dc->caps.max_slave_rgb_planes = 2; dc->caps.post_blend_color_processing = true; dc->caps.force_dp_tps4_for_cp2520 = true; dc->caps.extended_aux_timeout_support = true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile index d20e3b8ccc30..ec041e3cda30 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile @@ -15,32 +15,6 @@ DCN31 = dcn31_resource.o dcn31_hubbub.o dcn31_hwseq.o dcn31_init.o dcn31_hubp.o dcn31_apg.o dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o \ dcn31_afmt.o dcn31_vpg.o -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o := -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -mhard-float -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -msse2 -endif -endif - AMD_DAL_DCN31 = $(addprefix $(AMDDALPATH)/dc/dcn31/,$(DCN31)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN31) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c index 8ae6117953ca..197a5cae068b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c @@ -54,7 +54,8 @@ void hubp31_soft_reset(struct hubp *hubp, bool reset) REG_UPDATE(DCHUBP_CNTL, HUBP_SOFT_RESET, reset); } -void hubp31_program_extended_blank(struct hubp *hubp, unsigned int min_dst_y_next_start_optimized) +static void hubp31_program_extended_blank(struct hubp *hubp, + unsigned int min_dst_y_next_start_optimized) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index 631d8ac63aa4..531dd2c65007 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -188,8 +188,12 @@ void dcn31_init_hw(struct dc *dc) /* Check for enabled DIG to identify enabled display */ if (link->link_enc->funcs->is_dig_enabled && - link->link_enc->funcs->is_dig_enabled(link->link_enc)) + link->link_enc->funcs->is_dig_enabled(link->link_enc)) { link->link_status.link_active = true; + if (link->link_enc->funcs->fec_is_active && + link->link_enc->funcs->fec_is_active(link->link_enc)) + link->fec_state = dc_link_fec_enabled; + } } /* Enables outbox notifications for usb4 dpia */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c index d7559e5a99ce..e708f07fe75a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c @@ -153,9 +153,4 @@ void dcn31_hw_sequencer_construct(struct dc *dc) dc->hwss.init_hw = dcn20_fpga_init_hw; dc->hwseq->funcs.init_pipes = NULL; } - if (dc->debug.disable_z10) { - /*hw not support z10 or sw disable it*/ - dc->hwss.z10_restore = NULL; - dc->hwss.z10_save_init = NULL; - } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c index e05527a3a8ba..c51f7dca94f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c @@ -91,8 +91,7 @@ static void optc31_set_odm_combine(struct timing_generator *optc, int *opp_id, i optc1->opp_count = opp_cnt; } -/** - * Enable CRTC +/* * Enable CRTC - call ASIC Control Object to enable Timing generator. */ static bool optc31_enable_crtc(struct timing_generator *optc) @@ -214,6 +213,26 @@ void optc31_set_drr( } } +void optc3_init_odm(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 0, + OPTC_SEG0_SRC_SEL, optc->inst, + OPTC_SEG1_SRC_SEL, 0xf, + OPTC_SEG2_SRC_SEL, 0xf, + OPTC_SEG3_SRC_SEL, 0xf + ); + + REG_SET(OTG_H_TIMING_CNTL, 0, + OTG_H_TIMING_DIV_MODE, 0); + + REG_SET(OPTC_MEMORY_CONFIG, 0, + OPTC_MEM_SEL, 0); + optc1->opp_count = 1; +} + static struct timing_generator_funcs dcn31_tg_funcs = { .validate_timing = optc1_validate_timing, .program_timing = optc1_program_timing, @@ -273,6 +292,7 @@ static struct timing_generator_funcs dcn31_tg_funcs = { .program_manual_trigger = optc2_program_manual_trigger, .setup_manual_trigger = optc2_setup_manual_trigger, .get_hw_timing = optc1_get_hw_timing, + .init_odm = optc3_init_odm, }; void dcn31_timing_generator_init(struct optc *optc1) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h index a37b16040c1d..9e881f2ce74b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h @@ -258,4 +258,6 @@ void dcn31_timing_generator_init(struct optc *optc1); void optc31_set_drr(struct timing_generator *optc, const struct drr_params *params); +void optc3_init_odm(struct timing_generator *optc); + #endif /* __DC_OPTC_DCN31_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 63934ecf6be8..5b3f0c2dfb55 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -65,6 +65,7 @@ #include "virtual/virtual_stream_encoder.h" #include "dce110/dce110_resource.h" #include "dml/display_mode_vba.h" +#include "dml/dcn31/dcn31_fpu.h" #include "dcn31/dcn31_dccg.h" #include "dcn10/dcn10_resource.h" #include "dcn31_panel_cntl.h" @@ -102,152 +103,6 @@ #define DC_LOGGER_INIT(logger) -#define DCN3_1_DEFAULT_DET_SIZE 384 - -struct _vcs_dpi_ip_params_st dcn3_1_ip = { - .gpuvm_enable = 1, - .gpuvm_max_page_table_levels = 1, - .hostvm_enable = 1, - .hostvm_max_page_table_levels = 2, - .rob_buffer_size_kbytes = 64, - .det_buffer_size_kbytes = DCN3_1_DEFAULT_DET_SIZE, - .config_return_buffer_size_in_kbytes = 1792, - .compressed_buffer_segment_size_in_kbytes = 64, - .meta_fifo_size_in_kentries = 32, - .zero_size_buffer_entries = 512, - .compbuf_reserved_space_64b = 256, - .compbuf_reserved_space_zs = 64, - .dpp_output_buffer_pixels = 2560, - .opp_output_buffer_lines = 1, - .pixel_chunk_size_kbytes = 8, - .meta_chunk_size_kbytes = 2, - .min_meta_chunk_size_bytes = 256, - .writeback_chunk_size_kbytes = 8, - .ptoi_supported = false, - .num_dsc = 3, - .maximum_dsc_bits_per_component = 10, - .dsc422_native_support = false, - .is_line_buffer_bpp_fixed = true, - .line_buffer_fixed_bpp = 48, - .line_buffer_size_bits = 789504, - .max_line_buffer_lines = 12, - .writeback_interface_buffer_size_kbytes = 90, - .max_num_dpp = 4, - .max_num_otg = 4, - .max_num_hdmi_frl_outputs = 1, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dpte_buffer_size_in_pte_reqs_luma = 64, - .dpte_buffer_size_in_pte_reqs_chroma = 34, - .dispclk_ramp_margin_percent = 1, - .max_inter_dcn_tile_repeaters = 8, - .cursor_buffer_size = 16, - .cursor_chunk_size = 2, - .writeback_line_buffer_buffer_size = 0, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .dppclk_delay_subtotal = 46, - .dppclk_delay_scl = 50, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_cnvc_formatter = 27, - .dppclk_delay_cnvc_cursor = 6, - .dispclk_delay_subtotal = 119, - .dynamic_metadata_vm_enabled = false, - .odm_combine_4to1_supported = false, - .dcc_supported = true, -}; - -struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = { - /*TODO: correct dispclk/dppclk voltage level determination*/ - .clock_limits = { - { - .state = 0, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 600.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 186.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 1, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 2, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 3, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 371.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 4, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 417.0, - .dtbclk_mhz = 625.0, - }, - }, - .num_states = 5, - .sr_exit_time_us = 9.0, - .sr_enter_plus_exit_time_us = 11.0, - .sr_exit_z8_time_us = 442.0, - .sr_enter_plus_exit_z8_time_us = 560.0, - .writeback_latency_us = 12.0, - .dram_channel_width_bytes = 4, - .round_trip_ping_latency_dcfclk_cycles = 106, - .urgent_latency_pixel_data_only_us = 4.0, - .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, - .urgent_latency_vm_data_only_us = 4.0, - .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, - .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, - .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, - .pct_ideal_sdp_bw_after_urgent = 80.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, - .max_avg_sdp_bw_use_normal_percent = 60.0, - .max_avg_dram_bw_use_normal_percent = 60.0, - .fabric_datapath_to_dcn_data_return_bytes = 32, - .return_bus_width_bytes = 64, - .downspread_percent = 0.38, - .dcn_downspread_percent = 0.5, - .gpuvm_min_page_size_bytes = 4096, - .hostvm_min_page_size_bytes = 4096, - .do_urgent_latency_adjustment = false, - .urgent_latency_adjustment_fabric_clock_component_us = 0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, -}; - enum dcn31_clk_src_array_id { DCN31_CLK_SRC_PLL0, DCN31_CLK_SRC_PLL1, @@ -1030,6 +885,7 @@ static const struct dc_debug_options debug_defaults_drv = { .afmt = true, } }, + .disable_z10 = true, .optimize_edp_link_rate = true, .enable_sw_cntl_psr = true, .apply_vendor_specific_lttpr_wa = true, @@ -1812,7 +1668,6 @@ int dcn31_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.src.immediate_flip = true; pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; - pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active; pipes[pipe_cnt].pipe.src.gpuvm = true; pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; @@ -1869,143 +1724,6 @@ void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) } } -static void dcn31_calculate_wm_and_dlg_fp( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel) -{ - int i, pipe_idx; - double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; - - if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk) - dcfclk = context->bw_ctx.dml.soc.min_dcfclk; - - /* We don't recalculate clocks for 0 pipe configs, which can block - * S0i3 as high clocks will block low power states - * Override any clocks that can block S0i3 to min here - */ - if (pipe_cnt == 0) { - context->bw_ctx.bw.dcn.clk.dcfclk_khz = dcfclk; // always should be vlevel 0 - return; - } - - pipes[0].clks_cfg.voltage = vlevel; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; - -#if 0 // TODO - /* Set B: - * TODO - */ - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { - if (vlevel == 0) { - pipes[0].clks_cfg.voltage = 1; - pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz; - } - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - pipes[0].clks_cfg.voltage = vlevel; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - - /* Set C: - * TODO - */ - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - /* Set D: - * TODO - */ - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; -#endif - - /* Set A: - * All clocks min required - * - * Set A calculated last so that following calculations are based on Set A - */ - dc->res_pool->funcs->update_soc_for_wm_a(dc, context); - context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - /* TODO: remove: */ - context->bw_ctx.bw.dcn.watermarks.b = context->bw_ctx.bw.dcn.watermarks.a; - context->bw_ctx.bw.dcn.watermarks.c = context->bw_ctx.bw.dcn.watermarks.a; - context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a; - /* end remove*/ - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); - pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - - if (dc->config.forced_clocks || dc->debug.max_disp_clk) { - pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; - pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; - } - if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) - pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; - if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) - pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; - - pipe_idx++; - } - - DC_FP_START(); - dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); - DC_FP_END(); -} - void dcn31_calculate_wm_and_dlg( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, @@ -2075,77 +1793,6 @@ static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; -void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) -{ - struct clk_limit_table *clk_table = &bw_params->clk_table; - struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; - unsigned int i, closest_clk_lvl; - int j; - - // Default clock levels are used for diags, which may lead to overclocking. - if (!IS_DIAG_DC(dc->ctx->dce_environment)) { - int max_dispclk_mhz = 0, max_dppclk_mhz = 0; - - dcn3_1_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; - dcn3_1_ip.max_num_dpp = dc->res_pool->pipe_count; - dcn3_1_soc.num_chans = bw_params->num_channels; - - ASSERT(clk_table->num_entries); - - /* Prepass to find max clocks independent of voltage level. */ - for (i = 0; i < clk_table->num_entries; ++i) { - if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; - if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; - } - - for (i = 0; i < clk_table->num_entries; i++) { - /* loop backwards*/ - for (closest_clk_lvl = 0, j = dcn3_1_soc.num_states - 1; j >= 0; j--) { - if ((unsigned int) dcn3_1_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { - closest_clk_lvl = j; - break; - } - } - - clock_limits[i].state = i; - - /* Clocks dependent on voltage level. */ - clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; - clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; - clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; - clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; - - /* Clocks independent of voltage level. */ - clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : - dcn3_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - - clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : - dcn3_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; - - clock_limits[i].dram_bw_per_chan_gbps = dcn3_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; - clock_limits[i].dscclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; - clock_limits[i].dtbclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; - clock_limits[i].phyclk_d18_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; - clock_limits[i].phyclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; - } - for (i = 0; i < clk_table->num_entries; i++) - dcn3_1_soc.clock_limits[i] = clock_limits[i]; - if (clk_table->num_entries) { - dcn3_1_soc.num_states = clk_table->num_entries; - } - } - - dcn3_1_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; - dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; - - if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) - dml_init_instance(&dc->dml, &dcn3_1_soc, &dcn3_1_ip, DML_PROJECT_DCN31); - else - dml_init_instance(&dc->dml, &dcn3_1_soc, &dcn3_1_ip, DML_PROJECT_DCN31_FPGA); -} - static struct resource_funcs dcn31_res_pool_funcs = { .destroy = dcn31_destroy_resource_pool, .link_enc_create = dcn31_link_encoder_create, @@ -2223,9 +1870,9 @@ static bool dcn31_resource_construct( dc->caps.min_horizontal_blanking_period = 80; dc->caps.dmdata_alloc_size = 2048; - dc->caps.max_slave_planes = 1; - dc->caps.max_slave_yuv_planes = 1; - dc->caps.max_slave_rgb_planes = 1; + dc->caps.max_slave_planes = 2; + dc->caps.max_slave_yuv_planes = 2; + dc->caps.max_slave_rgb_planes = 2; dc->caps.post_blend_color_processing = true; dc->caps.force_dp_tps4_for_cp2520 = true; dc->caps.dp_hpo = true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h index 4b7ab21ea15b..1ce6509c1ed1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h @@ -31,6 +31,9 @@ #define TO_DCN31_RES_POOL(pool)\ container_of(pool, struct dcn31_resource_pool, base) +extern struct _vcs_dpi_ip_params_st dcn3_1_ip; +extern struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc; + struct dcn31_resource_pool { struct resource_pool base; }; @@ -47,7 +50,6 @@ int dcn31_populate_dml_pipes_from_context( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, bool fast_validate); -void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context); struct resource_pool *dcn31_create_resource_pool( diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile b/drivers/gpu/drm/amd/display/dc/dcn315/Makefile index c831ad46e81c..59381d24800b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn315/Makefile @@ -25,32 +25,6 @@ DCN315 = dcn315_resource.o -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o := -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o += -mhard-float -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o += -msse2 -endif -endif - AMD_DAL_DCN315 = $(addprefix $(AMDDALPATH)/dc/dcn315/,$(DCN315)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN315) diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c index 06adb77c206b..e6f9312e3a48 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c @@ -66,6 +66,7 @@ #include "virtual/virtual_stream_encoder.h" #include "dce110/dce110_resource.h" #include "dml/display_mode_vba.h" +#include "dml/dcn31/dcn31_fpu.h" #include "dcn31/dcn31_dccg.h" #include "dcn10/dcn10_resource.h" #include "dcn31/dcn31_panel_cntl.h" @@ -133,158 +134,9 @@ #include "link_enc_cfg.h" -#define DC_LOGGER_INIT(logger) - -#define DCN3_15_DEFAULT_DET_SIZE 192 #define DCN3_15_MAX_DET_SIZE 384 -#define DCN3_15_MIN_COMPBUF_SIZE_KB 128 #define DCN3_15_CRB_SEGMENT_SIZE_KB 64 -struct _vcs_dpi_ip_params_st dcn3_15_ip = { - .gpuvm_enable = 1, - .gpuvm_max_page_table_levels = 1, - .hostvm_enable = 1, - .hostvm_max_page_table_levels = 2, - .rob_buffer_size_kbytes = 64, - .det_buffer_size_kbytes = DCN3_15_DEFAULT_DET_SIZE, - .min_comp_buffer_size_kbytes = DCN3_15_MIN_COMPBUF_SIZE_KB, - .config_return_buffer_size_in_kbytes = 1024, - .compressed_buffer_segment_size_in_kbytes = 64, - .meta_fifo_size_in_kentries = 32, - .zero_size_buffer_entries = 512, - .compbuf_reserved_space_64b = 256, - .compbuf_reserved_space_zs = 64, - .dpp_output_buffer_pixels = 2560, - .opp_output_buffer_lines = 1, - .pixel_chunk_size_kbytes = 8, - .meta_chunk_size_kbytes = 2, - .min_meta_chunk_size_bytes = 256, - .writeback_chunk_size_kbytes = 8, - .ptoi_supported = false, - .num_dsc = 3, - .maximum_dsc_bits_per_component = 10, - .dsc422_native_support = false, - .is_line_buffer_bpp_fixed = true, - .line_buffer_fixed_bpp = 49, - .line_buffer_size_bits = 789504, - .max_line_buffer_lines = 12, - .writeback_interface_buffer_size_kbytes = 90, - .max_num_dpp = 4, - .max_num_otg = 4, - .max_num_hdmi_frl_outputs = 1, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dpte_buffer_size_in_pte_reqs_luma = 64, - .dpte_buffer_size_in_pte_reqs_chroma = 34, - .dispclk_ramp_margin_percent = 1, - .max_inter_dcn_tile_repeaters = 9, - .cursor_buffer_size = 16, - .cursor_chunk_size = 2, - .writeback_line_buffer_buffer_size = 0, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .dppclk_delay_subtotal = 46, - .dppclk_delay_scl = 50, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_cnvc_formatter = 27, - .dppclk_delay_cnvc_cursor = 6, - .dispclk_delay_subtotal = 119, - .dynamic_metadata_vm_enabled = false, - .odm_combine_4to1_supported = false, - .dcc_supported = true, -}; - -struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = { - /*TODO: correct dispclk/dppclk voltage level determination*/ - .clock_limits = { - { - .state = 0, - .dispclk_mhz = 1372.0, - .dppclk_mhz = 1372.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 417.0, - .dtbclk_mhz = 600.0, - }, - { - .state = 1, - .dispclk_mhz = 1372.0, - .dppclk_mhz = 1372.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 417.0, - .dtbclk_mhz = 600.0, - }, - { - .state = 2, - .dispclk_mhz = 1372.0, - .dppclk_mhz = 1372.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 417.0, - .dtbclk_mhz = 600.0, - }, - { - .state = 3, - .dispclk_mhz = 1372.0, - .dppclk_mhz = 1372.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 417.0, - .dtbclk_mhz = 600.0, - }, - { - .state = 4, - .dispclk_mhz = 1372.0, - .dppclk_mhz = 1372.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 417.0, - .dtbclk_mhz = 600.0, - }, - }, - .num_states = 5, - .sr_exit_time_us = 9.0, - .sr_enter_plus_exit_time_us = 11.0, - .sr_exit_z8_time_us = 50.0, - .sr_enter_plus_exit_z8_time_us = 50.0, - .writeback_latency_us = 12.0, - .dram_channel_width_bytes = 4, - .round_trip_ping_latency_dcfclk_cycles = 106, - .urgent_latency_pixel_data_only_us = 4.0, - .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, - .urgent_latency_vm_data_only_us = 4.0, - .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, - .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, - .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, - .pct_ideal_sdp_bw_after_urgent = 80.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, - .max_avg_sdp_bw_use_normal_percent = 60.0, - .max_avg_dram_bw_use_normal_percent = 60.0, - .fabric_datapath_to_dcn_data_return_bytes = 32, - .return_bus_width_bytes = 64, - .downspread_percent = 0.38, - .dcn_downspread_percent = 0.38, - .gpuvm_min_page_size_bytes = 4096, - .hostvm_min_page_size_bytes = 4096, - .do_urgent_latency_adjustment = false, - .urgent_latency_adjustment_fabric_clock_component_us = 0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, -}; - enum dcn31_clk_src_array_id { DCN31_CLK_SRC_PLL0, DCN31_CLK_SRC_PLL1, @@ -1859,88 +1711,6 @@ static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; -static void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) -{ - struct clk_limit_table *clk_table = &bw_params->clk_table; - struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; - unsigned int i, closest_clk_lvl; - int max_dispclk_mhz = 0, max_dppclk_mhz = 0; - int j; - - // Default clock levels are used for diags, which may lead to overclocking. - if (!IS_DIAG_DC(dc->ctx->dce_environment)) { - - dcn3_15_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; - dcn3_15_ip.max_num_dpp = dc->res_pool->pipe_count; - dcn3_15_soc.num_chans = bw_params->num_channels; - - ASSERT(clk_table->num_entries); - - /* Prepass to find max clocks independent of voltage level. */ - for (i = 0; i < clk_table->num_entries; ++i) { - if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; - if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; - } - - for (i = 0; i < clk_table->num_entries; i++) { - /* loop backwards*/ - for (closest_clk_lvl = 0, j = dcn3_15_soc.num_states - 1; j >= 0; j--) { - if ((unsigned int) dcn3_15_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { - closest_clk_lvl = j; - break; - } - } - if (clk_table->num_entries == 1) { - /*smu gives one DPM level, let's take the highest one*/ - closest_clk_lvl = dcn3_15_soc.num_states - 1; - } - - clock_limits[i].state = i; - - /* Clocks dependent on voltage level. */ - clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; - if (clk_table->num_entries == 1 && - clock_limits[i].dcfclk_mhz < dcn3_15_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { - /*SMU fix not released yet*/ - clock_limits[i].dcfclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; - } - clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; - clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; - clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; - - /* Clocks independent of voltage level. */ - clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : - dcn3_15_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - - clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : - dcn3_15_soc.clock_limits[closest_clk_lvl].dppclk_mhz; - - clock_limits[i].dram_bw_per_chan_gbps = dcn3_15_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; - clock_limits[i].dscclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].dscclk_mhz; - clock_limits[i].dtbclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; - clock_limits[i].phyclk_d18_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; - clock_limits[i].phyclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].phyclk_mhz; - } - for (i = 0; i < clk_table->num_entries; i++) - dcn3_15_soc.clock_limits[i] = clock_limits[i]; - if (clk_table->num_entries) { - dcn3_15_soc.num_states = clk_table->num_entries; - } - } - - if (max_dispclk_mhz) { - dcn3_15_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; - dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; - } - - if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) - dml_init_instance(&dc->dml, &dcn3_15_soc, &dcn3_15_ip, DML_PROJECT_DCN31); - else - dml_init_instance(&dc->dml, &dcn3_15_soc, &dcn3_15_ip, DML_PROJECT_DCN31_FPGA); -} - static struct resource_funcs dcn315_res_pool_funcs = { .destroy = dcn315_destroy_resource_pool, .link_enc_create = dcn31_link_encoder_create, @@ -1988,11 +1758,10 @@ static bool dcn315_resource_construct( pool->base.mpcc_count = pool->base.res_cap->num_timing_generator; dc->caps.max_downscale_ratio = 600; dc->caps.i2c_speed_in_khz = 100; - dc->caps.i2c_speed_in_khz_hdcp = 5; /*1.4 w/a applied by default*/ + dc->caps.i2c_speed_in_khz_hdcp = 100; dc->caps.max_cursor_size = 256; dc->caps.min_horizontal_blanking_period = 80; dc->caps.dmdata_alloc_size = 2048; - dc->caps.max_slave_planes = 1; dc->caps.max_slave_yuv_planes = 1; dc->caps.max_slave_rgb_planes = 1; diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h index f3a36820a31f..39929fa67a51 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h @@ -31,6 +31,9 @@ #define TO_DCN315_RES_POOL(pool)\ container_of(pool, struct dcn315_resource_pool, base) +extern struct _vcs_dpi_ip_params_st dcn3_15_ip; +extern struct _vcs_dpi_ip_params_st dcn3_15_soc; + struct dcn315_resource_pool { struct resource_pool base; }; diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile b/drivers/gpu/drm/amd/display/dc/dcn316/Makefile index cd87b687c5e2..819d44a9439b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn316/Makefile @@ -25,32 +25,6 @@ DCN316 = dcn316_resource.o -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o := -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o += -mhard-float -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o += -msse2 -endif -endif - AMD_DAL_DCN316 = $(addprefix $(AMDDALPATH)/dc/dcn316/,$(DCN316)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN316) diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c index 8decc3ccf8ca..d5c195749a81 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c @@ -66,6 +66,7 @@ #include "virtual/virtual_stream_encoder.h" #include "dce110/dce110_resource.h" #include "dml/display_mode_vba.h" +#include "dml/dcn31/dcn31_fpu.h" #include "dcn31/dcn31_dccg.h" #include "dcn10/dcn10_resource.h" #include "dcn31/dcn31_panel_cntl.h" @@ -123,157 +124,10 @@ #include "link_enc_cfg.h" -#define DC_LOGGER_INIT(logger) - -#define DCN3_16_DEFAULT_DET_SIZE 192 #define DCN3_16_MAX_DET_SIZE 384 #define DCN3_16_MIN_COMPBUF_SIZE_KB 128 #define DCN3_16_CRB_SEGMENT_SIZE_KB 64 -struct _vcs_dpi_ip_params_st dcn3_16_ip = { - .gpuvm_enable = 1, - .gpuvm_max_page_table_levels = 1, - .hostvm_enable = 1, - .hostvm_max_page_table_levels = 2, - .rob_buffer_size_kbytes = 64, - .det_buffer_size_kbytes = DCN3_16_DEFAULT_DET_SIZE, - .config_return_buffer_size_in_kbytes = 1024, - .compressed_buffer_segment_size_in_kbytes = 64, - .meta_fifo_size_in_kentries = 32, - .zero_size_buffer_entries = 512, - .compbuf_reserved_space_64b = 256, - .compbuf_reserved_space_zs = 64, - .dpp_output_buffer_pixels = 2560, - .opp_output_buffer_lines = 1, - .pixel_chunk_size_kbytes = 8, - .meta_chunk_size_kbytes = 2, - .min_meta_chunk_size_bytes = 256, - .writeback_chunk_size_kbytes = 8, - .ptoi_supported = false, - .num_dsc = 3, - .maximum_dsc_bits_per_component = 10, - .dsc422_native_support = false, - .is_line_buffer_bpp_fixed = true, - .line_buffer_fixed_bpp = 48, - .line_buffer_size_bits = 789504, - .max_line_buffer_lines = 12, - .writeback_interface_buffer_size_kbytes = 90, - .max_num_dpp = 4, - .max_num_otg = 4, - .max_num_hdmi_frl_outputs = 1, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dpte_buffer_size_in_pte_reqs_luma = 64, - .dpte_buffer_size_in_pte_reqs_chroma = 34, - .dispclk_ramp_margin_percent = 1, - .max_inter_dcn_tile_repeaters = 8, - .cursor_buffer_size = 16, - .cursor_chunk_size = 2, - .writeback_line_buffer_buffer_size = 0, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .dppclk_delay_subtotal = 46, - .dppclk_delay_scl = 50, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_cnvc_formatter = 27, - .dppclk_delay_cnvc_cursor = 6, - .dispclk_delay_subtotal = 119, - .dynamic_metadata_vm_enabled = false, - .odm_combine_4to1_supported = false, - .dcc_supported = true, -}; - -struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = { - /*TODO: correct dispclk/dppclk voltage level determination*/ - .clock_limits = { - { - .state = 0, - .dispclk_mhz = 556.0, - .dppclk_mhz = 556.0, - .phyclk_mhz = 600.0, - .phyclk_d18_mhz = 445.0, - .dscclk_mhz = 186.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 1, - .dispclk_mhz = 625.0, - .dppclk_mhz = 625.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 2, - .dispclk_mhz = 625.0, - .dppclk_mhz = 625.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 3, - .dispclk_mhz = 1112.0, - .dppclk_mhz = 1112.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 371.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 4, - .dispclk_mhz = 1250.0, - .dppclk_mhz = 1250.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 417.0, - .dtbclk_mhz = 625.0, - }, - }, - .num_states = 5, - .sr_exit_time_us = 9.0, - .sr_enter_plus_exit_time_us = 11.0, - .sr_exit_z8_time_us = 442.0, - .sr_enter_plus_exit_z8_time_us = 560.0, - .writeback_latency_us = 12.0, - .dram_channel_width_bytes = 4, - .round_trip_ping_latency_dcfclk_cycles = 106, - .urgent_latency_pixel_data_only_us = 4.0, - .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, - .urgent_latency_vm_data_only_us = 4.0, - .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, - .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, - .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, - .pct_ideal_sdp_bw_after_urgent = 80.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, - .max_avg_sdp_bw_use_normal_percent = 60.0, - .max_avg_dram_bw_use_normal_percent = 60.0, - .fabric_datapath_to_dcn_data_return_bytes = 32, - .return_bus_width_bytes = 64, - .downspread_percent = 0.38, - .dcn_downspread_percent = 0.5, - .gpuvm_min_page_size_bytes = 4096, - .hostvm_min_page_size_bytes = 4096, - .do_urgent_latency_adjustment = false, - .urgent_latency_adjustment_fabric_clock_component_us = 0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, -}; - enum dcn31_clk_src_array_id { DCN31_CLK_SRC_PLL0, DCN31_CLK_SRC_PLL1, @@ -1859,89 +1713,6 @@ static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; -static void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) -{ - struct clk_limit_table *clk_table = &bw_params->clk_table; - struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; - unsigned int i, closest_clk_lvl; - int max_dispclk_mhz = 0, max_dppclk_mhz = 0; - int j; - - // Default clock levels are used for diags, which may lead to overclocking. - if (!IS_DIAG_DC(dc->ctx->dce_environment)) { - - dcn3_16_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; - dcn3_16_ip.max_num_dpp = dc->res_pool->pipe_count; - dcn3_16_soc.num_chans = bw_params->num_channels; - - ASSERT(clk_table->num_entries); - - /* Prepass to find max clocks independent of voltage level. */ - for (i = 0; i < clk_table->num_entries; ++i) { - if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; - if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; - } - - for (i = 0; i < clk_table->num_entries; i++) { - /* loop backwards*/ - for (closest_clk_lvl = 0, j = dcn3_16_soc.num_states - 1; j >= 0; j--) { - if ((unsigned int) dcn3_16_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { - closest_clk_lvl = j; - break; - } - } - // Ported from DCN315 - if (clk_table->num_entries == 1) { - /*smu gives one DPM level, let's take the highest one*/ - closest_clk_lvl = dcn3_16_soc.num_states - 1; - } - - clock_limits[i].state = i; - - /* Clocks dependent on voltage level. */ - clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; - if (clk_table->num_entries == 1 && - clock_limits[i].dcfclk_mhz < dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { - /*SMU fix not released yet*/ - clock_limits[i].dcfclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; - } - clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; - clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; - clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; - - /* Clocks independent of voltage level. */ - clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : - dcn3_16_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - - clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : - dcn3_16_soc.clock_limits[closest_clk_lvl].dppclk_mhz; - - clock_limits[i].dram_bw_per_chan_gbps = dcn3_16_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; - clock_limits[i].dscclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dscclk_mhz; - clock_limits[i].dtbclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; - clock_limits[i].phyclk_d18_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; - clock_limits[i].phyclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_mhz; - } - for (i = 0; i < clk_table->num_entries; i++) - dcn3_16_soc.clock_limits[i] = clock_limits[i]; - if (clk_table->num_entries) { - dcn3_16_soc.num_states = clk_table->num_entries; - } - } - - if (max_dispclk_mhz) { - dcn3_16_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; - dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; - } - - if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) - dml_init_instance(&dc->dml, &dcn3_16_soc, &dcn3_16_ip, DML_PROJECT_DCN31); - else - dml_init_instance(&dc->dml, &dcn3_16_soc, &dcn3_16_ip, DML_PROJECT_DCN31_FPGA); -} - static struct resource_funcs dcn316_res_pool_funcs = { .destroy = dcn316_destroy_resource_pool, .link_enc_create = dcn31_link_encoder_create, @@ -1989,11 +1760,10 @@ static bool dcn316_resource_construct( pool->base.mpcc_count = pool->base.res_cap->num_timing_generator; dc->caps.max_downscale_ratio = 600; dc->caps.i2c_speed_in_khz = 100; - dc->caps.i2c_speed_in_khz_hdcp = 5; /*1.4 w/a applied by default*/ + dc->caps.i2c_speed_in_khz_hdcp = 100; dc->caps.max_cursor_size = 256; dc->caps.min_horizontal_blanking_period = 80; dc->caps.dmdata_alloc_size = 2048; - dc->caps.max_slave_planes = 1; dc->caps.max_slave_yuv_planes = 1; dc->caps.max_slave_rgb_planes = 1; diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h index 9d0d60cb9482..0dc5a6c13ae7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h @@ -31,6 +31,9 @@ #define TO_DCN316_RES_POOL(pool)\ container_of(pool, struct dcn316_resource_pool, base) +extern struct _vcs_dpi_ip_params_st dcn3_16_ip; +extern struct _vcs_dpi_ip_params_st dcn3_16_soc; + struct dcn316_resource_pool { struct resource_pool base; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 28978ce62f87..ee911452c048 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -71,6 +71,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(fram CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn303/dcn303_fpu.o := $(dml_ccflags) @@ -114,6 +115,7 @@ DML += dcn20/display_rq_dlg_calc_20v2.o dcn20/display_mode_vba_20v2.o DML += dcn21/display_rq_dlg_calc_21.o dcn21/display_mode_vba_21.o DML += dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o DML += dcn31/display_mode_vba_31.o dcn31/display_rq_dlg_calc_31.o +DML += dcn31/dcn31_fpu.o DML += dcn301/dcn301_fpu.o DML += dcn302/dcn302_fpu.o DML += dcn303/dcn303_fpu.o diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c index e447c74be713..db3b16b77034 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c @@ -639,7 +639,6 @@ static bool dcn_bw_apply_registry_override(struct dc *dc) { bool updated = false; - DC_FP_START(); if ((int)(dc->dcn_soc->sr_exit_time * 1000) != dc->debug.sr_exit_time_ns && dc->debug.sr_exit_time_ns) { updated = true; @@ -675,7 +674,6 @@ static bool dcn_bw_apply_registry_override(struct dc *dc) dc->dcn_soc->dram_clock_change_latency = dc->debug.dram_clock_change_latency_ns / 1000.0; } - DC_FP_END(); return updated; } @@ -764,7 +762,7 @@ static unsigned int get_highest_allowed_voltage_level(uint32_t chip_family, return 4; } -bool dcn10_validate_bandwidth( +bool dcn_validate_bandwidth( struct dc *dc, struct dc_state *context, bool fast_validate) @@ -790,7 +788,6 @@ bool dcn10_validate_bandwidth( dcn_bw_sync_calcs_and_dml(dc); memset(v, 0, sizeof(*v)); - DC_FP_START(); v->sr_exit_time = dc->dcn_soc->sr_exit_time; v->sr_enter_plus_exit_time = dc->dcn_soc->sr_enter_plus_exit_time; @@ -1323,8 +1320,6 @@ bool dcn10_validate_bandwidth( bw_limit = dc->dcn_soc->percent_disp_bw_limit * v->fabric_and_dram_bandwidth_vmax0p9; bw_limit_pass = (v->total_data_read_bandwidth / 1000.0) < bw_limit; - DC_FP_END(); - PERFORMANCE_TRACE_END(); BW_VAL_TRACE_FINISH(); @@ -1495,8 +1490,6 @@ void dcn_bw_update_from_pplib(struct dc *dc) res = dm_pp_get_clock_levels_by_type_with_voltage( ctx, DM_PP_CLOCK_TYPE_FCLK, &fclks); - DC_FP_START(); - if (res) res = verify_clock_values(&fclks); @@ -1526,13 +1519,9 @@ void dcn_bw_update_from_pplib(struct dc *dc) } else BREAK_TO_DEBUGGER(); - DC_FP_END(); - res = dm_pp_get_clock_levels_by_type_with_voltage( ctx, DM_PP_CLOCK_TYPE_DCFCLK, &dcfclks); - DC_FP_START(); - if (res) res = verify_clock_values(&dcfclks); @@ -1543,8 +1532,6 @@ void dcn_bw_update_from_pplib(struct dc *dc) dc->dcn_soc->dcfclkv_max0p9 = dcfclks.data[dcfclks.num_levels - 1].clocks_in_khz / 1000.0; } else BREAK_TO_DEBUGGER(); - - DC_FP_END(); } void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc) @@ -1559,11 +1546,9 @@ void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc) if (!pp || !pp->set_wm_ranges) return; - DC_FP_START(); min_fclk_khz = dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 * 1000000 / 32; min_dcfclk_khz = dc->dcn_soc->dcfclkv_min0p65 * 1000; socclk_khz = dc->dcn_soc->socclk * 1000; - DC_FP_END(); /* Now notify PPLib/SMU about which Watermarks sets they should select * depending on DPM state they are in. And update BW MGR GFX Engine and @@ -1614,7 +1599,6 @@ void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc) void dcn_bw_sync_calcs_and_dml(struct dc *dc) { - DC_FP_START(); DC_LOG_BANDWIDTH_CALCS("sr_exit_time: %f ns\n" "sr_enter_plus_exit_time: %f ns\n" "urgent_latency: %f ns\n" @@ -1803,5 +1787,4 @@ void dcn_bw_sync_calcs_and_dml(struct dc *dc) dc->dml.ip.bug_forcing_LC_req_same_size_fixed = dc->dcn_ip->bug_forcing_luma_and_chroma_request_to_same_size_fixed == dcn_bw_yes; dc->dml.ip.dcfclk_cstate_latency = dc->dcn_ip->dcfclk_cstate_latency; - DC_FP_END(); } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index f93af45aeab4..f79dd40f8d81 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -1302,9 +1302,7 @@ int dcn20_populate_dml_pipes_from_context( } /* populate writeback information */ - DC_FP_START(); dc->res_pool->funcs->populate_dml_writeback_from_context(dc, res_ctx, pipes); - DC_FP_END(); return pipe_cnt; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c new file mode 100644 index 000000000000..a0a2e125c9c8 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c @@ -0,0 +1,863 @@ +/* + * Copyright 2019-2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "resource.h" +#include "clk_mgr.h" + +#include "dml/dcn20/dcn20_fpu.h" +#include "dcn31_fpu.h" + +/** + * DOC: DCN31x FPU manipulation Overview + * + * The DCN architecture relies on FPU operations, which require special + * compilation flags and the use of kernel_fpu_begin/end functions; ideally, we + * want to avoid spreading FPU access across multiple files. With this idea in + * mind, this file aims to centralize all DCN3.1.x functions that require FPU + * access in a single place. Code in this file follows the following code + * pattern: + * + * 1. Functions that use FPU operations should be isolated in static functions. + * 2. The FPU functions should have the noinline attribute to ensure anything + * that deals with FP register is contained within this call. + * 3. All function that needs to be accessed outside this file requires a + * public interface that not uses any FPU reference. + * 4. Developers **must not** use DC_FP_START/END in this file, but they need + * to ensure that the caller invokes it before access any function available + * in this file. For this reason, public functions in this file must invoke + * dc_assert_fp_enabled(); + */ + +struct _vcs_dpi_ip_params_st dcn3_1_ip = { + .gpuvm_enable = 1, + .gpuvm_max_page_table_levels = 1, + .hostvm_enable = 1, + .hostvm_max_page_table_levels = 2, + .rob_buffer_size_kbytes = 64, + .det_buffer_size_kbytes = DCN3_1_DEFAULT_DET_SIZE, + .config_return_buffer_size_in_kbytes = 1792, + .compressed_buffer_segment_size_in_kbytes = 64, + .meta_fifo_size_in_kentries = 32, + .zero_size_buffer_entries = 512, + .compbuf_reserved_space_64b = 256, + .compbuf_reserved_space_zs = 64, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .meta_chunk_size_kbytes = 2, + .min_meta_chunk_size_bytes = 256, + .writeback_chunk_size_kbytes = 8, + .ptoi_supported = false, + .num_dsc = 3, + .maximum_dsc_bits_per_component = 10, + .dsc422_native_support = false, + .is_line_buffer_bpp_fixed = true, + .line_buffer_fixed_bpp = 48, + .line_buffer_size_bits = 789504, + .max_line_buffer_lines = 12, + .writeback_interface_buffer_size_kbytes = 90, + .max_num_dpp = 4, + .max_num_otg = 4, + .max_num_hdmi_frl_outputs = 1, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 6, + .max_vscl_ratio = 6, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dpte_buffer_size_in_pte_reqs_luma = 64, + .dpte_buffer_size_in_pte_reqs_chroma = 34, + .dispclk_ramp_margin_percent = 1, + .max_inter_dcn_tile_repeaters = 8, + .cursor_buffer_size = 16, + .cursor_chunk_size = 2, + .writeback_line_buffer_buffer_size = 0, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_max_hscl_taps = 1, + .writeback_max_vscl_taps = 1, + .dppclk_delay_subtotal = 46, + .dppclk_delay_scl = 50, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_cnvc_formatter = 27, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 119, + .dynamic_metadata_vm_enabled = false, + .odm_combine_4to1_supported = false, + .dcc_supported = true, +}; + +struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = { + /*TODO: correct dispclk/dppclk voltage level determination*/ + .clock_limits = { + { + .state = 0, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 600.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 186.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 1, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 209.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 2, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 209.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 3, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 371.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 4, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 417.0, + .dtbclk_mhz = 625.0, + }, + }, + .num_states = 5, + .sr_exit_time_us = 9.0, + .sr_enter_plus_exit_time_us = 11.0, + .sr_exit_z8_time_us = 442.0, + .sr_enter_plus_exit_z8_time_us = 560.0, + .writeback_latency_us = 12.0, + .dram_channel_width_bytes = 4, + .round_trip_ping_latency_dcfclk_cycles = 106, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_sdp_bw_after_urgent = 80.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, + .max_avg_sdp_bw_use_normal_percent = 60.0, + .max_avg_dram_bw_use_normal_percent = 60.0, + .fabric_datapath_to_dcn_data_return_bytes = 32, + .return_bus_width_bytes = 64, + .downspread_percent = 0.38, + .dcn_downspread_percent = 0.5, + .gpuvm_min_page_size_bytes = 4096, + .hostvm_min_page_size_bytes = 4096, + .do_urgent_latency_adjustment = false, + .urgent_latency_adjustment_fabric_clock_component_us = 0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, +}; + +struct _vcs_dpi_ip_params_st dcn3_15_ip = { + .gpuvm_enable = 1, + .gpuvm_max_page_table_levels = 1, + .hostvm_enable = 1, + .hostvm_max_page_table_levels = 2, + .rob_buffer_size_kbytes = 64, + .det_buffer_size_kbytes = DCN3_15_DEFAULT_DET_SIZE, + .min_comp_buffer_size_kbytes = DCN3_15_MIN_COMPBUF_SIZE_KB, + .config_return_buffer_size_in_kbytes = 1024, + .compressed_buffer_segment_size_in_kbytes = 64, + .meta_fifo_size_in_kentries = 32, + .zero_size_buffer_entries = 512, + .compbuf_reserved_space_64b = 256, + .compbuf_reserved_space_zs = 64, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .meta_chunk_size_kbytes = 2, + .min_meta_chunk_size_bytes = 256, + .writeback_chunk_size_kbytes = 8, + .ptoi_supported = false, + .num_dsc = 3, + .maximum_dsc_bits_per_component = 10, + .dsc422_native_support = false, + .is_line_buffer_bpp_fixed = true, + .line_buffer_fixed_bpp = 49, + .line_buffer_size_bits = 789504, + .max_line_buffer_lines = 12, + .writeback_interface_buffer_size_kbytes = 90, + .max_num_dpp = 4, + .max_num_otg = 4, + .max_num_hdmi_frl_outputs = 1, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 6, + .max_vscl_ratio = 6, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dpte_buffer_size_in_pte_reqs_luma = 64, + .dpte_buffer_size_in_pte_reqs_chroma = 34, + .dispclk_ramp_margin_percent = 1, + .max_inter_dcn_tile_repeaters = 9, + .cursor_buffer_size = 16, + .cursor_chunk_size = 2, + .writeback_line_buffer_buffer_size = 0, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_max_hscl_taps = 1, + .writeback_max_vscl_taps = 1, + .dppclk_delay_subtotal = 46, + .dppclk_delay_scl = 50, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_cnvc_formatter = 27, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 119, + .dynamic_metadata_vm_enabled = false, + .odm_combine_4to1_supported = false, + .dcc_supported = true, +}; + +struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = { + /*TODO: correct dispclk/dppclk voltage level determination*/ + .clock_limits = { + { + .state = 0, + .dispclk_mhz = 1372.0, + .dppclk_mhz = 1372.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 417.0, + .dtbclk_mhz = 600.0, + }, + { + .state = 1, + .dispclk_mhz = 1372.0, + .dppclk_mhz = 1372.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 417.0, + .dtbclk_mhz = 600.0, + }, + { + .state = 2, + .dispclk_mhz = 1372.0, + .dppclk_mhz = 1372.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 417.0, + .dtbclk_mhz = 600.0, + }, + { + .state = 3, + .dispclk_mhz = 1372.0, + .dppclk_mhz = 1372.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 417.0, + .dtbclk_mhz = 600.0, + }, + { + .state = 4, + .dispclk_mhz = 1372.0, + .dppclk_mhz = 1372.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 417.0, + .dtbclk_mhz = 600.0, + }, + }, + .num_states = 5, + .sr_exit_time_us = 9.0, + .sr_enter_plus_exit_time_us = 11.0, + .sr_exit_z8_time_us = 50.0, + .sr_enter_plus_exit_z8_time_us = 50.0, + .writeback_latency_us = 12.0, + .dram_channel_width_bytes = 4, + .round_trip_ping_latency_dcfclk_cycles = 106, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_sdp_bw_after_urgent = 80.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, + .max_avg_sdp_bw_use_normal_percent = 60.0, + .max_avg_dram_bw_use_normal_percent = 60.0, + .fabric_datapath_to_dcn_data_return_bytes = 32, + .return_bus_width_bytes = 64, + .downspread_percent = 0.38, + .dcn_downspread_percent = 0.38, + .gpuvm_min_page_size_bytes = 4096, + .hostvm_min_page_size_bytes = 4096, + .do_urgent_latency_adjustment = false, + .urgent_latency_adjustment_fabric_clock_component_us = 0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, +}; + +struct _vcs_dpi_ip_params_st dcn3_16_ip = { + .gpuvm_enable = 1, + .gpuvm_max_page_table_levels = 1, + .hostvm_enable = 1, + .hostvm_max_page_table_levels = 2, + .rob_buffer_size_kbytes = 64, + .det_buffer_size_kbytes = DCN3_16_DEFAULT_DET_SIZE, + .config_return_buffer_size_in_kbytes = 1024, + .compressed_buffer_segment_size_in_kbytes = 64, + .meta_fifo_size_in_kentries = 32, + .zero_size_buffer_entries = 512, + .compbuf_reserved_space_64b = 256, + .compbuf_reserved_space_zs = 64, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .meta_chunk_size_kbytes = 2, + .min_meta_chunk_size_bytes = 256, + .writeback_chunk_size_kbytes = 8, + .ptoi_supported = false, + .num_dsc = 3, + .maximum_dsc_bits_per_component = 10, + .dsc422_native_support = false, + .is_line_buffer_bpp_fixed = true, + .line_buffer_fixed_bpp = 48, + .line_buffer_size_bits = 789504, + .max_line_buffer_lines = 12, + .writeback_interface_buffer_size_kbytes = 90, + .max_num_dpp = 4, + .max_num_otg = 4, + .max_num_hdmi_frl_outputs = 1, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 6, + .max_vscl_ratio = 6, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dpte_buffer_size_in_pte_reqs_luma = 64, + .dpte_buffer_size_in_pte_reqs_chroma = 34, + .dispclk_ramp_margin_percent = 1, + .max_inter_dcn_tile_repeaters = 8, + .cursor_buffer_size = 16, + .cursor_chunk_size = 2, + .writeback_line_buffer_buffer_size = 0, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_max_hscl_taps = 1, + .writeback_max_vscl_taps = 1, + .dppclk_delay_subtotal = 46, + .dppclk_delay_scl = 50, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_cnvc_formatter = 27, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 119, + .dynamic_metadata_vm_enabled = false, + .odm_combine_4to1_supported = false, + .dcc_supported = true, +}; + +struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = { + /*TODO: correct dispclk/dppclk voltage level determination*/ + .clock_limits = { + { + .state = 0, + .dispclk_mhz = 556.0, + .dppclk_mhz = 556.0, + .phyclk_mhz = 600.0, + .phyclk_d18_mhz = 445.0, + .dscclk_mhz = 186.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 1, + .dispclk_mhz = 625.0, + .dppclk_mhz = 625.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 209.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 2, + .dispclk_mhz = 625.0, + .dppclk_mhz = 625.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 209.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 3, + .dispclk_mhz = 1112.0, + .dppclk_mhz = 1112.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 371.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 4, + .dispclk_mhz = 1250.0, + .dppclk_mhz = 1250.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 417.0, + .dtbclk_mhz = 625.0, + }, + }, + .num_states = 5, + .sr_exit_time_us = 9.0, + .sr_enter_plus_exit_time_us = 11.0, + .sr_exit_z8_time_us = 442.0, + .sr_enter_plus_exit_z8_time_us = 560.0, + .writeback_latency_us = 12.0, + .dram_channel_width_bytes = 4, + .round_trip_ping_latency_dcfclk_cycles = 106, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_sdp_bw_after_urgent = 80.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, + .max_avg_sdp_bw_use_normal_percent = 60.0, + .max_avg_dram_bw_use_normal_percent = 60.0, + .fabric_datapath_to_dcn_data_return_bytes = 32, + .return_bus_width_bytes = 64, + .downspread_percent = 0.38, + .dcn_downspread_percent = 0.5, + .gpuvm_min_page_size_bytes = 4096, + .hostvm_min_page_size_bytes = 4096, + .do_urgent_latency_adjustment = false, + .urgent_latency_adjustment_fabric_clock_component_us = 0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, +}; + +void dcn31_calculate_wm_and_dlg_fp( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + int i, pipe_idx; + double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + + dc_assert_fp_enabled(); + + if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk) + dcfclk = context->bw_ctx.dml.soc.min_dcfclk; + + /* We don't recalculate clocks for 0 pipe configs, which can block + * S0i3 as high clocks will block low power states + * Override any clocks that can block S0i3 to min here + */ + if (pipe_cnt == 0) { + context->bw_ctx.bw.dcn.clk.dcfclk_khz = dcfclk; // always should be vlevel 0 + return; + } + + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; + +#if 0 // TODO + /* Set B: + * TODO + */ + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { + if (vlevel == 0) { + pipes[0].clks_cfg.voltage = 1; + pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz; + } + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + + /* Set C: + * TODO + */ + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + /* Set D: + * TODO + */ + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; +#endif + + /* Set A: + * All clocks min required + * + * Set A calculated last so that following calculations are based on Set A + */ + dc->res_pool->funcs->update_soc_for_wm_a(dc, context); + context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + /* TODO: remove: */ + context->bw_ctx.bw.dcn.watermarks.b = context->bw_ctx.bw.dcn.watermarks.a; + context->bw_ctx.bw.dcn.watermarks.c = context->bw_ctx.bw.dcn.watermarks.a; + context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a; + /* end remove*/ + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); + pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + + if (dc->config.forced_clocks || dc->debug.max_disp_clk) { + pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; + pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; + } + if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; + if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; + + pipe_idx++; + } + + dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); +} + +void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +{ + struct clk_limit_table *clk_table = &bw_params->clk_table; + struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; + unsigned int i, closest_clk_lvl; + int j; + + dc_assert_fp_enabled(); + + // Default clock levels are used for diags, which may lead to overclocking. + if (!IS_DIAG_DC(dc->ctx->dce_environment)) { + int max_dispclk_mhz = 0, max_dppclk_mhz = 0; + + dcn3_1_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; + dcn3_1_ip.max_num_dpp = dc->res_pool->pipe_count; + dcn3_1_soc.num_chans = bw_params->num_channels; + + ASSERT(clk_table->num_entries); + + /* Prepass to find max clocks independent of voltage level. */ + for (i = 0; i < clk_table->num_entries; ++i) { + if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; + if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; + } + + for (i = 0; i < clk_table->num_entries; i++) { + /* loop backwards*/ + for (closest_clk_lvl = 0, j = dcn3_1_soc.num_states - 1; j >= 0; j--) { + if ((unsigned int) dcn3_1_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { + closest_clk_lvl = j; + break; + } + } + + clock_limits[i].state = i; + + /* Clocks dependent on voltage level. */ + clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; + clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; + + /* Clocks independent of voltage level. */ + clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : + dcn3_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; + + clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : + dcn3_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + + clock_limits[i].dram_bw_per_chan_gbps = dcn3_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + clock_limits[i].dscclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + clock_limits[i].dtbclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + clock_limits[i].phyclk_d18_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + clock_limits[i].phyclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + } + for (i = 0; i < clk_table->num_entries; i++) + dcn3_1_soc.clock_limits[i] = clock_limits[i]; + if (clk_table->num_entries) { + dcn3_1_soc.num_states = clk_table->num_entries; + } + } + + dcn3_1_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) + dml_init_instance(&dc->dml, &dcn3_1_soc, &dcn3_1_ip, DML_PROJECT_DCN31); + else + dml_init_instance(&dc->dml, &dcn3_1_soc, &dcn3_1_ip, DML_PROJECT_DCN31_FPGA); +} + +void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +{ + struct clk_limit_table *clk_table = &bw_params->clk_table; + struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; + unsigned int i, closest_clk_lvl; + int max_dispclk_mhz = 0, max_dppclk_mhz = 0; + int j; + + dc_assert_fp_enabled(); + + // Default clock levels are used for diags, which may lead to overclocking. + if (!IS_DIAG_DC(dc->ctx->dce_environment)) { + + dcn3_15_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; + dcn3_15_ip.max_num_dpp = dc->res_pool->pipe_count; + dcn3_15_soc.num_chans = bw_params->num_channels; + + ASSERT(clk_table->num_entries); + + /* Prepass to find max clocks independent of voltage level. */ + for (i = 0; i < clk_table->num_entries; ++i) { + if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; + if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; + } + + for (i = 0; i < clk_table->num_entries; i++) { + /* loop backwards*/ + for (closest_clk_lvl = 0, j = dcn3_15_soc.num_states - 1; j >= 0; j--) { + if ((unsigned int) dcn3_15_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { + closest_clk_lvl = j; + break; + } + } + if (clk_table->num_entries == 1) { + /*smu gives one DPM level, let's take the highest one*/ + closest_clk_lvl = dcn3_15_soc.num_states - 1; + } + + clock_limits[i].state = i; + + /* Clocks dependent on voltage level. */ + clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + if (clk_table->num_entries == 1 && + clock_limits[i].dcfclk_mhz < dcn3_15_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { + /*SMU fix not released yet*/ + clock_limits[i].dcfclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; + } + clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; + clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; + + /* Clocks independent of voltage level. */ + clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : + dcn3_15_soc.clock_limits[closest_clk_lvl].dispclk_mhz; + + clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : + dcn3_15_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + + clock_limits[i].dram_bw_per_chan_gbps = dcn3_15_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + clock_limits[i].dscclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + clock_limits[i].dtbclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + clock_limits[i].phyclk_d18_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + clock_limits[i].phyclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + } + for (i = 0; i < clk_table->num_entries; i++) + dcn3_15_soc.clock_limits[i] = clock_limits[i]; + if (clk_table->num_entries) { + dcn3_15_soc.num_states = clk_table->num_entries; + } + } + + if (max_dispclk_mhz) { + dcn3_15_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; + dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; + } + + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) + dml_init_instance(&dc->dml, &dcn3_15_soc, &dcn3_15_ip, DML_PROJECT_DCN31); + else + dml_init_instance(&dc->dml, &dcn3_15_soc, &dcn3_15_ip, DML_PROJECT_DCN31_FPGA); +} + +void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +{ + struct clk_limit_table *clk_table = &bw_params->clk_table; + struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; + unsigned int i, closest_clk_lvl; + int max_dispclk_mhz = 0, max_dppclk_mhz = 0; + int j; + + dc_assert_fp_enabled(); + + // Default clock levels are used for diags, which may lead to overclocking. + if (!IS_DIAG_DC(dc->ctx->dce_environment)) { + + dcn3_16_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; + dcn3_16_ip.max_num_dpp = dc->res_pool->pipe_count; + dcn3_16_soc.num_chans = bw_params->num_channels; + + ASSERT(clk_table->num_entries); + + /* Prepass to find max clocks independent of voltage level. */ + for (i = 0; i < clk_table->num_entries; ++i) { + if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; + if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; + } + + for (i = 0; i < clk_table->num_entries; i++) { + /* loop backwards*/ + for (closest_clk_lvl = 0, j = dcn3_16_soc.num_states - 1; j >= 0; j--) { + if ((unsigned int) dcn3_16_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { + closest_clk_lvl = j; + break; + } + } + // Ported from DCN315 + if (clk_table->num_entries == 1) { + /*smu gives one DPM level, let's take the highest one*/ + closest_clk_lvl = dcn3_16_soc.num_states - 1; + } + + clock_limits[i].state = i; + + /* Clocks dependent on voltage level. */ + clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + if (clk_table->num_entries == 1 && + clock_limits[i].dcfclk_mhz < dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { + /*SMU fix not released yet*/ + clock_limits[i].dcfclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; + } + clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; + clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; + + /* Clocks independent of voltage level. */ + clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : + dcn3_16_soc.clock_limits[closest_clk_lvl].dispclk_mhz; + + clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : + dcn3_16_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + + clock_limits[i].dram_bw_per_chan_gbps = dcn3_16_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + clock_limits[i].dscclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + clock_limits[i].dtbclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + clock_limits[i].phyclk_d18_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + clock_limits[i].phyclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + } + for (i = 0; i < clk_table->num_entries; i++) + dcn3_16_soc.clock_limits[i] = clock_limits[i]; + if (clk_table->num_entries) { + dcn3_16_soc.num_states = clk_table->num_entries; + } + } + + if (max_dispclk_mhz) { + dcn3_16_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; + dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; + } + + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) + dml_init_instance(&dc->dml, &dcn3_16_soc, &dcn3_16_ip, DML_PROJECT_DCN31); + else + dml_init_instance(&dc->dml, &dcn3_16_soc, &dcn3_16_ip, DML_PROJECT_DCN31_FPGA); +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h new file mode 100644 index 000000000000..24ac19c83687 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h @@ -0,0 +1,44 @@ +/* + * Copyright 2019-2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DCN31_FPU_H__ +#define __DCN31_FPU_H__ + +#define DCN3_1_DEFAULT_DET_SIZE 384 +#define DCN3_15_DEFAULT_DET_SIZE 192 +#define DCN3_15_MIN_COMPBUF_SIZE_KB 128 +#define DCN3_16_DEFAULT_DET_SIZE 192 + +void dcn31_calculate_wm_and_dlg_fp( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel); + +void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); +void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); +void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); + +#endif /* __DCN31_FPU_H__*/ diff --git a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h index 337c0161e72d..806f3041db14 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h @@ -619,7 +619,7 @@ struct dcn_ip_params { }; extern const struct dcn_ip_params dcn10_ip_defaults; -bool dcn10_validate_bandwidth( +bool dcn_validate_bandwidth( struct dc *dc, struct dc_state *context, bool fast_validate); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 59a704781e34..554d2e33bd7f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -310,6 +310,8 @@ struct timing_generator_funcs { uint32_t slave_pixel_clock_100Hz, uint8_t master_clock_divider, uint8_t slave_clock_divider); + + void (*init_odm)(struct timing_generator *tg); }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h b/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h index 3b3090e3d327..e6c49ef8b584 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h @@ -37,9 +37,12 @@ struct dc_link; struct link_resource; struct pipe_ctx; struct encoder_set_dp_phy_pattern_param; +struct link_mst_stream_allocation_table; struct link_hwss_ext { - /* function pointers below require check for NULL at all time + /* function pointers below may require to check for NULL if caller + * considers missing implementation as expected in some cases or none + * critical to be investigated immediately * ********************************************************************* */ void (*set_hblank_min_symbol_width)(struct pipe_ctx *pipe_ctx, @@ -62,6 +65,9 @@ struct link_hwss_ext { const struct link_resource *link_res, const struct dc_link_settings *link_settings, const struct dc_lane_settings lane_settings[LANE_COUNT_DP_MAX]); + void (*update_stream_allocation_table)(struct dc_link *link, + const struct link_resource *link_res, + const struct link_mst_stream_allocation_table *table); }; struct link_hwss { @@ -72,6 +78,7 @@ struct link_hwss { */ void (*setup_stream_encoder)(struct pipe_ctx *pipe_ctx); void (*reset_stream_encoder)(struct pipe_ctx *pipe_ctx); + void (*setup_stream_attribute)(struct pipe_ctx *pipe_ctx); }; #endif /* __DC_LINK_HWSS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dp_trace.c b/drivers/gpu/drm/amd/display/dc/link/link_dp_trace.c index e7047391934b..2c1a3bfcdb50 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dp_trace.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dp_trace.c @@ -144,3 +144,23 @@ unsigned int dc_dp_trace_get_link_loss_count(struct dc_link *link) { return link->dp_trace.link_loss_count; } + +void dp_trace_set_edp_power_timestamp(struct dc_link *link, + bool power_up) +{ + if (!power_up) + /*save driver power off time stamp*/ + link->dp_trace.edp_trace_power_timestamps.poweroff = dm_get_timestamp(link->dc->ctx); + else + link->dp_trace.edp_trace_power_timestamps.poweron = dm_get_timestamp(link->dc->ctx); +} + +uint64_t dp_trace_get_edp_poweron_timestamp(struct dc_link *link) +{ + return link->dp_trace.edp_trace_power_timestamps.poweron; +} + +uint64_t dp_trace_get_edp_poweroff_timestamp(struct dc_link *link) +{ + return link->dp_trace.edp_trace_power_timestamps.poweroff; +}
\ No newline at end of file diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dp_trace.h b/drivers/gpu/drm/amd/display/dc/link/link_dp_trace.h index 702f97c6ead0..26700e3cd65e 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dp_trace.h +++ b/drivers/gpu/drm/amd/display/dc/link/link_dp_trace.h @@ -54,4 +54,9 @@ struct dp_trace_lt_counts *dc_dp_trace_get_lt_counts(struct dc_link *link, bool in_detection); unsigned int dc_dp_trace_get_link_loss_count(struct dc_link *link); +void dp_trace_set_edp_power_timestamp(struct dc_link *link, + bool power_up); +uint64_t dp_trace_get_edp_poweron_timestamp(struct dc_link *link); +uint64_t dp_trace_get_edp_poweroff_timestamp(struct dc_link *link); + #endif /* __LINK_DP_TRACE_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c index 0f845113a6aa..776e822abcbb 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c @@ -62,6 +62,46 @@ void reset_dio_stream_encoder(struct pipe_ctx *pipe_ctx) } +void setup_dio_stream_attribute(struct pipe_ctx *pipe_ctx) +{ + struct stream_encoder *stream_encoder = pipe_ctx->stream_res.stream_enc; + struct dc_stream_state *stream = pipe_ctx->stream; + struct dc_link *link = stream->link; + + if (!dc_is_virtual_signal(stream->signal)) + stream_encoder->funcs->setup_stereo_sync( + stream_encoder, + pipe_ctx->stream_res.tg->inst, + stream->timing.timing_3d_format != TIMING_3D_FORMAT_NONE); + + if (dc_is_dp_signal(stream->signal)) + stream_encoder->funcs->dp_set_stream_attribute( + stream_encoder, + &stream->timing, + stream->output_color_space, + stream->use_vsc_sdp_for_colorimetry, + link->dpcd_caps.dprx_feature.bits.SST_SPLIT_SDP_CAP); + else if (dc_is_hdmi_tmds_signal(stream->signal)) + stream_encoder->funcs->hdmi_set_stream_attribute( + stream_encoder, + &stream->timing, + stream->phy_pix_clk, + pipe_ctx->stream_res.audio != NULL); + else if (dc_is_dvi_signal(stream->signal)) + stream_encoder->funcs->dvi_set_stream_attribute( + stream_encoder, + &stream->timing, + (stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK) ? + true : false); + else if (dc_is_lvds_signal(stream->signal)) + stream_encoder->funcs->lvds_set_stream_attribute( + stream_encoder, + &stream->timing); + + if (dc_is_dp_signal(stream->signal)) + dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_DP_STREAM_ATTR); +} + void enable_dio_dp_link_output(struct dc_link *link, const struct link_resource *link_res, enum signal_type signal, @@ -113,15 +153,27 @@ void set_dio_dp_lane_settings(struct dc_link *link, link_enc->funcs->dp_set_lane_settings(link_enc, link_settings, lane_settings); } +static void update_dio_stream_allocation_table(struct dc_link *link, + const struct link_resource *link_res, + const struct link_mst_stream_allocation_table *table) +{ + struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link); + + ASSERT(link_enc); + link_enc->funcs->update_mst_stream_allocation_table(link_enc, table); +} + static const struct link_hwss dio_link_hwss = { .setup_stream_encoder = setup_dio_stream_encoder, .reset_stream_encoder = reset_dio_stream_encoder, + .setup_stream_attribute = setup_dio_stream_attribute, .ext = { .set_throttled_vcp_size = set_dio_throttled_vcp_size, .enable_dp_link_output = enable_dio_dp_link_output, .disable_dp_link_output = disable_dio_dp_link_output, .set_dp_link_test_pattern = set_dio_dp_link_test_pattern, .set_dp_lane_settings = set_dio_dp_lane_settings, + .update_stream_allocation_table = update_dio_stream_allocation_table, }, }; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.h b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.h index 680df20b1fa3..08f22b32df48 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.h +++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.h @@ -34,6 +34,7 @@ void set_dio_throttled_vcp_size(struct pipe_ctx *pipe_ctx, struct fixed31_32 throttled_vcp_size); void setup_dio_stream_encoder(struct pipe_ctx *pipe_ctx); void reset_dio_stream_encoder(struct pipe_ctx *pipe_ctx); +void setup_dio_stream_attribute(struct pipe_ctx *pipe_ctx); void enable_dio_dp_link_output(struct dc_link *link, const struct link_resource *link_res, enum signal_type signal, diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dpia.c b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dpia.c index 35b206225201..89d4e8159138 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dpia.c @@ -25,16 +25,44 @@ #include "link_hwss_dpia.h" #include "core_types.h" #include "link_hwss_dio.h" +#include "link_enc_cfg.h" + +#define DC_LOGGER_INIT(logger) + +static void update_dpia_stream_allocation_table(struct dc_link *link, + const struct link_resource *link_res, + const struct link_mst_stream_allocation_table *table) +{ + struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link); + static enum dc_status status; + uint8_t mst_alloc_slots = 0, prev_mst_slots_in_use = 0xFF; + int i; + DC_LOGGER_INIT(link->ctx->logger); + + for (i = 0; i < table->stream_count; i++) + mst_alloc_slots += table->stream_allocations[i].slot_count; + + status = dc_process_dmub_set_mst_slots(link->dc, link->link_index, + mst_alloc_slots, &prev_mst_slots_in_use); + ASSERT(status == DC_OK); + DC_LOG_MST("dpia : status[%d]: alloc_slots[%d]: used_slots[%d]\n", + status, mst_alloc_slots, prev_mst_slots_in_use); + + ASSERT(link_enc); + link_enc->funcs->update_mst_stream_allocation_table(link_enc, table); +} static const struct link_hwss dpia_link_hwss = { .setup_stream_encoder = setup_dio_stream_encoder, .reset_stream_encoder = reset_dio_stream_encoder, + .setup_stream_attribute = setup_dio_stream_attribute, .ext = { .set_throttled_vcp_size = set_dio_throttled_vcp_size, .enable_dp_link_output = enable_dio_dp_link_output, .disable_dp_link_output = disable_dio_dp_link_output, .set_dp_link_test_pattern = set_dio_dp_link_test_pattern, .set_dp_lane_settings = set_dio_dp_lane_settings, + .update_stream_allocation_table = update_dpia_stream_allocation_table, }, }; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c index 74919491675f..87972dc8443d 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c @@ -131,6 +131,22 @@ static void reset_hpo_dp_stream_encoder(struct pipe_ctx *pipe_ctx) dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst); } +static void setup_hpo_dp_stream_attribute(struct pipe_ctx *pipe_ctx) +{ + struct hpo_dp_stream_encoder *stream_enc = pipe_ctx->stream_res.hpo_dp_stream_enc; + struct dc_stream_state *stream = pipe_ctx->stream; + struct dc_link *link = stream->link; + + stream_enc->funcs->set_stream_attribute( + stream_enc, + &stream->timing, + stream->output_color_space, + stream->use_vsc_sdp_for_colorimetry, + stream->timing.flags.DSC, + false); + dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_DP_STREAM_ATTR); +} + static void enable_hpo_dp_fpga_link_output(struct dc_link *link, const struct link_resource *link_res, enum signal_type signal, @@ -228,9 +244,19 @@ static void set_hpo_dp_lane_settings(struct dc_link *link, lane_settings[0].FFE_PRESET.raw); } +static void update_hpo_dp_stream_allocation_table(struct dc_link *link, + const struct link_resource *link_res, + const struct link_mst_stream_allocation_table *table) +{ + link_res->hpo_dp_link_enc->funcs->update_stream_allocation_table( + link_res->hpo_dp_link_enc, + table); +} + static const struct link_hwss hpo_dp_link_hwss = { .setup_stream_encoder = setup_hpo_dp_stream_encoder, .reset_stream_encoder = reset_hpo_dp_stream_encoder, + .setup_stream_attribute = setup_hpo_dp_stream_attribute, .ext = { .set_throttled_vcp_size = set_hpo_dp_throttled_vcp_size, .set_hblank_min_symbol_width = set_hpo_dp_hblank_min_symbol_width, @@ -238,6 +264,7 @@ static const struct link_hwss hpo_dp_link_hwss = { .disable_dp_link_output = disable_hpo_dp_link_output, .set_dp_link_test_pattern = set_hpo_dp_link_test_pattern, .set_dp_lane_settings = set_hpo_dp_lane_settings, + .update_stream_allocation_table = update_hpo_dp_stream_allocation_table, }, }; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_frl.c b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_frl.c index 9df273ca699b..4b5eccd994c4 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_frl.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_frl.c @@ -26,9 +26,28 @@ #include "core_types.h" #include "virtual/virtual_link_hwss.h" +static void setup_hpo_frl_stream_attribute(struct pipe_ctx *pipe_ctx) +{ + struct hpo_frl_stream_encoder *stream_enc = pipe_ctx->stream_res.hpo_frl_stream_enc; + struct dc_stream_state *stream = pipe_ctx->stream; + struct pipe_ctx *odm_pipe; + int odm_combine_num_segments = 1; + + /* get number of ODM combine input segments */ + for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) + odm_combine_num_segments++; + + stream_enc->funcs->hdmi_frl_set_stream_attribute( + stream_enc, + &stream->timing, + &stream->link->frl_link_settings.borrow_params, + odm_combine_num_segments); +} + static const struct link_hwss hpo_frl_link_hwss = { .setup_stream_encoder = virtual_setup_stream_encoder, .reset_stream_encoder = virtual_reset_stream_encoder, + .setup_stream_attribute = setup_hpo_frl_stream_attribute, }; bool can_use_hpo_frl_link_hwss(const struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_hwss.c b/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_hwss.c index 525eba2a3354..501173ce270e 100644 --- a/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_hwss.c +++ b/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_hwss.c @@ -29,12 +29,17 @@ void virtual_setup_stream_encoder(struct pipe_ctx *pipe_ctx) { } +void virtual_setup_stream_attribute(struct pipe_ctx *pipe_ctx) +{ +} + void virtual_reset_stream_encoder(struct pipe_ctx *pipe_ctx) { } static const struct link_hwss virtual_link_hwss = { .setup_stream_encoder = virtual_setup_stream_encoder, .reset_stream_encoder = virtual_reset_stream_encoder, + .setup_stream_attribute = virtual_setup_stream_attribute, }; const struct link_hwss *get_virtual_link_hwss(void) diff --git a/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_hwss.h b/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_hwss.h index e6bcb4bb0f3a..fbcbc5afb47d 100644 --- a/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_hwss.h +++ b/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_hwss.h @@ -28,6 +28,7 @@ #include "core_types.h" void virtual_setup_stream_encoder(struct pipe_ctx *pipe_ctx); +void virtual_setup_stream_attribute(struct pipe_ctx *pipe_ctx); void virtual_reset_stream_encoder(struct pipe_ctx *pipe_ctx); const struct link_hwss *get_virtual_link_hwss(void); diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 71214c7a60fc..05c8d91ad4ab 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -44,24 +44,6 @@ #endif // defined(_TEST_HARNESS) || defined(FPGA_USB4) -/* Firmware versioning. */ -#ifdef DMUB_EXPOSE_VERSION -#define DMUB_FW_VERSION_GIT_HASH 0x929554ba -#define DMUB_FW_VERSION_MAJOR 0 -#define DMUB_FW_VERSION_MINOR 0 -#define DMUB_FW_VERSION_REVISION 108 -#define DMUB_FW_VERSION_TEST 0 -#define DMUB_FW_VERSION_VBIOS 0 -#define DMUB_FW_VERSION_HOTFIX 0 -#define DMUB_FW_VERSION_UCODE (((DMUB_FW_VERSION_MAJOR & 0xFF) << 24) | \ - ((DMUB_FW_VERSION_MINOR & 0xFF) << 16) | \ - ((DMUB_FW_VERSION_REVISION & 0xFF) << 8) | \ - ((DMUB_FW_VERSION_TEST & 0x1) << 7) | \ - ((DMUB_FW_VERSION_VBIOS & 0x1) << 6) | \ - (DMUB_FW_VERSION_HOTFIX & 0x3F)) - -#endif - //<DMUB_TYPES>================================================================== /* Basic type definitions. */ @@ -1523,8 +1505,6 @@ enum dmub_phy_fsm_state { DMUB_PHY_FSM_FAST_LP, }; - - /** * Data passed from driver to FW in a DMUB_CMD__PSR_COPY_SETTINGS command. */ @@ -1704,9 +1684,16 @@ struct dmub_rb_cmd_psr_enable_data { */ uint8_t panel_inst; /** - * Explicit padding to 4 byte boundary. + * Phy state to enter. + * Values to use are defined in dmub_phy_fsm_state */ - uint8_t pad[2]; + uint8_t phy_fsm_state; + /** + * Phy rate for DP - RBR/HBR/HBR2/HBR3. + * Set this using enum phy_link_rate. + * This does not support HDMI/DP2 for now. + */ + uint8_t phy_rate; }; /** @@ -1772,16 +1759,9 @@ struct dmub_cmd_psr_force_static_data { */ uint8_t panel_inst; /** - * Phy state to enter. - * Values to use are defined in dmub_phy_fsm_state - */ - uint8_t phy_fsm_state; - /** - * Phy rate for DP - RBR/HBR/HBR2/HBR3. - * Set this using enum phy_link_rate. - * This does not support HDMI/DP2 for now. + * Explicit padding to 4 byte boundary. */ - uint8_t phy_rate; + uint8_t pad[2]; }; /** @@ -3044,9 +3024,7 @@ static inline void dmub_rb_flush_pending(const struct dmub_rb *rb) uint32_t wptr = rb->wrpt; while (rptr != wptr) { - uint64_t volatile *data = (uint64_t volatile *)((uint8_t *)(rb->base_address) + rptr); - //uint64_t volatile *p = (uint64_t volatile *)data; - uint64_t temp; + uint64_t *data = (uint64_t *)((uint8_t *)(rb->base_address) + rptr); uint8_t i; /* Don't remove this. @@ -3054,7 +3032,7 @@ static inline void dmub_rb_flush_pending(const struct dmub_rb *rb) * for this function to be effective. */ for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++) - temp = *data++; + (void)READ_ONCE(*data++); rptr += DMUB_RB_CMD_SIZE; if (rptr >= rb->capacity) diff --git a/drivers/gpu/drm/amd/display/include/grph_object_id.h b/drivers/gpu/drm/amd/display/include/grph_object_id.h index fed1edc038d8..c6bbd262f1ac 100644 --- a/drivers/gpu/drm/amd/display/include/grph_object_id.h +++ b/drivers/gpu/drm/amd/display/include/grph_object_id.h @@ -162,6 +162,7 @@ enum connector_id { CONNECTOR_ID_MXM = 21, CONNECTOR_ID_WIRELESS = 22, CONNECTOR_ID_MIRACAST = 23, + CONNECTOR_ID_USBC = 24, CONNECTOR_ID_VIRTUAL = 100 }; diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index bd1d1dc93629..03fa63d56fa6 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -46,6 +46,10 @@ /* Number of consecutive frames to check before entering/exiting fixed refresh */ #define FIXED_REFRESH_ENTER_FRAME_COUNT 5 #define FIXED_REFRESH_EXIT_FRAME_COUNT 10 +/* Flip interval workaround constants */ +#define VSYNCS_BETWEEN_FLIP_THRESHOLD 2 +#define FREESYNC_CONSEC_FLIP_AFTER_VSYNC 5 +#define FREESYNC_VSYNC_TO_FLIP_DELTA_IN_US 500 struct core_freesync { struct mod_freesync public; @@ -466,6 +470,41 @@ static void apply_fixed_refresh(struct core_freesync *core_freesync, } } +static void determine_flip_interval_workaround_req(struct mod_vrr_params *in_vrr, + unsigned int curr_time_stamp_in_us) +{ + in_vrr->flip_interval.vsync_to_flip_in_us = curr_time_stamp_in_us - + in_vrr->flip_interval.v_update_timestamp_in_us; + + /* Determine conditions for stopping workaround */ + if (in_vrr->flip_interval.flip_interval_workaround_active && + in_vrr->flip_interval.vsyncs_between_flip < VSYNCS_BETWEEN_FLIP_THRESHOLD && + in_vrr->flip_interval.vsync_to_flip_in_us > FREESYNC_VSYNC_TO_FLIP_DELTA_IN_US) { + in_vrr->flip_interval.flip_interval_detect_counter = 0; + in_vrr->flip_interval.program_flip_interval_workaround = true; + in_vrr->flip_interval.flip_interval_workaround_active = false; + } else { + /* Determine conditions for starting workaround */ + if (in_vrr->flip_interval.vsyncs_between_flip >= VSYNCS_BETWEEN_FLIP_THRESHOLD && + in_vrr->flip_interval.vsync_to_flip_in_us < FREESYNC_VSYNC_TO_FLIP_DELTA_IN_US) { + /* Increase flip interval counter we have 2 vsyncs between flips and + * vsync to flip interval is less than 500us + */ + in_vrr->flip_interval.flip_interval_detect_counter++; + if (in_vrr->flip_interval.flip_interval_detect_counter > FREESYNC_CONSEC_FLIP_AFTER_VSYNC) { + /* Start workaround if we detect 5 consecutive instances of the above case */ + in_vrr->flip_interval.program_flip_interval_workaround = true; + in_vrr->flip_interval.flip_interval_workaround_active = true; + } + } else { + /* Reset the flip interval counter if we condition is no longer met */ + in_vrr->flip_interval.flip_interval_detect_counter = 0; + } + } + + in_vrr->flip_interval.vsyncs_between_flip = 0; +} + static bool vrr_settings_require_update(struct core_freesync *core_freesync, struct mod_freesync_config *in_config, unsigned int min_refresh_in_uhz, @@ -1179,6 +1218,9 @@ void mod_freesync_handle_preflip(struct mod_freesync *mod_freesync, in_out_vrr); } + determine_flip_interval_workaround_req(in_out_vrr, + curr_time_stamp_in_us); + } } @@ -1187,6 +1229,8 @@ void mod_freesync_handle_v_update(struct mod_freesync *mod_freesync, struct mod_vrr_params *in_out_vrr) { struct core_freesync *core_freesync = NULL; + unsigned int cur_timestamp_in_us; + unsigned long long cur_tick; if ((mod_freesync == NULL) || (stream == NULL) || (in_out_vrr == NULL)) return; @@ -1196,6 +1240,36 @@ void mod_freesync_handle_v_update(struct mod_freesync *mod_freesync, if (in_out_vrr->supported == false) return; + cur_tick = dm_get_timestamp(core_freesync->dc->ctx); + cur_timestamp_in_us = (unsigned int) + div_u64(dm_get_elapse_time_in_ns(core_freesync->dc->ctx, cur_tick, 0), 1000); + + in_out_vrr->flip_interval.vsyncs_between_flip++; + in_out_vrr->flip_interval.v_update_timestamp_in_us = cur_timestamp_in_us; + + if (in_out_vrr->state == VRR_STATE_ACTIVE_VARIABLE && + (in_out_vrr->flip_interval.flip_interval_workaround_active || + (!in_out_vrr->flip_interval.flip_interval_workaround_active && + in_out_vrr->flip_interval.program_flip_interval_workaround))) { + // set freesync vmin vmax to nominal for workaround + in_out_vrr->adjust.v_total_min = + mod_freesync_calc_v_total_from_refresh( + stream, in_out_vrr->max_refresh_in_uhz); + in_out_vrr->adjust.v_total_max = + in_out_vrr->adjust.v_total_min; + in_out_vrr->flip_interval.program_flip_interval_workaround = false; + in_out_vrr->flip_interval.do_flip_interval_workaround_cleanup = true; + return; + } + + if (in_out_vrr->state != VRR_STATE_ACTIVE_VARIABLE && + in_out_vrr->flip_interval.do_flip_interval_workaround_cleanup) { + in_out_vrr->flip_interval.do_flip_interval_workaround_cleanup = false; + in_out_vrr->flip_interval.flip_interval_detect_counter = 0; + in_out_vrr->flip_interval.vsyncs_between_flip = 0; + in_out_vrr->flip_interval.vsync_to_flip_in_us = 0; + } + /* Below the Range Logic */ /* Only execute if in fullscreen mode */ @@ -1302,7 +1376,7 @@ unsigned long long mod_freesync_calc_field_rate_from_timing( bool mod_freesync_is_valid_range(uint32_t min_refresh_cap_in_uhz, uint32_t max_refresh_cap_in_uhz, - uint32_t nominal_field_rate_in_uhz) + uint32_t nominal_field_rate_in_uhz) { /* Typically nominal refresh calculated can have some fractional part. diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c index 3e81850a7ffe..5e01c6e24cbc 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c @@ -251,6 +251,33 @@ out: return status; } +static enum mod_hdcp_status update_display_adjustments(struct mod_hdcp *hdcp, + struct mod_hdcp_display *display, + struct mod_hdcp_display_adjustment *adj) +{ + enum mod_hdcp_status status = MOD_HDCP_STATUS_NOT_IMPLEMENTED; + + if (is_in_authenticated_states(hdcp) && + is_dp_mst_hdcp(hdcp) && + display->adjust.disable == true && + adj->disable == false) { + display->adjust.disable = false; + if (is_hdcp1(hdcp)) + status = mod_hdcp_hdcp1_enable_dp_stream_encryption(hdcp); + else if (is_hdcp2(hdcp)) + status = mod_hdcp_hdcp2_enable_dp_stream_encryption(hdcp); + + if (status != MOD_HDCP_STATUS_SUCCESS) + display->adjust.disable = true; + } + + if (status == MOD_HDCP_STATUS_SUCCESS && + memcmp(adj, &display->adjust, + sizeof(struct mod_hdcp_display_adjustment)) != 0) + status = MOD_HDCP_STATUS_NOT_IMPLEMENTED; + + return status; +} /* * Implementation of functions in mod_hdcp.h */ @@ -391,7 +418,7 @@ out: return status; } -enum mod_hdcp_status mod_hdcp_update_authentication(struct mod_hdcp *hdcp, +enum mod_hdcp_status mod_hdcp_update_display(struct mod_hdcp *hdcp, uint8_t index, struct mod_hdcp_link_adjustment *link_adjust, struct mod_hdcp_display_adjustment *display_adjust, @@ -419,6 +446,15 @@ enum mod_hdcp_status mod_hdcp_update_authentication(struct mod_hdcp *hdcp, goto out; } + if (memcmp(link_adjust, &hdcp->connection.link.adjust, + sizeof(struct mod_hdcp_link_adjustment)) == 0 && + memcmp(display_adjust, &display->adjust, + sizeof(struct mod_hdcp_display_adjustment)) != 0) { + status = update_display_adjustments(hdcp, display, display_adjust); + if (status != MOD_HDCP_STATUS_NOT_IMPLEMENTED) + goto out; + } + /* stop current authentication */ status = reset_authentication(hdcp, output); if (status != MOD_HDCP_STATUS_SUCCESS) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h index 392c0c03365a..55c7d873175f 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h @@ -445,6 +445,14 @@ static inline uint8_t is_in_hdcp2_dp_states(struct mod_hdcp *hdcp) current_state(hdcp) <= HDCP2_DP_STATE_END); } +static inline uint8_t is_in_authenticated_states(struct mod_hdcp *hdcp) +{ + return (current_state(hdcp) == D1_A4_AUTHENTICATED || + current_state(hdcp) == H1_A45_AUTHENTICATED || + current_state(hdcp) == D2_A5_AUTHENTICATED || + current_state(hdcp) == H2_A5_AUTHENTICATED); +} + static inline uint8_t is_hdcp1(struct mod_hdcp *hdcp) { return (is_in_hdcp1_states(hdcp) || is_in_hdcp1_dp_states(hdcp)); diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c index 6ec918af3bff..1ddb4f5eac8e 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c @@ -497,9 +497,9 @@ enum mod_hdcp_status mod_hdcp_hdcp1_execution(struct mod_hdcp *hdcp, return status; } -extern enum mod_hdcp_status mod_hdcp_hdcp1_dp_execution(struct mod_hdcp *hdcp, - struct mod_hdcp_event_context *event_ctx, - struct mod_hdcp_transition_input_hdcp1 *input) +enum mod_hdcp_status mod_hdcp_hdcp1_dp_execution(struct mod_hdcp *hdcp, + struct mod_hdcp_event_context *event_ctx, + struct mod_hdcp_transition_input_hdcp1 *input) { enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h index 75a158a2514c..cf6bc9446244 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h @@ -105,6 +105,16 @@ struct mod_vrr_params_fixed_refresh { uint32_t frame_counter; }; +struct mod_vrr_params_flip_interval { + bool flip_interval_workaround_active; + bool program_flip_interval_workaround; + bool do_flip_interval_workaround_cleanup; + uint32_t flip_interval_detect_counter; + uint32_t vsyncs_between_flip; + uint32_t vsync_to_flip_in_us; + uint32_t v_update_timestamp_in_us; +}; + struct mod_vrr_params { bool supported; bool send_info_frame; @@ -121,6 +131,8 @@ struct mod_vrr_params { struct mod_vrr_params_fixed_refresh fixed; struct mod_vrr_params_btr btr; + + struct mod_vrr_params_flip_interval flip_interval; }; struct mod_freesync *mod_freesync_create(struct dc *dc); diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h index f7420c3f5672..3348bb97ef81 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h @@ -294,7 +294,7 @@ enum mod_hdcp_status mod_hdcp_remove_display(struct mod_hdcp *hdcp, uint8_t index, struct mod_hdcp_output *output); /* called per display to apply new authentication adjustment */ -enum mod_hdcp_status mod_hdcp_update_authentication(struct mod_hdcp *hdcp, +enum mod_hdcp_status mod_hdcp_update_display(struct mod_hdcp *hdcp, uint8_t index, struct mod_hdcp_link_adjustment *link_adjust, struct mod_hdcp_display_adjustment *display_adjust, diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 2b00f334e93d..97928d4c3b9a 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -784,3 +784,41 @@ bool dmcu_load_iram(struct dmcu *dmcu, return result; } +/* + * is_psr_su_specific_panel() - check if sink is AMD vendor-specific PSR-SU + * supported eDP device. + * + * @link: dc link pointer + * + * Return: true if AMDGPU vendor specific PSR-SU eDP panel + */ +bool is_psr_su_specific_panel(struct dc_link *link) +{ + if (link->dpcd_caps.edp_rev >= DP_EDP_14) { + if (link->dpcd_caps.psr_info.psr_version >= DP_PSR2_WITH_Y_COORD_ET_SUPPORTED) + return true; + /* + * Some panels will report PSR capabilities over additional DPCD bits. + * Such panels are approved despite reporting only PSR v3, as long as + * the additional bits are reported. + */ + if (link->dpcd_caps.psr_info.psr_version < DP_PSR2_WITH_Y_COORD_IS_SUPPORTED) + return false; + + if (link->dpcd_caps.sink_dev_id == DP_BRANCH_DEVICE_ID_001CF8) { + /* + * FIXME: + * This is the temporary workaround to disable PSRSU when system turned on + * DSC function on the sepcific sink. Once the PSRSU + DSC is fixed, this + * condition should be removed. + */ + if (link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT) + return false; + + if (link->dpcd_caps.psr_info.force_psrsu_cap == 0x1) + return true; + } + } + + return false; +} diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h index 2a9f8e2d8080..1a634d8c78c5 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h @@ -52,4 +52,5 @@ bool dmub_init_abm_config(struct resource_pool *res_pool, struct dmcu_iram_parameters params, unsigned int inst); +bool is_psr_su_specific_panel(struct dc_link *link); #endif /* MODULES_POWER_POWER_HELPERS_H_ */ diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index fe4e585781bb..741dae17562a 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -116,38 +116,38 @@ enum amd_powergating_state { /* CG flags */ -#define AMD_CG_SUPPORT_GFX_MGCG (1 << 0) -#define AMD_CG_SUPPORT_GFX_MGLS (1 << 1) -#define AMD_CG_SUPPORT_GFX_CGCG (1 << 2) -#define AMD_CG_SUPPORT_GFX_CGLS (1 << 3) -#define AMD_CG_SUPPORT_GFX_CGTS (1 << 4) -#define AMD_CG_SUPPORT_GFX_CGTS_LS (1 << 5) -#define AMD_CG_SUPPORT_GFX_CP_LS (1 << 6) -#define AMD_CG_SUPPORT_GFX_RLC_LS (1 << 7) -#define AMD_CG_SUPPORT_MC_LS (1 << 8) -#define AMD_CG_SUPPORT_MC_MGCG (1 << 9) -#define AMD_CG_SUPPORT_SDMA_LS (1 << 10) -#define AMD_CG_SUPPORT_SDMA_MGCG (1 << 11) -#define AMD_CG_SUPPORT_BIF_LS (1 << 12) -#define AMD_CG_SUPPORT_UVD_MGCG (1 << 13) -#define AMD_CG_SUPPORT_VCE_MGCG (1 << 14) -#define AMD_CG_SUPPORT_HDP_LS (1 << 15) -#define AMD_CG_SUPPORT_HDP_MGCG (1 << 16) -#define AMD_CG_SUPPORT_ROM_MGCG (1 << 17) -#define AMD_CG_SUPPORT_DRM_LS (1 << 18) -#define AMD_CG_SUPPORT_BIF_MGCG (1 << 19) -#define AMD_CG_SUPPORT_GFX_3D_CGCG (1 << 20) -#define AMD_CG_SUPPORT_GFX_3D_CGLS (1 << 21) -#define AMD_CG_SUPPORT_DRM_MGCG (1 << 22) -#define AMD_CG_SUPPORT_DF_MGCG (1 << 23) -#define AMD_CG_SUPPORT_VCN_MGCG (1 << 24) -#define AMD_CG_SUPPORT_HDP_DS (1 << 25) -#define AMD_CG_SUPPORT_HDP_SD (1 << 26) -#define AMD_CG_SUPPORT_IH_CG (1 << 27) -#define AMD_CG_SUPPORT_ATHUB_LS (1 << 28) -#define AMD_CG_SUPPORT_ATHUB_MGCG (1 << 29) -#define AMD_CG_SUPPORT_JPEG_MGCG (1 << 30) -#define AMD_CG_SUPPORT_GFX_FGCG (1 << 31) +#define AMD_CG_SUPPORT_GFX_MGCG (1ULL << 0) +#define AMD_CG_SUPPORT_GFX_MGLS (1ULL << 1) +#define AMD_CG_SUPPORT_GFX_CGCG (1ULL << 2) +#define AMD_CG_SUPPORT_GFX_CGLS (1ULL << 3) +#define AMD_CG_SUPPORT_GFX_CGTS (1ULL << 4) +#define AMD_CG_SUPPORT_GFX_CGTS_LS (1ULL << 5) +#define AMD_CG_SUPPORT_GFX_CP_LS (1ULL << 6) +#define AMD_CG_SUPPORT_GFX_RLC_LS (1ULL << 7) +#define AMD_CG_SUPPORT_MC_LS (1ULL << 8) +#define AMD_CG_SUPPORT_MC_MGCG (1ULL << 9) +#define AMD_CG_SUPPORT_SDMA_LS (1ULL << 10) +#define AMD_CG_SUPPORT_SDMA_MGCG (1ULL << 11) +#define AMD_CG_SUPPORT_BIF_LS (1ULL << 12) +#define AMD_CG_SUPPORT_UVD_MGCG (1ULL << 13) +#define AMD_CG_SUPPORT_VCE_MGCG (1ULL << 14) +#define AMD_CG_SUPPORT_HDP_LS (1ULL << 15) +#define AMD_CG_SUPPORT_HDP_MGCG (1ULL << 16) +#define AMD_CG_SUPPORT_ROM_MGCG (1ULL << 17) +#define AMD_CG_SUPPORT_DRM_LS (1ULL << 18) +#define AMD_CG_SUPPORT_BIF_MGCG (1ULL << 19) +#define AMD_CG_SUPPORT_GFX_3D_CGCG (1ULL << 20) +#define AMD_CG_SUPPORT_GFX_3D_CGLS (1ULL << 21) +#define AMD_CG_SUPPORT_DRM_MGCG (1ULL << 22) +#define AMD_CG_SUPPORT_DF_MGCG (1ULL << 23) +#define AMD_CG_SUPPORT_VCN_MGCG (1ULL << 24) +#define AMD_CG_SUPPORT_HDP_DS (1ULL << 25) +#define AMD_CG_SUPPORT_HDP_SD (1ULL << 26) +#define AMD_CG_SUPPORT_IH_CG (1ULL << 27) +#define AMD_CG_SUPPORT_ATHUB_LS (1ULL << 28) +#define AMD_CG_SUPPORT_ATHUB_MGCG (1ULL << 29) +#define AMD_CG_SUPPORT_JPEG_MGCG (1ULL << 30) +#define AMD_CG_SUPPORT_GFX_FGCG (1ULL << 31) /* PG flags */ #define AMD_PG_SUPPORT_GFX_PG (1 << 0) #define AMD_PG_SUPPORT_GFX_SMG (1 << 1) @@ -298,7 +298,7 @@ struct amd_ip_funcs { enum amd_clockgating_state state); int (*set_powergating_state)(void *handle, enum amd_powergating_state state); - void (*get_clockgating_state)(void *handle, u32 *flags); + void (*get_clockgating_state)(void *handle, u64 *flags); }; diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h index 90350f46a0c4..363d2139cea2 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h @@ -988,4 +988,17 @@ #define mmMDM_WIG_PIPE_BUSY_BASE_IDX 1 +/* VCN 2_6_0 regs */ +#define mmUVD_RAS_VCPU_VCODEC_STATUS 0x0057 +#define mmUVD_RAS_VCPU_VCODEC_STATUS_BASE_IDX 1 +#define mmUVD_RAS_MMSCH_FATAL_ERROR 0x0058 +#define mmUVD_RAS_MMSCH_FATAL_ERROR_BASE_IDX 1 + + +/* JPEG 2_6_0 regs */ +#define mmUVD_RAS_JPEG0_STATUS 0x0059 +#define mmUVD_RAS_JPEG0_STATUS_BASE_IDX 1 +#define mmUVD_RAS_JPEG1_STATUS 0x005a +#define mmUVD_RAS_JPEG1_STATUS_BASE_IDX 1 + #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h index c41c59c30006..8de883b76d90 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h @@ -3606,4 +3606,28 @@ #define UVD_LMI_CRC3__CRC32_MASK 0xFFFFFFFFL +/* VCN 2_6_0 UVD_RAS_VCPU_VCODEC_STATUS */ +#define UVD_RAS_VCPU_VCODEC_STATUS__POISONED_VF__SHIFT 0x0 +#define UVD_RAS_VCPU_VCODEC_STATUS__POISONED_PF__SHIFT 0x1f +#define UVD_RAS_VCPU_VCODEC_STATUS__POISONED_VF_MASK 0x7FFFFFFFL +#define UVD_RAS_VCPU_VCODEC_STATUS__POISONED_PF_MASK 0x80000000L + +/* VCN 2_6_0 UVD_RAS_MMSCH_FATAL_ERROR */ +#define UVD_RAS_MMSCH_FATAL_ERROR__POISONED_VF__SHIFT 0x0 +#define UVD_RAS_MMSCH_FATAL_ERROR__POISONED_PF__SHIFT 0x1f +#define UVD_RAS_MMSCH_FATAL_ERROR__POISONED_VF_MASK 0x7FFFFFFFL +#define UVD_RAS_MMSCH_FATAL_ERROR__POISONED_PF_MASK 0x80000000L + +/* JPEG 2_6_0 UVD_RAS_JPEG0_STATUS */ +#define UVD_RAS_JPEG0_STATUS__POISONED_VF__SHIFT 0x0 +#define UVD_RAS_JPEG0_STATUS__POISONED_PF__SHIFT 0x1f +#define UVD_RAS_JPEG0_STATUS__POISONED_VF_MASK 0x7FFFFFFFL +#define UVD_RAS_JPEG0_STATUS__POISONED_PF_MASK 0x80000000L + +/* JPEG 2_6_0 UVD_RAS_JPEG1_STATUS */ +#define UVD_RAS_JPEG1_STATUS__POISONED_VF__SHIFT 0x0 +#define UVD_RAS_JPEG1_STATUS__POISONED_PF__SHIFT 0x1f +#define UVD_RAS_JPEG1_STATUS__POISONED_VF_MASK 0x7FFFFFFFL +#define UVD_RAS_JPEG1_STATUS__POISONED_PF_MASK 0x80000000L + #endif diff --git a/drivers/gpu/drm/amd/include/discovery.h b/drivers/gpu/drm/amd/include/discovery.h index a486769b66c6..b25026c3ec96 100644 --- a/drivers/gpu/drm/amd/include/discovery.h +++ b/drivers/gpu/drm/amd/include/discovery.h @@ -93,7 +93,7 @@ typedef struct ip uint8_t harvest : 4; /* Harvest */ uint8_t reserved : 4; /* Placeholder field */ #endif - uint32_t base_address[1]; /* variable number of Addresses */ + uint32_t base_address[]; /* variable number of Addresses */ } ip; typedef struct die_header diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 5504d81c77b7..5472f9936feb 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -427,6 +427,7 @@ int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors senso void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev) { const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + int i; if (!adev->pm.dpm_enabled) return; @@ -434,6 +435,15 @@ void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev) if (!pp_funcs->pm_compute_clocks) return; + if (adev->mode_info.num_crtc) + amdgpu_display_bandwidth_update(adev); + + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + if (ring && ring->sched.ready) + amdgpu_fence_wait_empty(ring); + } + mutex_lock(&adev->pm.mutex); pp_funcs->pm_compute_clocks(adev->powerplay.pp_handle); mutex_unlock(&adev->pm.mutex); @@ -443,6 +453,20 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) { int ret = 0; + if (adev->family == AMDGPU_FAMILY_SI) { + mutex_lock(&adev->pm.mutex); + if (enable) { + adev->pm.dpm.uvd_active = true; + adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD; + } else { + adev->pm.dpm.uvd_active = false; + } + mutex_unlock(&adev->pm.mutex); + + amdgpu_dpm_compute_clocks(adev); + return; + } + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable); if (ret) DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", @@ -453,6 +477,21 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) { int ret = 0; + if (adev->family == AMDGPU_FAMILY_SI) { + mutex_lock(&adev->pm.mutex); + if (enable) { + adev->pm.dpm.vce_active = true; + /* XXX select vce level based on ring/task */ + adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL; + } else { + adev->pm.dpm.vce_active = false; + } + mutex_unlock(&adev->pm.mutex); + + amdgpu_dpm_compute_clocks(adev); + return; + } + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable); if (ret) DRM_ERROR("Dpm %s vce failed, ret = %d. \n", @@ -751,7 +790,7 @@ int amdgpu_dpm_force_performance_level(struct amdgpu_device *adev, AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK | AMD_DPM_FORCED_LEVEL_PROFILE_PEAK; - if (!pp_funcs->force_performance_level) + if (!pp_funcs || !pp_funcs->force_performance_level) return 0; if (adev->pm.dpm.thermal_active) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 5cd67ddf8495..78ec9b71197d 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -1954,8 +1954,9 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ uint32_t mask, enum amdgpu_device_attr_states *states) { struct device_attribute *dev_attr = &attr->dev_attr; + uint32_t mp1_ver = adev->ip_versions[MP1_HWIP][0]; + uint32_t gc_ver = adev->ip_versions[GC_HWIP][0]; const char *attr_name = dev_attr->attr.name; - enum amd_asic_type asic_type = adev->asic_type; if (!(attr->flags & mask)) { *states = ATTR_STATE_UNSUPPORTED; @@ -1965,53 +1966,63 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ #define DEVICE_ATTR_IS(_name) (!strcmp(attr_name, #_name)) if (DEVICE_ATTR_IS(pp_dpm_socclk)) { - if (asic_type < CHIP_VEGA10) + if (gc_ver < IP_VERSION(9, 0, 0)) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pp_dpm_dcefclk)) { - if (asic_type < CHIP_VEGA10 || - asic_type == CHIP_ARCTURUS || - asic_type == CHIP_ALDEBARAN) + if (gc_ver < IP_VERSION(9, 0, 0) || + gc_ver == IP_VERSION(9, 4, 1) || + gc_ver == IP_VERSION(9, 4, 2)) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pp_dpm_fclk)) { - if (asic_type < CHIP_VEGA20) + if (mp1_ver < IP_VERSION(10, 0, 0)) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pp_od_clk_voltage)) { *states = ATTR_STATE_UNSUPPORTED; if (amdgpu_dpm_is_overdrive_supported(adev)) *states = ATTR_STATE_SUPPORTED; } else if (DEVICE_ATTR_IS(mem_busy_percent)) { - if (adev->flags & AMD_IS_APU || asic_type == CHIP_VEGA10) + if (adev->flags & AMD_IS_APU || gc_ver == IP_VERSION(9, 0, 1)) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pcie_bw)) { /* PCIe Perf counters won't work on APU nodes */ if (adev->flags & AMD_IS_APU) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(unique_id)) { - if (asic_type != CHIP_VEGA10 && - asic_type != CHIP_VEGA20 && - asic_type != CHIP_ARCTURUS && - asic_type != CHIP_ALDEBARAN) + switch (gc_ver) { + case IP_VERSION(9, 0, 1): + case IP_VERSION(9, 4, 0): + case IP_VERSION(9, 4, 1): + case IP_VERSION(9, 4, 2): + case IP_VERSION(10, 3, 0): + *states = ATTR_STATE_SUPPORTED; + break; + default: *states = ATTR_STATE_UNSUPPORTED; + } } else if (DEVICE_ATTR_IS(pp_features)) { - if (adev->flags & AMD_IS_APU || asic_type < CHIP_VEGA10) + if (adev->flags & AMD_IS_APU || gc_ver < IP_VERSION(9, 0, 0)) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(gpu_metrics)) { - if (asic_type < CHIP_VEGA12) + if (gc_ver < IP_VERSION(9, 1, 0)) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pp_dpm_vclk)) { - if (!(asic_type == CHIP_VANGOGH || asic_type == CHIP_SIENNA_CICHLID)) + if (!(gc_ver == IP_VERSION(10, 3, 1) || + gc_ver == IP_VERSION(10, 3, 0) || + gc_ver == IP_VERSION(10, 1, 2))) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pp_dpm_dclk)) { - if (!(asic_type == CHIP_VANGOGH || asic_type == CHIP_SIENNA_CICHLID)) + if (!(gc_ver == IP_VERSION(10, 3, 1) || + gc_ver == IP_VERSION(10, 3, 0) || + gc_ver == IP_VERSION(10, 1, 2))) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pp_power_profile_mode)) { if (amdgpu_dpm_get_power_profile_mode(adev, NULL) == -EOPNOTSUPP) *states = ATTR_STATE_UNSUPPORTED; } - switch (asic_type) { - case CHIP_ARCTURUS: - case CHIP_ALDEBARAN: + switch (gc_ver) { + case IP_VERSION(9, 4, 1): + case IP_VERSION(9, 4, 2): /* the Mi series card does not support standalone mclk/socclk/fclk level setting */ if (DEVICE_ATTR_IS(pp_dpm_mclk) || DEVICE_ATTR_IS(pp_dpm_socclk) || @@ -2026,7 +2037,7 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ if (DEVICE_ATTR_IS(pp_dpm_dcefclk)) { /* SMU MP1 does not support dcefclk level setting */ - if (asic_type >= CHIP_NAVI10) { + if (gc_ver >= IP_VERSION(10, 0, 0)) { dev_attr->attr.mode &= ~S_IWUGO; dev_attr->store = NULL; } @@ -2864,8 +2875,9 @@ static ssize_t amdgpu_hwmon_show_power_label(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); + uint32_t gc_ver = adev->ip_versions[GC_HWIP][0]; - if (adev->asic_type == CHIP_VANGOGH) + if (gc_ver == IP_VERSION(10, 3, 1)) return sysfs_emit(buf, "%s\n", to_sensor_dev_attr(attr)->index == PP_PWR_TYPE_FAST ? "fastPPT" : "slowPPT"); @@ -3177,6 +3189,7 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, struct device *dev = kobj_to_dev(kobj); struct amdgpu_device *adev = dev_get_drvdata(dev); umode_t effective_mode = attr->mode; + uint32_t gc_ver = adev->ip_versions[GC_HWIP][0]; /* under multi-vf mode, the hwmon attributes are all not supported */ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) @@ -3245,18 +3258,18 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't manage state */ effective_mode &= ~S_IWUSR; + /* not implemented yet for GC 10.3.1 APUs */ if (((adev->family == AMDGPU_FAMILY_SI) || - ((adev->flags & AMD_IS_APU) && - (adev->asic_type != CHIP_VANGOGH))) && /* not implemented yet */ + ((adev->flags & AMD_IS_APU) && (gc_ver != IP_VERSION(10, 3, 1)))) && (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || - attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr|| + attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr || attr == &sensor_dev_attr_power1_cap.dev_attr.attr || attr == &sensor_dev_attr_power1_cap_default.dev_attr.attr)) return 0; + /* not implemented yet for APUs having <= GC 9.3.0 */ if (((adev->family == AMDGPU_FAMILY_SI) || - ((adev->flags & AMD_IS_APU) && - (adev->asic_type < CHIP_RENOIR))) && /* not implemented yet */ + ((adev->flags & AMD_IS_APU) && (gc_ver < IP_VERSION(9, 3, 0)))) && (attr == &sensor_dev_attr_power1_average.dev_attr.attr)) return 0; @@ -3294,8 +3307,7 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, return 0; /* only SOC15 dGPUs support hotspot and mem temperatures */ - if (((adev->flags & AMD_IS_APU) || - adev->asic_type < CHIP_VEGA10) && + if (((adev->flags & AMD_IS_APU) || gc_ver < IP_VERSION(9, 0, 0)) && (attr == &sensor_dev_attr_temp2_crit.dev_attr.attr || attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr || attr == &sensor_dev_attr_temp3_crit.dev_attr.attr || @@ -3310,13 +3322,13 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, return 0; /* only Vangogh has fast PPT limit and power labels */ - if (!(adev->asic_type == CHIP_VANGOGH) && + if (!(gc_ver == IP_VERSION(10, 3, 1)) && (attr == &sensor_dev_attr_power2_average.dev_attr.attr || - attr == &sensor_dev_attr_power2_cap_max.dev_attr.attr || + attr == &sensor_dev_attr_power2_cap_max.dev_attr.attr || attr == &sensor_dev_attr_power2_cap_min.dev_attr.attr || - attr == &sensor_dev_attr_power2_cap.dev_attr.attr || - attr == &sensor_dev_attr_power2_cap_default.dev_attr.attr || - attr == &sensor_dev_attr_power2_label.dev_attr.attr)) + attr == &sensor_dev_attr_power2_cap.dev_attr.attr || + attr == &sensor_dev_attr_power2_cap_default.dev_attr.attr || + attr == &sensor_dev_attr_power2_label.dev_attr.attr)) return 0; return effective_mode; @@ -3421,6 +3433,8 @@ static void amdgpu_debugfs_prints_cpu_info(struct seq_file *m, static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *adev) { + uint32_t mp1_ver = adev->ip_versions[MP1_HWIP][0]; + uint32_t gc_ver = adev->ip_versions[GC_HWIP][0]; uint32_t value; uint64_t value64 = 0; uint32_t query = 0; @@ -3467,7 +3481,8 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK, (void *)&value64, &size)) seq_printf(m, "SMC Feature Mask: 0x%016llx\n", value64); - if (adev->asic_type > CHIP_VEGA20) { + /* ASICs greater than CHIP_VEGA20 supports these sensors */ + if (gc_ver != IP_VERSION(9, 4, 0) && mp1_ver > IP_VERSION(9, 0, 0)) { /* VCN clocks */ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCN_POWER_STATE, (void *)&value, &size)) { if (!value) { @@ -3511,7 +3526,7 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a return 0; } -static void amdgpu_parse_cg_state(struct seq_file *m, u32 flags) +static void amdgpu_parse_cg_state(struct seq_file *m, u64 flags) { int i; @@ -3524,7 +3539,7 @@ static int amdgpu_debugfs_pm_info_show(struct seq_file *m, void *unused) { struct amdgpu_device *adev = (struct amdgpu_device *)m->private; struct drm_device *dev = adev_to_drm(adev); - u32 flags = 0; + u64 flags = 0; int r; if (amdgpu_in_reset(adev)) @@ -3546,7 +3561,7 @@ static int amdgpu_debugfs_pm_info_show(struct seq_file *m, void *unused) amdgpu_device_ip_get_clockgating_state(adev, &flags); - seq_printf(m, "Clock Gating Flags Mask: 0x%x\n", flags); + seq_printf(m, "Clock Gating Flags Mask: 0x%llx\n", flags); amdgpu_parse_cg_state(m, flags); seq_printf(m, "\n"); diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h index a920515e2274..52045ad59bed 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h @@ -26,7 +26,7 @@ struct cg_flag_name { - u32 flag; + u64 flag; const char *name; }; diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c index 9613c6181c17..d3fe149d8476 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c @@ -1028,16 +1028,6 @@ static int amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev) void amdgpu_legacy_dpm_compute_clocks(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i = 0; - - if (adev->mode_info.num_crtc) - amdgpu_display_bandwidth_update(adev); - - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { - struct amdgpu_ring *ring = adev->rings[i]; - if (ring && ring->sched.ready) - amdgpu_fence_wait_empty(ring); - } amdgpu_dpm_get_active_displays(adev); diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index caae54487f9c..49c398ec0aaf 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -3892,40 +3892,6 @@ static int si_set_boot_state(struct amdgpu_device *adev) } #endif -static int si_set_powergating_by_smu(void *handle, - uint32_t block_type, - bool gate) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - switch (block_type) { - case AMD_IP_BLOCK_TYPE_UVD: - if (!gate) { - adev->pm.dpm.uvd_active = true; - adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD; - } else { - adev->pm.dpm.uvd_active = false; - } - - amdgpu_legacy_dpm_compute_clocks(handle); - break; - case AMD_IP_BLOCK_TYPE_VCE: - if (!gate) { - adev->pm.dpm.vce_active = true; - /* XXX select vce level based on ring/task */ - adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL; - } else { - adev->pm.dpm.vce_active = false; - } - - amdgpu_legacy_dpm_compute_clocks(handle); - break; - default: - break; - } - return 0; -} - static int si_set_sw_state(struct amdgpu_device *adev) { return (amdgpu_si_send_msg_to_smc(adev, PPSMC_MSG_SwitchToSwState) == PPSMC_Result_OK) ? @@ -7331,17 +7297,15 @@ static int si_parse_power_table(struct amdgpu_device *adev) if (!adev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; - for (i = 0; i < state_array->ucNumEntries; i++) { + for (adev->pm.dpm.num_ps = 0, i = 0; i < state_array->ucNumEntries; i++) { u8 *idx; power_state = (union pplib_power_state *)power_state_offset; non_clock_array_index = power_state->v2.nonClockInfoIndex; non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *) &non_clock_info_array->nonClockInfo[non_clock_array_index]; ps = kzalloc(sizeof(struct si_ps), GFP_KERNEL); - if (ps == NULL) { - kfree(adev->pm.dpm.ps); + if (ps == NULL) return -ENOMEM; - } adev->pm.dpm.ps[i].ps_priv = ps; si_parse_pplib_non_clock_info(adev, &adev->pm.dpm.ps[i], non_clock_info, @@ -7363,8 +7327,8 @@ static int si_parse_power_table(struct amdgpu_device *adev) k++; } power_state_offset += 2 + power_state->v2.ucNumDPMLevels; + adev->pm.dpm.num_ps++; } - adev->pm.dpm.num_ps = state_array->ucNumEntries; /* fill in the vce power states */ for (i = 0; i < adev->pm.dpm.num_of_vce_states; i++) { @@ -8125,7 +8089,6 @@ static const struct amd_pm_funcs si_dpm_funcs = { .print_power_state = &si_dpm_print_power_state, .debugfs_print_current_performance_level = &si_dpm_debugfs_print_current_performance_level, .force_performance_level = &si_dpm_force_performance_level, - .set_powergating_by_smu = &si_set_powergating_by_smu, .vblank_too_short = &si_dpm_vblank_too_short, .set_fan_control_mode = &si_dpm_set_fan_control_mode, .get_fan_control_mode = &si_dpm_get_fan_control_mode, diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index a2da46bf3985..1eb4e613b27a 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -671,6 +671,22 @@ static int pp_dpm_force_clock_level(void *handle, return hwmgr->hwmgr_func->force_clock_level(hwmgr, type, mask); } +static int pp_dpm_emit_clock_levels(void *handle, + enum pp_clock_type type, + char *buf, + int *offset) +{ + struct pp_hwmgr *hwmgr = handle; + + if (!hwmgr || !hwmgr->pm_en) + return -EOPNOTSUPP; + + if (!hwmgr->hwmgr_func->emit_clock_levels) + return -ENOENT; + + return hwmgr->hwmgr_func->emit_clock_levels(hwmgr, type, buf, offset); +} + static int pp_dpm_print_clock_levels(void *handle, enum pp_clock_type type, char *buf) { @@ -1487,16 +1503,6 @@ static void pp_pm_compute_clocks(void *handle) { struct pp_hwmgr *hwmgr = handle; struct amdgpu_device *adev = hwmgr->adev; - int i = 0; - - if (adev->mode_info.num_crtc) - amdgpu_display_bandwidth_update(adev); - - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { - struct amdgpu_ring *ring = adev->rings[i]; - if (ring && ring->sched.ready) - amdgpu_fence_wait_empty(ring); - } if (!amdgpu_device_has_dc_support(adev)) { amdgpu_dpm_get_active_displays(adev); @@ -1535,6 +1541,7 @@ static const struct amd_pm_funcs pp_dpm_funcs = { .get_pp_table = pp_dpm_get_pp_table, .set_pp_table = pp_dpm_set_pp_table, .force_clock_level = pp_dpm_force_clock_level, + .emit_clock_levels = pp_dpm_emit_clock_levels, .print_clock_levels = pp_dpm_print_clock_levels, .get_sclk_od = pp_dpm_get_sclk_od, .set_sclk_od = pp_dpm_set_sclk_od, diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c index 37324f2009ca..99bfe5efe171 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c @@ -4625,6 +4625,152 @@ static int vega10_get_current_pcie_link_speed_level(struct pp_hwmgr *hwmgr) >> PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT; } +static int vega10_emit_clock_levels(struct pp_hwmgr *hwmgr, + enum pp_clock_type type, char *buf, int *offset) +{ + struct vega10_hwmgr *data = hwmgr->backend; + struct vega10_single_dpm_table *sclk_table = &(data->dpm_table.gfx_table); + struct vega10_single_dpm_table *mclk_table = &(data->dpm_table.mem_table); + struct vega10_single_dpm_table *soc_table = &(data->dpm_table.soc_table); + struct vega10_single_dpm_table *dcef_table = &(data->dpm_table.dcef_table); + struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep = NULL; + uint32_t gen_speed, lane_width, current_gen_speed, current_lane_width; + PPTable_t *pptable = &(data->smc_state_table.pp_table); + + uint32_t i, now, count = 0; + int ret = 0; + + switch (type) { + case PP_SCLK: + if (data->registry_data.sclk_dpm_key_disabled) + return -EOPNOTSUPP; + + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentGfxclkIndex, &now); + if (unlikely(ret != 0)) + return ret; + + if (hwmgr->pp_one_vf && + (hwmgr->dpm_level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)) + count = 5; + else + count = sclk_table->count; + for (i = 0; i < count; i++) + *offset += sysfs_emit_at(buf, *offset, "%d: %uMhz %s\n", + i, sclk_table->dpm_levels[i].value / 100, + (i == now) ? "*" : ""); + break; + case PP_MCLK: + if (data->registry_data.mclk_dpm_key_disabled) + return -EOPNOTSUPP; + + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentUclkIndex, &now); + if (unlikely(ret != 0)) + return ret; + + for (i = 0; i < mclk_table->count; i++) + *offset += sysfs_emit_at(buf, *offset, "%d: %uMhz %s\n", + i, mclk_table->dpm_levels[i].value / 100, + (i == now) ? "*" : ""); + break; + case PP_SOCCLK: + if (data->registry_data.socclk_dpm_key_disabled) + return -EOPNOTSUPP; + + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentSocclkIndex, &now); + if (unlikely(ret != 0)) + return ret; + + for (i = 0; i < soc_table->count; i++) + *offset += sysfs_emit_at(buf, *offset, "%d: %uMhz %s\n", + i, soc_table->dpm_levels[i].value / 100, + (i == now) ? "*" : ""); + break; + case PP_DCEFCLK: + if (data->registry_data.dcefclk_dpm_key_disabled) + return -EOPNOTSUPP; + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_GetClockFreqMHz, + CLK_DCEFCLK, &now); + if (unlikely(ret != 0)) + return ret; + + for (i = 0; i < dcef_table->count; i++) + *offset += sysfs_emit_at(buf, *offset, "%d: %uMhz %s\n", + i, dcef_table->dpm_levels[i].value / 100, + (dcef_table->dpm_levels[i].value / 100 == now) ? + "*" : ""); + break; + case PP_PCIE: + current_gen_speed = + vega10_get_current_pcie_link_speed_level(hwmgr); + current_lane_width = + vega10_get_current_pcie_link_width_level(hwmgr); + for (i = 0; i < NUM_LINK_LEVELS; i++) { + gen_speed = pptable->PcieGenSpeed[i]; + lane_width = pptable->PcieLaneCount[i]; + + *offset += sysfs_emit_at(buf, *offset, "%d: %s %s %s\n", i, + (gen_speed == 0) ? "2.5GT/s," : + (gen_speed == 1) ? "5.0GT/s," : + (gen_speed == 2) ? "8.0GT/s," : + (gen_speed == 3) ? "16.0GT/s," : "", + (lane_width == 1) ? "x1" : + (lane_width == 2) ? "x2" : + (lane_width == 3) ? "x4" : + (lane_width == 4) ? "x8" : + (lane_width == 5) ? "x12" : + (lane_width == 6) ? "x16" : "", + (current_gen_speed == gen_speed) && + (current_lane_width == lane_width) ? + "*" : ""); + } + break; + + case OD_SCLK: + if (!hwmgr->od_enabled) + return -EOPNOTSUPP; + + *offset += sysfs_emit_at(buf, *offset, "%s:\n", "OD_SCLK"); + podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_sclk; + for (i = 0; i < podn_vdd_dep->count; i++) + *offset += sysfs_emit_at(buf, *offset, "%d: %10uMhz %10umV\n", + i, podn_vdd_dep->entries[i].clk / 100, + podn_vdd_dep->entries[i].vddc); + break; + case OD_MCLK: + if (!hwmgr->od_enabled) + return -EOPNOTSUPP; + + *offset += sysfs_emit_at(buf, *offset, "%s:\n", "OD_MCLK"); + podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_mclk; + for (i = 0; i < podn_vdd_dep->count; i++) + *offset += sysfs_emit_at(buf, *offset, "%d: %10uMhz %10umV\n", + i, podn_vdd_dep->entries[i].clk/100, + podn_vdd_dep->entries[i].vddc); + break; + case OD_RANGE: + if (!hwmgr->od_enabled) + return -EOPNOTSUPP; + + *offset += sysfs_emit_at(buf, *offset, "%s:\n", "OD_RANGE"); + *offset += sysfs_emit_at(buf, *offset, "SCLK: %7uMHz %10uMHz\n", + data->golden_dpm_table.gfx_table.dpm_levels[0].value/100, + hwmgr->platform_descriptor.overdriveLimit.engineClock/100); + *offset += sysfs_emit_at(buf, *offset, "MCLK: %7uMHz %10uMHz\n", + data->golden_dpm_table.mem_table.dpm_levels[0].value/100, + hwmgr->platform_descriptor.overdriveLimit.memoryClock/100); + *offset += sysfs_emit_at(buf, *offset, "VDDC: %7umV %11umV\n", + data->odn_dpm_table.min_vddc, + data->odn_dpm_table.max_vddc); + break; + default: + ret = -ENOENT; + break; + } + return ret; +} + static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, enum pp_clock_type type, char *buf) { @@ -5559,6 +5705,7 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = { .set_watermarks_for_clocks_ranges = vega10_set_watermarks_for_clocks_ranges, .display_clock_voltage_request = vega10_display_clock_voltage_request, .force_clock_level = vega10_force_clock_level, + .emit_clock_levels = vega10_emit_clock_levels, .print_clock_levels = vega10_print_clock_levels, .display_config_changed = vega10_display_configuration_changed_task, .powergate_uvd = vega10_power_gate_uvd, diff --git a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h index 4f7f2f455301..27f8d0e0e6a8 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h @@ -313,6 +313,8 @@ struct pp_hwmgr_func { int (*get_max_high_clocks)(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks); int (*power_off_asic)(struct pp_hwmgr *hwmgr); int (*force_clock_level)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, uint32_t mask); + int (*emit_clock_levels)(struct pp_hwmgr *hwmgr, + enum pp_clock_type type, char *buf, int *offset); int (*print_clock_levels)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, char *buf); int (*powergate_gfx)(struct pp_hwmgr *hwmgr, bool enable); int (*get_sclk_od)(struct pp_hwmgr *hwmgr); diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index ef57b6089c69..46e34ed8a3c8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -1333,6 +1333,8 @@ typedef enum { METRICS_VOLTAGE_VDDGFX, METRICS_SS_APU_SHARE, METRICS_SS_DGPU_SHARE, + METRICS_UNIQUE_ID_UPPER32, + METRICS_UNIQUE_ID_LOWER32, } MetricsMember_t; enum smu_cmn2asic_mapping_type { diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_sienna_cichlid.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_sienna_cichlid.h index 3e4a314ef925..08f0bb2af5d2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_sienna_cichlid.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_sienna_cichlid.h @@ -1419,8 +1419,8 @@ typedef struct { uint8_t PcieRate ; uint8_t PcieWidth ; uint16_t AverageGfxclkFrequencyTarget; - uint16_t Padding16_2; + uint16_t Padding16_2; } SmuMetrics_t; typedef struct { @@ -1476,8 +1476,8 @@ typedef struct { uint8_t PcieRate ; uint8_t PcieWidth ; uint16_t AverageGfxclkFrequencyTarget; - uint16_t Padding16_2; + uint16_t Padding16_2; } SmuMetrics_V2_t; typedef struct { @@ -1535,6 +1535,9 @@ typedef struct { uint8_t PcieWidth; uint16_t AverageGfxclkFrequencyTarget; + uint32_t PublicSerialNumLower32; + uint32_t PublicSerialNumUpper32; + } SmuMetrics_V3_t; typedef struct { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 38f04836c82f..ab3e9d8b831e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -715,6 +715,14 @@ static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu, *value = use_metrics_v3 ? metrics_v3->CurrFanSpeed : use_metrics_v2 ? metrics_v2->CurrFanSpeed : metrics->CurrFanSpeed; break; + case METRICS_UNIQUE_ID_UPPER32: + /* Only supported in 0x3A5300+, metrics_v3 requires 0x3A4900+ */ + *value = use_metrics_v3 ? metrics_v3->PublicSerialNumUpper32 : 0; + break; + case METRICS_UNIQUE_ID_LOWER32: + /* Only supported in 0x3A5300+, metrics_v3 requires 0x3A4900+ */ + *value = use_metrics_v3 ? metrics_v3->PublicSerialNumLower32 : 0; + break; default: *value = UINT_MAX; break; @@ -1773,6 +1781,28 @@ static int sienna_cichlid_read_sensor(struct smu_context *smu, return ret; } +static void sienna_cichlid_get_unique_id(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t upper32 = 0, lower32 = 0; + + /* Only supported as of version 0.58.83.0 and only on Sienna Cichlid */ + if (smu->smc_fw_version < 0x3A5300 || + smu->adev->ip_versions[MP1_HWIP][0] != IP_VERSION(11, 0, 7)) + return; + + if (sienna_cichlid_get_smu_metrics_data(smu, METRICS_UNIQUE_ID_UPPER32, &upper32)) + goto out; + if (sienna_cichlid_get_smu_metrics_data(smu, METRICS_UNIQUE_ID_LOWER32, &lower32)) + goto out; + +out: + + adev->unique_id = ((uint64_t)upper32 << 32) | lower32; + if (adev->serial[0] == '\0') + sprintf(adev->serial, "%016llx", adev->unique_id); +} + static int sienna_cichlid_get_uclk_dpm_states(struct smu_context *smu, uint32_t *clocks_in_khz, uint32_t *num_states) { uint32_t num_discrete_levels = 0; @@ -4182,6 +4212,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .get_ecc_info = sienna_cichlid_get_ecc_info, .get_default_config_table_settings = sienna_cichlid_get_default_config_table_settings, .set_config_table = sienna_cichlid_set_config_table, + .get_unique_id = sienna_cichlid_get_unique_id, }; void sienna_cichlid_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index cd81f848d45a..38af648cb857 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -650,6 +650,12 @@ static int aldebaran_get_smu_metrics_data(struct smu_context *smu, case METRICS_THROTTLER_STATUS: *value = metrics->ThrottlerStatus; break; + case METRICS_UNIQUE_ID_UPPER32: + *value = metrics->PublicSerialNumUpper32; + break; + case METRICS_UNIQUE_ID_LOWER32: + *value = metrics->PublicSerialNumLower32; + break; default: *value = UINT_MAX; break; @@ -1614,16 +1620,12 @@ static void aldebaran_i2c_control_fini(struct smu_context *smu) static void aldebaran_get_unique_id(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - SmuMetrics_t *metrics = smu->smu_table.metrics_table; uint32_t upper32 = 0, lower32 = 0; - int ret; - ret = smu_cmn_get_metrics_table(smu, NULL, false); - if (ret) + if (aldebaran_get_smu_metrics_data(smu, METRICS_UNIQUE_ID_UPPER32, &upper32)) + goto out; + if (aldebaran_get_smu_metrics_data(smu, METRICS_UNIQUE_ID_LOWER32, &lower32)) goto out; - - upper32 = metrics->PublicSerialNumUpper32; - lower32 = metrics->PublicSerialNumLower32; out: adev->unique_id = ((uint64_t)upper32 << 32) | lower32; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index b8d0c70ff668..f12319883a80 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -356,9 +356,11 @@ int smu_cmn_wait_for_response(struct smu_context *smu) * completion of the command, and return back a value from the SMU in * @read_arg pointer. * - * Return 0 on success, -errno on error, if we weren't able to send - * the message or if the message completed with some kind of - * error. See __smu_cmn_reg2errno() for details of the -errno. + * Return 0 on success, -errno when a problem is encountered sending + * message or receiving reply. If there is a PCI bus recovery or + * the destination is a virtual GPU which does not allow this message + * type, the message is simply dropped and success is also returned. + * See __smu_cmn_reg2errno() for details of the -errno. * * If we weren't able to send the message to the SMU, we also print * the error to the standard log. diff --git a/drivers/gpu/drm/drm_of.c b/drivers/gpu/drm/drm_of.c index f4df344509a8..9a2cfab3a177 100644 --- a/drivers/gpu/drm/drm_of.c +++ b/drivers/gpu/drm/drm_of.c @@ -214,29 +214,6 @@ int drm_of_encoder_active_endpoint(struct device_node *node, } EXPORT_SYMBOL_GPL(drm_of_encoder_active_endpoint); -static int find_panel_or_bridge(struct device_node *node, - struct drm_panel **panel, - struct drm_bridge **bridge) -{ - if (panel) { - *panel = of_drm_find_panel(node); - if (!IS_ERR(*panel)) - return 0; - - /* Clear the panel pointer in case of error. */ - *panel = NULL; - } - - /* No panel found yet, check for a bridge next. */ - if (bridge) { - *bridge = of_drm_find_bridge(node); - if (*bridge) - return 0; - } - - return -EPROBE_DEFER; -} - /** * drm_of_find_panel_or_bridge - return connected panel or bridge device * @np: device tree node containing encoder output ports @@ -259,44 +236,49 @@ int drm_of_find_panel_or_bridge(const struct device_node *np, struct drm_panel **panel, struct drm_bridge **bridge) { - struct device_node *node; - int ret; + int ret = -EPROBE_DEFER; + struct device_node *remote; if (!panel && !bridge) return -EINVAL; - if (panel) *panel = NULL; - if (bridge) - *bridge = NULL; - - /* Check for a graph on the device node first. */ - if (of_graph_is_present(np)) { - node = of_graph_get_remote_node(np, port, endpoint); - if (node) { - ret = find_panel_or_bridge(node, panel, bridge); - of_node_put(node); - - if (!ret) - return 0; - } - } - /* Otherwise check for any child node other than port/ports. */ - for_each_available_child_of_node(np, node) { - if (of_node_name_eq(node, "port") || - of_node_name_eq(node, "ports")) - continue; + /* + * of_graph_get_remote_node() produces a noisy error message if port + * node isn't found and the absence of the port is a legit case here, + * so at first we silently check whether graph presents in the + * device-tree node. + */ + if (!of_graph_is_present(np)) + return -ENODEV; - ret = find_panel_or_bridge(node, panel, bridge); - of_node_put(node); + remote = of_graph_get_remote_node(np, port, endpoint); + if (!remote) + return -ENODEV; + + if (panel) { + *panel = of_drm_find_panel(remote); + if (!IS_ERR(*panel)) + ret = 0; + else + *panel = NULL; + } + + /* No panel found yet, check for a bridge next. */ + if (bridge) { + if (ret) { + *bridge = of_drm_find_bridge(remote); + if (*bridge) + ret = 0; + } else { + *bridge = NULL; + } - /* Stop at the first found occurrence. */ - if (!ret) - return 0; } - return -EPROBE_DEFER; + of_node_put(remote); + return ret; } EXPORT_SYMBOL_GPL(drm_of_find_panel_or_bridge); diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 06911f783f56..0f2837f07741 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -4,7 +4,7 @@ config DRM_I915 depends on DRM depends on X86 && PCI depends on !PREEMPT_RT - select INTEL_GTT + select INTEL_GTT if X86 select INTERVAL_TREE # we need shmfs for the swappable backing store, and in particular # the shmem_readpage() which depends upon tmpfs @@ -33,6 +33,7 @@ config DRM_I915 select VMAP_PFN select DRM_TTM select DRM_BUDDY + select AUXILIARY_BUS help Choose this option if you have a system that has "Intel Graphics Media Accelerator" or "HD Graphics" integrated graphics, diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 815589f8658c..cd0bf6806228 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -33,6 +33,7 @@ subdir-ccflags-y += -I$(srctree)/$(src) # core driver code i915-y += i915_driver.o \ + i915_drm_client.o \ i915_config.o \ i915_getparam.o \ i915_ioctl.o \ @@ -106,6 +107,8 @@ gt-y += \ gt/intel_gt_pm_debugfs.o \ gt/intel_gt_pm_irq.o \ gt/intel_gt_requests.o \ + gt/intel_gt_sysfs.o \ + gt/intel_gt_sysfs_pm.o \ gt/intel_gtt.o \ gt/intel_llc.o \ gt/intel_lrc.o \ @@ -125,6 +128,8 @@ gt-y += \ gt/intel_workarounds.o \ gt/shmem_utils.o \ gt/sysfs_engines.o +# x86 intel-gtt module support +gt-$(CONFIG_X86) += gt/intel_gt_gmch.o # autogenerated null render state gt-y += \ gt/gen6_renderstate.o \ @@ -185,9 +190,11 @@ i915-y += gt/uc/intel_uc.o \ gt/uc/intel_uc_fw.o \ gt/uc/intel_guc.o \ gt/uc/intel_guc_ads.o \ + gt/uc/intel_guc_capture.o \ gt/uc/intel_guc_ct.o \ gt/uc/intel_guc_debugfs.o \ gt/uc/intel_guc_fw.o \ + gt/uc/intel_guc_hwconfig.o \ gt/uc/intel_guc_log.o \ gt/uc/intel_guc_log_debugfs.o \ gt/uc/intel_guc_rc.o \ @@ -197,6 +204,9 @@ i915-y += gt/uc/intel_uc.o \ gt/uc/intel_huc_debugfs.o \ gt/uc/intel_huc_fw.o +# graphics system controller (GSC) support +i915-y += gt/intel_gsc.o + # modesetting core code i915-y += \ display/hsw_ips.o \ diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index 97cf3cac0105..fb6cf30ee628 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -97,6 +97,14 @@ #define INTEL_EDP_BRIGHTNESS_OPTIMIZATION_1 0x359 +enum intel_dp_aux_backlight_modparam { + INTEL_DP_AUX_BACKLIGHT_AUTO = -1, + INTEL_DP_AUX_BACKLIGHT_OFF = 0, + INTEL_DP_AUX_BACKLIGHT_ON = 1, + INTEL_DP_AUX_BACKLIGHT_FORCE_VESA = 2, + INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL = 3, +}; + /* Intel EDP backlight callbacks */ static bool intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector) @@ -126,6 +134,24 @@ intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector) return false; } + /* + * If we don't have HDR static metadata there is no way to + * runtime detect used range for nits based control. For now + * do not use Intel proprietary eDP backlight control if we + * don't have this data in panel EDID. In case we find panel + * which supports only nits based control, but doesn't provide + * HDR static metadata we need to start maintaining table of + * ranges for such panels. + */ + if (i915->params.enable_dpcd_backlight != INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL && + !(connector->base.hdr_sink_metadata.hdmi_type1.metadata_type & + BIT(HDMI_STATIC_METADATA_TYPE1))) { + drm_info(&i915->drm, + "Panel is missing HDR static metadata. Possible support for Intel HDR backlight interface is not used. If your backlight controls don't work try booting with i915.enable_dpcd_backlight=%d. needs this, please file a _new_ bug report on drm/i915, see " FDO_BUG_URL " for details.\n", + INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL); + return false; + } + panel->backlight.edp.intel.sdr_uses_aux = tcon_cap[2] & INTEL_EDP_SDR_TCON_BRIGHTNESS_AUX_CAP; @@ -413,14 +439,6 @@ static const struct intel_panel_bl_funcs intel_dp_vesa_bl_funcs = { .get = intel_dp_aux_vesa_get_backlight, }; -enum intel_dp_aux_backlight_modparam { - INTEL_DP_AUX_BACKLIGHT_AUTO = -1, - INTEL_DP_AUX_BACKLIGHT_OFF = 0, - INTEL_DP_AUX_BACKLIGHT_ON = 1, - INTEL_DP_AUX_BACKLIGHT_FORCE_VESA = 2, - INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL = 3, -}; - int intel_dp_aux_init_backlight_funcs(struct intel_connector *connector) { struct drm_device *dev = connector->base.dev; diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index 15b2716172f7..fb0e7e79e0cd 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -300,5 +300,5 @@ void intel_dpt_destroy(struct i915_address_space *vm) { struct i915_dpt *dpt = i915_vm_to_dpt(vm); - i915_vm_close(&dpt->vm); + i915_vm_put(&dpt->vm); } diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index e94923e9dbb1..9f5a6b79e95b 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -2029,7 +2029,7 @@ intel_user_framebuffer_create(struct drm_device *dev, /* object is backed with LMEM for discrete */ i915 = to_i915(obj->base.dev); - if (HAS_LMEM(i915) && !i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM)) { + if (HAS_LMEM(i915) && !i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM_0)) { /* object is "remote", not in local memory */ i915_gem_object_put(obj); return ERR_PTR(-EREMOTE); diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c index a307b4993bcf..bd6e7c98e751 100644 --- a/drivers/gpu/drm/i915/display/intel_fb_pin.c +++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c @@ -140,7 +140,7 @@ retry: if (!ret && phys_cursor) ret = i915_gem_object_attach_phys(obj, alignment); else if (!ret && HAS_LMEM(dev_priv)) - ret = i915_gem_object_migrate(obj, &ww, INTEL_REGION_LMEM); + ret = i915_gem_object_migrate(obj, &ww, INTEL_REGION_LMEM_0); /* TODO: Do we need to sync when migration becomes async? */ if (!ret) ret = i915_gem_object_pin_pages(obj); diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 670835318a1f..ff303c7d3a57 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -1045,7 +1045,7 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, struct intel_plane_state *plane_state = intel_atomic_get_new_plane_state(state, plane); const struct drm_framebuffer *fb = plane_state->hw.fb; - struct intel_crtc *crtc = to_intel_crtc(plane_state->uapi.crtc); + struct intel_crtc *crtc = to_intel_crtc(plane_state->hw.crtc); const struct intel_crtc_state *crtc_state; struct intel_fbc *fbc = plane->fbc; diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 2cd62a187df3..221336178991 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -279,7 +279,7 @@ static int intelfb_create(struct drm_fb_helper *helper, /* Our framebuffer is the entirety of fbdev's system memory */ info->fix.smem_start = (unsigned long)(ggtt->gmadr.start + vma->node.start); - info->fix.smem_len = vma->node.size; + info->fix.smem_len = vma->size; } vaddr = i915_vma_pin_iomap(vma); @@ -290,7 +290,7 @@ static int intelfb_create(struct drm_fb_helper *helper, goto out_unpin; } info->screen_base = vaddr; - info->screen_size = vma->node.size; + info->screen_size = vma->size; drm_fb_helper_fill_info(info, &ifbdev->helper, sizes); diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c index e207d12286b5..d10f27d0b7b0 100644 --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c @@ -3,6 +3,7 @@ * Copyright © 2021 Intel Corporation */ +#include "gem/i915_gem_region.h" #include "i915_drv.h" #include "intel_atomic_plane.h" #include "intel_display.h" @@ -46,16 +47,55 @@ static struct i915_vma * initial_plane_vma(struct drm_i915_private *i915, struct intel_initial_plane_config *plane_config) { - struct intel_memory_region *mem = i915->mm.stolen_region; + struct intel_memory_region *mem; struct drm_i915_gem_object *obj; struct i915_vma *vma; + resource_size_t phys_base; u32 base, size; + u64 pinctl; - if (!mem || plane_config->size == 0) + if (plane_config->size == 0) + return NULL; + + base = round_down(plane_config->base, I915_GTT_MIN_ALIGNMENT); + if (IS_DGFX(i915)) { + gen8_pte_t __iomem *gte = to_gt(i915)->ggtt->gsm; + gen8_pte_t pte; + + gte += base / I915_GTT_PAGE_SIZE; + + pte = ioread64(gte); + if (!(pte & GEN12_GGTT_PTE_LM)) { + drm_err(&i915->drm, + "Initial plane programming missing PTE_LM bit\n"); + return NULL; + } + + phys_base = pte & I915_GTT_PAGE_MASK; + mem = i915->mm.regions[INTEL_REGION_LMEM_0]; + + /* + * We don't currently expect this to ever be placed in the + * stolen portion. + */ + if (phys_base >= resource_size(&mem->region)) { + drm_err(&i915->drm, + "Initial plane programming using invalid range, phys_base=%pa\n", + &phys_base); + return NULL; + } + + drm_dbg(&i915->drm, + "Using phys_base=%pa, based on initial plane programming\n", + &phys_base); + } else { + phys_base = base; + mem = i915->mm.stolen_region; + } + + if (!mem) return NULL; - base = round_down(plane_config->base, - I915_GTT_MIN_ALIGNMENT); size = round_up(plane_config->base + plane_config->size, mem->min_page_size); size -= base; @@ -66,10 +106,11 @@ initial_plane_vma(struct drm_i915_private *i915, * features. */ if (IS_ENABLED(CONFIG_FRAMEBUFFER_CONSOLE) && + mem == i915->mm.stolen_region && size * 2 > i915->stolen_usable_size) return NULL; - obj = i915_gem_object_create_stolen_for_preallocated(i915, base, size); + obj = i915_gem_object_create_region_at(mem, phys_base, size, 0); if (IS_ERR(obj)) return NULL; @@ -99,7 +140,10 @@ initial_plane_vma(struct drm_i915_private *i915, if (IS_ERR(vma)) goto err_obj; - if (i915_ggtt_pin(vma, NULL, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base)) + pinctl = PIN_GLOBAL | PIN_OFFSET_FIXED | base; + if (HAS_GMCH(i915)) + pinctl |= PIN_MAPPABLE; + if (i915_vma_pin(vma, 0, 0, pinctl)) goto err_obj; if (i915_gem_object_is_tiled(obj) && diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 5a55010a9b2f..8ec7c161284b 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -891,6 +891,20 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, return false; } + /* Wa_16011303918:adl-p */ + if (crtc_state->vrr.enable && + IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) { + drm_dbg_kms(&dev_priv->drm, + "PSR2 not enabled, not compatible with HW stepping + VRR\n"); + return false; + } + + if (!_compute_psr2_sdp_prior_scanline_indication(intel_dp, crtc_state)) { + drm_dbg_kms(&dev_priv->drm, + "PSR2 not enabled, PSR2 SDP indication do not fit in hblank\n"); + return false; + } + if (HAS_PSR2_SEL_FETCH(dev_priv)) { if (!intel_psr2_sel_fetch_config_valid(intel_dp, crtc_state) && !HAS_PSR_HW_TRACKING(dev_priv)) { @@ -904,12 +918,12 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, if (!crtc_state->enable_psr2_sel_fetch && IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_C0)) { drm_dbg_kms(&dev_priv->drm, "PSR2 HW tracking is not supported this Display stepping\n"); - return false; + goto unsupported; } if (!psr2_granularity_check(intel_dp, crtc_state)) { drm_dbg_kms(&dev_priv->drm, "PSR2 not enabled, SU granularity not compatible\n"); - return false; + goto unsupported; } if (!crtc_state->enable_psr2_sel_fetch && @@ -918,25 +932,15 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, "PSR2 not enabled, resolution %dx%d > max supported %dx%d\n", crtc_hdisplay, crtc_vdisplay, psr_max_h, psr_max_v); - return false; - } - - if (!_compute_psr2_sdp_prior_scanline_indication(intel_dp, crtc_state)) { - drm_dbg_kms(&dev_priv->drm, - "PSR2 not enabled, PSR2 SDP indication do not fit in hblank\n"); - return false; - } - - /* Wa_16011303918:adl-p */ - if (crtc_state->vrr.enable && - IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) { - drm_dbg_kms(&dev_priv->drm, - "PSR2 not enabled, not compatible with HW stepping + VRR\n"); - return false; + goto unsupported; } tgl_dc3co_exitline_compute_config(intel_dp, crtc_state); return true; + +unsupported: + crtc_state->enable_psr2_sel_fetch = false; + return false; } void intel_psr_compute_config(struct intel_dp *intel_dp, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 5db83aaf93ee..ab4c5ab28e4d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1031,23 +1031,44 @@ static void free_engines_rcu(struct rcu_head *rcu) free_engines(engines); } +static void accumulate_runtime(struct i915_drm_client *client, + struct i915_gem_engines *engines) +{ + struct i915_gem_engines_iter it; + struct intel_context *ce; + + if (!client) + return; + + /* Transfer accumulated runtime to the parent GEM context. */ + for_each_gem_engine(ce, engines, it) { + unsigned int class = ce->engine->uabi_class; + + GEM_BUG_ON(class >= ARRAY_SIZE(client->past_runtime)); + atomic64_add(intel_context_get_total_runtime_ns(ce), + &client->past_runtime[class]); + } +} + static int engines_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { struct i915_gem_engines *engines = container_of(fence, typeof(*engines), fence); + struct i915_gem_context *ctx = engines->ctx; switch (state) { case FENCE_COMPLETE: if (!list_empty(&engines->link)) { - struct i915_gem_context *ctx = engines->ctx; unsigned long flags; spin_lock_irqsave(&ctx->stale.lock, flags); list_del(&engines->link); spin_unlock_irqrestore(&ctx->stale.lock, flags); } - i915_gem_context_put(engines->ctx); + accumulate_runtime(ctx->client, engines); + i915_gem_context_put(ctx); + break; case FENCE_FREE: @@ -1257,6 +1278,9 @@ static void i915_gem_context_release_work(struct work_struct *work) if (ctx->pxp_wakeref) intel_runtime_pm_put(&ctx->i915->runtime_pm, ctx->pxp_wakeref); + if (ctx->client) + i915_drm_client_put(ctx->client); + mutex_destroy(&ctx->engines_mutex); mutex_destroy(&ctx->lut_mutex); @@ -1467,7 +1491,7 @@ static void set_closed_name(struct i915_gem_context *ctx) static void context_close(struct i915_gem_context *ctx) { - struct i915_address_space *vm; + struct i915_drm_client *client; /* Flush any concurrent set_engines() */ mutex_lock(&ctx->engines_mutex); @@ -1480,19 +1504,6 @@ static void context_close(struct i915_gem_context *ctx) set_closed_name(ctx); - vm = ctx->vm; - if (vm) { - /* i915_vm_close drops the final reference, which is a bit too - * early and could result in surprises with concurrent - * operations racing with thist ctx close. Keep a full reference - * until the end. - */ - i915_vm_get(vm); - i915_vm_close(vm); - } - - ctx->file_priv = ERR_PTR(-EBADF); - /* * The LUT uses the VMA as a backpointer to unref the object, * so we need to clear the LUT before we close all the VMA (inside @@ -1500,10 +1511,19 @@ static void context_close(struct i915_gem_context *ctx) */ lut_close(ctx); + ctx->file_priv = ERR_PTR(-EBADF); + spin_lock(&ctx->i915->gem.contexts.lock); list_del(&ctx->link); spin_unlock(&ctx->i915->gem.contexts.lock); + client = ctx->client; + if (client) { + spin_lock(&client->ctx_lock); + list_del_rcu(&ctx->client_link); + spin_unlock(&client->ctx_lock); + } + mutex_unlock(&ctx->mutex); /* @@ -1598,12 +1618,8 @@ i915_gem_create_context(struct drm_i915_private *i915, } vm = &ppgtt->vm; } - if (vm) { - ctx->vm = i915_vm_open(vm); - - /* i915_vm_open() takes a reference */ - i915_vm_put(vm); - } + if (vm) + ctx->vm = vm; mutex_init(&ctx->engines_mutex); if (pc->num_user_engines >= 0) { @@ -1653,7 +1669,7 @@ err_engines: free_engines(e); err_vm: if (ctx->vm) - i915_vm_close(ctx->vm); + i915_vm_put(ctx->vm); err_ctx: kfree(ctx); return ERR_PTR(err); @@ -1680,6 +1696,8 @@ static void gem_context_register(struct i915_gem_context *ctx, ctx->file_priv = fpriv; ctx->pid = get_task_pid(current, PIDTYPE_PID); + ctx->client = i915_drm_client_get(fpriv->client); + snprintf(ctx->name, sizeof(ctx->name), "%s[%d]", current->comm, pid_nr(ctx->pid)); @@ -1687,6 +1705,10 @@ static void gem_context_register(struct i915_gem_context *ctx, old = xa_store(&fpriv->context_xa, id, ctx, GFP_KERNEL); WARN_ON(old); + spin_lock(&ctx->client->ctx_lock); + list_add_tail_rcu(&ctx->client_link, &ctx->client->ctx_list); + spin_unlock(&ctx->client->ctx_lock); + spin_lock(&i915->gem.contexts.lock); list_add_tail(&ctx->link, &i915->gem.contexts.list); spin_unlock(&i915->gem.contexts.lock); @@ -1837,7 +1859,7 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, if (err) return err; - i915_vm_open(vm); + i915_vm_get(vm); GEM_BUG_ON(id == 0); /* reserved for invalid/unassigned ppgtt */ args->value = id; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 282cdb8a5c5a..cb78214a7dcd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -293,6 +293,12 @@ struct i915_gem_context { /** @link: place with &drm_i915_private.context_list */ struct list_head link; + /** @client: struct i915_drm_client */ + struct i915_drm_client *client; + + /** @client_link: for linking onto &i915_drm_client.ctx_list */ + struct list_head client_link; + /** * @ref: reference count * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index c6eb023d3d86..5802692ea604 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -123,7 +123,7 @@ __i915_gem_object_create_user_ext(struct drm_i915_private *i915, u64 size, */ flags = I915_BO_ALLOC_USER; - ret = mr->ops->init_object(mr, obj, size, 0, flags); + ret = mr->ops->init_object(mr, obj, I915_BO_INVALID_OFFSET, size, 0, flags); if (ret) goto object_free; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 13917231ae81..f5062d0c6333 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -66,15 +66,6 @@ err: return ERR_PTR(ret); } -static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, - struct sg_table *sg, - enum dma_data_direction dir) -{ - dma_unmap_sgtable(attachment->dev, sg, dir, DMA_ATTR_SKIP_CPU_SYNC); - sg_free_table(sg); - kfree(sg); -} - static int i915_gem_dmabuf_vmap(struct dma_buf *dma_buf, struct iosys_map *map) { @@ -102,11 +93,15 @@ static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) { struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); + struct drm_i915_private *i915 = to_i915(obj->base.dev); int ret; if (obj->base.size < vma->vm_end - vma->vm_start) return -EINVAL; + if (HAS_LMEM(i915)) + return drm_gem_prime_mmap(&obj->base, vma); + if (!obj->base.filp) return -ENODEV; @@ -209,7 +204,7 @@ static const struct dma_buf_ops i915_dmabuf_ops = { .attach = i915_gem_dmabuf_attach, .detach = i915_gem_dmabuf_detach, .map_dma_buf = i915_gem_map_dma_buf, - .unmap_dma_buf = i915_gem_unmap_dma_buf, + .unmap_dma_buf = drm_gem_unmap_dma_buf, .release = drm_gem_dmabuf_release, .mmap = i915_gem_dmabuf_mmap, .vmap = i915_gem_dmabuf_vmap, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index fd0e15d9573c..b3383e047505 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1321,10 +1321,8 @@ static void *reloc_vaddr(struct i915_vma *vma, static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) { if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) { - if (flushes & CLFLUSH_BEFORE) { - clflushopt(addr); - mb(); - } + if (flushes & CLFLUSH_BEFORE) + drm_clflush_virt_range(addr, sizeof(*addr)); *addr = value; @@ -1336,7 +1334,7 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) * to ensure ordering of clflush wrt to the system. */ if (flushes & CLFLUSH_AFTER) - clflushopt(addr); + drm_clflush_virt_range(addr, sizeof(*addr)); } else *addr = value; } @@ -2690,6 +2688,11 @@ eb_select_engine(struct i915_execbuffer *eb) if (err) goto err; + if (!i915_vm_tryget(ce->vm)) { + err = -ENOENT; + goto err; + } + eb->context = ce; eb->gt = ce->engine->gt; @@ -2713,6 +2716,7 @@ eb_put_engine(struct i915_execbuffer *eb) { struct intel_context *child; + i915_vm_put(eb->context->vm); intel_gt_pm_put(eb->gt); for_each_child(eb->context, child) intel_context_put(child); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index 02ed3b269326..8949fb0a944f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -102,7 +102,7 @@ __i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915, resource_size_t page_size, unsigned int flags) { - return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM], + return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM_0], size, page_size, flags); } @@ -137,6 +137,6 @@ i915_gem_object_create_lmem(struct drm_i915_private *i915, resource_size_t size, unsigned int flags) { - return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM], + return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM_0], size, 0, flags); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index c3ea243d414d..0c5c43852e24 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -70,7 +70,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, * mmap ioctl is disallowed for all discrete platforms, * and for all platforms with GRAPHICS_VER > 12. */ - if (IS_DGFX(i915) || GRAPHICS_VER(i915) > 12) + if (IS_DGFX(i915) || GRAPHICS_VER_FULL(i915) > IP_VER(12, 0)) return -EOPNOTSUPP; if (args->flags & ~(I915_MMAP_WC)) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 747ac65e060f..06b1b188ce5a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -606,6 +606,9 @@ bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj, if (!mr) return false; + if (!IS_ALIGNED(obj->base.size, mr->min_page_size)) + return false; + if (obj->mm.region == mr) return true; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index fd54eb8f4826..2c88bdb8ff7c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -631,6 +631,8 @@ struct drm_i915_gem_object { struct drm_mm_node *stolen; + resource_size_t bo_offset; + unsigned long scratch; u64 encode; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index 5866c88e4d02..f46ee16a323a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -29,11 +29,12 @@ void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj) mutex_unlock(&mem->objects.lock); } -struct drm_i915_gem_object * -i915_gem_object_create_region(struct intel_memory_region *mem, - resource_size_t size, - resource_size_t page_size, - unsigned int flags) +static struct drm_i915_gem_object * +__i915_gem_object_create_region(struct intel_memory_region *mem, + resource_size_t offset, + resource_size_t size, + resource_size_t page_size, + unsigned int flags) { struct drm_i915_gem_object *obj; resource_size_t default_page_size; @@ -64,6 +65,9 @@ i915_gem_object_create_region(struct intel_memory_region *mem, size = round_up(size, default_page_size); + if (default_page_size == size) + flags |= I915_BO_ALLOC_CONTIGUOUS; + GEM_BUG_ON(!size); GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_MIN_ALIGNMENT)); @@ -85,7 +89,7 @@ i915_gem_object_create_region(struct intel_memory_region *mem, if (default_page_size < mem->min_page_size) flags |= I915_BO_ALLOC_PM_EARLY; - err = mem->ops->init_object(mem, obj, size, page_size, flags); + err = mem->ops->init_object(mem, obj, offset, size, page_size, flags); if (err) goto err_object_free; @@ -97,6 +101,40 @@ err_object_free: return ERR_PTR(err); } +struct drm_i915_gem_object * +i915_gem_object_create_region(struct intel_memory_region *mem, + resource_size_t size, + resource_size_t page_size, + unsigned int flags) +{ + return __i915_gem_object_create_region(mem, I915_BO_INVALID_OFFSET, + size, page_size, flags); +} + +struct drm_i915_gem_object * +i915_gem_object_create_region_at(struct intel_memory_region *mem, + resource_size_t offset, + resource_size_t size, + unsigned int flags) +{ + GEM_BUG_ON(offset == I915_BO_INVALID_OFFSET); + + if (GEM_WARN_ON(!IS_ALIGNED(size, mem->min_page_size)) || + GEM_WARN_ON(!IS_ALIGNED(offset, mem->min_page_size))) + return ERR_PTR(-EINVAL); + + if (range_overflows(offset, size, resource_size(&mem->region))) + return ERR_PTR(-EINVAL); + + if (!(flags & I915_BO_ALLOC_GPU_ONLY) && + offset + size > mem->io_size && + !i915_ggtt_has_aperture(to_gt(mem->i915)->ggtt)) + return ERR_PTR(-ENOSPC); + + return __i915_gem_object_create_region(mem, offset, size, 0, + flags | I915_BO_ALLOC_CONTIGUOUS); +} + /** * i915_gem_process_region - Iterate over all objects of a region using ops * to process and optionally skip objects diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h index fcaa12d657d4..2dfcc41c0170 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h @@ -14,6 +14,8 @@ struct sg_table; struct i915_gem_apply_to_region; +#define I915_BO_INVALID_OFFSET ((resource_size_t)-1) + /** * struct i915_gem_apply_to_region_ops - ops to use when iterating over all * region objects. @@ -56,6 +58,11 @@ i915_gem_object_create_region(struct intel_memory_region *mem, resource_size_t size, resource_size_t page_size, unsigned int flags); +struct drm_i915_gem_object * +i915_gem_object_create_region_at(struct intel_memory_region *mem, + resource_size_t offset, + resource_size_t size, + unsigned int flags); int i915_gem_process_region(struct intel_memory_region *mr, struct i915_gem_apply_to_region *apply); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index c7541dc687c1..c2a3e388fcb4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -553,6 +553,7 @@ static int __create_shmem(struct drm_i915_private *i915, static int shmem_object_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, + resource_size_t offset, resource_size_t size, resource_size_t page_size, unsigned int flags) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 2a225daa2b12..47b5e0e342ab 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -12,6 +12,8 @@ #include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" +#include "gt/intel_gt.h" +#include "gt/intel_region_lmem.h" #include "i915_drv.h" #include "i915_gem_stolen.h" #include "i915_reg.h" @@ -493,7 +495,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem) /* Exclude the reserved region from driver use */ mem->region.end = reserved_base - 1; - mem->io_size = resource_size(&mem->region); + mem->io_size = min(mem->io_size, resource_size(&mem->region)); /* It is possible for the reserved area to end before the end of stolen * memory, so just consider the start. */ @@ -680,6 +682,7 @@ static int __i915_gem_object_create_stolen(struct intel_memory_region *mem, static int _i915_gem_object_stolen_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, + resource_size_t offset, resource_size_t size, resource_size_t page_size, unsigned int flags) @@ -694,12 +697,32 @@ static int _i915_gem_object_stolen_init(struct intel_memory_region *mem, if (size == 0) return -EINVAL; + /* + * With discrete devices, where we lack a mappable aperture there is no + * possible way to ever access this memory on the CPU side. + */ + if (mem->type == INTEL_MEMORY_STOLEN_LOCAL && !mem->io_size && + !(flags & I915_BO_ALLOC_GPU_ONLY)) + return -ENOSPC; + stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); if (!stolen) return -ENOMEM; - ret = i915_gem_stolen_insert_node(i915, stolen, size, - mem->min_page_size); + if (offset != I915_BO_INVALID_OFFSET) { + drm_dbg(&i915->drm, + "creating preallocated stolen object: stolen_offset=%pa, size=%pa\n", + &offset, &size); + + stolen->start = offset; + stolen->size = size; + mutex_lock(&i915->mm.stolen_lock); + ret = drm_mm_reserve_node(&i915->mm.stolen, stolen); + mutex_unlock(&i915->mm.stolen_lock); + } else { + ret = i915_gem_stolen_insert_node(i915, stolen, size, + mem->min_page_size); + } if (ret) goto err_free; @@ -751,11 +774,6 @@ static int init_stolen_lmem(struct intel_memory_region *mem) if (GEM_WARN_ON(resource_size(&mem->region) == 0)) return -ENODEV; - if (!io_mapping_init_wc(&mem->iomap, - mem->io_start, - mem->io_size)) - return -EIO; - /* * TODO: For stolen lmem we mostly just care about populating the dsm * related bits and setting up the drm_mm allocator for the range. @@ -763,18 +781,26 @@ static int init_stolen_lmem(struct intel_memory_region *mem) */ err = i915_gem_init_stolen(mem); if (err) - goto err_fini; + return err; + + if (mem->io_size && !io_mapping_init_wc(&mem->iomap, + mem->io_start, + mem->io_size)) { + err = -EIO; + goto err_cleanup; + } return 0; -err_fini: - io_mapping_fini(&mem->iomap); +err_cleanup: + i915_gem_cleanup_stolen(mem->i915); return err; } static int release_stolen_lmem(struct intel_memory_region *mem) { - io_mapping_fini(&mem->iomap); + if (mem->io_size) + io_mapping_fini(&mem->iomap); i915_gem_cleanup_stolen(mem->i915); return 0; } @@ -791,25 +817,43 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, { struct intel_uncore *uncore = &i915->uncore; struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + resource_size_t dsm_size, dsm_base, lmem_size; struct intel_memory_region *mem; + resource_size_t io_start, io_size; resource_size_t min_page_size; - resource_size_t io_start; - resource_size_t lmem_size; - u64 lmem_base; - lmem_base = intel_uncore_read64(uncore, GEN12_DSMBASE); - if (GEM_WARN_ON(lmem_base >= pci_resource_len(pdev, 2))) + if (WARN_ON_ONCE(instance)) return ERR_PTR(-ENODEV); - lmem_size = pci_resource_len(pdev, 2) - lmem_base; - io_start = pci_resource_start(pdev, 2) + lmem_base; + /* Use DSM base address instead for stolen memory */ + dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE); + if (IS_DG1(uncore->i915)) { + lmem_size = pci_resource_len(pdev, 2); + if (WARN_ON(lmem_size < dsm_base)) + return ERR_PTR(-ENODEV); + } else { + resource_size_t lmem_range; + + lmem_range = intel_gt_read_register(&i915->gt0, XEHPSDV_TILE0_ADDR_RANGE) & 0xFFFF; + lmem_size = lmem_range >> XEHPSDV_TILE_LMEM_RANGE_SHIFT; + lmem_size *= SZ_1G; + } + + dsm_size = lmem_size - dsm_base; + if (pci_resource_len(pdev, 2) < lmem_size) { + io_start = 0; + io_size = 0; + } else { + io_start = pci_resource_start(pdev, 2) + dsm_base; + io_size = dsm_size; + } min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K : I915_GTT_PAGE_SIZE_4K; - mem = intel_memory_region_create(i915, lmem_base, lmem_size, + mem = intel_memory_region_create(i915, dsm_base, dsm_size, min_page_size, - io_start, lmem_size, + io_start, io_size, type, instance, &i915_region_stolen_lmem_ops); if (IS_ERR(mem)) @@ -823,6 +867,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, drm_dbg(&i915->drm, "Stolen Local memory IO start: %pa\n", &mem->io_start); + drm_dbg(&i915->drm, "Stolen Local DSM base: %pa\n", &dsm_base); intel_memory_region_set_name(mem, "stolen-local"); @@ -851,63 +896,6 @@ i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type, return mem; } -struct drm_i915_gem_object * -i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *i915, - resource_size_t stolen_offset, - resource_size_t size) -{ - struct intel_memory_region *mem = i915->mm.stolen_region; - struct drm_i915_gem_object *obj; - struct drm_mm_node *stolen; - int ret; - - if (!drm_mm_initialized(&i915->mm.stolen)) - return ERR_PTR(-ENODEV); - - drm_dbg(&i915->drm, - "creating preallocated stolen object: stolen_offset=%pa, size=%pa\n", - &stolen_offset, &size); - - /* KISS and expect everything to be page-aligned */ - if (GEM_WARN_ON(size == 0) || - GEM_WARN_ON(!IS_ALIGNED(size, mem->min_page_size)) || - GEM_WARN_ON(!IS_ALIGNED(stolen_offset, mem->min_page_size))) - return ERR_PTR(-EINVAL); - - stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); - if (!stolen) - return ERR_PTR(-ENOMEM); - - stolen->start = stolen_offset; - stolen->size = size; - mutex_lock(&i915->mm.stolen_lock); - ret = drm_mm_reserve_node(&i915->mm.stolen, stolen); - mutex_unlock(&i915->mm.stolen_lock); - if (ret) - goto err_free; - - obj = i915_gem_object_alloc(); - if (!obj) { - ret = -ENOMEM; - goto err_stolen; - } - - ret = __i915_gem_object_create_stolen(mem, obj, stolen); - if (ret) - goto err_object_free; - - i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); - return obj; - -err_object_free: - i915_gem_object_free(obj); -err_stolen: - i915_gem_stolen_remove_node(i915, stolen); -err_free: - kfree(stolen); - return ERR_PTR(ret); -} - bool i915_gem_object_is_stolen(const struct drm_i915_gem_object *obj) { return obj->ops == &i915_gem_object_stolen_ops; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h index ccdf7befc571..d5005a39d130 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h @@ -31,10 +31,6 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, struct drm_i915_gem_object * i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, resource_size_t size); -struct drm_i915_gem_object * -i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv, - resource_size_t stolen_offset, - resource_size_t size); bool i915_gem_object_is_stolen(const struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index b9ae6b02f304..4c25d9b2f138 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -20,6 +20,7 @@ #include "gem/i915_gem_ttm.h" #include "gem/i915_gem_ttm_move.h" #include "gem/i915_gem_ttm_pm.h" +#include "gt/intel_gpu_commands.h" #define I915_TTM_PRIO_PURGE 0 #define I915_TTM_PRIO_NO_PAGES 1 @@ -126,14 +127,22 @@ i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj) static void i915_ttm_place_from_region(const struct intel_memory_region *mr, struct ttm_place *place, + resource_size_t offset, + resource_size_t size, unsigned int flags) { memset(place, 0, sizeof(*place)); place->mem_type = intel_region_to_ttm_type(mr); + if (mr->type == INTEL_MEMORY_SYSTEM) + return; + if (flags & I915_BO_ALLOC_CONTIGUOUS) place->flags |= TTM_PL_FLAG_CONTIGUOUS; - if (mr->io_size && mr->io_size < mr->total) { + if (offset != I915_BO_INVALID_OFFSET) { + place->fpfn = offset >> PAGE_SHIFT; + place->lpfn = place->fpfn + (size >> PAGE_SHIFT); + } else if (mr->io_size && mr->io_size < mr->total) { if (flags & I915_BO_ALLOC_GPU_ONLY) { place->flags |= TTM_PL_FLAG_TOPDOWN; } else { @@ -155,12 +164,14 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj, placement->num_placement = 1; i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] : - obj->mm.region, requested, flags); + obj->mm.region, requested, obj->bo_offset, + obj->base.size, flags); /* Cache this on object? */ placement->num_busy_placement = num_allowed; for (i = 0; i < placement->num_busy_placement; ++i) - i915_ttm_place_from_region(obj->mm.placements[i], busy + i, flags); + i915_ttm_place_from_region(obj->mm.placements[i], busy + i, + obj->bo_offset, obj->base.size, flags); if (num_allowed == 0) { *busy = *requested; @@ -255,12 +266,33 @@ static const struct i915_refct_sgt_ops tt_rsgt_ops = { .release = i915_ttm_tt_release }; +static inline bool +i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj) +{ + bool lmem_placement = false; + int i; + + for (i = 0; i < obj->mm.n_placements; i++) { + /* Compression is not allowed for the objects with smem placement */ + if (obj->mm.placements[i]->type == INTEL_MEMORY_SYSTEM) + return false; + if (!lmem_placement && + obj->mm.placements[i]->type == INTEL_MEMORY_LOCAL) + lmem_placement = true; + } + + return lmem_placement; +} + static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, uint32_t page_flags) { + struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915), + bdev); struct ttm_resource_manager *man = ttm_manager_type(bo->bdev, bo->resource->mem_type); struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + unsigned long ccs_pages = 0; enum ttm_caching caching; struct i915_ttm_tt *i915_tt; int ret; @@ -283,7 +315,12 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, i915_tt->is_shmem = true; } - ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, caching, 0); + if (HAS_FLAT_CCS(i915) && i915_gem_object_needs_ccs_pages(obj)) + ccs_pages = DIV_ROUND_UP(DIV_ROUND_UP(bo->base.size, + NUM_BYTES_PER_CCS_BYTE), + PAGE_SIZE); + + ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, caching, ccs_pages); if (ret) goto err_free; @@ -763,6 +800,7 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, i915_sg_dma_sizes(rsgt->table.sgl)); } + GEM_BUG_ON(bo->ttm && ((obj->base.size >> PAGE_SHIFT) < bo->ttm->num_pages)); i915_ttm_adjust_lru(obj); return ret; } @@ -802,7 +840,8 @@ static int __i915_ttm_migrate(struct drm_i915_gem_object *obj, struct ttm_placement placement; int ret; - i915_ttm_place_from_region(mr, &requested, flags); + i915_ttm_place_from_region(mr, &requested, obj->bo_offset, + obj->base.size, flags); placement.num_placement = 1; placement.num_busy_placement = 1; placement.placement = &requested; @@ -1142,6 +1181,7 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo) */ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, + resource_size_t offset, resource_size_t size, resource_size_t page_size, unsigned int flags) @@ -1158,6 +1198,8 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &i915_gem_ttm_obj_ops, &lock_class, flags); + obj->bo_offset = offset; + /* Don't put on a region list until we're either locked or fully initialized. */ obj->mm.region = mem; INIT_LIST_HEAD(&obj->mm.region_link); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h index 9d698ad00853..73e371aa3850 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -45,6 +45,7 @@ i915_ttm_to_gem(struct ttm_buffer_object *bo) int __i915_gem_ttm_object_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, + resource_size_t offset, resource_size_t size, resource_size_t page_size, unsigned int flags); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index b4275b55e5b8..62c61af77a42 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -88,7 +88,7 @@ out: static int igt_dmabuf_import_same_driver_lmem(void *arg) { struct drm_i915_private *i915 = arg; - struct intel_memory_region *lmem = i915->mm.regions[INTEL_REGION_LMEM]; + struct intel_memory_region *lmem = i915->mm.regions[INTEL_REGION_LMEM_0]; struct drm_i915_gem_object *obj; struct drm_gem_object *import; struct dma_buf *dmabuf; @@ -253,10 +253,10 @@ static int igt_dmabuf_import_same_driver_lmem_smem(void *arg) struct drm_i915_private *i915 = arg; struct intel_memory_region *regions[2]; - if (!i915->mm.regions[INTEL_REGION_LMEM]) + if (!i915->mm.regions[INTEL_REGION_LMEM_0]) return 0; - regions[0] = i915->mm.regions[INTEL_REGION_LMEM]; + regions[0] = i915->mm.regions[INTEL_REGION_LMEM_0]; regions[1] = i915->mm.regions[INTEL_REGION_SMEM]; return igt_dmabuf_import_same_driver(i915, regions, 2); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index 0ad443a90c8b..801af51aff62 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -47,14 +47,16 @@ static int igt_create_migrate(struct intel_gt *gt, enum intel_region_id src, { struct drm_i915_private *i915 = gt->i915; struct intel_memory_region *src_mr = i915->mm.regions[src]; + struct intel_memory_region *dst_mr = i915->mm.regions[dst]; struct drm_i915_gem_object *obj; struct i915_gem_ww_ctx ww; int err = 0; GEM_BUG_ON(!src_mr); + GEM_BUG_ON(!dst_mr); /* Switch object backing-store on create */ - obj = i915_gem_object_create_region(src_mr, PAGE_SIZE, 0, 0); + obj = i915_gem_object_create_region(src_mr, dst_mr->min_page_size, 0, 0); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -92,17 +94,17 @@ static int igt_create_migrate(struct intel_gt *gt, enum intel_region_id src, static int igt_smem_create_migrate(void *arg) { - return igt_create_migrate(arg, INTEL_REGION_LMEM, INTEL_REGION_SMEM); + return igt_create_migrate(arg, INTEL_REGION_LMEM_0, INTEL_REGION_SMEM); } static int igt_lmem_create_migrate(void *arg) { - return igt_create_migrate(arg, INTEL_REGION_SMEM, INTEL_REGION_LMEM); + return igt_create_migrate(arg, INTEL_REGION_SMEM, INTEL_REGION_LMEM_0); } static int igt_same_create_migrate(void *arg) { - return igt_create_migrate(arg, INTEL_REGION_LMEM, INTEL_REGION_LMEM); + return igt_create_migrate(arg, INTEL_REGION_LMEM_0, INTEL_REGION_LMEM_0); } static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, @@ -152,7 +154,7 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, } } else { - err = i915_gem_object_migrate(obj, ww, INTEL_REGION_LMEM); + err = i915_gem_object_migrate(obj, ww, INTEL_REGION_LMEM_0); if (err) { pr_err("Object failed migration to lmem\n"); if (err) diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index 6d6082b5f31f..8ac6726ec16b 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -42,8 +42,7 @@ mock_context(struct drm_i915_private *i915, if (!ppgtt) goto err_free; - ctx->vm = i915_vm_open(&ppgtt->vm); - i915_vm_put(&ppgtt->vm); + ctx->vm = &ppgtt->vm; } mutex_init(&ctx->engines_mutex); @@ -59,7 +58,7 @@ mock_context(struct drm_i915_private *i915, err_vm: if (ctx->vm) - i915_vm_close(ctx->vm); + i915_vm_put(ctx->vm); err_free: kfree(ctx); return NULL; diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index 871fe7bda0e0..1bb766c79dcb 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -322,7 +322,7 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww) struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); int err; - GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open)); + GEM_BUG_ON(!kref_read(&ppgtt->base.vm.ref)); /* * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index b1b9c3fd7bf9..9529c5455bc3 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -6,7 +6,6 @@ #include "gen8_engine_cs.h" #include "i915_drv.h" #include "intel_gpu_commands.h" -#include "intel_gt_regs.h" #include "intel_lrc.h" #include "intel_ring.h" @@ -165,33 +164,9 @@ static u32 preparser_disable(bool state) return MI_ARB_CHECK | 1 << 8 | state; } -static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine) +u32 *gen12_emit_aux_table_inv(u32 *cs, const i915_reg_t inv_reg) { - static const i915_reg_t vd[] = { - GEN12_VD0_AUX_NV, - GEN12_VD1_AUX_NV, - GEN12_VD2_AUX_NV, - GEN12_VD3_AUX_NV, - }; - - static const i915_reg_t ve[] = { - GEN12_VE0_AUX_NV, - GEN12_VE1_AUX_NV, - }; - - if (engine->class == VIDEO_DECODE_CLASS) - return vd[engine->instance]; - - if (engine->class == VIDEO_ENHANCEMENT_CLASS) - return ve[engine->instance]; - - GEM_BUG_ON("unknown aux_inv reg\n"); - return INVALID_MMIO_REG; -} - -static u32 *gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs) -{ - *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN; *cs++ = i915_mmio_reg_offset(inv_reg); *cs++ = AUX_INV; *cs++ = MI_NOOP; @@ -236,7 +211,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) if (mode & EMIT_INVALIDATE) { u32 flags = 0; - u32 *cs; + u32 *cs, count; flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; flags |= PIPE_CONTROL_TLB_INVALIDATE; @@ -254,7 +229,12 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) if (engine->class == COMPUTE_CLASS) flags &= ~PIPE_CONTROL_3D_FLAGS; - cs = intel_ring_begin(rq, 8 + 4); + if (!HAS_FLAT_CCS(rq->engine->i915)) + count = 8 + 4; + else + count = 8; + + cs = intel_ring_begin(rq, count); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -267,8 +247,10 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); - /* hsdes: 1809175790 */ - cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs); + if (!HAS_FLAT_CCS(rq->engine->i915)) { + /* hsdes: 1809175790 */ + cs = gen12_emit_aux_table_inv(cs, GEN12_GFX_CCS_AUX_NV); + } *cs++ = preparser_disable(false); intel_ring_advance(rq, cs); @@ -283,12 +265,17 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) u32 cmd, *cs; cmd = 4; - if (mode & EMIT_INVALIDATE) + if (mode & EMIT_INVALIDATE) { cmd += 2; - if (mode & EMIT_INVALIDATE) - aux_inv = rq->engine->mask & ~BIT(BCS0); - if (aux_inv) - cmd += 2 * hweight32(aux_inv) + 2; + + if (!HAS_FLAT_CCS(rq->engine->i915) && + (rq->engine->class == VIDEO_DECODE_CLASS || + rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) { + aux_inv = rq->engine->mask & ~BIT(BCS0); + if (aux_inv) + cmd += 4; + } + } cs = intel_ring_begin(rq, cmd); if (IS_ERR(cs)) @@ -319,15 +306,10 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) *cs++ = 0; /* value */ if (aux_inv) { /* hsdes: 1809175790 */ - struct intel_engine_cs *engine; - unsigned int tmp; - - *cs++ = MI_LOAD_REGISTER_IMM(hweight32(aux_inv)); - for_each_engine_masked(engine, rq->engine->gt, aux_inv, tmp) { - *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine)); - *cs++ = AUX_INV; - } - *cs++ = MI_NOOP; + if (rq->engine->class == VIDEO_DECODE_CLASS) + cs = gen12_emit_aux_table_inv(cs, GEN12_VD0_AUX_NV); + else + cs = gen12_emit_aux_table_inv(cs, GEN12_VE0_AUX_NV); } if (mode & EMIT_INVALIDATE) @@ -601,6 +583,43 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs) return cs; } +/* Wa_14014475959:dg2 */ +#define CCS_SEMAPHORE_PPHWSP_OFFSET 0x540 +static u32 ccs_semaphore_offset(struct i915_request *rq) +{ + return i915_ggtt_offset(rq->context->state) + + (LRC_PPHWSP_PN * PAGE_SIZE) + CCS_SEMAPHORE_PPHWSP_OFFSET; +} + +/* Wa_14014475959:dg2 */ +static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs) +{ + int i; + + *cs++ = MI_ATOMIC_INLINE | MI_ATOMIC_GLOBAL_GTT | MI_ATOMIC_CS_STALL | + MI_ATOMIC_MOVE; + *cs++ = ccs_semaphore_offset(rq); + *cs++ = 0; + *cs++ = 1; + + /* + * When MI_ATOMIC_INLINE_DATA set this command must be 11 DW + (1 NOP) + * to align. 4 DWs above + 8 filler DWs here. + */ + for (i = 0; i < 8; ++i) + *cs++ = 0; + + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = ccs_semaphore_offset(rq); + *cs++ = 0; + + return cs; +} + static __always_inline u32* gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) { @@ -611,6 +630,10 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) !intel_uc_uses_guc_submission(&rq->engine->gt->uc)) cs = gen12_emit_preempt_busywait(rq, cs); + /* Wa_14014475959:dg2 */ + if (intel_engine_uses_wa_hold_ccs_switchout(rq->engine)) + cs = ccs_emit_wa_busywait(rq, cs); + rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h index cc6e21d3662a..107ab42539ab 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h @@ -10,7 +10,7 @@ #include <linux/types.h> #include "i915_gem.h" /* GEM_BUG_ON */ - +#include "intel_gt_regs.h" #include "intel_gpu_commands.h" struct i915_request; @@ -38,6 +38,8 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); +u32 *gen12_emit_aux_table_inv(u32 *cs, const i915_reg_t inv_reg); + static inline u32 * __gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) { diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index f574da00eff1..c7bd5d71b03e 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -454,11 +454,11 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, pd = pdp->entry[gen8_pd_index(idx, 2)]; } - clflush_cache_range(vaddr, PAGE_SIZE); + drm_clflush_virt_range(vaddr, PAGE_SIZE); vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1))); } } while (1); - clflush_cache_range(vaddr, PAGE_SIZE); + drm_clflush_virt_range(vaddr, PAGE_SIZE); return idx; } @@ -631,7 +631,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, } } while (rem >= page_size && index < I915_PDES); - clflush_cache_range(vaddr, PAGE_SIZE); + drm_clflush_virt_range(vaddr, PAGE_SIZE); /* * Is it safe to mark the 2M block as 64K? -- Either we have @@ -647,7 +647,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, I915_GTT_PAGE_SIZE_2M)))) { vaddr = px_vaddr(pd); vaddr[maybe_64K] |= GEN8_PDE_IPS_64K; - clflush_cache_range(vaddr, PAGE_SIZE); + drm_clflush_virt_range(vaddr, PAGE_SIZE); page_size = I915_GTT_PAGE_SIZE_64K; /* @@ -668,7 +668,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, for (i = 1; i < index; i += 16) memset64(vaddr + i, encode, 15); - clflush_cache_range(vaddr, PAGE_SIZE); + drm_clflush_virt_range(vaddr, PAGE_SIZE); } } @@ -722,7 +722,7 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm, vaddr = px_vaddr(pt); vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags); - clflush_cache_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr)); + drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr)); } static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 5d0ec7c49b6a..4070cb5711d8 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -386,7 +386,7 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) ce->ring = NULL; ce->ring_size = SZ_4K; - ewma_runtime_init(&ce->runtime.avg); + ewma_runtime_init(&ce->stats.runtime.avg); ce->vm = i915_vm_get(engine->gt->vm); @@ -400,7 +400,7 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) INIT_LIST_HEAD(&ce->guc_state.fences); INIT_LIST_HEAD(&ce->guc_state.requests); - ce->guc_id.id = GUC_INVALID_LRC_ID; + ce->guc_id.id = GUC_INVALID_CONTEXT_ID; INIT_LIST_HEAD(&ce->guc_id.link); INIT_LIST_HEAD(&ce->destroyed_link); @@ -576,6 +576,31 @@ void intel_context_bind_parent_child(struct intel_context *parent, child->parallel.parent = parent; } +u64 intel_context_get_total_runtime_ns(const struct intel_context *ce) +{ + u64 total, active; + + total = ce->stats.runtime.total; + if (ce->ops->flags & COPS_RUNTIME_CYCLES) + total *= ce->engine->gt->clock_period_ns; + + active = READ_ONCE(ce->stats.active); + if (active) + active = intel_context_clock() - active; + + return total + active; +} + +u64 intel_context_get_avg_runtime_ns(struct intel_context *ce) +{ + u64 avg = ewma_runtime_read(&ce->stats.runtime.avg); + + if (ce->ops->flags & COPS_RUNTIME_CYCLES) + avg *= ce->engine->gt->clock_period_ns; + + return avg; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_context.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index d8c74bbf9aae..b7d3214d2cdd 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -351,18 +351,13 @@ intel_context_clear_nopreempt(struct intel_context *ce) clear_bit(CONTEXT_NOPREEMPT, &ce->flags); } -static inline u64 intel_context_get_total_runtime_ns(struct intel_context *ce) -{ - const u32 period = ce->engine->gt->clock_period_ns; - - return READ_ONCE(ce->runtime.total) * period; -} +u64 intel_context_get_total_runtime_ns(const struct intel_context *ce); +u64 intel_context_get_avg_runtime_ns(struct intel_context *ce); -static inline u64 intel_context_get_avg_runtime_ns(struct intel_context *ce) +static inline u64 intel_context_clock(void) { - const u32 period = ce->engine->gt->clock_period_ns; - - return mul_u32_u32(ewma_runtime_read(&ce->runtime.avg), period); + /* As we mix CS cycles with CPU clocks, use the raw monotonic clock. */ + return ktime_get_raw_fast_ns(); } #endif /* __INTEL_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 30cd81ad8911..09f82545789f 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -35,6 +35,9 @@ struct intel_context_ops { #define COPS_HAS_INFLIGHT_BIT 0 #define COPS_HAS_INFLIGHT BIT(COPS_HAS_INFLIGHT_BIT) +#define COPS_RUNTIME_CYCLES_BIT 1 +#define COPS_RUNTIME_CYCLES BIT(COPS_RUNTIME_CYCLES_BIT) + int (*alloc)(struct intel_context *ce); void (*ban)(struct intel_context *ce, struct i915_request *rq); @@ -134,14 +137,19 @@ struct intel_context { } lrc; u32 tag; /* cookie passed to HW to track this context on submission */ - /* Time on GPU as tracked by the hw. */ - struct { - struct ewma_runtime avg; - u64 total; - u32 last; - I915_SELFTEST_DECLARE(u32 num_underflow); - I915_SELFTEST_DECLARE(u32 max_underflow); - } runtime; + /** stats: Context GPU engine busyness tracking. */ + struct intel_context_stats { + u64 active; + + /* Time on GPU as tracked by the hw. */ + struct { + struct ewma_runtime avg; + u64 total; + u32 last; + I915_SELFTEST_DECLARE(u32 num_underflow); + I915_SELFTEST_DECLARE(u32 max_underflow); + } runtime; + } stats; unsigned int active_count; /* protected by timeline->mutex */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 1c0ab05c3c40..1431f1e9dbee 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -4,6 +4,7 @@ #include <asm/cacheflush.h> #include <drm/drm_util.h> +#include <drm/drm_cache.h> #include <linux/hashtable.h> #include <linux/irq_work.h> @@ -143,15 +144,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) * of extra paranoia to try and ensure that the HWS takes the value * we give and that it doesn't end up trapped inside the CPU! */ - if (static_cpu_has(X86_FEATURE_CLFLUSH)) { - mb(); - clflush(&engine->status_page.addr[reg]); - engine->status_page.addr[reg] = value; - clflush(&engine->status_page.addr[reg]); - mb(); - } else { - WRITE_ONCE(engine->status_page.addr[reg], value); - } + drm_clflush_virt_range(&engine->status_page.addr[reg], sizeof(value)); + WRITE_ONCE(engine->status_page.addr[reg], value); + drm_clflush_virt_range(&engine->status_page.addr[reg], sizeof(value)); } /* diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 7447411a5b26..14c6ddbbfde8 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -436,6 +436,11 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, if (GRAPHICS_VER(i915) == 12 && engine->class == RENDER_CLASS) engine->props.preempt_timeout_ms = 0; + if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) && + __ffs(CCS_MASK(engine->gt)) == engine->instance) || + engine->class == RENDER_CLASS) + engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE; + /* features common between engines sharing EUs */ if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) { engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE; @@ -726,12 +731,24 @@ static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids, static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class) { - int i; - u8 map[MAX_ENGINE_INSTANCE + 1]; + /* + * Logical to physical mapping is needed for proper support + * to split-frame feature. + */ + if (MEDIA_VER(gt->i915) >= 11 && class == VIDEO_DECODE_CLASS) { + const u8 map[] = { 0, 2, 4, 6, 1, 3, 5, 7 }; + + populate_logical_ids(gt, logical_ids, class, + map, ARRAY_SIZE(map)); + } else { + int i; + u8 map[MAX_ENGINE_INSTANCE + 1]; - for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i) - map[i] = i; - populate_logical_ids(gt, logical_ids, class, map, ARRAY_SIZE(map)); + for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i) + map[i] = i; + populate_logical_ids(gt, logical_ids, class, + map, ARRAY_SIZE(map)); + } } /** @@ -1263,6 +1280,15 @@ static int __intel_engine_stop_cs(struct intel_engine_cs *engine, int err; intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING)); + + /* + * Wa_22011802037 : gen12, Prior to doing a reset, ensure CS is + * stopped, set ring stop bit and prefetch disable bit to halt CS + */ + if (GRAPHICS_VER(engine->i915) == 12) + intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base), + _MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE)); + err = __intel_wait_for_register_fw(engine->uncore, mode, MODE_IDLE, MODE_IDLE, fast_timeout_us, @@ -1697,9 +1723,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR)); } - if (intel_engine_uses_guc(engine)) { - /* nothing to print yet */ - } else if (HAS_EXECLISTS(dev_priv)) { + if (HAS_EXECLISTS(dev_priv) && !intel_engine_uses_guc(engine)) { struct i915_request * const *port, *rq; const u32 *hws = &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index 0bf8b45c9319..594a629cb28f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -181,6 +181,7 @@ #define GFX_SURFACE_FAULT_ENABLE (1 << 12) #define GFX_REPLAY_MODE (1 << 11) #define GFX_PSMI_GRANULARITY (1 << 10) +#define GEN12_GFX_PREFETCH_DISABLE REG_BIT(10) #define GFX_PPGTT_ENABLE (1 << 9) #define GEN8_GFX_PPGTT_48B (1 << 7) #define GFX_FORWARD_VBLANK_MASK (3 << 5) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 19ff8758e34d..298f2cc7a879 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -96,7 +96,9 @@ struct i915_ctx_workarounds { #define I915_MAX_VCS 8 #define I915_MAX_VECS 4 +#define I915_MAX_SFC (I915_MAX_VCS / 2) #define I915_MAX_CCS 4 +#define I915_MAX_RCS 1 /* * Engine IDs definitions. @@ -526,6 +528,8 @@ struct intel_engine_cs { #define I915_ENGINE_WANT_FORCED_PREEMPTION BIT(8) #define I915_ENGINE_HAS_RCS_REG_STATE BIT(9) #define I915_ENGINE_HAS_EU_PRIORITY BIT(10) +#define I915_ENGINE_FIRST_RENDER_COMPUTE BIT(11) +#define I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT BIT(12) unsigned int flags; /* @@ -626,6 +630,13 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) return engine->flags & I915_ENGINE_HAS_RELATIVE_MMIO; } +/* Wa_14014475959:dg2 */ +static inline bool +intel_engine_uses_wa_hold_ccs_switchout(struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; +} + #define instdone_has_slice(dev_priv___, sseu___, slice___) \ ((GRAPHICS_VER(dev_priv___) == 7 ? 1 : ((sseu___)->slice_mask)) & BIT(slice___)) @@ -643,7 +654,7 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) #define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \ for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \ - (iter_) < GEN_MAX_SUBSLICES; \ + (iter_) < GEN_SS_MASK_SIZE; \ (iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \ (dss_) = (iter_) % GEN_DSS_PER_GSLICE) \ for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_))) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index b8c9b6b89003..0f6cd96b459f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -193,7 +193,6 @@ static void add_legacy_ring(struct legacy_ring *ring, void intel_engines_driver_register(struct drm_i915_private *i915) { struct legacy_ring ring = {}; - u8 uabi_instances[5] = {}; struct list_head *it, *next; struct rb_node **p, *prev; LIST_HEAD(engines); @@ -214,8 +213,10 @@ void intel_engines_driver_register(struct drm_i915_private *i915) GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes)); engine->uabi_class = uabi_classes[engine->class]; - GEM_BUG_ON(engine->uabi_class >= ARRAY_SIZE(uabi_instances)); - engine->uabi_instance = uabi_instances[engine->uabi_class]++; + GEM_BUG_ON(engine->uabi_class >= + ARRAY_SIZE(i915->engine_uabi_class_count)); + engine->uabi_instance = + i915->engine_uabi_class_count[engine->uabi_class]++; /* Replace the internal name with the final user facing name */ memcpy(old, engine->name, sizeof(engine->name)); @@ -245,8 +246,8 @@ void intel_engines_driver_register(struct drm_i915_private *i915) int class, inst; int errors = 0; - for (class = 0; class < ARRAY_SIZE(uabi_instances); class++) { - for (inst = 0; inst < uabi_instances[class]; inst++) { + for (class = 0; class < ARRAY_SIZE(i915->engine_uabi_class_count); class++) { + for (inst = 0; inst < i915->engine_uabi_class_count[class]; inst++) { engine = intel_engine_lookup_user(i915, class, inst); if (!engine) { diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 1c602d4ae297..f8749c433b7c 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -625,8 +625,6 @@ static void __execlists_schedule_out(struct i915_request * const rq, GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag)); __set_bit(ccid - 1, &engine->context_tag); } - - lrc_update_runtime(ce); intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); if (engine->fw_domain && !--engine->fw_active) @@ -1651,12 +1649,6 @@ cancel_port_requests(struct intel_engine_execlists * const execlists, return inactive; } -static void invalidate_csb_entries(const u64 *first, const u64 *last) -{ - clflush((void *)first); - clflush((void *)last); -} - /* * Starting with Gen12, the status has a new format: * @@ -2004,15 +1996,30 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) * the wash as hardware, working or not, will need to do the * invalidation before. */ - invalidate_csb_entries(&buf[0], &buf[num_entries - 1]); + drm_clflush_virt_range(&buf[0], num_entries * sizeof(buf[0])); /* * We assume that any event reflects a change in context flow * and merits a fresh timeslice. We reinstall the timer after * inspecting the queue to see if we need to resumbit. */ - if (*prev != *execlists->active) /* elide lite-restores */ + if (*prev != *execlists->active) { /* elide lite-restores */ + /* + * Note the inherent discrepancy between the HW runtime, + * recorded as part of the context switch, and the CPU + * adjustment for active contexts. We have to hope that + * the delay in processing the CS event is very small + * and consistent. It works to our advantage to have + * the CPU adjustment _undershoot_ (i.e. start later than) + * the CS timestamp so we never overreport the runtime + * and correct overselves later when updating from HW. + */ + if (*prev) + lrc_runtime_stop((*prev)->context); + if (*execlists->active) + lrc_runtime_start((*execlists->active)->context); new_timeslice(execlists); + } return inactive; } @@ -2236,11 +2243,11 @@ static struct execlists_capture *capture_regs(struct intel_engine_cs *engine) if (!cap->error) goto err_cap; - cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp); + cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp, CORE_DUMP_FLAG_NONE); if (!cap->error->gt) goto err_gpu; - cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp); + cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp, CORE_DUMP_FLAG_NONE); if (!cap->error->gt->engine) goto err_gt; @@ -2644,7 +2651,7 @@ unwind: } static const struct intel_context_ops execlists_context_ops = { - .flags = COPS_HAS_INFLIGHT, + .flags = COPS_HAS_INFLIGHT | COPS_RUNTIME_CYCLES, .alloc = execlists_context_alloc, @@ -2788,8 +2795,9 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) /* Check that the GPU does indeed update the CSB entries! */ memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64)); - invalidate_csb_entries(&execlists->csb_status[0], - &execlists->csb_status[reset_value]); + drm_clflush_virt_range(execlists->csb_status, + execlists->csb_size * + sizeof(execlists->csb_status)); /* Once more for luck and our trusty paranoia */ ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, @@ -2833,7 +2841,7 @@ static void execlists_sanitize(struct intel_engine_cs *engine) sanitize_hwsp(engine); /* And scrub the dirty cachelines for the HWSP */ - clflush_cache_range(engine->status_page.addr, PAGE_SIZE); + drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE); intel_engine_reset_pinned_contexts(engine); } @@ -2912,7 +2920,7 @@ static int execlists_resume(struct intel_engine_cs *engine) enable_execlists(engine); - if (engine->class == RENDER_CLASS) + if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) xehp_enable_ccs_engines(engine); return 0; @@ -2958,9 +2966,8 @@ reset_csb(struct intel_engine_cs *engine, struct i915_request **inactive) { struct intel_engine_execlists * const execlists = &engine->execlists; - mb(); /* paranoia: read the CSB pointers from after the reset */ - clflush(execlists->csb_write); - mb(); + drm_clflush_virt_range(execlists->csb_write, + sizeof(execlists->csb_write[0])); inactive = process_csb(engine, inactive); /* drain preemption events */ @@ -3702,7 +3709,7 @@ virtual_get_sibling(struct intel_engine_cs *engine, unsigned int sibling) } static const struct intel_context_ops virtual_context_ops = { - .flags = COPS_HAS_INFLIGHT, + .flags = COPS_HAS_INFLIGHT | COPS_RUNTIME_CYCLES, .alloc = virtual_context_alloc, diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index e164f30a900d..e6b2eb122ad7 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -3,18 +3,16 @@ * Copyright © 2020 Intel Corporation */ -#include <linux/agp_backend.h> -#include <linux/stop_machine.h> - +#include <linux/types.h> #include <asm/set_memory.h> #include <asm/smp.h> #include <drm/i915_drm.h> -#include <drm/intel-gtt.h> #include "gem/i915_gem_lmem.h" #include "intel_gt.h" +#include "intel_gt_gmch.h" #include "intel_gt_regs.h" #include "i915_drv.h" #include "i915_scatterlist.h" @@ -95,28 +93,6 @@ int i915_ggtt_init_hw(struct drm_i915_private *i915) return 0; } -/* - * Certain Gen5 chipsets require idling the GPU before - * unmapping anything from the GTT when VT-d is enabled. - */ -static bool needs_idle_maps(struct drm_i915_private *i915) -{ - /* - * Query intel_iommu to see if we need the workaround. Presumably that - * was loaded first. - */ - if (!i915_vtd_active(i915)) - return false; - - if (GRAPHICS_VER(i915) == 5 && IS_MOBILE(i915)) - return true; - - if (GRAPHICS_VER(i915) == 12) - return true; /* XXX DMAR fault reason 7 */ - - return false; -} - /** * i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM * @vm: The VM to suspend the mappings for @@ -127,7 +103,7 @@ static bool needs_idle_maps(struct drm_i915_private *i915) void i915_ggtt_suspend_vm(struct i915_address_space *vm) { struct i915_vma *vma, *vn; - int open; + int save_skip_rewrite; drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt); @@ -136,8 +112,12 @@ retry: mutex_lock(&vm->mutex); - /* Skip rewriting PTE on VMA unbind. */ - open = atomic_xchg(&vm->open, 0); + /* + * Skip rewriting PTE on VMA unbind. + * FIXME: Use an argument to i915_vma_unbind() instead? + */ + save_skip_rewrite = vm->skip_pte_rewrite; + vm->skip_pte_rewrite = true; list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { struct drm_i915_gem_object *obj = vma->obj; @@ -155,16 +135,14 @@ retry: */ i915_gem_object_get(obj); - atomic_set(&vm->open, open); mutex_unlock(&vm->mutex); i915_gem_object_lock(obj, NULL); - open = i915_vma_unbind(vma); + GEM_WARN_ON(i915_vma_unbind(vma)); i915_gem_object_unlock(obj); - - GEM_WARN_ON(open); - i915_gem_object_put(obj); + + vm->skip_pte_rewrite = save_skip_rewrite; goto retry; } @@ -180,7 +158,7 @@ retry: vm->clear_range(vm, 0, vm->total); - atomic_set(&vm->open, open); + vm->skip_pte_rewrite = save_skip_rewrite; mutex_unlock(&vm->mutex); } @@ -203,7 +181,7 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) spin_unlock_irq(&uncore->lock); } -static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) +void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) { struct intel_uncore *uncore = ggtt->vm.gt->uncore; @@ -228,11 +206,6 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); } -static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) -{ - intel_gtt_chipset_flush(); -} - u64 gen8_ggtt_pte_encode(dma_addr_t addr, enum i915_cache_level level, u32 flags) @@ -245,258 +218,7 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr, return pte; } -static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) -{ - writeq(pte, addr); -} - -static void gen8_ggtt_insert_page(struct i915_address_space *vm, - dma_addr_t addr, - u64 offset, - enum i915_cache_level level, - u32 flags) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - gen8_pte_t __iomem *pte = - (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; - - gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags)); - - ggtt->invalidate(ggtt); -} - -static void gen8_ggtt_insert_entries(struct i915_address_space *vm, - struct i915_vma_resource *vma_res, - enum i915_cache_level level, - u32 flags) -{ - const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags); - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - gen8_pte_t __iomem *gte; - gen8_pte_t __iomem *end; - struct sgt_iter iter; - dma_addr_t addr; - - /* - * Note that we ignore PTE_READ_ONLY here. The caller must be careful - * not to allow the user to override access to a read only page. - */ - - gte = (gen8_pte_t __iomem *)ggtt->gsm; - gte += vma_res->start / I915_GTT_PAGE_SIZE; - end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE; - - for_each_sgt_daddr(addr, iter, vma_res->bi.pages) - gen8_set_pte(gte++, pte_encode | addr); - GEM_BUG_ON(gte > end); - - /* Fill the allocated but "unused" space beyond the end of the buffer */ - while (gte < end) - gen8_set_pte(gte++, vm->scratch[0]->encode); - - /* - * We want to flush the TLBs only after we're certain all the PTE - * updates have finished. - */ - ggtt->invalidate(ggtt); -} - -static void gen6_ggtt_insert_page(struct i915_address_space *vm, - dma_addr_t addr, - u64 offset, - enum i915_cache_level level, - u32 flags) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - gen6_pte_t __iomem *pte = - (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; - - iowrite32(vm->pte_encode(addr, level, flags), pte); - - ggtt->invalidate(ggtt); -} - -/* - * Binds an object into the global gtt with the specified cache level. - * The object will be accessible to the GPU via commands whose operands - * reference offsets within the global GTT as well as accessible by the GPU - * through the GMADR mapped BAR (i915->mm.gtt->gtt). - */ -static void gen6_ggtt_insert_entries(struct i915_address_space *vm, - struct i915_vma_resource *vma_res, - enum i915_cache_level level, - u32 flags) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - gen6_pte_t __iomem *gte; - gen6_pte_t __iomem *end; - struct sgt_iter iter; - dma_addr_t addr; - - gte = (gen6_pte_t __iomem *)ggtt->gsm; - gte += vma_res->start / I915_GTT_PAGE_SIZE; - end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE; - - for_each_sgt_daddr(addr, iter, vma_res->bi.pages) - iowrite32(vm->pte_encode(addr, level, flags), gte++); - GEM_BUG_ON(gte > end); - - /* Fill the allocated but "unused" space beyond the end of the buffer */ - while (gte < end) - iowrite32(vm->scratch[0]->encode, gte++); - - /* - * We want to flush the TLBs only after we're certain all the PTE - * updates have finished. - */ - ggtt->invalidate(ggtt); -} - -static void nop_clear_range(struct i915_address_space *vm, - u64 start, u64 length) -{ -} - -static void gen8_ggtt_clear_range(struct i915_address_space *vm, - u64 start, u64 length) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - unsigned int first_entry = start / I915_GTT_PAGE_SIZE; - unsigned int num_entries = length / I915_GTT_PAGE_SIZE; - const gen8_pte_t scratch_pte = vm->scratch[0]->encode; - gen8_pte_t __iomem *gtt_base = - (gen8_pte_t __iomem *)ggtt->gsm + first_entry; - const int max_entries = ggtt_total_entries(ggtt) - first_entry; - int i; - - if (WARN(num_entries > max_entries, - "First entry = %d; Num entries = %d (max=%d)\n", - first_entry, num_entries, max_entries)) - num_entries = max_entries; - - for (i = 0; i < num_entries; i++) - gen8_set_pte(>t_base[i], scratch_pte); -} - -static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) -{ - /* - * Make sure the internal GAM fifo has been cleared of all GTT - * writes before exiting stop_machine(). This guarantees that - * any aperture accesses waiting to start in another process - * cannot back up behind the GTT writes causing a hang. - * The register can be any arbitrary GAM register. - */ - intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6); -} - -struct insert_page { - struct i915_address_space *vm; - dma_addr_t addr; - u64 offset; - enum i915_cache_level level; -}; - -static int bxt_vtd_ggtt_insert_page__cb(void *_arg) -{ - struct insert_page *arg = _arg; - - gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0); - bxt_vtd_ggtt_wa(arg->vm); - - return 0; -} - -static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, - dma_addr_t addr, - u64 offset, - enum i915_cache_level level, - u32 unused) -{ - struct insert_page arg = { vm, addr, offset, level }; - - stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); -} - -struct insert_entries { - struct i915_address_space *vm; - struct i915_vma_resource *vma_res; - enum i915_cache_level level; - u32 flags; -}; - -static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) -{ - struct insert_entries *arg = _arg; - - gen8_ggtt_insert_entries(arg->vm, arg->vma_res, arg->level, arg->flags); - bxt_vtd_ggtt_wa(arg->vm); - - return 0; -} - -static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, - struct i915_vma_resource *vma_res, - enum i915_cache_level level, - u32 flags) -{ - struct insert_entries arg = { vm, vma_res, level, flags }; - - stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); -} - -static void gen6_ggtt_clear_range(struct i915_address_space *vm, - u64 start, u64 length) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - unsigned int first_entry = start / I915_GTT_PAGE_SIZE; - unsigned int num_entries = length / I915_GTT_PAGE_SIZE; - gen6_pte_t scratch_pte, __iomem *gtt_base = - (gen6_pte_t __iomem *)ggtt->gsm + first_entry; - const int max_entries = ggtt_total_entries(ggtt) - first_entry; - int i; - - if (WARN(num_entries > max_entries, - "First entry = %d; Num entries = %d (max=%d)\n", - first_entry, num_entries, max_entries)) - num_entries = max_entries; - - scratch_pte = vm->scratch[0]->encode; - for (i = 0; i < num_entries; i++) - iowrite32(scratch_pte, >t_base[i]); -} - -static void i915_ggtt_insert_page(struct i915_address_space *vm, - dma_addr_t addr, - u64 offset, - enum i915_cache_level cache_level, - u32 unused) -{ - unsigned int flags = (cache_level == I915_CACHE_NONE) ? - AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; - - intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); -} - -static void i915_ggtt_insert_entries(struct i915_address_space *vm, - struct i915_vma_resource *vma_res, - enum i915_cache_level cache_level, - u32 unused) -{ - unsigned int flags = (cache_level == I915_CACHE_NONE) ? - AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; - - intel_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT, - flags); -} - -static void i915_ggtt_clear_range(struct i915_address_space *vm, - u64 start, u64 length) -{ - intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); -} - -static void ggtt_bind_vma(struct i915_address_space *vm, +void intel_ggtt_bind_vma(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, struct i915_vma_resource *vma_res, enum i915_cache_level cache_level, @@ -520,7 +242,7 @@ static void ggtt_bind_vma(struct i915_address_space *vm, vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE; } -static void ggtt_unbind_vma(struct i915_address_space *vm, +void intel_ggtt_unbind_vma(struct i915_address_space *vm, struct i915_vma_resource *vma_res) { vm->clear_range(vm, vma_res->start, vma_res->vma_size); @@ -723,10 +445,10 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) ggtt->alias = ppgtt; ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; - GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma); + GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != intel_ggtt_bind_vma); ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; - GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma); + GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != intel_ggtt_unbind_vma); ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; i915_vm_free_pt_stash(&ppgtt->vm, &stash); @@ -749,8 +471,8 @@ static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt) i915_vm_put(&ppgtt->vm); - ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; - ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; + ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; + ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; } int i915_init_ggtt(struct drm_i915_private *i915) @@ -774,13 +496,13 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) { struct i915_vma *vma, *vn; - atomic_set(&ggtt->vm.open, 0); - flush_workqueue(ggtt->vm.i915->wq); i915_gem_drain_freed_objects(ggtt->vm.i915); mutex_lock(&ggtt->vm.mutex); + ggtt->vm.skip_pte_rewrite = true; + list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { struct drm_i915_gem_object *obj = vma->obj; bool trylock; @@ -838,364 +560,12 @@ void i915_ggtt_driver_late_release(struct drm_i915_private *i915) dma_resv_fini(&ggtt->vm._resv); } -static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) -{ - snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; - snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; - return snb_gmch_ctl << 20; -} - -static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) -{ - bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; - bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; - if (bdw_gmch_ctl) - bdw_gmch_ctl = 1 << bdw_gmch_ctl; - -#ifdef CONFIG_X86_32 - /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */ - if (bdw_gmch_ctl > 4) - bdw_gmch_ctl = 4; -#endif - - return bdw_gmch_ctl << 20; -} - -static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) -{ - gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; - gmch_ctrl &= SNB_GMCH_GGMS_MASK; - - if (gmch_ctrl) - return 1 << (20 + gmch_ctrl); - - return 0; -} - -static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915) -{ - /* - * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset - * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset - */ - GEM_BUG_ON(GRAPHICS_VER(i915) < 6); - return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M; -} - -static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915) -{ - return gen6_gttmmadr_size(i915) / 2; -} - -static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) -{ - struct drm_i915_private *i915 = ggtt->vm.i915; - struct pci_dev *pdev = to_pci_dev(i915->drm.dev); - phys_addr_t phys_addr; - u32 pte_flags; - int ret; - - GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915)); - phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915); - - /* - * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range - * will be dropped. For WC mappings in general we have 64 byte burst - * writes when the WC buffer is flushed, so we can't use it, but have to - * resort to an uncached mapping. The WC issue is easily caught by the - * readback check when writing GTT PTE entries. - */ - if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11) - ggtt->gsm = ioremap(phys_addr, size); - else - ggtt->gsm = ioremap_wc(phys_addr, size); - if (!ggtt->gsm) { - drm_err(&i915->drm, "Failed to map the ggtt page table\n"); - return -ENOMEM; - } - - kref_init(&ggtt->vm.resv_ref); - ret = setup_scratch_page(&ggtt->vm); - if (ret) { - drm_err(&i915->drm, "Scratch setup failed\n"); - /* iounmap will also get called at remove, but meh */ - iounmap(ggtt->gsm); - return ret; - } - - pte_flags = 0; - if (i915_gem_object_is_lmem(ggtt->vm.scratch[0])) - pte_flags |= PTE_LM; - - ggtt->vm.scratch[0]->encode = - ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]), - I915_CACHE_NONE, pte_flags); - - return 0; -} - -static void gen6_gmch_remove(struct i915_address_space *vm) -{ - struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - - iounmap(ggtt->gsm); - free_scratch(vm); -} - -static struct resource pci_resource(struct pci_dev *pdev, int bar) +struct resource intel_pci_resource(struct pci_dev *pdev, int bar) { return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar), pci_resource_len(pdev, bar)); } -static int gen8_gmch_probe(struct i915_ggtt *ggtt) -{ - struct drm_i915_private *i915 = ggtt->vm.i915; - struct pci_dev *pdev = to_pci_dev(i915->drm.dev); - unsigned int size; - u16 snb_gmch_ctl; - - /* TODO: We're not aware of mappable constraints on gen8 yet */ - if (!HAS_LMEM(i915)) { - ggtt->gmadr = pci_resource(pdev, 2); - ggtt->mappable_end = resource_size(&ggtt->gmadr); - } - - pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); - if (IS_CHERRYVIEW(i915)) - size = chv_get_total_gtt_size(snb_gmch_ctl); - else - size = gen8_get_total_gtt_size(snb_gmch_ctl); - - ggtt->vm.alloc_pt_dma = alloc_pt_dma; - ggtt->vm.alloc_scratch_dma = alloc_pt_dma; - ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY; - - ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; - ggtt->vm.cleanup = gen6_gmch_remove; - ggtt->vm.insert_page = gen8_ggtt_insert_page; - ggtt->vm.clear_range = nop_clear_range; - if (intel_scanout_needs_vtd_wa(i915)) - ggtt->vm.clear_range = gen8_ggtt_clear_range; - - ggtt->vm.insert_entries = gen8_ggtt_insert_entries; - - /* - * Serialize GTT updates with aperture access on BXT if VT-d is on, - * and always on CHV. - */ - if (intel_vm_no_concurrent_access_wa(i915)) { - ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; - ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; - ggtt->vm.bind_async_flags = - I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; - } - - ggtt->invalidate = gen8_ggtt_invalidate; - - ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; - ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; - - ggtt->vm.pte_encode = gen8_ggtt_pte_encode; - - setup_private_pat(ggtt->vm.gt->uncore); - - return ggtt_probe_common(ggtt, size); -} - -static u64 snb_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; - - switch (level) { - case I915_CACHE_L3_LLC: - case I915_CACHE_LLC: - pte |= GEN6_PTE_CACHE_LLC; - break; - case I915_CACHE_NONE: - pte |= GEN6_PTE_UNCACHED; - break; - default: - MISSING_CASE(level); - } - - return pte; -} - -static u64 ivb_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; - - switch (level) { - case I915_CACHE_L3_LLC: - pte |= GEN7_PTE_CACHE_L3_LLC; - break; - case I915_CACHE_LLC: - pte |= GEN6_PTE_CACHE_LLC; - break; - case I915_CACHE_NONE: - pte |= GEN6_PTE_UNCACHED; - break; - default: - MISSING_CASE(level); - } - - return pte; -} - -static u64 byt_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; - - if (!(flags & PTE_READ_ONLY)) - pte |= BYT_PTE_WRITEABLE; - - if (level != I915_CACHE_NONE) - pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; - - return pte; -} - -static u64 hsw_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; - - if (level != I915_CACHE_NONE) - pte |= HSW_WB_LLC_AGE3; - - return pte; -} - -static u64 iris_pte_encode(dma_addr_t addr, - enum i915_cache_level level, - u32 flags) -{ - gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; - - switch (level) { - case I915_CACHE_NONE: - break; - case I915_CACHE_WT: - pte |= HSW_WT_ELLC_LLC_AGE3; - break; - default: - pte |= HSW_WB_ELLC_LLC_AGE3; - break; - } - - return pte; -} - -static int gen6_gmch_probe(struct i915_ggtt *ggtt) -{ - struct drm_i915_private *i915 = ggtt->vm.i915; - struct pci_dev *pdev = to_pci_dev(i915->drm.dev); - unsigned int size; - u16 snb_gmch_ctl; - - ggtt->gmadr = pci_resource(pdev, 2); - ggtt->mappable_end = resource_size(&ggtt->gmadr); - - /* - * 64/512MB is the current min/max we actually know of, but this is - * just a coarse sanity check. - */ - if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { - drm_err(&i915->drm, "Unknown GMADR size (%pa)\n", - &ggtt->mappable_end); - return -ENXIO; - } - - pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); - - size = gen6_get_total_gtt_size(snb_gmch_ctl); - ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; - - ggtt->vm.alloc_pt_dma = alloc_pt_dma; - ggtt->vm.alloc_scratch_dma = alloc_pt_dma; - - ggtt->vm.clear_range = nop_clear_range; - if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915)) - ggtt->vm.clear_range = gen6_ggtt_clear_range; - ggtt->vm.insert_page = gen6_ggtt_insert_page; - ggtt->vm.insert_entries = gen6_ggtt_insert_entries; - ggtt->vm.cleanup = gen6_gmch_remove; - - ggtt->invalidate = gen6_ggtt_invalidate; - - if (HAS_EDRAM(i915)) - ggtt->vm.pte_encode = iris_pte_encode; - else if (IS_HASWELL(i915)) - ggtt->vm.pte_encode = hsw_pte_encode; - else if (IS_VALLEYVIEW(i915)) - ggtt->vm.pte_encode = byt_pte_encode; - else if (GRAPHICS_VER(i915) >= 7) - ggtt->vm.pte_encode = ivb_pte_encode; - else - ggtt->vm.pte_encode = snb_pte_encode; - - ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; - ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; - - return ggtt_probe_common(ggtt, size); -} - -static void i915_gmch_remove(struct i915_address_space *vm) -{ - intel_gmch_remove(); -} - -static int i915_gmch_probe(struct i915_ggtt *ggtt) -{ - struct drm_i915_private *i915 = ggtt->vm.i915; - phys_addr_t gmadr_base; - int ret; - - ret = intel_gmch_probe(i915->bridge_dev, to_pci_dev(i915->drm.dev), NULL); - if (!ret) { - drm_err(&i915->drm, "failed to set up gmch\n"); - return -EIO; - } - - intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end); - - ggtt->gmadr = - (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end); - - ggtt->vm.alloc_pt_dma = alloc_pt_dma; - ggtt->vm.alloc_scratch_dma = alloc_pt_dma; - - if (needs_idle_maps(i915)) { - drm_notice(&i915->drm, - "Flushing DMA requests before IOMMU unmaps; performance may be degraded\n"); - ggtt->do_idle_maps = true; - } - - ggtt->vm.insert_page = i915_ggtt_insert_page; - ggtt->vm.insert_entries = i915_ggtt_insert_entries; - ggtt->vm.clear_range = i915_ggtt_clear_range; - ggtt->vm.cleanup = i915_gmch_remove; - - ggtt->invalidate = gmch_ggtt_invalidate; - - ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; - ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; - - if (unlikely(ggtt->do_idle_maps)) - drm_notice(&i915->drm, - "Applying Ironlake quirks for intel_iommu\n"); - - return 0; -} - static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; @@ -1207,11 +577,11 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) dma_resv_init(&ggtt->vm._resv); if (GRAPHICS_VER(i915) <= 5) - ret = i915_gmch_probe(ggtt); + ret = intel_gt_gmch_gen5_probe(ggtt); else if (GRAPHICS_VER(i915) < 8) - ret = gen6_gmch_probe(ggtt); + ret = intel_gt_gmch_gen6_probe(ggtt); else - ret = gen8_gmch_probe(ggtt); + ret = intel_gt_gmch_gen8_probe(ggtt); if (ret) { dma_resv_fini(&ggtt->vm._resv); return ret; @@ -1265,10 +635,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915) int i915_ggtt_enable_hw(struct drm_i915_private *i915) { - if (GRAPHICS_VER(i915) < 6 && !intel_enable_gtt()) - return -EIO; - - return 0; + return intel_gt_gmch_gen5_enable_hw(i915); } void i915_ggtt_enable_guc(struct i915_ggtt *ggtt) @@ -1308,16 +675,12 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm) { struct i915_vma *vma; bool write_domain_objs = false; - int open; drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt); /* First fill our portion of the GTT with scratch pages */ vm->clear_range(vm, 0, vm->total); - /* Skip rewriting PTE on VMA unbind. */ - open = atomic_xchg(&vm->open, 0); - /* clflush objects bound into the GGTT and rebind them. */ list_for_each_entry(vma, &vm->bound_list, vm_link) { struct drm_i915_gem_object *obj = vma->obj; @@ -1334,8 +697,6 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm) } } - atomic_set(&vm->open, open); - return write_domain_objs; } diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index d112ffd56418..e52718a87f14 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -134,6 +134,13 @@ #define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */ #define MI_USE_GGTT (1 << 22) /* g4x+ */ #define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) +#define MI_ATOMIC MI_INSTR(0x2f, 1) +#define MI_ATOMIC_INLINE (MI_INSTR(0x2f, 9) | MI_ATOMIC_INLINE_DATA) +#define MI_ATOMIC_GLOBAL_GTT (1 << 22) +#define MI_ATOMIC_INLINE_DATA (1 << 18) +#define MI_ATOMIC_CS_STALL (1 << 17) +#define MI_ATOMIC_MOVE (0x4 << 8) + /* * Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM: * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw @@ -144,6 +151,7 @@ #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */ #define MI_LRI_LRM_CS_MMIO REG_BIT(19) +#define MI_LRI_MMIO_REMAP_EN REG_BIT(17) #define MI_LRI_FORCE_POSTED (1<<12) #define MI_LOAD_REGISTER_IMM_MAX_REGS (126) #define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1) @@ -153,8 +161,10 @@ #define MI_FLUSH_DW_PROTECTED_MEM_EN (1 << 22) #define MI_FLUSH_DW_STORE_INDEX (1<<21) #define MI_INVALIDATE_TLB (1<<18) +#define MI_FLUSH_DW_CCS (1<<16) #define MI_FLUSH_DW_OP_STOREDW (1<<14) #define MI_FLUSH_DW_OP_MASK (3<<14) +#define MI_FLUSH_DW_LLC (1<<9) #define MI_FLUSH_DW_NOTIFY (1<<8) #define MI_INVALIDATE_BSD (1<<7) #define MI_FLUSH_DW_USE_GTT (1<<2) @@ -203,8 +213,27 @@ #define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3)) #define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2) +#define XY_CTRL_SURF_INSTR_SIZE 5 +#define MI_FLUSH_DW_SIZE 3 +#define XY_CTRL_SURF_COPY_BLT ((2 << 29) | (0x48 << 22) | 3) +#define SRC_ACCESS_TYPE_SHIFT 21 +#define DST_ACCESS_TYPE_SHIFT 20 +#define CCS_SIZE_MASK 0x3FF +#define CCS_SIZE_SHIFT 8 +#define XY_CTRL_SURF_MOCS_MASK GENMASK(31, 25) +#define NUM_CCS_BYTES_PER_BLOCK 256 +#define NUM_BYTES_PER_CCS_BYTE 256 +#define NUM_CCS_BLKS_PER_XFER 1024 +#define INDIRECT_ACCESS 0 +#define DIRECT_ACCESS 1 + #define COLOR_BLT_CMD (2 << 29 | 0x40 << 22 | (5 - 2)) #define XY_COLOR_BLT_CMD (2 << 29 | 0x50 << 22) +#define XY_FAST_COLOR_BLT_CMD (2 << 29 | 0x44 << 22) +#define XY_FAST_COLOR_BLT_DEPTH_32 (2 << 19) +#define XY_FAST_COLOR_BLT_DW 16 +#define XY_FAST_COLOR_BLT_MOCS_MASK GENMASK(27, 21) +#define XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31 #define SRC_COPY_BLT_CMD (2 << 29 | 0x43 << 22) #define GEN9_XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22) #define XY_SRC_COPY_BLT_CMD (2 << 29 | 0x53 << 22) diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.c b/drivers/gpu/drm/i915/gt/intel_gsc.c new file mode 100644 index 000000000000..0e494028b81d --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gsc.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright(c) 2019-2022, Intel Corporation. All rights reserved. + */ + +#include <linux/irq.h> +#include <linux/mei_aux.h> +#include "i915_drv.h" +#include "i915_reg.h" +#include "gt/intel_gsc.h" +#include "gt/intel_gt.h" + +#define GSC_BAR_LENGTH 0x00000FFC + +static void gsc_irq_mask(struct irq_data *d) +{ + /* generic irq handling */ +} + +static void gsc_irq_unmask(struct irq_data *d) +{ + /* generic irq handling */ +} + +static struct irq_chip gsc_irq_chip = { + .name = "gsc_irq_chip", + .irq_mask = gsc_irq_mask, + .irq_unmask = gsc_irq_unmask, +}; + +static int gsc_irq_init(int irq) +{ + irq_set_chip_and_handler_name(irq, &gsc_irq_chip, + handle_simple_irq, "gsc_irq_handler"); + + return irq_set_chip_data(irq, NULL); +} + +struct gsc_def { + const char *name; + unsigned long bar; + size_t bar_size; +}; + +/* gsc resources and definitions (HECI1 and HECI2) */ +static const struct gsc_def gsc_def_dg1[] = { + { + /* HECI1 not yet implemented. */ + }, + { + .name = "mei-gscfi", + .bar = DG1_GSC_HECI2_BASE, + .bar_size = GSC_BAR_LENGTH, + } +}; + +static const struct gsc_def gsc_def_dg2[] = { + { + .name = "mei-gsc", + .bar = DG2_GSC_HECI1_BASE, + .bar_size = GSC_BAR_LENGTH, + }, + { + .name = "mei-gscfi", + .bar = DG2_GSC_HECI2_BASE, + .bar_size = GSC_BAR_LENGTH, + } +}; + +static void gsc_release_dev(struct device *dev) +{ + struct auxiliary_device *aux_dev = to_auxiliary_dev(dev); + struct mei_aux_device *adev = auxiliary_dev_to_mei_aux_dev(aux_dev); + + kfree(adev); +} + +static void gsc_destroy_one(struct intel_gsc_intf *intf) +{ + if (intf->adev) { + auxiliary_device_delete(&intf->adev->aux_dev); + auxiliary_device_uninit(&intf->adev->aux_dev); + intf->adev = NULL; + } + if (intf->irq >= 0) + irq_free_desc(intf->irq); + intf->irq = -1; +} + +static void gsc_init_one(struct drm_i915_private *i915, + struct intel_gsc_intf *intf, + unsigned int intf_id) +{ + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + struct mei_aux_device *adev; + struct auxiliary_device *aux_dev; + const struct gsc_def *def; + int ret; + + intf->irq = -1; + intf->id = intf_id; + + if (intf_id == 0 && !HAS_HECI_PXP(i915)) + return; + + if (IS_DG1(i915)) { + def = &gsc_def_dg1[intf_id]; + } else if (IS_DG2(i915)) { + def = &gsc_def_dg2[intf_id]; + } else { + drm_warn_once(&i915->drm, "Unknown platform\n"); + return; + } + + if (!def->name) { + drm_warn_once(&i915->drm, "HECI%d is not implemented!\n", intf_id + 1); + return; + } + + intf->irq = irq_alloc_desc(0); + if (intf->irq < 0) { + drm_err(&i915->drm, "gsc irq error %d\n", intf->irq); + return; + } + + ret = gsc_irq_init(intf->irq); + if (ret < 0) { + drm_err(&i915->drm, "gsc irq init failed %d\n", ret); + goto fail; + } + + adev = kzalloc(sizeof(*adev), GFP_KERNEL); + if (!adev) + goto fail; + + adev->irq = intf->irq; + adev->bar.parent = &pdev->resource[0]; + adev->bar.start = def->bar + pdev->resource[0].start; + adev->bar.end = adev->bar.start + def->bar_size - 1; + adev->bar.flags = IORESOURCE_MEM; + adev->bar.desc = IORES_DESC_NONE; + + aux_dev = &adev->aux_dev; + aux_dev->name = def->name; + aux_dev->id = (pci_domain_nr(pdev->bus) << 16) | + PCI_DEVID(pdev->bus->number, pdev->devfn); + aux_dev->dev.parent = &pdev->dev; + aux_dev->dev.release = gsc_release_dev; + + ret = auxiliary_device_init(aux_dev); + if (ret < 0) { + drm_err(&i915->drm, "gsc aux init failed %d\n", ret); + kfree(adev); + goto fail; + } + + ret = auxiliary_device_add(aux_dev); + if (ret < 0) { + drm_err(&i915->drm, "gsc aux add failed %d\n", ret); + /* adev will be freed with the put_device() and .release sequence */ + auxiliary_device_uninit(aux_dev); + goto fail; + } + intf->adev = adev; + + return; +fail: + gsc_destroy_one(intf); +} + +static void gsc_irq_handler(struct intel_gt *gt, unsigned int intf_id) +{ + int ret; + + if (intf_id >= INTEL_GSC_NUM_INTERFACES) { + drm_warn_once(>->i915->drm, "GSC irq: intf_id %d is out of range", intf_id); + return; + } + + if (!HAS_HECI_GSC(gt->i915)) { + drm_warn_once(>->i915->drm, "GSC irq: not supported"); + return; + } + + if (gt->gsc.intf[intf_id].irq < 0) { + drm_err_ratelimited(>->i915->drm, "GSC irq: irq not set"); + return; + } + + ret = generic_handle_irq(gt->gsc.intf[intf_id].irq); + if (ret) + drm_err_ratelimited(>->i915->drm, "error handling GSC irq: %d\n", ret); +} + +void intel_gsc_irq_handler(struct intel_gt *gt, u32 iir) +{ + if (iir & GSC_IRQ_INTF(0)) + gsc_irq_handler(gt, 0); + if (iir & GSC_IRQ_INTF(1)) + gsc_irq_handler(gt, 1); +} + +void intel_gsc_init(struct intel_gsc *gsc, struct drm_i915_private *i915) +{ + unsigned int i; + + if (!HAS_HECI_GSC(i915)) + return; + + for (i = 0; i < INTEL_GSC_NUM_INTERFACES; i++) + gsc_init_one(i915, &gsc->intf[i], i); +} + +void intel_gsc_fini(struct intel_gsc *gsc) +{ + struct intel_gt *gt = gsc_to_gt(gsc); + unsigned int i; + + if (!HAS_HECI_GSC(gt->i915)) + return; + + for (i = 0; i < INTEL_GSC_NUM_INTERFACES; i++) + gsc_destroy_one(&gsc->intf[i]); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.h b/drivers/gpu/drm/i915/gt/intel_gsc.h new file mode 100644 index 000000000000..68582f912b21 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gsc.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2019-2022, Intel Corporation. All rights reserved. + */ +#ifndef __INTEL_GSC_DEV_H__ +#define __INTEL_GSC_DEV_H__ + +#include <linux/types.h> + +struct drm_i915_private; +struct intel_gt; +struct mei_aux_device; + +#define INTEL_GSC_NUM_INTERFACES 2 +/* + * The HECI1 bit corresponds to bit15 and HECI2 to bit14. + * The reason for this is to allow growth for more interfaces in the future. + */ +#define GSC_IRQ_INTF(_x) BIT(15 - (_x)) + +/** + * struct intel_gsc - graphics security controller + * @intf : gsc interface + */ +struct intel_gsc { + struct intel_gsc_intf { + struct mei_aux_device *adev; + int irq; + unsigned int id; + } intf[INTEL_GSC_NUM_INTERFACES]; +}; + +void intel_gsc_init(struct intel_gsc *gsc, struct drm_i915_private *dev_priv); +void intel_gsc_fini(struct intel_gsc *gsc); +void intel_gsc_irq_handler(struct intel_gt *gt, u32 iir); + +#endif /* __INTEL_GSC_DEV_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 8a2483ccbfb9..92394f13b42f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -4,7 +4,6 @@ */ #include <drm/drm_managed.h> -#include <drm/intel-gtt.h> #include "gem/i915_gem_internal.h" #include "gem/i915_gem_lmem.h" @@ -17,6 +16,7 @@ #include "intel_gt_buffer_pool.h" #include "intel_gt_clock_utils.h" #include "intel_gt_debugfs.h" +#include "intel_gt_gmch.h" #include "intel_gt_pm.h" #include "intel_gt_regs.h" #include "intel_gt_requests.h" @@ -26,10 +26,11 @@ #include "intel_rc6.h" #include "intel_renderstate.h" #include "intel_rps.h" +#include "intel_gt_sysfs.h" #include "intel_uncore.h" #include "shmem_utils.h" -void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) +static void __intel_gt_init_early(struct intel_gt *gt) { spin_lock_init(>->irq_lock); @@ -51,17 +52,23 @@ void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) intel_rps_init_early(>->rps); } -void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) +/* Preliminary initialization of Tile 0 */ +void intel_root_gt_init_early(struct drm_i915_private *i915) { + struct intel_gt *gt = to_gt(i915); + gt->i915 = i915; gt->uncore = &i915->uncore; + + __intel_gt_init_early(gt); } -int intel_gt_probe_lmem(struct intel_gt *gt) +static int intel_gt_probe_lmem(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; + unsigned int instance = gt->info.id; + int id = INTEL_REGION_LMEM_0 + instance; struct intel_memory_region *mem; - int id; int err; mem = intel_gt_setup_lmem(gt); @@ -76,9 +83,8 @@ int intel_gt_probe_lmem(struct intel_gt *gt) return err; } - id = INTEL_REGION_LMEM; - mem->id = id; + mem->instance = instance; intel_memory_region_set_name(mem, "local%u", mem->instance); @@ -96,6 +102,12 @@ int intel_gt_assign_ggtt(struct intel_gt *gt) return gt->ggtt ? 0 : -ENOMEM; } +static const char * const intel_steering_types[] = { + "L3BANK", + "MSLICE", + "LNCF", +}; + static const struct intel_mmio_range icl_l3bank_steering_table[] = { { 0x00B100, 0x00B3FF }, {}, @@ -439,14 +451,17 @@ void intel_gt_chipset_flush(struct intel_gt *gt) { wmb(); if (GRAPHICS_VER(gt->i915) < 6) - intel_gtt_chipset_flush(); + intel_gt_gmch_gen5_chipset_flush(gt); } void intel_gt_driver_register(struct intel_gt *gt) { + intel_gsc_init(>->gsc, gt->i915); + intel_rps_driver_register(>->rps); intel_gt_debugfs_register(gt); + intel_gt_sysfs_register(gt); } static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) @@ -712,6 +727,11 @@ int intel_gt_init(struct intel_gt *gt) if (err) goto err_uc_init; + err = intel_gt_init_hwconfig(gt); + if (err) + drm_err(>->i915->drm, "Failed to retrieve hwconfig table: %pe\n", + ERR_PTR(err)); + err = __engines_record_defaults(gt); if (err) goto err_gt; @@ -766,6 +786,7 @@ void intel_gt_driver_unregister(struct intel_gt *gt) intel_wakeref_t wakeref; intel_rps_driver_unregister(>->rps); + intel_gsc_fini(>->gsc); intel_pxp_fini(>->pxp); @@ -793,18 +814,24 @@ void intel_gt_driver_release(struct intel_gt *gt) intel_gt_pm_fini(gt); intel_gt_fini_scratch(gt); intel_gt_fini_buffer_pool(gt); + intel_gt_fini_hwconfig(gt); } -void intel_gt_driver_late_release(struct intel_gt *gt) +void intel_gt_driver_late_release_all(struct drm_i915_private *i915) { + struct intel_gt *gt; + unsigned int id; + /* We need to wait for inflight RCU frees to release their grip */ rcu_barrier(); - intel_uc_driver_late_release(>->uc); - intel_gt_fini_requests(gt); - intel_gt_fini_reset(gt); - intel_gt_fini_timelines(gt); - intel_engines_free(gt); + for_each_gt(gt, i915, id) { + intel_uc_driver_late_release(>->uc); + intel_gt_fini_requests(gt); + intel_gt_fini_reset(gt); + intel_gt_fini_timelines(gt); + intel_engines_free(gt); + } } /** @@ -913,6 +940,35 @@ u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg) return intel_uncore_read_fw(gt->uncore, reg); } +/** + * intel_gt_get_valid_steering_for_reg - get a valid steering for a register + * @gt: GT structure + * @reg: register for which the steering is required + * @sliceid: return variable for slice steering + * @subsliceid: return variable for subslice steering + * + * This function returns a slice/subslice pair that is guaranteed to work for + * read steering of the given register. Note that a value will be returned even + * if the register is not replicated and therefore does not actually require + * steering. + */ +void intel_gt_get_valid_steering_for_reg(struct intel_gt *gt, i915_reg_t reg, + u8 *sliceid, u8 *subsliceid) +{ + int type; + + for (type = 0; type < NUM_STEERING_TYPES; type++) { + if (intel_gt_reg_needs_read_steering(gt, reg, type)) { + intel_gt_get_valid_steering(gt, type, sliceid, + subsliceid); + return; + } + } + + *sliceid = gt->default_steering.groupid; + *subsliceid = gt->default_steering.instanceid; +} + u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg) { int type; @@ -932,6 +988,145 @@ u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg) return intel_uncore_read(gt->uncore, reg); } +static void report_steering_type(struct drm_printer *p, + struct intel_gt *gt, + enum intel_steering_type type, + bool dump_table) +{ + const struct intel_mmio_range *entry; + u8 slice, subslice; + + BUILD_BUG_ON(ARRAY_SIZE(intel_steering_types) != NUM_STEERING_TYPES); + + if (!gt->steering_table[type]) { + drm_printf(p, "%s steering: uses default steering\n", + intel_steering_types[type]); + return; + } + + intel_gt_get_valid_steering(gt, type, &slice, &subslice); + drm_printf(p, "%s steering: sliceid=0x%x, subsliceid=0x%x\n", + intel_steering_types[type], slice, subslice); + + if (!dump_table) + return; + + for (entry = gt->steering_table[type]; entry->end; entry++) + drm_printf(p, "\t0x%06x - 0x%06x\n", entry->start, entry->end); +} + +void intel_gt_report_steering(struct drm_printer *p, struct intel_gt *gt, + bool dump_table) +{ + drm_printf(p, "Default steering: sliceid=0x%x, subsliceid=0x%x\n", + gt->default_steering.groupid, + gt->default_steering.instanceid); + + if (HAS_MSLICES(gt->i915)) { + report_steering_type(p, gt, MSLICE, dump_table); + report_steering_type(p, gt, LNCF, dump_table); + } +} + +static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr) +{ + int ret; + + if (!gt_is_root(gt)) { + struct intel_uncore_mmio_debug *mmio_debug; + struct intel_uncore *uncore; + + uncore = kzalloc(sizeof(*uncore), GFP_KERNEL); + if (!uncore) + return -ENOMEM; + + mmio_debug = kzalloc(sizeof(*mmio_debug), GFP_KERNEL); + if (!mmio_debug) { + kfree(uncore); + return -ENOMEM; + } + + gt->uncore = uncore; + gt->uncore->debug = mmio_debug; + + __intel_gt_init_early(gt); + } + + intel_uncore_init_early(gt->uncore, gt); + + ret = intel_uncore_setup_mmio(gt->uncore, phys_addr); + if (ret) + return ret; + + gt->phys_addr = phys_addr; + + return 0; +} + +static void +intel_gt_tile_cleanup(struct intel_gt *gt) +{ + intel_uncore_cleanup_mmio(gt->uncore); + + if (!gt_is_root(gt)) { + kfree(gt->uncore->debug); + kfree(gt->uncore); + kfree(gt); + } +} + +int intel_gt_probe_all(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + struct intel_gt *gt = &i915->gt0; + phys_addr_t phys_addr; + unsigned int mmio_bar; + int ret; + + mmio_bar = GRAPHICS_VER(i915) == 2 ? 1 : 0; + phys_addr = pci_resource_start(pdev, mmio_bar); + + /* + * We always have at least one primary GT on any device + * and it has been already initialized early during probe + * in i915_driver_probe() + */ + ret = intel_gt_tile_setup(gt, phys_addr); + if (ret) + return ret; + + i915->gt[0] = gt; + + /* TODO: add more tiles */ + return 0; +} + +int intel_gt_tiles_init(struct drm_i915_private *i915) +{ + struct intel_gt *gt; + unsigned int id; + int ret; + + for_each_gt(gt, i915, id) { + ret = intel_gt_probe_lmem(gt); + if (ret) + return ret; + } + + return 0; +} + +void intel_gt_release_all(struct drm_i915_private *i915) +{ + struct intel_gt *gt; + unsigned int id; + + for_each_gt(gt, i915, id) { + intel_gt_tile_cleanup(gt); + i915->gt[id] = NULL; + } +} + void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p) { diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 0f571c8ee22b..44c6cb63ccbc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -13,12 +13,24 @@ struct drm_i915_private; struct drm_printer; +struct insert_entries { + struct i915_address_space *vm; + struct i915_vma_resource *vma_res; + enum i915_cache_level level; + u32 flags; +}; + #define GT_TRACE(gt, fmt, ...) do { \ const struct intel_gt *gt__ __maybe_unused = (gt); \ GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \ ##__VA_ARGS__); \ } while (0) +static inline bool gt_is_root(struct intel_gt *gt) +{ + return !gt->info.id; +} + static inline struct intel_gt *uc_to_gt(struct intel_uc *uc) { return container_of(uc, struct intel_gt, uc); @@ -34,10 +46,13 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc) return container_of(huc, struct intel_gt, uc.huc); } -void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915); -void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915); +static inline struct intel_gt *gsc_to_gt(struct intel_gsc *gsc) +{ + return container_of(gsc, struct intel_gt, gsc); +} + +void intel_root_gt_init_early(struct drm_i915_private *i915); int intel_gt_assign_ggtt(struct intel_gt *gt); -int intel_gt_probe_lmem(struct intel_gt *gt); int intel_gt_init_mmio(struct intel_gt *gt); int __must_check intel_gt_init_hw(struct intel_gt *gt); int intel_gt_init(struct intel_gt *gt); @@ -47,7 +62,7 @@ void intel_gt_driver_unregister(struct intel_gt *gt); void intel_gt_driver_remove(struct intel_gt *gt); void intel_gt_driver_release(struct intel_gt *gt); -void intel_gt_driver_late_release(struct intel_gt *gt); +void intel_gt_driver_late_release_all(struct drm_i915_private *i915); int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); @@ -84,9 +99,25 @@ static inline bool intel_gt_needs_read_steering(struct intel_gt *gt, return gt->steering_table[type]; } +void intel_gt_get_valid_steering_for_reg(struct intel_gt *gt, i915_reg_t reg, + u8 *sliceid, u8 *subsliceid); + u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg); u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg); +void intel_gt_report_steering(struct drm_printer *p, struct intel_gt *gt, + bool dump_table); + +int intel_gt_probe_all(struct drm_i915_private *i915); +int intel_gt_tiles_init(struct drm_i915_private *i915); +void intel_gt_release_all(struct drm_i915_private *i915); + +#define for_each_gt(gt__, i915__, id__) \ + for ((id__) = 0; \ + (id__) < I915_MAX_GT; \ + (id__)++) \ + for_each_if(((gt__) = (i915__)->gt[(id__)])) + void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p); @@ -94,4 +125,6 @@ void intel_gt_watchdog_work(struct work_struct *work); void intel_gt_invalidate_tlbs(struct intel_gt *gt); +struct resource intel_pci_resource(struct pci_dev *pdev, int bar); + #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c index 0db822c3b7e5..d5d1b04dbcad 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c @@ -161,6 +161,10 @@ void intel_gt_init_clock_frequency(struct intel_gt *gt) if (gt->clock_frequency) gt->clock_period_ns = intel_gt_clock_interval_to_ns(gt, 1); + /* Icelake appears to use another fixed frequency for CTX_TIMESTAMP */ + if (GRAPHICS_VER(gt->i915) == 11) + gt->clock_period_ns = NSEC_PER_SEC / 13750000; + GT_TRACE(gt, "Using clock frequency: %dkHz, period: %dns, wrap: %lldms\n", gt->clock_frequency / 1000, diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c index f103664b71d4..d886fdc2c694 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c @@ -6,6 +6,7 @@ #include <linux/debugfs.h> #include "i915_drv.h" +#include "intel_gt.h" #include "intel_gt_debugfs.h" #include "intel_gt_engines_debugfs.h" #include "intel_gt_pm_debugfs.h" @@ -29,7 +30,7 @@ int intel_gt_debugfs_reset_show(struct intel_gt *gt, u64 *val) } } -int intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val) +void intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val) { /* Flush any previous reset before applying for a new one */ wait_event(gt->reset.queue, @@ -37,7 +38,6 @@ int intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val) intel_gt_handle_error(gt, val, I915_ERROR_CAPTURE, "Manually reset engine mask to %llx", val); - return 0; } /* @@ -51,16 +51,30 @@ static int __intel_gt_debugfs_reset_show(void *data, u64 *val) static int __intel_gt_debugfs_reset_store(void *data, u64 val) { - return intel_gt_debugfs_reset_store(data, val); + intel_gt_debugfs_reset_store(data, val); + + return 0; } DEFINE_SIMPLE_ATTRIBUTE(reset_fops, __intel_gt_debugfs_reset_show, __intel_gt_debugfs_reset_store, "%llu\n"); +static int steering_show(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + struct intel_gt *gt = m->private; + + intel_gt_report_steering(&p, gt, true); + + return 0; +} +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(steering); + static void gt_debugfs_register(struct intel_gt *gt, struct dentry *root) { static const struct intel_gt_debugfs_file files[] = { { "reset", &reset_fops, NULL }, + { "steering", &steering_fops }, }; intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h index 17e79b735cfe..e4110eebf093 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.h @@ -48,6 +48,6 @@ void intel_gt_debugfs_register_files(struct dentry *root, /* functions that need to be accessed by the upper level non-gt interfaces */ int intel_gt_debugfs_reset_show(struct intel_gt *gt, u64 *val); -int intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val); +void intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val); #endif /* INTEL_GT_DEBUGFS_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_gmch.c b/drivers/gpu/drm/i915/gt/intel_gt_gmch.c new file mode 100644 index 000000000000..18e488672d1b --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_gmch.c @@ -0,0 +1,654 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include <drm/intel-gtt.h> +#include <drm/i915_drm.h> + +#include <linux/agp_backend.h> +#include <linux/stop_machine.h> + +#include "i915_drv.h" +#include "intel_gt_gmch.h" +#include "intel_gt_regs.h" +#include "intel_gt.h" +#include "i915_utils.h" + +#include "gen8_ppgtt.h" + +struct insert_page { + struct i915_address_space *vm; + dma_addr_t addr; + u64 offset; + enum i915_cache_level level; +}; + +static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) +{ + writeq(pte, addr); +} + +static void nop_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ +} + +static u64 snb_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + switch (level) { + case I915_CACHE_L3_LLC: + case I915_CACHE_LLC: + pte |= GEN6_PTE_CACHE_LLC; + break; + case I915_CACHE_NONE: + pte |= GEN6_PTE_UNCACHED; + break; + default: + MISSING_CASE(level); + } + + return pte; +} + +static u64 ivb_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + switch (level) { + case I915_CACHE_L3_LLC: + pte |= GEN7_PTE_CACHE_L3_LLC; + break; + case I915_CACHE_LLC: + pte |= GEN6_PTE_CACHE_LLC; + break; + case I915_CACHE_NONE: + pte |= GEN6_PTE_UNCACHED; + break; + default: + MISSING_CASE(level); + } + + return pte; +} + +static u64 byt_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + if (!(flags & PTE_READ_ONLY)) + pte |= BYT_PTE_WRITEABLE; + + if (level != I915_CACHE_NONE) + pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; + + return pte; +} + +static u64 hsw_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + if (level != I915_CACHE_NONE) + pte |= HSW_WB_LLC_AGE3; + + return pte; +} + +static u64 iris_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + u32 flags) +{ + gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; + + switch (level) { + case I915_CACHE_NONE: + break; + case I915_CACHE_WT: + pte |= HSW_WT_ELLC_LLC_AGE3; + break; + default: + pte |= HSW_WB_ELLC_LLC_AGE3; + break; + } + + return pte; +} + +static void gen5_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level cache_level, + u32 unused) +{ + unsigned int flags = (cache_level == I915_CACHE_NONE) ? + AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; + + intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); +} + +static void gen6_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 flags) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen6_pte_t __iomem *pte = + (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; + + iowrite32(vm->pte_encode(addr, level, flags), pte); + + ggtt->invalidate(ggtt); +} + +static void gen8_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 flags) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen8_pte_t __iomem *pte = + (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; + + gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags)); + + ggtt->invalidate(ggtt); +} + +static void gen5_ggtt_insert_entries(struct i915_address_space *vm, + struct i915_vma_resource *vma_res, + enum i915_cache_level cache_level, + u32 unused) +{ + unsigned int flags = (cache_level == I915_CACHE_NONE) ? + AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; + + intel_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT, + flags); +} + +/* + * Binds an object into the global gtt with the specified cache level. + * The object will be accessible to the GPU via commands whose operands + * reference offsets within the global GTT as well as accessible by the GPU + * through the GMADR mapped BAR (i915->mm.gtt->gtt). + */ +static void gen6_ggtt_insert_entries(struct i915_address_space *vm, + struct i915_vma_resource *vma_res, + enum i915_cache_level level, + u32 flags) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen6_pte_t __iomem *gte; + gen6_pte_t __iomem *end; + struct sgt_iter iter; + dma_addr_t addr; + + gte = (gen6_pte_t __iomem *)ggtt->gsm; + gte += vma_res->start / I915_GTT_PAGE_SIZE; + end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE; + + for_each_sgt_daddr(addr, iter, vma_res->bi.pages) + iowrite32(vm->pte_encode(addr, level, flags), gte++); + GEM_BUG_ON(gte > end); + + /* Fill the allocated but "unused" space beyond the end of the buffer */ + while (gte < end) + iowrite32(vm->scratch[0]->encode, gte++); + + /* + * We want to flush the TLBs only after we're certain all the PTE + * updates have finished. + */ + ggtt->invalidate(ggtt); +} + +static void gen8_ggtt_insert_entries(struct i915_address_space *vm, + struct i915_vma_resource *vma_res, + enum i915_cache_level level, + u32 flags) +{ + const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags); + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen8_pte_t __iomem *gte; + gen8_pte_t __iomem *end; + struct sgt_iter iter; + dma_addr_t addr; + + /* + * Note that we ignore PTE_READ_ONLY here. The caller must be careful + * not to allow the user to override access to a read only page. + */ + + gte = (gen8_pte_t __iomem *)ggtt->gsm; + gte += vma_res->start / I915_GTT_PAGE_SIZE; + end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE; + + for_each_sgt_daddr(addr, iter, vma_res->bi.pages) + gen8_set_pte(gte++, pte_encode | addr); + GEM_BUG_ON(gte > end); + + /* Fill the allocated but "unused" space beyond the end of the buffer */ + while (gte < end) + gen8_set_pte(gte++, vm->scratch[0]->encode); + + /* + * We want to flush the TLBs only after we're certain all the PTE + * updates have finished. + */ + ggtt->invalidate(ggtt); +} + +static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) +{ + /* + * Make sure the internal GAM fifo has been cleared of all GTT + * writes before exiting stop_machine(). This guarantees that + * any aperture accesses waiting to start in another process + * cannot back up behind the GTT writes causing a hang. + * The register can be any arbitrary GAM register. + */ + intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6); +} + +static int bxt_vtd_ggtt_insert_page__cb(void *_arg) +{ + struct insert_page *arg = _arg; + + gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0); + bxt_vtd_ggtt_wa(arg->vm); + + return 0; +} + +static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 unused) +{ + struct insert_page arg = { vm, addr, offset, level }; + + stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); +} + +static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) +{ + struct insert_entries *arg = _arg; + + gen8_ggtt_insert_entries(arg->vm, arg->vma_res, arg->level, arg->flags); + bxt_vtd_ggtt_wa(arg->vm); + + return 0; +} + +static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, + struct i915_vma_resource *vma_res, + enum i915_cache_level level, + u32 flags) +{ + struct insert_entries arg = { vm, vma_res, level, flags }; + + stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); +} + +void intel_gt_gmch_gen5_chipset_flush(struct intel_gt *gt) +{ + intel_gtt_chipset_flush(); +} + +static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) +{ + intel_gtt_chipset_flush(); +} + +static void gen5_ggtt_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ + intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); +} + +static void gen6_ggtt_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + unsigned int first_entry = start / I915_GTT_PAGE_SIZE; + unsigned int num_entries = length / I915_GTT_PAGE_SIZE; + gen6_pte_t scratch_pte, __iomem *gtt_base = + (gen6_pte_t __iomem *)ggtt->gsm + first_entry; + const int max_entries = ggtt_total_entries(ggtt) - first_entry; + int i; + + if (WARN(num_entries > max_entries, + "First entry = %d; Num entries = %d (max=%d)\n", + first_entry, num_entries, max_entries)) + num_entries = max_entries; + + scratch_pte = vm->scratch[0]->encode; + for (i = 0; i < num_entries; i++) + iowrite32(scratch_pte, >t_base[i]); +} + +static void gen8_ggtt_clear_range(struct i915_address_space *vm, + u64 start, u64 length) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + unsigned int first_entry = start / I915_GTT_PAGE_SIZE; + unsigned int num_entries = length / I915_GTT_PAGE_SIZE; + const gen8_pte_t scratch_pte = vm->scratch[0]->encode; + gen8_pte_t __iomem *gtt_base = + (gen8_pte_t __iomem *)ggtt->gsm + first_entry; + const int max_entries = ggtt_total_entries(ggtt) - first_entry; + int i; + + if (WARN(num_entries > max_entries, + "First entry = %d; Num entries = %d (max=%d)\n", + first_entry, num_entries, max_entries)) + num_entries = max_entries; + + for (i = 0; i < num_entries; i++) + gen8_set_pte(>t_base[i], scratch_pte); +} + +static void gen5_gmch_remove(struct i915_address_space *vm) +{ + intel_gmch_remove(); +} + +static void gen6_gmch_remove(struct i915_address_space *vm) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + + iounmap(ggtt->gsm); + free_scratch(vm); +} + +/* + * Certain Gen5 chipsets require idling the GPU before + * unmapping anything from the GTT when VT-d is enabled. + */ +static bool needs_idle_maps(struct drm_i915_private *i915) +{ + /* + * Query intel_iommu to see if we need the workaround. Presumably that + * was loaded first. + */ + if (!i915_vtd_active(i915)) + return false; + + if (GRAPHICS_VER(i915) == 5 && IS_MOBILE(i915)) + return true; + + if (GRAPHICS_VER(i915) == 12) + return true; /* XXX DMAR fault reason 7 */ + + return false; +} + +static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915) +{ + /* + * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset + * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset + */ + GEM_BUG_ON(GRAPHICS_VER(i915) < 6); + return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M; +} + +static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) +{ + snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; + snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; + return snb_gmch_ctl << 20; +} + +static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) +{ + bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; + bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; + if (bdw_gmch_ctl) + bdw_gmch_ctl = 1 << bdw_gmch_ctl; + +#ifdef CONFIG_X86_32 + /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */ + if (bdw_gmch_ctl > 4) + bdw_gmch_ctl = 4; +#endif + + return bdw_gmch_ctl << 20; +} + +static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915) +{ + return gen6_gttmmadr_size(i915) / 2; +} + +static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + phys_addr_t phys_addr; + u32 pte_flags; + int ret; + + GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915)); + phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915); + + /* + * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range + * will be dropped. For WC mappings in general we have 64 byte burst + * writes when the WC buffer is flushed, so we can't use it, but have to + * resort to an uncached mapping. The WC issue is easily caught by the + * readback check when writing GTT PTE entries. + */ + if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11) + ggtt->gsm = ioremap(phys_addr, size); + else + ggtt->gsm = ioremap_wc(phys_addr, size); + if (!ggtt->gsm) { + drm_err(&i915->drm, "Failed to map the ggtt page table\n"); + return -ENOMEM; + } + + kref_init(&ggtt->vm.resv_ref); + ret = setup_scratch_page(&ggtt->vm); + if (ret) { + drm_err(&i915->drm, "Scratch setup failed\n"); + /* iounmap will also get called at remove, but meh */ + iounmap(ggtt->gsm); + return ret; + } + + pte_flags = 0; + if (i915_gem_object_is_lmem(ggtt->vm.scratch[0])) + pte_flags |= PTE_LM; + + ggtt->vm.scratch[0]->encode = + ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]), + I915_CACHE_NONE, pte_flags); + + return 0; +} + +int intel_gt_gmch_gen5_probe(struct i915_ggtt *ggtt) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + phys_addr_t gmadr_base; + int ret; + + ret = intel_gmch_probe(i915->bridge_dev, to_pci_dev(i915->drm.dev), NULL); + if (!ret) { + drm_err(&i915->drm, "failed to set up gmch\n"); + return -EIO; + } + + intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end); + + ggtt->gmadr = + (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end); + + ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.alloc_scratch_dma = alloc_pt_dma; + + if (needs_idle_maps(i915)) { + drm_notice(&i915->drm, + "Flushing DMA requests before IOMMU unmaps; performance may be degraded\n"); + ggtt->do_idle_maps = true; + } + + ggtt->vm.insert_page = gen5_ggtt_insert_page; + ggtt->vm.insert_entries = gen5_ggtt_insert_entries; + ggtt->vm.clear_range = gen5_ggtt_clear_range; + ggtt->vm.cleanup = gen5_gmch_remove; + + ggtt->invalidate = gmch_ggtt_invalidate; + + ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; + ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; + + if (unlikely(ggtt->do_idle_maps)) + drm_notice(&i915->drm, + "Applying Ironlake quirks for intel_iommu\n"); + + return 0; +} + +int intel_gt_gmch_gen6_probe(struct i915_ggtt *ggtt) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + unsigned int size; + u16 snb_gmch_ctl; + + ggtt->gmadr = intel_pci_resource(pdev, 2); + ggtt->mappable_end = resource_size(&ggtt->gmadr); + + /* + * 64/512MB is the current min/max we actually know of, but this is + * just a coarse sanity check. + */ + if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { + drm_err(&i915->drm, "Unknown GMADR size (%pa)\n", + &ggtt->mappable_end); + return -ENXIO; + } + + pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); + + size = gen6_get_total_gtt_size(snb_gmch_ctl); + ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; + + ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.alloc_scratch_dma = alloc_pt_dma; + + ggtt->vm.clear_range = nop_clear_range; + if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915)) + ggtt->vm.clear_range = gen6_ggtt_clear_range; + ggtt->vm.insert_page = gen6_ggtt_insert_page; + ggtt->vm.insert_entries = gen6_ggtt_insert_entries; + ggtt->vm.cleanup = gen6_gmch_remove; + + ggtt->invalidate = gen6_ggtt_invalidate; + + if (HAS_EDRAM(i915)) + ggtt->vm.pte_encode = iris_pte_encode; + else if (IS_HASWELL(i915)) + ggtt->vm.pte_encode = hsw_pte_encode; + else if (IS_VALLEYVIEW(i915)) + ggtt->vm.pte_encode = byt_pte_encode; + else if (GRAPHICS_VER(i915) >= 7) + ggtt->vm.pte_encode = ivb_pte_encode; + else + ggtt->vm.pte_encode = snb_pte_encode; + + ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; + ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; + + return ggtt_probe_common(ggtt, size); +} + +static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) +{ + gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; + gmch_ctrl &= SNB_GMCH_GGMS_MASK; + + if (gmch_ctrl) + return 1 << (20 + gmch_ctrl); + + return 0; +} + +int intel_gt_gmch_gen8_probe(struct i915_ggtt *ggtt) +{ + struct drm_i915_private *i915 = ggtt->vm.i915; + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + unsigned int size; + u16 snb_gmch_ctl; + + /* TODO: We're not aware of mappable constraints on gen8 yet */ + if (!HAS_LMEM(i915)) { + ggtt->gmadr = intel_pci_resource(pdev, 2); + ggtt->mappable_end = resource_size(&ggtt->gmadr); + } + + pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); + if (IS_CHERRYVIEW(i915)) + size = chv_get_total_gtt_size(snb_gmch_ctl); + else + size = gen8_get_total_gtt_size(snb_gmch_ctl); + + ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.alloc_scratch_dma = alloc_pt_dma; + ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY; + + ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; + ggtt->vm.cleanup = gen6_gmch_remove; + ggtt->vm.insert_page = gen8_ggtt_insert_page; + ggtt->vm.clear_range = nop_clear_range; + if (intel_scanout_needs_vtd_wa(i915)) + ggtt->vm.clear_range = gen8_ggtt_clear_range; + + ggtt->vm.insert_entries = gen8_ggtt_insert_entries; + + /* + * Serialize GTT updates with aperture access on BXT if VT-d is on, + * and always on CHV. + */ + if (intel_vm_no_concurrent_access_wa(i915)) { + ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; + ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; + ggtt->vm.bind_async_flags = + I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; + } + + ggtt->invalidate = gen8_ggtt_invalidate; + + ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; + ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; + + ggtt->vm.pte_encode = gen8_ggtt_pte_encode; + + setup_private_pat(ggtt->vm.gt->uncore); + + return ggtt_probe_common(ggtt, size); +} + +int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915) +{ + if (GRAPHICS_VER(i915) < 6 && !intel_enable_gtt()) + return -EIO; + + return 0; +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_gmch.h b/drivers/gpu/drm/i915/gt/intel_gt_gmch.h new file mode 100644 index 000000000000..75ed55c1f30a --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_gmch.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef __INTEL_GT_GMCH_H__ +#define __INTEL_GT_GMCH_H__ + +#include "intel_gtt.h" + +/* For x86 platforms */ +#if IS_ENABLED(CONFIG_X86) +void intel_gt_gmch_gen5_chipset_flush(struct intel_gt *gt); +int intel_gt_gmch_gen6_probe(struct i915_ggtt *ggtt); +int intel_gt_gmch_gen8_probe(struct i915_ggtt *ggtt); +int intel_gt_gmch_gen5_probe(struct i915_ggtt *ggtt); +int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915); + +/* Stubs for non-x86 platforms */ +#else +static inline void intel_gt_gmch_gen5_chipset_flush(struct intel_gt *gt) +{ +} +static inline int intel_gt_gmch_gen5_probe(struct i915_ggtt *ggtt) +{ + /* No HW should be probed for this case yet, return fail */ + return -ENODEV; +} +static inline int intel_gt_gmch_gen6_probe(struct i915_ggtt *ggtt) +{ + /* No HW should be probed for this case yet, return fail */ + return -ENODEV; +} +static inline int intel_gt_gmch_gen8_probe(struct i915_ggtt *ggtt) +{ + /* No HW should be probed for this case yet, return fail */ + return -ENODEV; +} +static inline int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915) +{ + /* No HW should be enabled for this case yet, return fail */ + return -ENODEV; +} +#endif + +#endif /* __INTEL_GT_GMCH_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index e443ac4c8059..88b4becfcb17 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -68,6 +68,9 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 instance, if (instance == OTHER_KCR_INSTANCE) return intel_pxp_irq_handler(>->pxp, iir); + if (instance == OTHER_GSC_INSTANCE) + return intel_gsc_irq_handler(gt, iir); + WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n", instance, iir); } @@ -184,6 +187,8 @@ void gen11_gt_irq_reset(struct intel_gt *gt) intel_uncore_write(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0); if (CCS_MASK(gt)) intel_uncore_write(uncore, GEN12_CCS_RSVD_INTR_ENABLE, 0); + if (HAS_HECI_GSC(gt->i915)) + intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_ENABLE, 0); /* Restore masks irqs on RCS, BCS, VCS and VECS engines. */ intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~0); @@ -201,6 +206,8 @@ void gen11_gt_irq_reset(struct intel_gt *gt) intel_uncore_write(uncore, GEN12_CCS0_CCS1_INTR_MASK, ~0); if (HAS_ENGINE(gt, CCS2) || HAS_ENGINE(gt, CCS3)) intel_uncore_write(uncore, GEN12_CCS2_CCS3_INTR_MASK, ~0); + if (HAS_HECI_GSC(gt->i915)) + intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_MASK, ~0); intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0); intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK, ~0); @@ -215,6 +222,7 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) { struct intel_uncore *uncore = gt->uncore; u32 irqs = GT_RENDER_USER_INTERRUPT; + const u32 gsc_mask = GSC_IRQ_INTF(0) | GSC_IRQ_INTF(1); u32 dmask; u32 smask; @@ -233,6 +241,9 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) intel_uncore_write(uncore, GEN11_VCS_VECS_INTR_ENABLE, dmask); if (CCS_MASK(gt)) intel_uncore_write(uncore, GEN12_CCS_RSVD_INTR_ENABLE, smask); + if (HAS_HECI_GSC(gt->i915)) + intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_ENABLE, + gsc_mask); /* Unmask irqs on RCS, BCS, VCS and VECS engines. */ intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~smask); @@ -250,6 +261,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) intel_uncore_write(uncore, GEN12_CCS0_CCS1_INTR_MASK, ~dmask); if (HAS_ENGINE(gt, CCS2) || HAS_ENGINE(gt, CCS3)) intel_uncore_write(uncore, GEN12_CCS2_CCS3_INTR_MASK, ~dmask); + if (HAS_HECI_GSC(gt->i915)) + intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_MASK, ~gsc_mask); /* * RPS interrupts will get enabled/disabled on demand when RPS itself diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index e4ecc17889d3..f553e2173bda 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -129,7 +129,14 @@ static const struct intel_wakeref_ops wf_ops = { void intel_gt_pm_init_early(struct intel_gt *gt) { - intel_wakeref_init(>->wakeref, gt->uncore->rpm, &wf_ops); + /* + * We access the runtime_pm structure via gt->i915 here rather than + * gt->uncore as we do elsewhere in the file because gt->uncore is not + * yet initialized for all tiles at this point in the driver startup. + * runtime_pm is per-device rather than per-tile, so this is still the + * correct structure. + */ + intel_wakeref_init(>->wakeref, >->i915->runtime_pm, &wf_ops); seqcount_mutex_init(>->stats.lock, >->wakeref.mutex); } @@ -175,15 +182,16 @@ static void gt_sanitize(struct intel_gt *gt, bool force) if (intel_gt_is_wedged(gt)) intel_gt_unset_wedged(gt); - for_each_engine(engine, gt, id) + /* For GuC mode, ensure submission is disabled before stopping ring */ + intel_uc_reset_prepare(>->uc); + + for_each_engine(engine, gt, id) { if (engine->reset.prepare) engine->reset.prepare(engine); - intel_uc_reset_prepare(>->uc); - - for_each_engine(engine, gt, id) if (engine->sanitize) engine->sanitize(engine); + } if (reset_engines(gt) || force) { for_each_engine(engine, gt, id) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 437e96bb3b93..0c6b9eb724ae 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -24,38 +24,38 @@ #include "intel_uncore.h" #include "vlv_sideband.h" -int intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt) +void intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt) { atomic_inc(>->user_wakeref); intel_gt_pm_get(gt); if (GRAPHICS_VER(gt->i915) >= 6) intel_uncore_forcewake_user_get(gt->uncore); - - return 0; } -int intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt) +void intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt) { if (GRAPHICS_VER(gt->i915) >= 6) intel_uncore_forcewake_user_put(gt->uncore); intel_gt_pm_put(gt); atomic_dec(>->user_wakeref); - - return 0; } static int forcewake_user_open(struct inode *inode, struct file *file) { struct intel_gt *gt = inode->i_private; - return intel_gt_pm_debugfs_forcewake_user_open(gt); + intel_gt_pm_debugfs_forcewake_user_open(gt); + + return 0; } static int forcewake_user_release(struct inode *inode, struct file *file) { struct intel_gt *gt = inode->i_private; - return intel_gt_pm_debugfs_forcewake_user_release(gt); + intel_gt_pm_debugfs_forcewake_user_release(gt); + + return 0; } static const struct file_operations forcewake_user_fops = { @@ -342,17 +342,16 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p) } else if (GRAPHICS_VER(i915) >= 6) { u32 rp_state_limits; u32 gt_perf_status; - u32 rp_state_cap; + struct intel_rps_freq_caps caps; u32 rpmodectl, rpinclimit, rpdeclimit; u32 rpstat, cagf, reqf; u32 rpcurupei, rpcurup, rpprevup; u32 rpcurdownei, rpcurdown, rpprevdown; u32 rpupei, rpupt, rpdownei, rpdownt; u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask; - int max_freq; rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS); - rp_state_cap = intel_rps_read_state_cap(rps); + gen6_rps_get_freq_caps(rps, &caps); if (IS_GEN9_LP(i915)) gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS); else @@ -474,25 +473,12 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p) drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n", rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt)); - max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 : - rp_state_cap >> 16) & 0xff; - max_freq *= (IS_GEN9_BC(i915) || - GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); drm_printf(p, "Lowest (RPN) frequency: %dMHz\n", - intel_gpu_freq(rps, max_freq)); - - max_freq = (rp_state_cap & 0xff00) >> 8; - max_freq *= (IS_GEN9_BC(i915) || - GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); + intel_gpu_freq(rps, caps.min_freq)); drm_printf(p, "Nominal (RP1) frequency: %dMHz\n", - intel_gpu_freq(rps, max_freq)); - - max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 16 : - rp_state_cap >> 0) & 0xff; - max_freq *= (IS_GEN9_BC(i915) || - GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); + intel_gpu_freq(rps, caps.rp1_freq)); drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n", - intel_gpu_freq(rps, max_freq)); + intel_gpu_freq(rps, caps.rp0_freq)); drm_printf(p, "Max overclocked frequency: %dMHz\n", intel_gpu_freq(rps, rps->max_freq)); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h index a8457887ec65..0ace8c2da0ac 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h @@ -14,7 +14,7 @@ void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root); void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *m); /* functions that need to be accessed by the upper level non-gt interfaces */ -int intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt); -int intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt); +void intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt); +void intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt); #endif /* INTEL_GT_PM_DEBUGFS_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 201b507c9dde..a39718a40cc3 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -46,6 +46,7 @@ #define GEN8_MCR_SLICE_MASK GEN8_MCR_SLICE(3) #define GEN8_MCR_SUBSLICE(subslice) (((subslice) & 3) << 24) #define GEN8_MCR_SUBSLICE_MASK GEN8_MCR_SUBSLICE(3) +#define GEN11_MCR_MULTICAST REG_BIT(31) #define GEN11_MCR_SLICE(slice) (((slice) & 0xf) << 27) #define GEN11_MCR_SLICE_MASK GEN11_MCR_SLICE(0xf) #define GEN11_MCR_SUBSLICE(subslice) (((subslice) & 0x7) << 24) @@ -840,6 +841,24 @@ #define CTC_SHIFT_PARAMETER_SHIFT 1 #define CTC_SHIFT_PARAMETER_MASK (0x3 << CTC_SHIFT_PARAMETER_SHIFT) +/* GPM MSG_IDLE */ +#define MSG_IDLE_CS _MMIO(0x8000) +#define MSG_IDLE_VCS0 _MMIO(0x8004) +#define MSG_IDLE_VCS1 _MMIO(0x8008) +#define MSG_IDLE_BCS _MMIO(0x800C) +#define MSG_IDLE_VECS0 _MMIO(0x8010) +#define MSG_IDLE_VCS2 _MMIO(0x80C0) +#define MSG_IDLE_VCS3 _MMIO(0x80C4) +#define MSG_IDLE_VCS4 _MMIO(0x80C8) +#define MSG_IDLE_VCS5 _MMIO(0x80CC) +#define MSG_IDLE_VCS6 _MMIO(0x80D0) +#define MSG_IDLE_VCS7 _MMIO(0x80D4) +#define MSG_IDLE_VECS1 _MMIO(0x80D8) +#define MSG_IDLE_VECS2 _MMIO(0x80DC) +#define MSG_IDLE_VECS3 _MMIO(0x80E0) +#define MSG_IDLE_FW_MASK REG_GENMASK(13, 9) +#define MSG_IDLE_FW_SHIFT 9 + #define FORCEWAKE_MEDIA_GEN9 _MMIO(0xa270) #define FORCEWAKE_RENDER_GEN9 _MMIO(0xa278) @@ -1087,6 +1106,7 @@ #define EU_PERF_CNTL3 _MMIO(0xe758) #define LSC_CHICKEN_BIT_0 _MMIO(0xe7c8) +#define DISABLE_D8_D16_COASLESCE REG_BIT(30) #define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15) #define LSC_CHICKEN_BIT_0_UDW _MMIO(0xe7c8 + 4) #define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32) @@ -1482,6 +1502,7 @@ #define OTHER_GUC_INSTANCE 0 #define OTHER_GTPM_INSTANCE 1 #define OTHER_KCR_INSTANCE 4 +#define OTHER_GSC_INSTANCE 6 #define GEN11_IIR_REG_SELECTOR(x) _MMIO(0x190070 + ((x) * 4)) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c new file mode 100644 index 000000000000..8ec8bc660c8c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include <drm/drm_device.h> +#include <linux/device.h> +#include <linux/kobject.h> +#include <linux/printk.h> +#include <linux/sysfs.h> + +#include "i915_drv.h" +#include "i915_sysfs.h" +#include "intel_gt.h" +#include "intel_gt_sysfs.h" +#include "intel_gt_sysfs_pm.h" +#include "intel_gt_types.h" +#include "intel_rc6.h" + +bool is_object_gt(struct kobject *kobj) +{ + return !strncmp(kobj->name, "gt", 2); +} + +static struct intel_gt *kobj_to_gt(struct kobject *kobj) +{ + return container_of(kobj, struct kobj_gt, base)->gt; +} + +struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, + const char *name) +{ + struct kobject *kobj = &dev->kobj; + + /* + * We are interested at knowing from where the interface + * has been called, whether it's called from gt/ or from + * the parent directory. + * From the interface position it depends also the value of + * the private data. + * If the interface is called from gt/ then private data is + * of the "struct intel_gt *" type, otherwise it's * a + * "struct drm_i915_private *" type. + */ + if (!is_object_gt(kobj)) { + struct drm_i915_private *i915 = kdev_minor_to_i915(dev); + + return to_gt(i915); + } + + return kobj_to_gt(kobj); +} + +static struct kobject *gt_get_parent_obj(struct intel_gt *gt) +{ + return >->i915->drm.primary->kdev->kobj; +} + +static ssize_t id_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + + return sysfs_emit(buf, "%u\n", gt->info.id); +} +static DEVICE_ATTR_RO(id); + +static struct attribute *id_attrs[] = { + &dev_attr_id.attr, + NULL, +}; +ATTRIBUTE_GROUPS(id); + +static void kobj_gt_release(struct kobject *kobj) +{ + kfree(kobj); +} + +static struct kobj_type kobj_gt_type = { + .release = kobj_gt_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = id_groups, +}; + +void intel_gt_sysfs_register(struct intel_gt *gt) +{ + struct kobj_gt *kg; + + /* + * We need to make things right with the + * ABI compatibility. The files were originally + * generated under the parent directory. + * + * We generate the files only for gt 0 + * to avoid duplicates. + */ + if (gt_is_root(gt)) + intel_gt_sysfs_pm_init(gt, gt_get_parent_obj(gt)); + + kg = kzalloc(sizeof(*kg), GFP_KERNEL); + if (!kg) + goto exit_fail; + + kobject_init(&kg->base, &kobj_gt_type); + kg->gt = gt; + + /* xfer ownership to sysfs tree */ + if (kobject_add(&kg->base, gt->i915->sysfs_gt, "gt%d", gt->info.id)) + goto exit_kobj_put; + + intel_gt_sysfs_pm_init(gt, &kg->base); + + return; + +exit_kobj_put: + kobject_put(&kg->base); + +exit_fail: + drm_warn(>->i915->drm, + "failed to initialize gt%d sysfs root\n", gt->info.id); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h new file mode 100644 index 000000000000..9471b26752cf --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef __SYSFS_GT_H__ +#define __SYSFS_GT_H__ + +#include <linux/ctype.h> +#include <linux/kobject.h> + +#include "i915_gem.h" /* GEM_BUG_ON() */ + +struct intel_gt; + +struct kobj_gt { + struct kobject base; + struct intel_gt *gt; +}; + +bool is_object_gt(struct kobject *kobj); + +struct drm_i915_private *kobj_to_i915(struct kobject *kobj); + +struct kobject * +intel_gt_create_kobj(struct intel_gt *gt, + struct kobject *dir, + const char *name); + +void intel_gt_sysfs_register(struct intel_gt *gt); +struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, + const char *name); + +#endif /* SYSFS_GT_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c new file mode 100644 index 000000000000..26cbfa6477d1 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -0,0 +1,601 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include <drm/drm_device.h> +#include <linux/sysfs.h> +#include <linux/printk.h> + +#include "i915_drv.h" +#include "i915_reg.h" +#include "i915_sysfs.h" +#include "intel_gt.h" +#include "intel_gt_regs.h" +#include "intel_gt_sysfs.h" +#include "intel_gt_sysfs_pm.h" +#include "intel_rc6.h" +#include "intel_rps.h" + +#ifdef CONFIG_PM +enum intel_gt_sysfs_op { + INTEL_GT_SYSFS_MIN = 0, + INTEL_GT_SYSFS_MAX, +}; + +static int +sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr, + int (func)(struct intel_gt *gt, u32 val), u32 val) +{ + struct intel_gt *gt; + int ret; + + if (!is_object_gt(&dev->kobj)) { + int i; + struct drm_i915_private *i915 = kdev_minor_to_i915(dev); + + for_each_gt(gt, i915, i) { + ret = func(gt, val); + if (ret) + break; + } + } else { + gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + ret = func(gt, val); + } + + return ret; +} + +static u32 +sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr, + u32 (func)(struct intel_gt *gt), + enum intel_gt_sysfs_op op) +{ + struct intel_gt *gt; + u32 ret; + + ret = (op == INTEL_GT_SYSFS_MAX) ? 0 : (u32) -1; + + if (!is_object_gt(&dev->kobj)) { + int i; + struct drm_i915_private *i915 = kdev_minor_to_i915(dev); + + for_each_gt(gt, i915, i) { + u32 val = func(gt); + + switch (op) { + case INTEL_GT_SYSFS_MIN: + if (val < ret) + ret = val; + break; + + case INTEL_GT_SYSFS_MAX: + if (val > ret) + ret = val; + break; + } + } + } else { + gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + ret = func(gt); + } + + return ret; +} + +/* RC6 interfaces will show the minimum RC6 residency value */ +#define sysfs_gt_attribute_r_min_func(d, a, f) \ + sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MIN) + +/* Frequency interfaces will show the maximum frequency value */ +#define sysfs_gt_attribute_r_max_func(d, a, f) \ + sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MAX) + +static u32 get_residency(struct intel_gt *gt, i915_reg_t reg) +{ + intel_wakeref_t wakeref; + u64 res = 0; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + res = intel_rc6_residency_us(>->rc6, reg); + + return DIV_ROUND_CLOSEST_ULL(res, 1000); +} + +static ssize_t rc6_enable_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + u8 mask = 0; + + if (HAS_RC6(gt->i915)) + mask |= BIT(0); + if (HAS_RC6p(gt->i915)) + mask |= BIT(1); + if (HAS_RC6pp(gt->i915)) + mask |= BIT(2); + + return sysfs_emit(buff, "%x\n", mask); +} + +static u32 __rc6_residency_ms_show(struct intel_gt *gt) +{ + return get_residency(gt, GEN6_GT_GFX_RC6); +} + +static ssize_t rc6_residency_ms_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + u32 rc6_residency = sysfs_gt_attribute_r_min_func(dev, attr, + __rc6_residency_ms_show); + + return sysfs_emit(buff, "%u\n", rc6_residency); +} + +static u32 __rc6p_residency_ms_show(struct intel_gt *gt) +{ + return get_residency(gt, GEN6_GT_GFX_RC6p); +} + +static ssize_t rc6p_residency_ms_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + u32 rc6p_residency = sysfs_gt_attribute_r_min_func(dev, attr, + __rc6p_residency_ms_show); + + return sysfs_emit(buff, "%u\n", rc6p_residency); +} + +static u32 __rc6pp_residency_ms_show(struct intel_gt *gt) +{ + return get_residency(gt, GEN6_GT_GFX_RC6pp); +} + +static ssize_t rc6pp_residency_ms_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + u32 rc6pp_residency = sysfs_gt_attribute_r_min_func(dev, attr, + __rc6pp_residency_ms_show); + + return sysfs_emit(buff, "%u\n", rc6pp_residency); +} + +static u32 __media_rc6_residency_ms_show(struct intel_gt *gt) +{ + return get_residency(gt, VLV_GT_MEDIA_RC6); +} + +static ssize_t media_rc6_residency_ms_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + u32 rc6_residency = sysfs_gt_attribute_r_min_func(dev, attr, + __media_rc6_residency_ms_show); + + return sysfs_emit(buff, "%u\n", rc6_residency); +} + +static DEVICE_ATTR_RO(rc6_enable); +static DEVICE_ATTR_RO(rc6_residency_ms); +static DEVICE_ATTR_RO(rc6p_residency_ms); +static DEVICE_ATTR_RO(rc6pp_residency_ms); +static DEVICE_ATTR_RO(media_rc6_residency_ms); + +static struct attribute *rc6_attrs[] = { + &dev_attr_rc6_enable.attr, + &dev_attr_rc6_residency_ms.attr, + NULL +}; + +static struct attribute *rc6p_attrs[] = { + &dev_attr_rc6p_residency_ms.attr, + &dev_attr_rc6pp_residency_ms.attr, + NULL +}; + +static struct attribute *media_rc6_attrs[] = { + &dev_attr_media_rc6_residency_ms.attr, + NULL +}; + +static const struct attribute_group rc6_attr_group[] = { + { .attrs = rc6_attrs, }, + { .name = power_group_name, .attrs = rc6_attrs, }, +}; + +static const struct attribute_group rc6p_attr_group[] = { + { .attrs = rc6p_attrs, }, + { .name = power_group_name, .attrs = rc6p_attrs, }, +}; + +static const struct attribute_group media_rc6_attr_group[] = { + { .attrs = media_rc6_attrs, }, + { .name = power_group_name, .attrs = media_rc6_attrs, }, +}; + +static int __intel_gt_sysfs_create_group(struct kobject *kobj, + const struct attribute_group *grp) +{ + return is_object_gt(kobj) ? + sysfs_create_group(kobj, &grp[0]) : + sysfs_merge_group(kobj, &grp[1]); +} + +static void intel_sysfs_rc6_init(struct intel_gt *gt, struct kobject *kobj) +{ + int ret; + + if (!HAS_RC6(gt->i915)) + return; + + ret = __intel_gt_sysfs_create_group(kobj, rc6_attr_group); + if (ret) + drm_warn(>->i915->drm, + "failed to create gt%u RC6 sysfs files (%pe)\n", + gt->info.id, ERR_PTR(ret)); + + /* + * cannot use the is_visible() attribute because + * the upper object inherits from the parent group. + */ + if (HAS_RC6p(gt->i915)) { + ret = __intel_gt_sysfs_create_group(kobj, rc6p_attr_group); + if (ret) + drm_warn(>->i915->drm, + "failed to create gt%u RC6p sysfs files (%pe)\n", + gt->info.id, ERR_PTR(ret)); + } + + if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915)) { + ret = __intel_gt_sysfs_create_group(kobj, media_rc6_attr_group); + if (ret) + drm_warn(>->i915->drm, + "failed to create media %u RC6 sysfs files (%pe)\n", + gt->info.id, ERR_PTR(ret)); + } +} +#else +static void intel_sysfs_rc6_init(struct intel_gt *gt, struct kobject *kobj) +{ +} +#endif /* CONFIG_PM */ + +static u32 __act_freq_mhz_show(struct intel_gt *gt) +{ + return intel_rps_read_actual_frequency(>->rps); +} + +static ssize_t act_freq_mhz_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + u32 actual_freq = sysfs_gt_attribute_r_max_func(dev, attr, + __act_freq_mhz_show); + + return sysfs_emit(buff, "%u\n", actual_freq); +} + +static u32 __cur_freq_mhz_show(struct intel_gt *gt) +{ + return intel_rps_get_requested_frequency(>->rps); +} + +static ssize_t cur_freq_mhz_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + u32 cur_freq = sysfs_gt_attribute_r_max_func(dev, attr, + __cur_freq_mhz_show); + + return sysfs_emit(buff, "%u\n", cur_freq); +} + +static u32 __boost_freq_mhz_show(struct intel_gt *gt) +{ + return intel_rps_get_boost_frequency(>->rps); +} + +static ssize_t boost_freq_mhz_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + u32 boost_freq = sysfs_gt_attribute_r_max_func(dev, attr, + __boost_freq_mhz_show); + + return sysfs_emit(buff, "%u\n", boost_freq); +} + +static int __boost_freq_mhz_store(struct intel_gt *gt, u32 val) +{ + return intel_rps_set_boost_frequency(>->rps, val); +} + +static ssize_t boost_freq_mhz_store(struct device *dev, + struct device_attribute *attr, + const char *buff, size_t count) +{ + ssize_t ret; + u32 val; + + ret = kstrtou32(buff, 0, &val); + if (ret) + return ret; + + return sysfs_gt_attribute_w_func(dev, attr, + __boost_freq_mhz_store, val) ?: count; +} + +static u32 __rp0_freq_mhz_show(struct intel_gt *gt) +{ + return intel_rps_get_rp0_frequency(>->rps); +} + +static ssize_t RP0_freq_mhz_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + u32 rp0_freq = sysfs_gt_attribute_r_max_func(dev, attr, + __rp0_freq_mhz_show); + + return sysfs_emit(buff, "%u\n", rp0_freq); +} + +static u32 __rp1_freq_mhz_show(struct intel_gt *gt) +{ + return intel_rps_get_rp1_frequency(>->rps); +} + +static ssize_t RP1_freq_mhz_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + u32 rp1_freq = sysfs_gt_attribute_r_max_func(dev, attr, + __rp1_freq_mhz_show); + + return sysfs_emit(buff, "%u\n", rp1_freq); +} + +static u32 __rpn_freq_mhz_show(struct intel_gt *gt) +{ + return intel_rps_get_rpn_frequency(>->rps); +} + +static ssize_t RPn_freq_mhz_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + u32 rpn_freq = sysfs_gt_attribute_r_max_func(dev, attr, + __rpn_freq_mhz_show); + + return sysfs_emit(buff, "%u\n", rpn_freq); +} + +static u32 __max_freq_mhz_show(struct intel_gt *gt) +{ + return intel_rps_get_max_frequency(>->rps); +} + +static ssize_t max_freq_mhz_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + u32 max_freq = sysfs_gt_attribute_r_max_func(dev, attr, + __max_freq_mhz_show); + + return sysfs_emit(buff, "%u\n", max_freq); +} + +static int __set_max_freq(struct intel_gt *gt, u32 val) +{ + return intel_rps_set_max_frequency(>->rps, val); +} + +static ssize_t max_freq_mhz_store(struct device *dev, + struct device_attribute *attr, + const char *buff, size_t count) +{ + int ret; + u32 val; + + ret = kstrtou32(buff, 0, &val); + if (ret) + return ret; + + ret = sysfs_gt_attribute_w_func(dev, attr, __set_max_freq, val); + + return ret ?: count; +} + +static u32 __min_freq_mhz_show(struct intel_gt *gt) +{ + return intel_rps_get_min_frequency(>->rps); +} + +static ssize_t min_freq_mhz_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + u32 min_freq = sysfs_gt_attribute_r_min_func(dev, attr, + __min_freq_mhz_show); + + return sysfs_emit(buff, "%u\n", min_freq); +} + +static int __set_min_freq(struct intel_gt *gt, u32 val) +{ + return intel_rps_set_min_frequency(>->rps, val); +} + +static ssize_t min_freq_mhz_store(struct device *dev, + struct device_attribute *attr, + const char *buff, size_t count) +{ + int ret; + u32 val; + + ret = kstrtou32(buff, 0, &val); + if (ret) + return ret; + + ret = sysfs_gt_attribute_w_func(dev, attr, __set_min_freq, val); + + return ret ?: count; +} + +static u32 __vlv_rpe_freq_mhz_show(struct intel_gt *gt) +{ + struct intel_rps *rps = >->rps; + + return intel_gpu_freq(rps, rps->efficient_freq); +} + +static ssize_t vlv_rpe_freq_mhz_show(struct device *dev, + struct device_attribute *attr, char *buff) +{ + u32 rpe_freq = sysfs_gt_attribute_r_max_func(dev, attr, + __vlv_rpe_freq_mhz_show); + + return sysfs_emit(buff, "%u\n", rpe_freq); +} + +#define INTEL_GT_RPS_SYSFS_ATTR(_name, _mode, _show, _store) \ + struct device_attribute dev_attr_gt_##_name = __ATTR(gt_##_name, _mode, _show, _store); \ + struct device_attribute dev_attr_rps_##_name = __ATTR(rps_##_name, _mode, _show, _store) + +#define INTEL_GT_RPS_SYSFS_ATTR_RO(_name) \ + INTEL_GT_RPS_SYSFS_ATTR(_name, 0444, _name##_show, NULL) +#define INTEL_GT_RPS_SYSFS_ATTR_RW(_name) \ + INTEL_GT_RPS_SYSFS_ATTR(_name, 0644, _name##_show, _name##_store) + +static INTEL_GT_RPS_SYSFS_ATTR_RO(act_freq_mhz); +static INTEL_GT_RPS_SYSFS_ATTR_RO(cur_freq_mhz); +static INTEL_GT_RPS_SYSFS_ATTR_RW(boost_freq_mhz); +static INTEL_GT_RPS_SYSFS_ATTR_RO(RP0_freq_mhz); +static INTEL_GT_RPS_SYSFS_ATTR_RO(RP1_freq_mhz); +static INTEL_GT_RPS_SYSFS_ATTR_RO(RPn_freq_mhz); +static INTEL_GT_RPS_SYSFS_ATTR_RW(max_freq_mhz); +static INTEL_GT_RPS_SYSFS_ATTR_RW(min_freq_mhz); + +static DEVICE_ATTR_RO(vlv_rpe_freq_mhz); + +#define GEN6_ATTR(s) { \ + &dev_attr_##s##_act_freq_mhz.attr, \ + &dev_attr_##s##_cur_freq_mhz.attr, \ + &dev_attr_##s##_boost_freq_mhz.attr, \ + &dev_attr_##s##_max_freq_mhz.attr, \ + &dev_attr_##s##_min_freq_mhz.attr, \ + &dev_attr_##s##_RP0_freq_mhz.attr, \ + &dev_attr_##s##_RP1_freq_mhz.attr, \ + &dev_attr_##s##_RPn_freq_mhz.attr, \ + NULL, \ + } + +#define GEN6_RPS_ATTR GEN6_ATTR(rps) +#define GEN6_GT_ATTR GEN6_ATTR(gt) + +static const struct attribute * const gen6_rps_attrs[] = GEN6_RPS_ATTR; +static const struct attribute * const gen6_gt_attrs[] = GEN6_GT_ATTR; + +static ssize_t punit_req_freq_mhz_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + u32 preq = intel_rps_read_punit_req_frequency(>->rps); + + return sysfs_emit(buff, "%u\n", preq); +} + +struct intel_gt_bool_throttle_attr { + struct attribute attr; + ssize_t (*show)(struct device *dev, struct device_attribute *attr, + char *buf); + i915_reg_t reg32; + u32 mask; +}; + +static ssize_t throttle_reason_bool_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt_bool_throttle_attr *t_attr = + (struct intel_gt_bool_throttle_attr *) attr; + bool val = rps_read_mask_mmio(>->rps, t_attr->reg32, t_attr->mask); + + return sysfs_emit(buff, "%u\n", val); +} + +#define INTEL_GT_RPS_BOOL_ATTR_RO(sysfs_func__, mask__) \ +struct intel_gt_bool_throttle_attr attr_##sysfs_func__ = { \ + .attr = { .name = __stringify(sysfs_func__), .mode = 0444 }, \ + .show = throttle_reason_bool_show, \ + .reg32 = GT0_PERF_LIMIT_REASONS, \ + .mask = mask__, \ +} + +static DEVICE_ATTR_RO(punit_req_freq_mhz); +static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_status, GT0_PERF_LIMIT_REASONS_MASK); +static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl1, POWER_LIMIT_1_MASK); +static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl2, POWER_LIMIT_2_MASK); +static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl4, POWER_LIMIT_4_MASK); +static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_thermal, THERMAL_LIMIT_MASK); +static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_prochot, PROCHOT_MASK); +static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_ratl, RATL_MASK); +static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_vr_thermalert, VR_THERMALERT_MASK); +static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_vr_tdc, VR_TDC_MASK); + +static const struct attribute *freq_attrs[] = { + &dev_attr_punit_req_freq_mhz.attr, + &attr_throttle_reason_status.attr, + &attr_throttle_reason_pl1.attr, + &attr_throttle_reason_pl2.attr, + &attr_throttle_reason_pl4.attr, + &attr_throttle_reason_thermal.attr, + &attr_throttle_reason_prochot.attr, + &attr_throttle_reason_ratl.attr, + &attr_throttle_reason_vr_thermalert.attr, + &attr_throttle_reason_vr_tdc.attr, + NULL +}; + +static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj, + const struct attribute * const *attrs) +{ + int ret; + + if (GRAPHICS_VER(gt->i915) < 6) + return 0; + + ret = sysfs_create_files(kobj, attrs); + if (ret) + return ret; + + if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915)) + ret = sysfs_create_file(kobj, &dev_attr_vlv_rpe_freq_mhz.attr); + + return ret; +} + +void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj) +{ + int ret; + + intel_sysfs_rc6_init(gt, kobj); + + ret = is_object_gt(kobj) ? + intel_sysfs_rps_init(gt, kobj, gen6_rps_attrs) : + intel_sysfs_rps_init(gt, kobj, gen6_gt_attrs); + if (ret) + drm_warn(>->i915->drm, + "failed to create gt%u RPS sysfs files (%pe)", + gt->info.id, ERR_PTR(ret)); + + /* end of the legacy interfaces */ + if (!is_object_gt(kobj)) + return; + + ret = sysfs_create_files(kobj, freq_attrs); + if (ret) + drm_warn(>->i915->drm, + "failed to create gt%u throttle sysfs files (%pe)", + gt->info.id, ERR_PTR(ret)); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.h new file mode 100644 index 000000000000..f567105a4a89 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef __SYSFS_GT_PM_H__ +#define __SYSFS_GT_PM_H__ + +#include <linux/kobject.h> + +#include "intel_gt_types.h" + +void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj); + +#endif /* SYSFS_RC6_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index f20687796490..b06611c1d4ad 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -16,10 +16,12 @@ #include <linux/workqueue.h> #include "uc/intel_uc.h" +#include "intel_gsc.h" #include "i915_vma.h" #include "intel_engine_types.h" #include "intel_gt_buffer_pool_types.h" +#include "intel_hwconfig.h" #include "intel_llc_types.h" #include "intel_reset_types.h" #include "intel_rc6_types.h" @@ -72,6 +74,7 @@ struct intel_gt { struct i915_ggtt *ggtt; struct intel_uc uc; + struct intel_gsc gsc; struct mutex tlb_invalidate_lock; @@ -182,7 +185,19 @@ struct intel_gt { const struct intel_mmio_range *steering_table[NUM_STEERING_TYPES]; + struct { + u8 groupid; + u8 instanceid; + } default_steering; + + /* + * Base of per-tile GTTMMADR where we can derive the MMIO and the GGTT. + */ + phys_addr_t phys_addr; + struct intel_gt_info { + unsigned int id; + intel_engine_mask_t engine_mask; u32 l3bank_mask; @@ -199,6 +214,9 @@ struct intel_gt { struct sseu_dev_info sseu; unsigned long mslice_mask; + + /** @hwconfig: hardware configuration data */ + struct intel_hwconfig hwconfig; } info; struct { diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index cc53b481a1c5..b67831833c9a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -109,32 +109,52 @@ int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object return 0; } -void __i915_vm_close(struct i915_address_space *vm) +static void clear_vm_list(struct list_head *list) { struct i915_vma *vma, *vn; - if (!atomic_dec_and_mutex_lock(&vm->open, &vm->mutex)) - return; - - list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { + list_for_each_entry_safe(vma, vn, list, vm_link) { struct drm_i915_gem_object *obj = vma->obj; - if (!kref_get_unless_zero(&obj->base.refcount)) { + if (!i915_gem_object_get_rcu(obj)) { /* - * Unbind the dying vma to ensure the bound_list + * Object is dying, but has not yet cleared its + * vma list. + * Unbind the dying vma to ensure our list * is completely drained. We leave the destruction to - * the object destructor. + * the object destructor to avoid the vma + * disappearing under it. */ atomic_and(~I915_VMA_PIN_MASK, &vma->flags); WARN_ON(__i915_vma_unbind(vma)); - continue; + + /* Remove from the unbound list */ + list_del_init(&vma->vm_link); + + /* + * Delay the vm and vm mutex freeing until the + * object is done with destruction. + */ + i915_vm_resv_get(vma->vm); + vma->vm_ddestroy = true; + } else { + i915_vma_destroy_locked(vma); + i915_gem_object_put(obj); } - /* Keep the obj (and hence the vma) alive as _we_ destroy it */ - i915_vma_destroy_locked(vma); - i915_gem_object_put(obj); } +} + +static void __i915_vm_close(struct i915_address_space *vm) +{ + mutex_lock(&vm->mutex); + + clear_vm_list(&vm->bound_list); + clear_vm_list(&vm->unbound_list); + + /* Check for must-fix unanticipated side-effects */ GEM_BUG_ON(!list_empty(&vm->bound_list)); + GEM_BUG_ON(!list_empty(&vm->unbound_list)); mutex_unlock(&vm->mutex); } @@ -156,7 +176,6 @@ int i915_vm_lock_objects(struct i915_address_space *vm, void i915_address_space_fini(struct i915_address_space *vm) { drm_mm_takedown(&vm->mm); - mutex_destroy(&vm->mutex); } /** @@ -164,7 +183,8 @@ void i915_address_space_fini(struct i915_address_space *vm) * @kref: Pointer to the &i915_address_space.resv_ref member. * * This function is called when the last lock sharer no longer shares the - * &i915_address_space._resv lock. + * &i915_address_space._resv lock, and also if we raced when + * destroying a vma by the vma destruction */ void i915_vm_resv_release(struct kref *kref) { @@ -172,6 +192,8 @@ void i915_vm_resv_release(struct kref *kref) container_of(kref, typeof(*vm), resv_ref); dma_resv_fini(&vm->_resv); + mutex_destroy(&vm->mutex); + kfree(vm); } @@ -180,6 +202,8 @@ static void __i915_vm_release(struct work_struct *work) struct i915_address_space *vm = container_of(work, struct i915_address_space, release_work); + __i915_vm_close(vm); + /* Synchronize async unbinds. */ i915_vma_resource_bind_dep_sync_all(vm); @@ -213,7 +237,6 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) vm->pending_unbind = RB_ROOT_CACHED; INIT_WORK(&vm->release_work, __i915_vm_release); - atomic_set(&vm->open, 1); /* * The vm->mutex must be reclaim safe (for use in the shrinker). @@ -258,6 +281,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; INIT_LIST_HEAD(&vm->bound_list); + INIT_LIST_HEAD(&vm->unbound_list); } void *__px_vaddr(struct drm_i915_gem_object *p) @@ -286,7 +310,7 @@ fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count) void *vaddr = __px_vaddr(p); memset64(vaddr, val, count); - clflush_cache_range(vaddr, PAGE_SIZE); + drm_clflush_virt_range(vaddr, PAGE_SIZE); } static void poison_scratch_page(struct drm_i915_gem_object *scratch) diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index ba7025388a5e..a40d928b3888 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -240,15 +240,6 @@ struct i915_address_space { unsigned int bind_async_flags; - /* - * Each active user context has its own address space (in full-ppgtt). - * Since the vm may be shared between multiple contexts, we count how - * many contexts keep us "open". Once open hits zero, we are closed - * and do not allow any new attachments, and proceed to shutdown our - * vma and page directories. - */ - atomic_t open; - struct mutex mutex; /* protects vma and our lists */ struct kref resv_ref; /* kref to keep the reservation lock alive. */ @@ -263,6 +254,11 @@ struct i915_address_space { */ struct list_head bound_list; + /** + * List of vmas not yet bound or evicted. + */ + struct list_head unbound_list; + /* Global GTT */ bool is_ggtt:1; @@ -272,6 +268,9 @@ struct i915_address_space { /* Some systems support read-only mappings for GGTT and/or PPGTT */ bool has_read_only:1; + /* Skip pte rewrite on unbind for suspend. Protected by @mutex */ + bool skip_pte_rewrite:1; + u8 top; u8 pd_shift; u8 scratch_order; @@ -448,6 +447,17 @@ i915_vm_get(struct i915_address_space *vm) return vm; } +static inline struct i915_address_space * +i915_vm_tryget(struct i915_address_space *vm) +{ + return kref_get_unless_zero(&vm->ref) ? vm : NULL; +} + +static inline void assert_vm_alive(struct i915_address_space *vm) +{ + GEM_BUG_ON(!kref_read(&vm->ref)); +} + /** * i915_vm_resv_get - Obtain a reference on the vm's reservation lock * @vm: The vm whose reservation lock we want to share. @@ -478,34 +488,6 @@ static inline void i915_vm_resv_put(struct i915_address_space *vm) kref_put(&vm->resv_ref, i915_vm_resv_release); } -static inline struct i915_address_space * -i915_vm_open(struct i915_address_space *vm) -{ - GEM_BUG_ON(!atomic_read(&vm->open)); - atomic_inc(&vm->open); - return i915_vm_get(vm); -} - -static inline bool -i915_vm_tryopen(struct i915_address_space *vm) -{ - if (atomic_add_unless(&vm->open, 1, 0)) - return i915_vm_get(vm); - - return false; -} - -void __i915_vm_close(struct i915_address_space *vm); - -static inline void -i915_vm_close(struct i915_address_space *vm) -{ - GEM_BUG_ON(!atomic_read(&vm->open)); - __i915_vm_close(vm); - - i915_vm_put(vm); -} - void i915_address_space_init(struct i915_address_space *vm, int subclass); void i915_address_space_fini(struct i915_address_space *vm); @@ -567,6 +549,14 @@ i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n) void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt, unsigned long lmem_pt_obj_flags); +void intel_ggtt_bind_vma(struct i915_address_space *vm, + struct i915_vm_pt_stash *stash, + struct i915_vma_resource *vma_res, + enum i915_cache_level cache_level, + u32 flags); +void intel_ggtt_unbind_vma(struct i915_address_space *vm, + struct i915_vma_resource *vma_res); + int i915_ggtt_probe_hw(struct drm_i915_private *i915); int i915_ggtt_init_hw(struct drm_i915_private *i915); int i915_ggtt_enable_hw(struct drm_i915_private *i915); @@ -637,6 +627,7 @@ release_pd_entry(struct i915_page_directory * const pd, struct i915_page_table * const pt, const struct drm_i915_gem_object * const scratch); void gen6_ggtt_invalidate(struct i915_ggtt *ggtt); +void gen8_ggtt_invalidate(struct i915_ggtt *ggtt); void ppgtt_bind_vma(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, diff --git a/drivers/gpu/drm/i915/gt/intel_hwconfig.h b/drivers/gpu/drm/i915/gt/intel_hwconfig.h new file mode 100644 index 000000000000..322290780b67 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_hwconfig.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef _INTEL_HWCONFIG_H_ +#define _INTEL_HWCONFIG_H_ + +#include <linux/types.h> + +struct intel_gt; + +struct intel_hwconfig { + u32 size; + void *ptr; +}; + +int intel_gt_init_hwconfig(struct intel_gt *gt); +void intel_gt_fini_hwconfig(struct intel_gt *gt); + +#endif /* _INTEL_HWCONFIG_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 07bef7128fdb..3f83a9038e13 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -778,7 +778,7 @@ static void init_common_regs(u32 * const regs, CTX_CTRL_RS_CTX_ENABLE); regs[CTX_CONTEXT_CONTROL] = ctl; - regs[CTX_TIMESTAMP] = ce->runtime.last; + regs[CTX_TIMESTAMP] = ce->stats.runtime.last; } static void init_wa_bb_regs(u32 * const regs, @@ -1208,6 +1208,10 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) IS_DG2_G11(ce->engine->i915)) cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0); + /* hsdes: 1809175790 */ + if (!HAS_FLAT_CCS(ce->engine->i915)) + cs = gen12_emit_aux_table_inv(cs, GEN12_GFX_CCS_AUX_NV); + return cs; } @@ -1225,6 +1229,14 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0); + /* hsdes: 1809175790 */ + if (!HAS_FLAT_CCS(ce->engine->i915)) { + if (ce->engine->class == VIDEO_DECODE_CLASS) + cs = gen12_emit_aux_table_inv(cs, GEN12_VD0_AUX_NV); + else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS) + cs = gen12_emit_aux_table_inv(cs, GEN12_VE0_AUX_NV); + } + return cs; } @@ -1722,11 +1734,12 @@ err: } } -static void st_update_runtime_underflow(struct intel_context *ce, s32 dt) +static void st_runtime_underflow(struct intel_context_stats *stats, s32 dt) { #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) - ce->runtime.num_underflow++; - ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt); + stats->runtime.num_underflow++; + stats->runtime.max_underflow = + max_t(u32, stats->runtime.max_underflow, -dt); #endif } @@ -1743,25 +1756,25 @@ static u32 lrc_get_runtime(const struct intel_context *ce) void lrc_update_runtime(struct intel_context *ce) { + struct intel_context_stats *stats = &ce->stats; u32 old; s32 dt; - if (intel_context_is_barrier(ce)) + old = stats->runtime.last; + stats->runtime.last = lrc_get_runtime(ce); + dt = stats->runtime.last - old; + if (!dt) return; - old = ce->runtime.last; - ce->runtime.last = lrc_get_runtime(ce); - dt = ce->runtime.last - old; - if (unlikely(dt < 0)) { CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n", - old, ce->runtime.last, dt); - st_update_runtime_underflow(ce, dt); + old, stats->runtime.last, dt); + st_runtime_underflow(stats, dt); return; } - ewma_runtime_add(&ce->runtime.avg, dt); - ce->runtime.total += dt; + ewma_runtime_add(&stats->runtime.avg, dt); + stats->runtime.total += dt; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 6e4f9f58fca5..7371bb5c8129 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -11,9 +11,10 @@ #include <linux/bitfield.h> #include <linux/types.h> +#include "intel_context.h" + struct drm_i915_gem_object; struct i915_gem_ww_ctx; -struct intel_context; struct intel_engine_cs; struct intel_ring; struct kref; @@ -120,4 +121,28 @@ static inline u32 lrc_desc_priority(int prio) return GEN12_CTX_PRIORITY_NORMAL; } +static inline void lrc_runtime_start(struct intel_context *ce) +{ + struct intel_context_stats *stats = &ce->stats; + + if (intel_context_is_barrier(ce)) + return; + + if (stats->active) + return; + + WRITE_ONCE(stats->active, intel_context_clock()); +} + +static inline void lrc_runtime_stop(struct intel_context *ce) +{ + struct intel_context_stats *stats = &ce->stats; + + if (!stats->active) + return; + + lrc_update_runtime(ce); + WRITE_ONCE(stats->active, 0); +} + #endif /* __INTEL_LRC_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index 20444d6ceb3c..9d552f30b627 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -17,6 +17,8 @@ struct insert_pte_data { #define CHUNK_SZ SZ_8M /* ~1ms at 8GiB/s preemption delay */ +#define GET_CCS_BYTES(i915, size) (HAS_FLAT_CCS(i915) ? \ + DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE) : 0) static bool engine_supports_migration(struct intel_engine_cs *engine) { if (!engine) @@ -467,6 +469,123 @@ static bool wa_1209644611_applies(int ver, u32 size) return height % 4 == 3 && height <= 8; } +/** + * DOC: Flat-CCS - Memory compression for Local memory + * + * On Xe-HP and later devices, we use dedicated compression control state (CCS) + * stored in local memory for each surface, to support the 3D and media + * compression formats. + * + * The memory required for the CCS of the entire local memory is 1/256 of the + * local memory size. So before the kernel boot, the required memory is reserved + * for the CCS data and a secure register will be programmed with the CCS base + * address. + * + * Flat CCS data needs to be cleared when a lmem object is allocated. + * And CCS data can be copied in and out of CCS region through + * XY_CTRL_SURF_COPY_BLT. CPU can't access the CCS data directly. + * + * When we exhaust the lmem, if the object's placements support smem, then we can + * directly decompress the compressed lmem object into smem and start using it + * from smem itself. + * + * But when we need to swapout the compressed lmem object into a smem region + * though objects' placement doesn't support smem, then we copy the lmem content + * as it is into smem region along with ccs data (using XY_CTRL_SURF_COPY_BLT). + * When the object is referred, lmem content will be swaped in along with + * restoration of the CCS data (using XY_CTRL_SURF_COPY_BLT) at corresponding + * location. + */ + +static inline u32 *i915_flush_dw(u32 *cmd, u32 flags) +{ + *cmd++ = MI_FLUSH_DW | flags; + *cmd++ = 0; + *cmd++ = 0; + + return cmd; +} + +static u32 calc_ctrl_surf_instr_size(struct drm_i915_private *i915, int size) +{ + u32 num_cmds, num_blks, total_size; + + if (!GET_CCS_BYTES(i915, size)) + return 0; + + /* + * XY_CTRL_SURF_COPY_BLT transfers CCS in 256 byte + * blocks. one XY_CTRL_SURF_COPY_BLT command can + * transfer upto 1024 blocks. + */ + num_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size), + NUM_CCS_BYTES_PER_BLOCK); + num_cmds = DIV_ROUND_UP(num_blks, NUM_CCS_BLKS_PER_XFER); + total_size = XY_CTRL_SURF_INSTR_SIZE * num_cmds; + + /* + * Adding a flush before and after XY_CTRL_SURF_COPY_BLT + */ + total_size += 2 * MI_FLUSH_DW_SIZE; + + return total_size; +} + +static int emit_copy_ccs(struct i915_request *rq, + u32 dst_offset, u8 dst_access, + u32 src_offset, u8 src_access, int size) +{ + struct drm_i915_private *i915 = rq->engine->i915; + int mocs = rq->engine->gt->mocs.uc_index << 1; + u32 num_ccs_blks, ccs_ring_size; + u32 *cs; + + ccs_ring_size = calc_ctrl_surf_instr_size(i915, size); + WARN_ON(!ccs_ring_size); + + cs = intel_ring_begin(rq, round_up(ccs_ring_size, 2)); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + num_ccs_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size), + NUM_CCS_BYTES_PER_BLOCK); + GEM_BUG_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER); + cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS); + + /* + * The XY_CTRL_SURF_COPY_BLT instruction is used to copy the CCS + * data in and out of the CCS region. + * + * We can copy at most 1024 blocks of 256 bytes using one + * XY_CTRL_SURF_COPY_BLT instruction. + * + * In case we need to copy more than 1024 blocks, we need to add + * another instruction to the same batch buffer. + * + * 1024 blocks of 256 bytes of CCS represent a total 256KB of CCS. + * + * 256 KB of CCS represents 256 * 256 KB = 64 MB of LMEM. + */ + *cs++ = XY_CTRL_SURF_COPY_BLT | + src_access << SRC_ACCESS_TYPE_SHIFT | + dst_access << DST_ACCESS_TYPE_SHIFT | + ((num_ccs_blks - 1) & CCS_SIZE_MASK) << CCS_SIZE_SHIFT; + *cs++ = src_offset; + *cs++ = rq->engine->instance | + FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs); + *cs++ = dst_offset; + *cs++ = rq->engine->instance | + FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs); + + cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS); + if (ccs_ring_size & 1) + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + + return 0; +} + static int emit_copy(struct i915_request *rq, u32 dst_offset, u32 src_offset, int size) { @@ -514,6 +633,65 @@ static int emit_copy(struct i915_request *rq, return 0; } +static int scatter_list_length(struct scatterlist *sg) +{ + int len = 0; + + while (sg && sg_dma_len(sg)) { + len += sg_dma_len(sg); + sg = sg_next(sg); + }; + + return len; +} + +static void +calculate_chunk_sz(struct drm_i915_private *i915, bool src_is_lmem, + int *src_sz, int *ccs_sz, u32 bytes_to_cpy, + u32 ccs_bytes_to_cpy) +{ + if (ccs_bytes_to_cpy) { + /* + * We can only copy the ccs data corresponding to + * the CHUNK_SZ of lmem which is + * GET_CCS_BYTES(i915, CHUNK_SZ)) + */ + *ccs_sz = min_t(int, ccs_bytes_to_cpy, GET_CCS_BYTES(i915, CHUNK_SZ)); + + if (!src_is_lmem) + /* + * When CHUNK_SZ is passed all the pages upto CHUNK_SZ + * will be taken for the blt. in Flat-ccs supported + * platform Smem obj will have more pages than required + * for main meory hence limit it to the required size + * for main memory + */ + *src_sz = min_t(int, bytes_to_cpy, CHUNK_SZ); + } else { /* ccs handling is not required */ + *src_sz = CHUNK_SZ; + } +} + +static void get_ccs_sg_sgt(struct sgt_dma *it, u32 bytes_to_cpy) +{ + u32 len; + + do { + GEM_BUG_ON(!it->sg || !sg_dma_len(it->sg)); + len = it->max - it->dma; + if (len > bytes_to_cpy) { + it->dma += bytes_to_cpy; + break; + } + + bytes_to_cpy -= len; + + it->sg = __sg_next(it->sg); + it->dma = sg_dma_address(it->sg); + it->max = it->dma + sg_dma_len(it->sg); + } while (bytes_to_cpy); +} + int intel_context_migrate_copy(struct intel_context *ce, const struct i915_deps *deps, @@ -525,17 +703,67 @@ intel_context_migrate_copy(struct intel_context *ce, bool dst_is_lmem, struct i915_request **out) { - struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst); + struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst), it_ccs; + struct drm_i915_private *i915 = ce->engine->i915; + u32 ccs_bytes_to_cpy = 0, bytes_to_cpy; + enum i915_cache_level ccs_cache_level; + int src_sz, dst_sz, ccs_sz; + u32 src_offset, dst_offset; + u8 src_access, dst_access; struct i915_request *rq; + bool ccs_is_src; int err; GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); + GEM_BUG_ON(IS_DGFX(ce->engine->i915) && (!src_is_lmem && !dst_is_lmem)); *out = NULL; GEM_BUG_ON(ce->ring->size < SZ_64K); + src_sz = scatter_list_length(src); + bytes_to_cpy = src_sz; + + if (HAS_FLAT_CCS(i915) && src_is_lmem ^ dst_is_lmem) { + src_access = !src_is_lmem && dst_is_lmem; + dst_access = !src_access; + + dst_sz = scatter_list_length(dst); + if (src_is_lmem) { + it_ccs = it_dst; + ccs_cache_level = dst_cache_level; + ccs_is_src = false; + } else if (dst_is_lmem) { + bytes_to_cpy = dst_sz; + it_ccs = it_src; + ccs_cache_level = src_cache_level; + ccs_is_src = true; + } + + /* + * When there is a eviction of ccs needed smem will have the + * extra pages for the ccs data + * + * TO-DO: Want to move the size mismatch check to a WARN_ON, + * but still we have some requests of smem->lmem with same size. + * Need to fix it. + */ + ccs_bytes_to_cpy = src_sz != dst_sz ? GET_CCS_BYTES(i915, bytes_to_cpy) : 0; + if (ccs_bytes_to_cpy) + get_ccs_sg_sgt(&it_ccs, bytes_to_cpy); + } + + src_offset = 0; + dst_offset = CHUNK_SZ; + if (HAS_64K_PAGES(ce->engine->i915)) { + src_offset = 0; + dst_offset = 0; + if (src_is_lmem) + src_offset = CHUNK_SZ; + if (dst_is_lmem) + dst_offset = 2 * CHUNK_SZ; + } + do { - u32 src_offset, dst_offset; int len; rq = i915_request_create(ce); @@ -563,22 +791,16 @@ intel_context_migrate_copy(struct intel_context *ce, if (err) goto out_rq; - src_offset = 0; - dst_offset = CHUNK_SZ; - if (HAS_64K_PAGES(ce->engine->i915)) { - GEM_BUG_ON(!src_is_lmem && !dst_is_lmem); - - src_offset = 0; - dst_offset = 0; - if (src_is_lmem) - src_offset = CHUNK_SZ; - if (dst_is_lmem) - dst_offset = 2 * CHUNK_SZ; - } + calculate_chunk_sz(i915, src_is_lmem, &src_sz, &ccs_sz, + bytes_to_cpy, ccs_bytes_to_cpy); len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem, - src_offset, CHUNK_SZ); - if (len <= 0) { + src_offset, src_sz); + if (!len) { + err = -EINVAL; + goto out_rq; + } + if (len < 0) { err = len; goto out_rq; } @@ -596,7 +818,46 @@ intel_context_migrate_copy(struct intel_context *ce, if (err) goto out_rq; - err = emit_copy(rq, dst_offset, src_offset, len); + err = emit_copy(rq, dst_offset, src_offset, len); + if (err) + goto out_rq; + + bytes_to_cpy -= len; + + if (ccs_bytes_to_cpy) { + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto out_rq; + + err = emit_pte(rq, &it_ccs, ccs_cache_level, false, + ccs_is_src ? src_offset : dst_offset, + ccs_sz); + + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto out_rq; + + /* + * Using max of src_sz and dst_sz, as we need to + * pass the lmem size corresponding to the ccs + * blocks we need to handle. + */ + ccs_sz = max_t(int, ccs_is_src ? ccs_sz : src_sz, + ccs_is_src ? dst_sz : ccs_sz); + + err = emit_copy_ccs(rq, dst_offset, dst_access, + src_offset, src_access, ccs_sz); + if (err) + goto out_rq; + + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto out_rq; + + /* Converting back to ccs bytes */ + ccs_sz = GET_CCS_BYTES(rq->engine->i915, ccs_sz); + ccs_bytes_to_cpy -= ccs_sz; + } /* Arbitration is re-enabled between requests. */ out_rq: @@ -604,9 +865,26 @@ out_rq: i915_request_put(*out); *out = i915_request_get(rq); i915_request_add(rq); - if (err || !it_src.sg || !sg_dma_len(it_src.sg)) + + if (err) break; + if (!bytes_to_cpy && !ccs_bytes_to_cpy) { + if (src_is_lmem) + WARN_ON(it_src.sg && sg_dma_len(it_src.sg)); + else + WARN_ON(it_dst.sg && sg_dma_len(it_dst.sg)); + break; + } + + if (WARN_ON(!it_src.sg || !sg_dma_len(it_src.sg) || + !it_dst.sg || !sg_dma_len(it_dst.sg) || + (ccs_bytes_to_cpy && (!it_ccs.sg || + !sg_dma_len(it_ccs.sg))))) { + err = -EINVAL; + break; + } + cond_resched(); } while (1); @@ -614,35 +892,65 @@ out_ce: return err; } -static int emit_clear(struct i915_request *rq, u64 offset, int size, u32 value) +static int emit_clear(struct i915_request *rq, u32 offset, int size, + u32 value, bool is_lmem) { - const int ver = GRAPHICS_VER(rq->engine->i915); + struct drm_i915_private *i915 = rq->engine->i915; + int mocs = rq->engine->gt->mocs.uc_index << 1; + const int ver = GRAPHICS_VER(i915); + int ring_sz; u32 *cs; GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); - offset += (u64)rq->engine->instance << 32; + if (HAS_FLAT_CCS(i915) && ver >= 12) + ring_sz = XY_FAST_COLOR_BLT_DW; + else if (ver >= 8) + ring_sz = 8; + else + ring_sz = 6; - cs = intel_ring_begin(rq, ver >= 8 ? 8 : 6); + cs = intel_ring_begin(rq, ring_sz); if (IS_ERR(cs)) return PTR_ERR(cs); - if (ver >= 8) { + if (HAS_FLAT_CCS(i915) && ver >= 12) { + *cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 | + (XY_FAST_COLOR_BLT_DW - 2); + *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) | + (PAGE_SIZE - 1); + *cs++ = 0; + *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cs++ = offset; + *cs++ = rq->engine->instance; + *cs++ = !is_lmem << XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT; + /* BG7 */ + *cs++ = value; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + /* BG11 */ + *cs++ = 0; + *cs++ = 0; + /* BG13 */ + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + } else if (ver >= 8) { *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2); *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; *cs++ = 0; *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cs++ = lower_32_bits(offset); - *cs++ = upper_32_bits(offset); + *cs++ = offset; + *cs++ = rq->engine->instance; *cs++ = value; *cs++ = MI_NOOP; } else { - GEM_BUG_ON(upper_32_bits(offset)); *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; *cs++ = 0; *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cs++ = lower_32_bits(offset); + *cs++ = offset; *cs++ = value; } @@ -659,8 +967,10 @@ intel_context_migrate_clear(struct intel_context *ce, u32 value, struct i915_request **out) { + struct drm_i915_private *i915 = ce->engine->i915; struct sgt_dma it = sg_sgt(sg); struct i915_request *rq; + u32 offset; int err; GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); @@ -668,8 +978,11 @@ intel_context_migrate_clear(struct intel_context *ce, GEM_BUG_ON(ce->ring->size < SZ_64K); + offset = 0; + if (HAS_64K_PAGES(i915) && is_lmem) + offset = CHUNK_SZ; + do { - u32 offset; int len; rq = i915_request_create(ce); @@ -697,10 +1010,6 @@ intel_context_migrate_clear(struct intel_context *ce, if (err) goto out_rq; - offset = 0; - if (HAS_64K_PAGES(ce->engine->i915) && is_lmem) - offset = CHUNK_SZ; - len = emit_pte(rq, &it, cache_level, is_lmem, offset, CHUNK_SZ); if (len <= 0) { err = len; @@ -711,7 +1020,22 @@ intel_context_migrate_clear(struct intel_context *ce, if (err) goto out_rq; - err = emit_clear(rq, offset, len, value); + err = emit_clear(rq, offset, len, value, is_lmem); + if (err) + goto out_rq; + + if (HAS_FLAT_CCS(i915) && is_lmem && !value) { + /* + * copy the content of memory into corresponding + * ccs surface + */ + err = emit_copy_ccs(rq, offset, INDIRECT_ACCESS, offset, + DIRECT_ACCESS, len); + if (err) + goto out_rq; + } + + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); /* Arbitration is re-enabled between requests. */ out_rq: diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c index d91e2beb7517..d8b94d638559 100644 --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -91,7 +91,7 @@ write_dma_entry(struct drm_i915_gem_object * const pdma, u64 * const vaddr = __px_vaddr(pdma); vaddr[idx] = encoded_entry; - clflush_cache_range(&vaddr[idx], sizeof(u64)); + drm_clflush_virt_range(&vaddr[idx], sizeof(u64)); } void diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 63db136cbc27..b4770690e794 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -6,6 +6,7 @@ #include <linux/pm_runtime.h> #include <linux/string_helpers.h> +#include "gem/i915_gem_region.h" #include "i915_drv.h" #include "i915_reg.h" #include "i915_vgpu.h" @@ -325,9 +326,10 @@ static int vlv_rc6_init(struct intel_rc6 *rc6) resource_size_t pcbr_offset; pcbr_offset = (pcbr & ~4095) - i915->dsm.start; - pctx = i915_gem_object_create_stolen_for_preallocated(i915, - pcbr_offset, - pctx_size); + pctx = i915_gem_object_create_region_at(i915->mm.stolen_region, + pcbr_offset, + pctx_size, + 0); if (IS_ERR(pctx)) return PTR_ERR(pctx); diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index 6cecfdae07ad..f5111c0a0060 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -93,6 +93,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) struct intel_memory_region *mem; resource_size_t min_page_size; resource_size_t io_start; + resource_size_t io_size; resource_size_t lmem_size; int err; @@ -122,9 +123,14 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) lmem_size = intel_uncore_read64(&i915->uncore, GEN12_GSMBASE); } + if (i915->params.lmem_size > 0) { + lmem_size = min_t(resource_size_t, lmem_size, + mul_u32_u32(i915->params.lmem_size, SZ_1M)); + } io_start = pci_resource_start(pdev, 2); - if (GEM_WARN_ON(lmem_size > pci_resource_len(pdev, 2))) + io_size = min(pci_resource_len(pdev, 2), lmem_size); + if (!io_size) return ERR_PTR(-ENODEV); min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K : @@ -134,7 +140,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) lmem_size, min_page_size, io_start, - lmem_size, + io_size, INTEL_MEMORY_LOCAL, 0, &intel_region_lmem_ops); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index a6ae213c7d89..5422a3b84bd4 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -772,14 +772,15 @@ static intel_engine_mask_t reset_prepare(struct intel_gt *gt) intel_engine_mask_t awake = 0; enum intel_engine_id id; + /* For GuC mode, ensure submission is disabled before stopping ring */ + intel_uc_reset_prepare(>->uc); + for_each_engine(engine, gt, id) { if (intel_engine_pm_get_if_awake(engine)) awake |= engine->mask; reset_prepare_engine(engine); } - intel_uc_reset_prepare(>->uc); - return awake; } @@ -1319,7 +1320,7 @@ void intel_gt_handle_error(struct intel_gt *gt, engine_mask &= gt->info.engine_mask; if (flags & I915_ERROR_CAPTURE) { - i915_capture_error_state(gt, engine_mask); + i915_capture_error_state(gt, engine_mask, CORE_DUMP_FLAG_NONE); intel_gt_clear_error_registers(gt, engine_mask); } diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 6d7ec3bf1f32..5423bfd301ad 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -767,7 +767,7 @@ static int mi_set_context(struct i915_request *rq, if (GRAPHICS_VER(i915) == 7) { if (num_engines) { struct intel_engine_cs *signaller; - i915_reg_t last_reg = {}; /* keep gcc quiet */ + i915_reg_t last_reg = INVALID_MMIO_REG; /* keep gcc quiet */ *cs++ = MI_LOAD_REGISTER_IMM(num_engines); for_each_engine(signaller, engine->gt, id) { diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index a9c13b1a3018..3476a11f294c 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1070,24 +1070,67 @@ int intel_rps_set(struct intel_rps *rps, u8 val) return 0; } -static void gen6_rps_init(struct intel_rps *rps) +static u32 intel_rps_read_state_cap(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); - u32 rp_state_cap = intel_rps_read_state_cap(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); - /* All of these values are in units of 50MHz */ + if (IS_XEHPSDV(i915)) + return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP); + else if (IS_GEN9_LP(i915)) + return intel_uncore_read(uncore, BXT_RP_STATE_CAP); + else + return intel_uncore_read(uncore, GEN6_RP_STATE_CAP); +} + +/** + * gen6_rps_get_freq_caps - Get freq caps exposed by HW + * @rps: the intel_rps structure + * @caps: returned freq caps + * + * Returned "caps" frequencies should be converted to MHz using + * intel_gpu_freq() + */ +void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + u32 rp_state_cap; + + rp_state_cap = intel_rps_read_state_cap(rps); /* static values from HW: RP0 > RP1 > RPn (min_freq) */ if (IS_GEN9_LP(i915)) { - rps->rp0_freq = (rp_state_cap >> 16) & 0xff; - rps->rp1_freq = (rp_state_cap >> 8) & 0xff; - rps->min_freq = (rp_state_cap >> 0) & 0xff; + caps->rp0_freq = (rp_state_cap >> 16) & 0xff; + caps->rp1_freq = (rp_state_cap >> 8) & 0xff; + caps->min_freq = (rp_state_cap >> 0) & 0xff; } else { - rps->rp0_freq = (rp_state_cap >> 0) & 0xff; - rps->rp1_freq = (rp_state_cap >> 8) & 0xff; - rps->min_freq = (rp_state_cap >> 16) & 0xff; + caps->rp0_freq = (rp_state_cap >> 0) & 0xff; + caps->rp1_freq = (rp_state_cap >> 8) & 0xff; + caps->min_freq = (rp_state_cap >> 16) & 0xff; } + if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { + /* + * In this case rp_state_cap register reports frequencies in + * units of 50 MHz. Convert these to the actual "hw unit", i.e. + * units of 16.67 MHz + */ + caps->rp0_freq *= GEN9_FREQ_SCALER; + caps->rp1_freq *= GEN9_FREQ_SCALER; + caps->min_freq *= GEN9_FREQ_SCALER; + } +} + +static void gen6_rps_init(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_rps_freq_caps caps; + + gen6_rps_get_freq_caps(rps, &caps); + rps->rp0_freq = caps.rp0_freq; + rps->rp1_freq = caps.rp1_freq; + rps->min_freq = caps.min_freq; + /* hw_max = RP0 until we check for overclocking */ rps->max_freq = rps->rp0_freq; @@ -1095,26 +1138,18 @@ static void gen6_rps_init(struct intel_rps *rps) if (IS_HASWELL(i915) || IS_BROADWELL(i915) || IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { u32 ddcc_status = 0; + u32 mult = 1; + if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) + mult = GEN9_FREQ_SCALER; if (snb_pcode_read(i915, HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, &ddcc_status, NULL) == 0) rps->efficient_freq = - clamp_t(u8, - (ddcc_status >> 8) & 0xff, + clamp_t(u32, + ((ddcc_status >> 8) & 0xff) * mult, rps->min_freq, rps->max_freq); } - - if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { - /* Store the frequency values in 16.66 MHZ units, which is - * the natural hardware unit for SKL - */ - rps->rp0_freq *= GEN9_FREQ_SCALER; - rps->rp1_freq *= GEN9_FREQ_SCALER; - rps->min_freq *= GEN9_FREQ_SCALER; - rps->max_freq *= GEN9_FREQ_SCALER; - rps->efficient_freq *= GEN9_FREQ_SCALER; - } } static bool rps_reset(struct intel_rps *rps) @@ -2219,19 +2254,6 @@ int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val) return set_min_freq(rps, val); } -u32 intel_rps_read_state_cap(struct intel_rps *rps) -{ - struct drm_i915_private *i915 = rps_to_i915(rps); - struct intel_uncore *uncore = rps_to_uncore(rps); - - if (IS_XEHPSDV(i915)) - return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP); - else if (IS_GEN9_LP(i915)) - return intel_uncore_read(uncore, BXT_RP_STATE_CAP); - else - return intel_uncore_read(uncore, GEN6_RP_STATE_CAP); -} - static void intel_rps_set_manual(struct intel_rps *rps, bool enable) { struct intel_uncore *uncore = rps_to_uncore(rps); @@ -2244,18 +2266,18 @@ static void intel_rps_set_manual(struct intel_rps *rps, bool enable) void intel_rps_raise_unslice(struct intel_rps *rps) { struct intel_uncore *uncore = rps_to_uncore(rps); - u32 rp0_unslice_req; mutex_lock(&rps->lock); if (rps_uses_slpc(rps)) { /* RP limits have not been initialized yet for SLPC path */ - rp0_unslice_req = ((intel_rps_read_state_cap(rps) >> 0) - & 0xff) * GEN9_FREQ_SCALER; + struct intel_rps_freq_caps caps; + + gen6_rps_get_freq_caps(rps, &caps); intel_rps_set_manual(rps, true); intel_uncore_write(uncore, GEN6_RPNSWREQ, - ((rp0_unslice_req << + ((caps.rp0_freq << GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) | GEN9_IGNORE_SLICE_RATIO)); intel_rps_set_manual(rps, false); @@ -2269,18 +2291,18 @@ void intel_rps_raise_unslice(struct intel_rps *rps) void intel_rps_lower_unslice(struct intel_rps *rps) { struct intel_uncore *uncore = rps_to_uncore(rps); - u32 rpn_unslice_req; mutex_lock(&rps->lock); if (rps_uses_slpc(rps)) { /* RP limits have not been initialized yet for SLPC path */ - rpn_unslice_req = ((intel_rps_read_state_cap(rps) >> 16) - & 0xff) * GEN9_FREQ_SCALER; + struct intel_rps_freq_caps caps; + + gen6_rps_get_freq_caps(rps, &caps); intel_rps_set_manual(rps, true); intel_uncore_write(uncore, GEN6_RPNSWREQ, - ((rpn_unslice_req << + ((caps.min_freq << GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) | GEN9_IGNORE_SLICE_RATIO)); intel_rps_set_manual(rps, false); @@ -2291,6 +2313,24 @@ void intel_rps_lower_unslice(struct intel_rps *rps) mutex_unlock(&rps->lock); } +static u32 rps_read_mmio(struct intel_rps *rps, i915_reg_t reg32) +{ + struct intel_gt *gt = rps_to_gt(rps); + intel_wakeref_t wakeref; + u32 val; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + val = intel_uncore_read(gt->uncore, reg32); + + return val; +} + +bool rps_read_mask_mmio(struct intel_rps *rps, + i915_reg_t reg32, u32 mask) +{ + return rps_read_mmio(rps, reg32) & mask; +} + /* External interface for intel_ips.ko */ static struct drm_i915_private __rcu *ips_mchdev; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index c6d76a3d1331..1e8d56491308 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -7,6 +7,7 @@ #define INTEL_RPS_H #include "intel_rps_types.h" +#include "i915_reg_defs.h" struct i915_request; @@ -44,10 +45,13 @@ u32 intel_rps_get_rp1_frequency(struct intel_rps *rps); u32 intel_rps_get_rpn_frequency(struct intel_rps *rps); u32 intel_rps_read_punit_req(struct intel_rps *rps); u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps); -u32 intel_rps_read_state_cap(struct intel_rps *rps); +void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps); void intel_rps_raise_unslice(struct intel_rps *rps); void intel_rps_lower_unslice(struct intel_rps *rps); +u32 intel_rps_read_throttle_reason(struct intel_rps *rps); +bool rps_read_mask_mmio(struct intel_rps *rps, i915_reg_t reg32, u32 mask); + void gen5_rps_irq_handler(struct intel_rps *rps); void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h index 3941d8551f52..9173ec75f2b8 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps_types.h +++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h @@ -37,6 +37,21 @@ enum { INTEL_RPS_TIMER, }; +/** + * struct intel_rps_freq_caps - rps freq capabilities + * @rp0_freq: non-overclocked max frequency + * @rp1_freq: "less than" RP0 power/freqency + * @min_freq: aka RPn, minimum frequency + * + * Freq caps exposed by HW, values are in "hw units" and intel_gpu_freq() + * should be used to convert to MHz + */ +struct intel_rps_freq_caps { + u8 rp0_freq; + u8 rp1_freq; + u8 min_freq; +}; + struct intel_rps { struct mutex lock; /* protects enabling and the worker */ diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 614915ffbd37..9881a6790574 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -10,6 +10,8 @@ #include "intel_gt_regs.h" #include "intel_sseu.h" +#include "linux/string_helpers.h" + void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, u8 max_subslices, u8 max_eus_per_subslice) { @@ -35,8 +37,8 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu) } static u32 -_intel_sseu_get_subslices(const struct sseu_dev_info *sseu, - const u8 *subslice_mask, u8 slice) +sseu_get_subslices(const struct sseu_dev_info *sseu, + const u8 *subslice_mask, u8 slice) { int i, offset = slice * sseu->ss_stride; u32 mask = 0; @@ -51,12 +53,17 @@ _intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) { - return _intel_sseu_get_subslices(sseu, sseu->subslice_mask, slice); + return sseu_get_subslices(sseu, sseu->subslice_mask, slice); +} + +static u32 sseu_get_geometry_subslices(const struct sseu_dev_info *sseu) +{ + return sseu_get_subslices(sseu, sseu->geometry_subslice_mask, 0); } u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu) { - return _intel_sseu_get_subslices(sseu, sseu->compute_subslice_mask, 0); + return sseu_get_subslices(sseu, sseu->compute_subslice_mask, 0); } void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, @@ -720,16 +727,11 @@ void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) str_yes_no(sseu->has_eu_pg)); } -void intel_sseu_print_topology(const struct sseu_dev_info *sseu, - struct drm_printer *p) +static void sseu_print_hsw_topology(const struct sseu_dev_info *sseu, + struct drm_printer *p) { int s, ss; - if (sseu->max_slices == 0) { - drm_printf(p, "Unavailable\n"); - return; - } - for (s = 0; s < sseu->max_slices; s++) { drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n", s, intel_sseu_subslices_per_slice(sseu, s), @@ -744,6 +746,36 @@ void intel_sseu_print_topology(const struct sseu_dev_info *sseu, } } +static void sseu_print_xehp_topology(const struct sseu_dev_info *sseu, + struct drm_printer *p) +{ + u32 g_dss_mask = sseu_get_geometry_subslices(sseu); + u32 c_dss_mask = intel_sseu_get_compute_subslices(sseu); + int dss; + + for (dss = 0; dss < sseu->max_subslices; dss++) { + u16 enabled_eus = sseu_get_eus(sseu, 0, dss); + + drm_printf(p, "DSS_%02d: G:%3s C:%3s, %2u EUs (0x%04hx)\n", dss, + str_yes_no(g_dss_mask & BIT(dss)), + str_yes_no(c_dss_mask & BIT(dss)), + hweight16(enabled_eus), enabled_eus); + } +} + +void intel_sseu_print_topology(struct drm_i915_private *i915, + const struct sseu_dev_info *sseu, + struct drm_printer *p) +{ + if (sseu->max_slices == 0) { + drm_printf(p, "Unavailable\n"); + } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + sseu_print_xehp_topology(sseu, p); + } else { + sseu_print_hsw_topology(sseu, p); + } +} + u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice) { u16 slice_mask = 0; diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 8a79cd8eaab4..5c078df4729c 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -15,26 +15,49 @@ struct drm_i915_private; struct intel_gt; struct drm_printer; -#define GEN_MAX_SLICES (3) /* SKL upper bound */ -#define GEN_MAX_SUBSLICES (32) /* XEHPSDV upper bound */ -#define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE) -#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES) -#define GEN_MAX_EUS (16) /* TGL upper bound */ -#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS) +/* + * Maximum number of slices on older platforms. Slices no longer exist + * starting on Xe_HP ("gslices," "cslices," etc. are a different concept and + * are not expressed through fusing). + */ +#define GEN_MAX_HSW_SLICES 3 + +/* + * Maximum number of subslices that can exist within a HSW-style slice. This + * is only relevant to pre-Xe_HP platforms (Xe_HP and beyond use the + * GEN_MAX_DSS value below). + */ +#define GEN_MAX_SS_PER_HSW_SLICE 6 + +/* Maximum number of DSS on newer platforms (Xe_HP and beyond). */ +#define GEN_MAX_DSS 32 + +/* Maximum number of EUs that can exist within a subslice or DSS. */ +#define GEN_MAX_EUS_PER_SS 16 + +#define SSEU_MAX(a, b) ((a) > (b) ? (a) : (b)) + +/* The maximum number of bits needed to express each subslice/DSS independently */ +#define GEN_SS_MASK_SIZE SSEU_MAX(GEN_MAX_DSS, \ + GEN_MAX_HSW_SLICES * GEN_MAX_SS_PER_HSW_SLICE) + +#define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE) +#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_SS_MASK_SIZE) +#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS_PER_SS) #define GEN_DSS_PER_GSLICE 4 #define GEN_DSS_PER_CSLICE 8 #define GEN_DSS_PER_MSLICE 8 -#define GEN_MAX_GSLICES (GEN_MAX_SUBSLICES / GEN_DSS_PER_GSLICE) -#define GEN_MAX_CSLICES (GEN_MAX_SUBSLICES / GEN_DSS_PER_CSLICE) +#define GEN_MAX_GSLICES (GEN_MAX_DSS / GEN_DSS_PER_GSLICE) +#define GEN_MAX_CSLICES (GEN_MAX_DSS / GEN_DSS_PER_CSLICE) struct sseu_dev_info { u8 slice_mask; - u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; - u8 geometry_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; - u8 compute_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; - u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE]; + u8 subslice_mask[GEN_SS_MASK_SIZE]; + u8 geometry_subslice_mask[GEN_SS_MASK_SIZE]; + u8 compute_subslice_mask[GEN_SS_MASK_SIZE]; + u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE]; u16 eu_total; u8 eu_per_subslice; u8 min_eu_in_pool; @@ -116,7 +139,8 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt, const struct intel_sseu *req_sseu); void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p); -void intel_sseu_print_topology(const struct sseu_dev_info *sseu, +void intel_sseu_print_topology(struct drm_i915_private *i915, + const struct sseu_dev_info *sseu, struct drm_printer *p); u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice); diff --git a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c index a9d5bc49f361..2d5d011e01db 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c @@ -248,7 +248,7 @@ int intel_sseu_status(struct seq_file *m, struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; const struct intel_gt_info *info = >->info; - struct sseu_dev_info sseu; + struct sseu_dev_info *sseu; intel_wakeref_t wakeref; if (GRAPHICS_VER(i915) < 8) @@ -258,23 +258,29 @@ int intel_sseu_status(struct seq_file *m, struct intel_gt *gt) i915_print_sseu_info(m, true, HAS_POOLED_EU(i915), &info->sseu); seq_puts(m, "SSEU Device Status\n"); - memset(&sseu, 0, sizeof(sseu)); - intel_sseu_set_info(&sseu, info->sseu.max_slices, + + sseu = kzalloc(sizeof(*sseu), GFP_KERNEL); + if (!sseu) + return -ENOMEM; + + intel_sseu_set_info(sseu, info->sseu.max_slices, info->sseu.max_subslices, info->sseu.max_eus_per_subslice); with_intel_runtime_pm(&i915->runtime_pm, wakeref) { if (IS_CHERRYVIEW(i915)) - cherryview_sseu_device_status(gt, &sseu); + cherryview_sseu_device_status(gt, sseu); else if (IS_BROADWELL(i915)) - bdw_sseu_device_status(gt, &sseu); + bdw_sseu_device_status(gt, sseu); else if (GRAPHICS_VER(i915) == 9) - gen9_sseu_device_status(gt, &sseu); + gen9_sseu_device_status(gt, sseu); else if (GRAPHICS_VER(i915) >= 11) - gen11_sseu_device_status(gt, &sseu); + gen11_sseu_device_status(gt, sseu); } - i915_print_sseu_info(m, false, HAS_POOLED_EU(i915), &sseu); + i915_print_sseu_info(m, false, HAS_POOLED_EU(i915), sseu); + + kfree(sseu); return 0; } @@ -287,22 +293,22 @@ static int sseu_status_show(struct seq_file *m, void *unused) } DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(sseu_status); -static int rcs_topology_show(struct seq_file *m, void *unused) +static int sseu_topology_show(struct seq_file *m, void *unused) { struct intel_gt *gt = m->private; struct drm_printer p = drm_seq_file_printer(m); - intel_sseu_print_topology(>->info.sseu, &p); + intel_sseu_print_topology(gt->i915, >->info.sseu, &p); return 0; } -DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rcs_topology); +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(sseu_topology); void intel_sseu_debugfs_register(struct intel_gt *gt, struct dentry *root) { static const struct intel_gt_debugfs_file files[] = { { "sseu_status", &sseu_status_fops, NULL }, - { "rcs_topology", &rcs_topology_fops, NULL }, + { "sseu_topology", &sseu_topology_fops, NULL }, }; intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt); diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index c014b40d2e9f..a05c4b99b3fb 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1072,9 +1072,15 @@ static void __set_mcr_steering(struct i915_wa_list *wal, static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal, unsigned int slice, unsigned int subslice) { - drm_dbg(>->i915->drm, "MCR slice=0x%x, subslice=0x%x\n", slice, subslice); + struct drm_printer p = drm_debug_printer("MCR Steering:"); __set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice); + + gt->default_steering.groupid = slice; + gt->default_steering.instanceid = subslice; + + if (drm_debug_enabled(DRM_UT_DRIVER)) + intel_gt_report_steering(&p, gt, false); } static void @@ -2188,11 +2194,15 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) */ wa_write_or(wal, GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE); + } + if (IS_ALDERLAKE_P(i915) || IS_DG2(i915) || IS_ALDERLAKE_S(i915) || + IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* * Wa_1606700617:tgl,dg1,adl-p * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p * Wa_14010826681:tgl,dg1,rkl,adl-p + * Wa_18019627453:dg2 */ wa_masked_en(wal, GEN9_CS_DEBUG_MODE1, @@ -2310,7 +2320,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) FF_DOP_CLOCK_GATE_DISABLE); } - if (IS_GRAPHICS_VER(i915, 9, 12)) { + if (HAS_PERCTX_PREEMPT_CTRL(i915)) { /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */ wa_masked_en(wal, GEN7_FF_SLICE_CS_CHICKEN1, @@ -2618,6 +2628,11 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE); } + + if (IS_DG2(i915)) { + /* Wa_22014226127:dg2 */ + wa_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); + } } static void @@ -2633,7 +2648,7 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal * to a single RCS/CCS engine's workaround list since * they're reset as part of the general render domain reset. */ - if (engine->class == RENDER_CLASS) + if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) general_render_compute_wa_init(engine, wal); if (engine->class == RENDER_CLASS) diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 72d5faab8f9a..09f8cd2d0e2c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -1736,15 +1736,9 @@ static int live_preempt(void *arg) enum intel_engine_id id; int err = -ENOMEM; - if (igt_spinner_init(&spin_hi, gt)) - return -ENOMEM; - - if (igt_spinner_init(&spin_lo, gt)) - goto err_spin_hi; - ctx_hi = kernel_context(gt->i915, NULL); if (!ctx_hi) - goto err_spin_lo; + return -ENOMEM; ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; ctx_lo = kernel_context(gt->i915, NULL); @@ -1752,6 +1746,12 @@ static int live_preempt(void *arg) goto err_ctx_hi; ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; + if (igt_spinner_init(&spin_hi, gt)) + goto err_ctx_lo; + + if (igt_spinner_init(&spin_lo, gt)) + goto err_spin_hi; + for_each_engine(engine, gt, id) { struct igt_live_test t; struct i915_request *rq; @@ -1761,14 +1761,14 @@ static int live_preempt(void *arg) if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { err = -EIO; - goto err_ctx_lo; + goto err_spin_lo; } rq = spinner_create_request(&spin_lo, ctx_lo, engine, MI_ARB_CHECK); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto err_ctx_lo; + goto err_spin_lo; } i915_request_add(rq); @@ -1777,7 +1777,7 @@ static int live_preempt(void *arg) GEM_TRACE_DUMP(); intel_gt_set_wedged(gt); err = -EIO; - goto err_ctx_lo; + goto err_spin_lo; } rq = spinner_create_request(&spin_hi, ctx_hi, engine, @@ -1785,7 +1785,7 @@ static int live_preempt(void *arg) if (IS_ERR(rq)) { igt_spinner_end(&spin_lo); err = PTR_ERR(rq); - goto err_ctx_lo; + goto err_spin_lo; } i915_request_add(rq); @@ -1794,7 +1794,7 @@ static int live_preempt(void *arg) GEM_TRACE_DUMP(); intel_gt_set_wedged(gt); err = -EIO; - goto err_ctx_lo; + goto err_spin_lo; } igt_spinner_end(&spin_hi); @@ -1802,19 +1802,19 @@ static int live_preempt(void *arg) if (igt_live_test_end(&t)) { err = -EIO; - goto err_ctx_lo; + goto err_spin_lo; } } err = 0; -err_ctx_lo: - kernel_context_close(ctx_lo); -err_ctx_hi: - kernel_context_close(ctx_hi); err_spin_lo: igt_spinner_fini(&spin_lo); err_spin_hi: igt_spinner_fini(&spin_hi); +err_ctx_lo: + kernel_context_close(ctx_lo); +err_ctx_hi: + kernel_context_close(ctx_hi); return err; } @@ -1828,20 +1828,20 @@ static int live_late_preempt(void *arg) enum intel_engine_id id; int err = -ENOMEM; - if (igt_spinner_init(&spin_hi, gt)) - return -ENOMEM; - - if (igt_spinner_init(&spin_lo, gt)) - goto err_spin_hi; - ctx_hi = kernel_context(gt->i915, NULL); if (!ctx_hi) - goto err_spin_lo; + return -ENOMEM; ctx_lo = kernel_context(gt->i915, NULL); if (!ctx_lo) goto err_ctx_hi; + if (igt_spinner_init(&spin_hi, gt)) + goto err_ctx_lo; + + if (igt_spinner_init(&spin_lo, gt)) + goto err_spin_hi; + /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ ctx_lo->sched.priority = 1; @@ -1854,14 +1854,14 @@ static int live_late_preempt(void *arg) if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { err = -EIO; - goto err_ctx_lo; + goto err_spin_lo; } rq = spinner_create_request(&spin_lo, ctx_lo, engine, MI_ARB_CHECK); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto err_ctx_lo; + goto err_spin_lo; } i915_request_add(rq); @@ -1875,7 +1875,7 @@ static int live_late_preempt(void *arg) if (IS_ERR(rq)) { igt_spinner_end(&spin_lo); err = PTR_ERR(rq); - goto err_ctx_lo; + goto err_spin_lo; } i915_request_add(rq); @@ -1898,19 +1898,19 @@ static int live_late_preempt(void *arg) if (igt_live_test_end(&t)) { err = -EIO; - goto err_ctx_lo; + goto err_spin_lo; } } err = 0; -err_ctx_lo: - kernel_context_close(ctx_lo); -err_ctx_hi: - kernel_context_close(ctx_hi); err_spin_lo: igt_spinner_fini(&spin_lo); err_spin_hi: igt_spinner_fini(&spin_hi); +err_ctx_lo: + kernel_context_close(ctx_lo); +err_ctx_hi: + kernel_context_close(ctx_hi); return err; err_wedged: @@ -1918,7 +1918,7 @@ err_wedged: igt_spinner_end(&spin_lo); intel_gt_set_wedged(gt); err = -EIO; - goto err_ctx_lo; + goto err_spin_lo; } struct preempt_client { @@ -3382,12 +3382,9 @@ static int live_preempt_timeout(void *arg) if (!intel_has_reset_engine(gt)) return 0; - if (igt_spinner_init(&spin_lo, gt)) - return -ENOMEM; - ctx_hi = kernel_context(gt->i915, NULL); if (!ctx_hi) - goto err_spin_lo; + return -ENOMEM; ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; ctx_lo = kernel_context(gt->i915, NULL); @@ -3395,6 +3392,9 @@ static int live_preempt_timeout(void *arg) goto err_ctx_hi; ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; + if (igt_spinner_init(&spin_lo, gt)) + goto err_ctx_lo; + for_each_engine(engine, gt, id) { unsigned long saved_timeout; struct i915_request *rq; @@ -3406,21 +3406,21 @@ static int live_preempt_timeout(void *arg) MI_NOOP); /* preemption disabled */ if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto err_ctx_lo; + goto err_spin_lo; } i915_request_add(rq); if (!igt_wait_for_spinner(&spin_lo, rq)) { intel_gt_set_wedged(gt); err = -EIO; - goto err_ctx_lo; + goto err_spin_lo; } rq = igt_request_alloc(ctx_hi, engine); if (IS_ERR(rq)) { igt_spinner_end(&spin_lo); err = PTR_ERR(rq); - goto err_ctx_lo; + goto err_spin_lo; } /* Flush the previous CS ack before changing timeouts */ @@ -3440,7 +3440,7 @@ static int live_preempt_timeout(void *arg) intel_gt_set_wedged(gt); i915_request_put(rq); err = -ETIME; - goto err_ctx_lo; + goto err_spin_lo; } igt_spinner_end(&spin_lo); @@ -3448,12 +3448,12 @@ static int live_preempt_timeout(void *arg) } err = 0; +err_spin_lo: + igt_spinner_fini(&spin_lo); err_ctx_lo: kernel_context_close(ctx_lo); err_ctx_hi: kernel_context_close(ctx_hi); -err_spin_lo: - igt_spinner_fini(&spin_lo); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 21c29d315cc0..6ba52ef1acb8 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1753,8 +1753,8 @@ static int __live_pphwsp_runtime(struct intel_engine_cs *engine) if (IS_ERR(ce)) return PTR_ERR(ce); - ce->runtime.num_underflow = 0; - ce->runtime.max_underflow = 0; + ce->stats.runtime.num_underflow = 0; + ce->stats.runtime.max_underflow = 0; do { unsigned int loop = 1024; @@ -1792,11 +1792,11 @@ static int __live_pphwsp_runtime(struct intel_engine_cs *engine) intel_context_get_avg_runtime_ns(ce)); err = 0; - if (ce->runtime.num_underflow) { + if (ce->stats.runtime.num_underflow) { pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n", engine->name, - ce->runtime.num_underflow, - ce->runtime.max_underflow); + ce->stats.runtime.num_underflow, + ce->stats.runtime.max_underflow); GEM_TRACE_DUMP(); err = -EOVERFLOW; } diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index c9c4f391c5cc..2b0c87999949 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -132,6 +132,124 @@ err_free_src: return err; } +static int intel_context_copy_ccs(struct intel_context *ce, + const struct i915_deps *deps, + struct scatterlist *sg, + enum i915_cache_level cache_level, + bool write_to_ccs, + struct i915_request **out) +{ + u8 src_access = write_to_ccs ? DIRECT_ACCESS : INDIRECT_ACCESS; + u8 dst_access = write_to_ccs ? INDIRECT_ACCESS : DIRECT_ACCESS; + struct sgt_dma it = sg_sgt(sg); + struct i915_request *rq; + u32 offset; + int err; + + GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); + *out = NULL; + + GEM_BUG_ON(ce->ring->size < SZ_64K); + + offset = 0; + if (HAS_64K_PAGES(ce->engine->i915)) + offset = CHUNK_SZ; + + do { + int len; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_ce; + } + + if (deps) { + err = i915_request_await_deps(rq, deps); + if (err) + goto out_rq; + + if (rq->engine->emit_init_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (err) + goto out_rq; + } + + deps = NULL; + } + + /* The PTE updates + clear must not be interrupted. */ + err = emit_no_arbitration(rq); + if (err) + goto out_rq; + + len = emit_pte(rq, &it, cache_level, true, offset, CHUNK_SZ); + if (len <= 0) { + err = len; + goto out_rq; + } + + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto out_rq; + + err = emit_copy_ccs(rq, offset, dst_access, + offset, src_access, len); + if (err) + goto out_rq; + + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + + /* Arbitration is re-enabled between requests. */ +out_rq: + if (*out) + i915_request_put(*out); + *out = i915_request_get(rq); + i915_request_add(rq); + if (err || !it.sg || !sg_dma_len(it.sg)) + break; + + cond_resched(); + } while (1); + +out_ce: + return err; +} + +static int +intel_migrate_ccs_copy(struct intel_migrate *m, + struct i915_gem_ww_ctx *ww, + const struct i915_deps *deps, + struct scatterlist *sg, + enum i915_cache_level cache_level, + bool write_to_ccs, + struct i915_request **out) +{ + struct intel_context *ce; + int err; + + *out = NULL; + if (!m->context) + return -ENODEV; + + ce = intel_migrate_create_context(m); + if (IS_ERR(ce)) + ce = intel_context_get(m->context); + GEM_BUG_ON(IS_ERR(ce)); + + err = intel_context_pin_ww(ce, ww); + if (err) + goto out; + + err = intel_context_copy_ccs(ce, deps, sg, cache_level, + write_to_ccs, out); + + intel_context_unpin(ce); +out: + intel_context_put(ce); + return err; +} + static int clear(struct intel_migrate *migrate, int (*fn)(struct intel_migrate *migrate, struct i915_gem_ww_ctx *ww, @@ -144,7 +262,8 @@ static int clear(struct intel_migrate *migrate, struct drm_i915_gem_object *obj; struct i915_request *rq; struct i915_gem_ww_ctx ww; - u32 *vaddr; + u32 *vaddr, val = 0; + bool ccs_cap = false; int err = 0; int i; @@ -152,7 +271,15 @@ static int clear(struct intel_migrate *migrate, if (IS_ERR(obj)) return 0; + /* Consider the rounded up memory too */ + sz = obj->base.size; + + if (HAS_FLAT_CCS(i915) && i915_gem_object_is_lmem(obj)) + ccs_cap = true; + for_i915_gem_ww(&ww, err, true) { + int ccs_bytes, ccs_bytes_per_chunk; + err = i915_gem_object_lock(obj, &ww); if (err) continue; @@ -167,44 +294,114 @@ static int clear(struct intel_migrate *migrate, vaddr[i] = ~i; i915_gem_object_flush_map(obj); - err = fn(migrate, &ww, obj, sz, &rq); - if (!err) - continue; + if (ccs_cap && !val) { + /* Write the obj data into ccs surface */ + err = intel_migrate_ccs_copy(migrate, &ww, NULL, + obj->mm.pages->sgl, + obj->cache_level, + true, &rq); + if (rq && !err) { + if (i915_request_wait(rq, 0, HZ) < 0) { + pr_err("%ps timed out, size: %u\n", + fn, sz); + err = -ETIME; + } + i915_request_put(rq); + rq = NULL; + } + if (err) + continue; + } - if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) - pr_err("%ps failed, size: %u\n", fn, sz); - if (rq) { - i915_request_wait(rq, 0, HZ); + err = fn(migrate, &ww, obj, val, &rq); + if (rq && !err) { + if (i915_request_wait(rq, 0, HZ) < 0) { + pr_err("%ps timed out, size: %u\n", fn, sz); + err = -ETIME; + } i915_request_put(rq); + rq = NULL; } - i915_gem_object_unpin_map(obj); - } - if (err) - goto err_out; + if (err) + continue; - if (rq) { - if (i915_request_wait(rq, 0, HZ) < 0) { - pr_err("%ps timed out, size: %u\n", fn, sz); - err = -ETIME; + i915_gem_object_flush_map(obj); + + /* Verify the set/clear of the obj mem */ + for (i = 0; !err && i < sz / PAGE_SIZE; i++) { + int x = i * 1024 + + i915_prandom_u32_max_state(1024, prng); + + if (vaddr[x] != val) { + pr_err("%ps failed, (%u != %u), offset: %zu\n", + fn, vaddr[x], val, x * sizeof(u32)); + igt_hexdump(vaddr + i * 1024, 4096); + err = -EINVAL; + } } - i915_request_put(rq); - } + if (err) + continue; - for (i = 0; !err && i < sz / PAGE_SIZE; i++) { - int x = i * 1024 + i915_prandom_u32_max_state(1024, prng); + if (ccs_cap && !val) { + for (i = 0; i < sz / sizeof(u32); i++) + vaddr[i] = ~i; + i915_gem_object_flush_map(obj); + + err = intel_migrate_ccs_copy(migrate, &ww, NULL, + obj->mm.pages->sgl, + obj->cache_level, + false, &rq); + if (rq && !err) { + if (i915_request_wait(rq, 0, HZ) < 0) { + pr_err("%ps timed out, size: %u\n", + fn, sz); + err = -ETIME; + } + i915_request_put(rq); + rq = NULL; + } + if (err) + continue; + + ccs_bytes = GET_CCS_BYTES(i915, sz); + ccs_bytes_per_chunk = GET_CCS_BYTES(i915, CHUNK_SZ); + i915_gem_object_flush_map(obj); + + for (i = 0; !err && i < DIV_ROUND_UP(ccs_bytes, PAGE_SIZE); i++) { + int offset = ((i * PAGE_SIZE) / + ccs_bytes_per_chunk) * CHUNK_SZ / sizeof(u32); + int ccs_bytes_left = (ccs_bytes - i * PAGE_SIZE) / sizeof(u32); + int x = i915_prandom_u32_max_state(min_t(int, 1024, + ccs_bytes_left), prng); + + if (vaddr[offset + x]) { + pr_err("%ps ccs clearing failed, offset: %ld/%d\n", + fn, i * PAGE_SIZE + x * sizeof(u32), ccs_bytes); + igt_hexdump(vaddr + offset, + min_t(int, 4096, + ccs_bytes_left * sizeof(u32))); + err = -EINVAL; + } + } + + if (err) + continue; + } + i915_gem_object_unpin_map(obj); + } - if (vaddr[x] != sz) { - pr_err("%ps failed, size: %u, offset: %zu\n", - fn, sz, x * sizeof(u32)); - igt_hexdump(vaddr + i * 1024, 4096); - err = -EINVAL; + if (err) { + if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) + pr_err("%ps failed, size: %u\n", fn, sz); + if (rq && err != -EINVAL) { + i915_request_wait(rq, 0, HZ); + i915_request_put(rq); } + + i915_gem_object_unpin_map(obj); } - i915_gem_object_unpin_map(obj); -err_out: i915_gem_object_put(obj); - return err; } @@ -621,13 +818,15 @@ static int perf_copy_blt(void *arg) for (i = 0; i < ARRAY_SIZE(sizes); i++) { struct drm_i915_gem_object *src, *dst; + size_t sz; int err; src = create_init_lmem_internal(gt, sizes[i], true); if (IS_ERR(src)) return PTR_ERR(src); - dst = create_init_lmem_internal(gt, sizes[i], false); + sz = src->base.size; + dst = create_init_lmem_internal(gt, sz, false); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto err_src; @@ -640,7 +839,7 @@ static int perf_copy_blt(void *arg) dst->mm.pages->sgl, I915_CACHE_NONE, i915_gem_object_is_lmem(dst), - sizes[i]); + sz); i915_gem_object_unlock(dst); i915_gem_object_put(dst); diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index 7afdadc7656f..be9ac47fa9d0 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -122,17 +122,14 @@ enum intel_guc_action { INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002, INTEL_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003, INTEL_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004, - INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY = 0x1005, - INTEL_GUC_ACTION_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006, - INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007, INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008, INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009, + INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B, INTEL_GUC_ACTION_SETUP_PC_GUCRC = 0x3004, INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, + INTEL_GUC_ACTION_GET_HWCONFIG = 0x4100, INTEL_GUC_ACTION_REGISTER_CONTEXT = 0x4502, INTEL_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503, - INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, - INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600, INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601, INTEL_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507, @@ -173,4 +170,11 @@ enum intel_guc_sleep_state_status { #define GUC_LOG_CONTROL_VERBOSITY_MASK (0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT) #define GUC_LOG_CONTROL_DEFAULT_LOGGING (1 << 8) +enum intel_guc_state_capture_event_status { + INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_SUCCESS = 0x0, + INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE = 0x1, +}; + +#define INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK 0x000000FF + #endif /* _ABI_GUC_ACTIONS_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h index c20658ee85a5..8085fb181274 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h @@ -8,6 +8,10 @@ enum intel_guc_response_status { INTEL_GUC_RESPONSE_STATUS_SUCCESS = 0x0, + INTEL_GUC_RESPONSE_NOT_SUPPORTED = 0x20, + INTEL_GUC_RESPONSE_NO_ATTRIBUTE_TABLE = 0x201, + INTEL_GUC_RESPONSE_NO_DECRYPTION_KEY = 0x202, + INTEL_GUC_RESPONSE_DECRYPTION_FAILED = 0x204, INTEL_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000, }; diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h index f0814a57c191..4a59478c3b5c 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h @@ -6,6 +6,8 @@ #ifndef _ABI_GUC_KLVS_ABI_H #define _ABI_GUC_KLVS_ABI_H +#include <linux/types.h> + /** * DOC: GuC KLV * @@ -79,4 +81,17 @@ #define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY 0x0907 #define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_LEN 1u +/* + * Per context scheduling policy update keys. + */ +enum { + GUC_CONTEXT_POLICIES_KLV_ID_EXECUTION_QUANTUM = 0x2001, + GUC_CONTEXT_POLICIES_KLV_ID_PREEMPTION_TIMEOUT = 0x2002, + GUC_CONTEXT_POLICIES_KLV_ID_SCHEDULING_PRIORITY = 0x2003, + GUC_CONTEXT_POLICIES_KLV_ID_PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY = 0x2004, + GUC_CONTEXT_POLICIES_KLV_ID_SLPM_GT_FREQUENCY = 0x2005, + + GUC_CONTEXT_POLICIES_KLV_NUM_IDS = 5, +}; + #endif /* _ABI_GUC_KLVS_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h b/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h new file mode 100644 index 000000000000..3624abfd22d1 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h @@ -0,0 +1,218 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021-2022 Intel Corporation + */ + +#ifndef _INTEL_GUC_CAPTURE_FWIF_H +#define _INTEL_GUC_CAPTURE_FWIF_H + +#include <linux/types.h> +#include "intel_guc_fwif.h" + +struct intel_guc; +struct file; + +/** + * struct __guc_capture_bufstate + * + * Book-keeping structure used to track read and write pointers + * as we extract error capture data from the GuC-log-buffer's + * error-capture region as a stream of dwords. + */ +struct __guc_capture_bufstate { + u32 size; + void *data; + u32 rd; + u32 wr; +}; + +/** + * struct __guc_capture_parsed_output - extracted error capture node + * + * A single unit of extracted error-capture output data grouped together + * at an engine-instance level. We keep these nodes in a linked list. + * See cachelist and outlist below. + */ +struct __guc_capture_parsed_output { + /* + * A single set of 3 capture lists: a global-list + * an engine-class-list and an engine-instance list. + * outlist in __guc_capture_parsed_output will keep + * a linked list of these nodes that will eventually + * be detached from outlist and attached into to + * i915_gpu_codedump in response to a context reset + */ + struct list_head link; + bool is_partial; + u32 eng_class; + u32 eng_inst; + u32 guc_id; + u32 lrca; + struct gcap_reg_list_info { + u32 vfid; + u32 num_regs; + struct guc_mmio_reg *regs; + } reginfo[GUC_CAPTURE_LIST_TYPE_MAX]; +#define GCAP_PARSED_REGLIST_INDEX_GLOBAL BIT(GUC_CAPTURE_LIST_TYPE_GLOBAL) +#define GCAP_PARSED_REGLIST_INDEX_ENGCLASS BIT(GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS) +#define GCAP_PARSED_REGLIST_INDEX_ENGINST BIT(GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE) +}; + +/** + * struct guc_debug_capture_list_header / struct guc_debug_capture_list + * + * As part of ADS registration, these header structures (followed by + * an array of 'struct guc_mmio_reg' entries) are used to register with + * GuC microkernel the list of registers we want it to dump out prior + * to a engine reset. + */ +struct guc_debug_capture_list_header { + u32 info; +#define GUC_CAPTURELISTHDR_NUMDESCR GENMASK(15, 0) +} __packed; + +struct guc_debug_capture_list { + struct guc_debug_capture_list_header header; + struct guc_mmio_reg regs[0]; +} __packed; + +/** + * struct __guc_mmio_reg_descr / struct __guc_mmio_reg_descr_group + * + * intel_guc_capture module uses these structures to maintain static + * tables (per unique platform) that consists of lists of registers + * (offsets, names, flags,...) that are used at the ADS regisration + * time as well as during runtime processing and reporting of error- + * capture states generated by GuC just prior to engine reset events. + */ +struct __guc_mmio_reg_descr { + i915_reg_t reg; + u32 flags; + u32 mask; + const char *regname; +}; + +struct __guc_mmio_reg_descr_group { + const struct __guc_mmio_reg_descr *list; + u32 num_regs; + u32 owner; /* see enum guc_capture_owner */ + u32 type; /* see enum guc_capture_type */ + u32 engine; /* as per MAX_ENGINE_CLASS */ + struct __guc_mmio_reg_descr *extlist; /* only used for steered registers */ +}; + +/** + * struct guc_state_capture_header_t / struct guc_state_capture_t / + * guc_state_capture_group_header_t / guc_state_capture_group_t + * + * Prior to resetting engines that have hung or faulted, GuC microkernel + * reports the engine error-state (register values that was read) by + * logging them into the shared GuC log buffer using these hierarchy + * of structures. + */ +struct guc_state_capture_header_t { + u32 owner; +#define CAP_HDR_CAPTURE_VFID GENMASK(7, 0) + u32 info; +#define CAP_HDR_CAPTURE_TYPE GENMASK(3, 0) /* see enum guc_capture_type */ +#define CAP_HDR_ENGINE_CLASS GENMASK(7, 4) /* see GUC_MAX_ENGINE_CLASSES */ +#define CAP_HDR_ENGINE_INSTANCE GENMASK(11, 8) + u32 lrca; /* if type-instance, LRCA (address) that hung, else set to ~0 */ + u32 guc_id; /* if type-instance, context index of hung context, else set to ~0 */ + u32 num_mmios; +#define CAP_HDR_NUM_MMIOS GENMASK(9, 0) +} __packed; + +struct guc_state_capture_t { + struct guc_state_capture_header_t header; + struct guc_mmio_reg mmio_entries[0]; +} __packed; + +enum guc_capture_group_types { + GUC_STATE_CAPTURE_GROUP_TYPE_FULL, + GUC_STATE_CAPTURE_GROUP_TYPE_PARTIAL, + GUC_STATE_CAPTURE_GROUP_TYPE_MAX, +}; + +struct guc_state_capture_group_header_t { + u32 owner; +#define CAP_GRP_HDR_CAPTURE_VFID GENMASK(7, 0) + u32 info; +#define CAP_GRP_HDR_NUM_CAPTURES GENMASK(7, 0) +#define CAP_GRP_HDR_CAPTURE_TYPE GENMASK(15, 8) /* guc_capture_group_types */ +} __packed; + +/* this is the top level structure where an error-capture dump starts */ +struct guc_state_capture_group_t { + struct guc_state_capture_group_header_t grp_header; + struct guc_state_capture_t capture_entries[0]; +} __packed; + +/** + * struct __guc_capture_ads_cache + * + * A structure to cache register lists that were populated and registered + * with GuC at startup during ADS registration. This allows much quicker + * GuC resets without re-parsing all the tables for the given gt. + */ +struct __guc_capture_ads_cache { + bool is_valid; + void *ptr; + size_t size; + int status; +}; + +/** + * struct intel_guc_state_capture + * + * Internal context of the intel_guc_capture module. + */ +struct intel_guc_state_capture { + /** + * @reglists: static table of register lists used for error-capture state. + */ + const struct __guc_mmio_reg_descr_group *reglists; + + /** + * @extlists: allocated table of steered register lists used for error-capture state. + * + * NOTE: steered registers have multiple instances depending on the HW configuration + * (slices or dual-sub-slices) and thus depends on HW fuses discovered at startup + */ + struct __guc_mmio_reg_descr_group *extlists; + + /** + * @ads_cache: cached register lists that is ADS format ready + */ + struct __guc_capture_ads_cache ads_cache[GUC_CAPTURE_LIST_INDEX_MAX] + [GUC_CAPTURE_LIST_TYPE_MAX] + [GUC_MAX_ENGINE_CLASSES]; + void *ads_null_cache; + + /** + * @cachelist: Pool of pre-allocated nodes for error capture output + * + * We need this pool of pre-allocated nodes because we cannot + * dynamically allocate new nodes when receiving the G2H notification + * because the event handlers for all G2H event-processing is called + * by the ct processing worker queue and when that queue is being + * processed, there is no absoluate guarantee that we are not in the + * midst of a GT reset operation (which doesn't allow allocations). + */ + struct list_head cachelist; +#define PREALLOC_NODES_MAX_COUNT (3 * GUC_MAX_ENGINE_CLASSES * GUC_MAX_INSTANCES_PER_CLASS) +#define PREALLOC_NODES_DEFAULT_NUMREGS 64 + int max_mmio_per_node; + + /** + * @outlist: Pool of pre-allocated nodes for error capture output + * + * A linked list of parsed GuC error-capture output data before + * reporting with formatting via i915_gpu_coredump. Each node in this linked list shall + * contain a single engine-capture including global, engine-class and + * engine-instance register dumps as per guc_capture_parsed_output_node + */ + struct list_head outlist; +}; + +#endif /* _INTEL_GUC_CAPTURE_FWIF_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 447a976c9f25..2c4ad4a65089 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -9,8 +9,9 @@ #include "gt/intel_gt_pm_irq.h" #include "gt/intel_gt_regs.h" #include "intel_guc.h" -#include "intel_guc_slpc.h" #include "intel_guc_ads.h" +#include "intel_guc_capture.h" +#include "intel_guc_slpc.h" #include "intel_guc_submission.h" #include "i915_drv.h" #include "i915_irq.h" @@ -291,6 +292,41 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 50)) flags |= GUC_WA_POLLCS; + /* Wa_16011759253:dg2_g10:a0 */ + if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) + flags |= GUC_WA_GAM_CREDITS; + + /* Wa_14014475959:dg2 */ + if (IS_DG2(gt->i915)) + flags |= GUC_WA_HOLD_CCS_SWITCHOUT; + + /* + * Wa_14012197797:dg2_g10:a0,dg2_g11:a0 + * Wa_22011391025:dg2_g10,dg2_g11,dg2_g12 + * + * The same WA bit is used for both and 22011391025 is applicable to + * all DG2. + */ + if (IS_DG2(gt->i915)) + flags |= GUC_WA_DUAL_QUEUE; + + /* Wa_22011802037: graphics version 12 */ + if (GRAPHICS_VER(gt->i915) == 12) + flags |= GUC_WA_PRE_PARSER; + + /* Wa_16011777198:dg2 */ + if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || + IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) + flags |= GUC_WA_RCS_RESET_BEFORE_RC6; + + /* + * Wa_22012727170:dg2_g10[a0-c0), dg2_g11[a0..) + * Wa_22012727685:dg2_g11[a0..) + */ + if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || + IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_FOREVER)) + flags |= GUC_WA_CONTEXT_ISOLATION; + return flags; } @@ -362,9 +398,14 @@ int intel_guc_init(struct intel_guc *guc) if (ret) goto err_fw; - ret = intel_guc_ads_create(guc); + ret = intel_guc_capture_init(guc); if (ret) goto err_log; + + ret = intel_guc_ads_create(guc); + if (ret) + goto err_capture; + GEM_BUG_ON(!guc->ads_vma); ret = intel_guc_ct_init(&guc->ct); @@ -403,6 +444,8 @@ err_ct: intel_guc_ct_fini(&guc->ct); err_ads: intel_guc_ads_destroy(guc); +err_capture: + intel_guc_capture_destroy(guc); err_log: intel_guc_log_destroy(&guc->log); err_fw: @@ -430,6 +473,7 @@ void intel_guc_fini(struct intel_guc *guc) intel_guc_ct_fini(&guc->ct); intel_guc_ads_destroy(guc); + intel_guc_capture_destroy(guc); intel_guc_log_destroy(&guc->log); intel_uc_fw_fini(&guc->fw); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index bf7079480d47..3f3373f68123 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -10,18 +10,19 @@ #include <linux/iosys-map.h> #include <linux/xarray.h> -#include "intel_uncore.h" +#include "intel_guc_ct.h" #include "intel_guc_fw.h" #include "intel_guc_fwif.h" -#include "intel_guc_ct.h" #include "intel_guc_log.h" #include "intel_guc_reg.h" #include "intel_guc_slpc_types.h" #include "intel_uc_fw.h" +#include "intel_uncore.h" #include "i915_utils.h" #include "i915_vma.h" struct __guc_ads_blob; +struct intel_guc_state_capture; /** * struct intel_guc - Top level structure of GuC. @@ -38,6 +39,8 @@ struct intel_guc { struct intel_guc_ct ct; /** @slpc: sub-structure containing SLPC related data and objects */ struct intel_guc_slpc slpc; + /** @capture: the error-state-capture module's data and objects */ + struct intel_guc_state_capture *capture; /** @sched_engine: Global engine used to submit requests to GuC */ struct i915_sched_engine *sched_engine; @@ -138,6 +141,8 @@ struct intel_guc { bool submission_supported; /** @submission_selected: tracks whether the user enabled GuC submission */ bool submission_selected; + /** @submission_initialized: tracks whether GuC submission has been initialised */ + bool submission_initialized; /** * @rc_supported: tracks whether we support GuC rc on the current platform */ @@ -160,14 +165,11 @@ struct intel_guc { struct guc_mmio_reg *ads_regset; /** @ads_golden_ctxt_size: size of the golden contexts in the ADS */ u32 ads_golden_ctxt_size; + /** @ads_capture_size: size of register lists in the ADS used for error capture */ + u32 ads_capture_size; /** @ads_engine_usage_size: size of engine usage in the ADS */ u32 ads_engine_usage_size; - /** @lrc_desc_pool: object allocated to hold the GuC LRC descriptor pool */ - struct i915_vma *lrc_desc_pool; - /** @lrc_desc_pool_vaddr: contents of the GuC LRC descriptor pool */ - void *lrc_desc_pool_vaddr; - /** * @context_lookup: used to resolve intel_context from guc_id, if a * context is present in this structure it is registered with the GuC @@ -431,6 +433,9 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc, int intel_guc_error_capture_process_msg(struct intel_guc *guc, const u32 *msg, u32 len); +struct intel_engine_cs * +intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance); + void intel_guc_find_hung_context(struct intel_engine_cs *engine); int intel_guc_global_policies_update(struct intel_guc *guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 92cb88248391..3eabf4cf8eec 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -11,6 +11,7 @@ #include "gt/intel_lrc.h" #include "gt/shmem_utils.h" #include "intel_guc_ads.h" +#include "intel_guc_capture.h" #include "intel_guc_fwif.h" #include "intel_uc.h" #include "i915_drv.h" @@ -86,8 +87,7 @@ static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc) static u32 guc_ads_capture_size(struct intel_guc *guc) { - /* FIXME: Allocate a proper capture list */ - return PAGE_ALIGN(PAGE_SIZE); + return PAGE_ALIGN(guc->ads_capture_size); } static u32 guc_ads_private_data_size(struct intel_guc *guc) @@ -276,15 +276,24 @@ __mmio_reg_add(struct temp_regset *regset, struct guc_mmio_reg *reg) return slot; } -static long __must_check guc_mmio_reg_add(struct temp_regset *regset, - u32 offset, u32 flags) +#define GUC_REGSET_STEERING(group, instance) ( \ + FIELD_PREP(GUC_REGSET_STEERING_GROUP, (group)) | \ + FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, (instance)) | \ + GUC_REGSET_NEEDS_STEERING \ +) + +static long __must_check guc_mmio_reg_add(struct intel_gt *gt, + struct temp_regset *regset, + i915_reg_t reg, u32 flags) { u32 count = regset->storage_used - (regset->registers - regset->storage); - struct guc_mmio_reg reg = { + u32 offset = i915_mmio_reg_offset(reg); + struct guc_mmio_reg entry = { .offset = offset, .flags = flags, }; struct guc_mmio_reg *slot; + u8 group, inst; /* * The mmio list is built using separate lists within the driver. @@ -292,11 +301,22 @@ static long __must_check guc_mmio_reg_add(struct temp_regset *regset, * register more than once. Do not consider this an error; silently * move on if the register is already in the list. */ - if (bsearch(®, regset->registers, count, - sizeof(reg), guc_mmio_reg_cmp)) + if (bsearch(&entry, regset->registers, count, + sizeof(entry), guc_mmio_reg_cmp)) return 0; - slot = __mmio_reg_add(regset, ®); + /* + * The GuC doesn't have a default steering, so we need to explicitly + * steer all registers that need steering. However, we do not keep track + * of all the steering ranges, only of those that have a chance of using + * a non-default steering from the i915 pov. Instead of adding such + * tracking, it is easier to just program the default steering for all + * regs that don't need a non-default one. + */ + intel_gt_get_valid_steering_for_reg(gt, reg, &group, &inst); + entry.flags |= GUC_REGSET_STEERING(group, inst); + + slot = __mmio_reg_add(regset, &entry); if (IS_ERR(slot)) return PTR_ERR(slot); @@ -311,14 +331,16 @@ static long __must_check guc_mmio_reg_add(struct temp_regset *regset, return 0; } -#define GUC_MMIO_REG_ADD(regset, reg, masked) \ - guc_mmio_reg_add(regset, \ - i915_mmio_reg_offset((reg)), \ +#define GUC_MMIO_REG_ADD(gt, regset, reg, masked) \ + guc_mmio_reg_add(gt, \ + regset, \ + (reg), \ (masked) ? GUC_REGSET_MASKED : 0) static int guc_mmio_regset_init(struct temp_regset *regset, struct intel_engine_cs *engine) { + struct intel_gt *gt = engine->gt; const u32 base = engine->mmio_base; struct i915_wa_list *wal = &engine->wa_list; struct i915_wa *wa; @@ -331,26 +353,26 @@ static int guc_mmio_regset_init(struct temp_regset *regset, */ regset->registers = regset->storage + regset->storage_used; - ret |= GUC_MMIO_REG_ADD(regset, RING_MODE_GEN7(base), true); - ret |= GUC_MMIO_REG_ADD(regset, RING_HWS_PGA(base), false); - ret |= GUC_MMIO_REG_ADD(regset, RING_IMR(base), false); + ret |= GUC_MMIO_REG_ADD(gt, regset, RING_MODE_GEN7(base), true); + ret |= GUC_MMIO_REG_ADD(gt, regset, RING_HWS_PGA(base), false); + ret |= GUC_MMIO_REG_ADD(gt, regset, RING_IMR(base), false); - if (engine->class == RENDER_CLASS && + if ((engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) && CCS_MASK(engine->gt)) - ret |= GUC_MMIO_REG_ADD(regset, GEN12_RCU_MODE, true); + ret |= GUC_MMIO_REG_ADD(gt, regset, GEN12_RCU_MODE, true); for (i = 0, wa = wal->list; i < wal->count; i++, wa++) - ret |= GUC_MMIO_REG_ADD(regset, wa->reg, wa->masked_reg); + ret |= GUC_MMIO_REG_ADD(gt, regset, wa->reg, wa->masked_reg); /* Be extra paranoid and include all whitelist registers. */ for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) - ret |= GUC_MMIO_REG_ADD(regset, + ret |= GUC_MMIO_REG_ADD(gt, regset, RING_FORCE_TO_NONPRIV(base, i), false); /* add in local MOCS registers */ for (i = 0; i < GEN9_LNCFCMOCS_REG_COUNT; i++) - ret |= GUC_MMIO_REG_ADD(regset, GEN9_LNCFCMOCS(i), false); + ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false); return ret ? -1 : 0; } @@ -433,7 +455,7 @@ static void guc_mmio_reg_state_init(struct intel_guc *guc) static void fill_engine_enable_masks(struct intel_gt *gt, struct iosys_map *info_map) { - info_map_write(info_map, engine_enabled_masks[GUC_RENDER_CLASS], 1); + info_map_write(info_map, engine_enabled_masks[GUC_RENDER_CLASS], RCS_MASK(gt)); info_map_write(info_map, engine_enabled_masks[GUC_COMPUTE_CLASS], CCS_MASK(gt)); info_map_write(info_map, engine_enabled_masks[GUC_BLITTER_CLASS], 1); info_map_write(info_map, engine_enabled_masks[GUC_VIDEO_CLASS], VDBOX_MASK(gt)); @@ -589,24 +611,119 @@ static void guc_init_golden_context(struct intel_guc *guc) GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size); } -static void guc_capture_list_init(struct intel_guc *guc) +static int +guc_capture_prep_lists(struct intel_guc *guc) { + struct intel_gt *gt = guc_to_gt(guc); + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + u32 ads_ggtt, capture_offset, null_ggtt, total_size = 0; + struct guc_gt_system_info local_info; + struct iosys_map info_map; + bool ads_is_mapped; + size_t size = 0; + void *ptr; int i, j; - u32 addr_ggtt, offset; - offset = guc_ads_capture_offset(guc); - addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; + ads_is_mapped = !iosys_map_is_null(&guc->ads_map); + if (ads_is_mapped) { + capture_offset = guc_ads_capture_offset(guc); + ads_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma); + info_map = IOSYS_MAP_INIT_OFFSET(&guc->ads_map, + offsetof(struct __guc_ads_blob, system_info)); + } else { + memset(&local_info, 0, sizeof(local_info)); + iosys_map_set_vaddr(&info_map, &local_info); + fill_engine_enable_masks(gt, &info_map); + } - /* FIXME: Populate a proper capture list */ + /* first, set aside the first page for a capture_list with zero descriptors */ + total_size = PAGE_SIZE; + if (ads_is_mapped) { + if (!intel_guc_capture_getnullheader(guc, &ptr, &size)) + iosys_map_memcpy_to(&guc->ads_map, capture_offset, ptr, size); + null_ggtt = ads_ggtt + capture_offset; + capture_offset += PAGE_SIZE; + } for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) { for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) { - ads_blob_write(guc, ads.capture_instance[i][j], addr_ggtt); - ads_blob_write(guc, ads.capture_class[i][j], addr_ggtt); - } - ads_blob_write(guc, ads.capture_global[i], addr_ggtt); + /* null list if we dont have said engine or list */ + if (!info_map_read(&info_map, engine_enabled_masks[j])) { + if (ads_is_mapped) { + ads_blob_write(guc, ads.capture_class[i][j], null_ggtt); + ads_blob_write(guc, ads.capture_instance[i][j], null_ggtt); + } + continue; + } + if (intel_guc_capture_getlistsize(guc, i, + GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, + j, &size)) { + if (ads_is_mapped) + ads_blob_write(guc, ads.capture_class[i][j], null_ggtt); + goto engine_instance_list; + } + total_size += size; + if (ads_is_mapped) { + if (total_size > guc->ads_capture_size || + intel_guc_capture_getlist(guc, i, + GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, + j, &ptr)) { + ads_blob_write(guc, ads.capture_class[i][j], null_ggtt); + continue; + } + ads_blob_write(guc, ads.capture_class[i][j], ads_ggtt + + capture_offset); + iosys_map_memcpy_to(&guc->ads_map, capture_offset, ptr, size); + capture_offset += size; + } +engine_instance_list: + if (intel_guc_capture_getlistsize(guc, i, + GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE, + j, &size)) { + if (ads_is_mapped) + ads_blob_write(guc, ads.capture_instance[i][j], null_ggtt); + continue; + } + total_size += size; + if (ads_is_mapped) { + if (total_size > guc->ads_capture_size || + intel_guc_capture_getlist(guc, i, + GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE, + j, &ptr)) { + ads_blob_write(guc, ads.capture_instance[i][j], null_ggtt); + continue; + } + ads_blob_write(guc, ads.capture_instance[i][j], ads_ggtt + + capture_offset); + iosys_map_memcpy_to(&guc->ads_map, capture_offset, ptr, size); + capture_offset += size; + } + } + if (intel_guc_capture_getlistsize(guc, i, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &size)) { + if (ads_is_mapped) + ads_blob_write(guc, ads.capture_global[i], null_ggtt); + continue; + } + total_size += size; + if (ads_is_mapped) { + if (total_size > guc->ads_capture_size || + intel_guc_capture_getlist(guc, i, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, + &ptr)) { + ads_blob_write(guc, ads.capture_global[i], null_ggtt); + continue; + } + ads_blob_write(guc, ads.capture_global[i], ads_ggtt + capture_offset); + iosys_map_memcpy_to(&guc->ads_map, capture_offset, ptr, size); + capture_offset += size; + } } + + if (guc->ads_capture_size && guc->ads_capture_size != PAGE_ALIGN(total_size)) + drm_warn(&i915->drm, "GuC->ADS->Capture alloc size changed from %d to %d\n", + guc->ads_capture_size, PAGE_ALIGN(total_size)); + + return PAGE_ALIGN(total_size); } static void __guc_ads_init(struct intel_guc *guc) @@ -644,8 +761,8 @@ static void __guc_ads_init(struct intel_guc *guc) base = intel_guc_ggtt_offset(guc, guc->ads_vma); - /* Capture list for hang debug */ - guc_capture_list_init(guc); + /* Lists for error capture debug */ + guc_capture_prep_lists(guc); /* ADS */ ads_blob_write(guc, ads.scheduler_policies, base + @@ -693,6 +810,12 @@ int intel_guc_ads_create(struct intel_guc *guc) return ret; guc->ads_golden_ctxt_size = ret; + /* Likewise the capture lists: */ + ret = guc_capture_prep_lists(guc); + if (ret < 0) + return ret; + guc->ads_capture_size = ret; + /* Now the total size can be determined: */ size = guc_ads_blob_size(guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c new file mode 100644 index 000000000000..c4e25966d3e9 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -0,0 +1,1657 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021-2022 Intel Corporation + */ + +#include <linux/types.h> + +#include <drm/drm_print.h> + +#include "gt/intel_engine_regs.h" +#include "gt/intel_gt.h" +#include "gt/intel_gt_regs.h" +#include "gt/intel_lrc.h" +#include "guc_capture_fwif.h" +#include "intel_guc_capture.h" +#include "intel_guc_fwif.h" +#include "i915_drv.h" +#include "i915_gpu_error.h" +#include "i915_irq.h" +#include "i915_memcpy.h" +#include "i915_reg.h" + +/* + * Define all device tables of GuC error capture register lists + * NOTE: For engine-registers, GuC only needs the register offsets + * from the engine-mmio-base + */ +#define COMMON_BASE_GLOBAL \ + { FORCEWAKE_MT, 0, 0, "FORCEWAKE" } + +#define COMMON_GEN9BASE_GLOBAL \ + { GEN8_FAULT_TLB_DATA0, 0, 0, "GEN8_FAULT_TLB_DATA0" }, \ + { GEN8_FAULT_TLB_DATA1, 0, 0, "GEN8_FAULT_TLB_DATA1" }, \ + { ERROR_GEN6, 0, 0, "ERROR_GEN6" }, \ + { DONE_REG, 0, 0, "DONE_REG" }, \ + { HSW_GTT_CACHE_EN, 0, 0, "HSW_GTT_CACHE_EN" } + +#define COMMON_GEN12BASE_GLOBAL \ + { GEN12_FAULT_TLB_DATA0, 0, 0, "GEN12_FAULT_TLB_DATA0" }, \ + { GEN12_FAULT_TLB_DATA1, 0, 0, "GEN12_FAULT_TLB_DATA1" }, \ + { GEN12_AUX_ERR_DBG, 0, 0, "AUX_ERR_DBG" }, \ + { GEN12_GAM_DONE, 0, 0, "GAM_DONE" }, \ + { GEN12_RING_FAULT_REG, 0, 0, "FAULT_REG" } + +#define COMMON_BASE_ENGINE_INSTANCE \ + { RING_PSMI_CTL(0), 0, 0, "RC PSMI" }, \ + { RING_ESR(0), 0, 0, "ESR" }, \ + { RING_DMA_FADD(0), 0, 0, "RING_DMA_FADD_LDW" }, \ + { RING_DMA_FADD_UDW(0), 0, 0, "RING_DMA_FADD_UDW" }, \ + { RING_IPEIR(0), 0, 0, "IPEIR" }, \ + { RING_IPEHR(0), 0, 0, "IPEHR" }, \ + { RING_INSTPS(0), 0, 0, "INSTPS" }, \ + { RING_BBADDR(0), 0, 0, "RING_BBADDR_LOW32" }, \ + { RING_BBADDR_UDW(0), 0, 0, "RING_BBADDR_UP32" }, \ + { RING_BBSTATE(0), 0, 0, "BB_STATE" }, \ + { CCID(0), 0, 0, "CCID" }, \ + { RING_ACTHD(0), 0, 0, "ACTHD_LDW" }, \ + { RING_ACTHD_UDW(0), 0, 0, "ACTHD_UDW" }, \ + { RING_INSTPM(0), 0, 0, "INSTPM" }, \ + { RING_INSTDONE(0), 0, 0, "INSTDONE" }, \ + { RING_NOPID(0), 0, 0, "RING_NOPID" }, \ + { RING_START(0), 0, 0, "START" }, \ + { RING_HEAD(0), 0, 0, "HEAD" }, \ + { RING_TAIL(0), 0, 0, "TAIL" }, \ + { RING_CTL(0), 0, 0, "CTL" }, \ + { RING_MI_MODE(0), 0, 0, "MODE" }, \ + { RING_CONTEXT_CONTROL(0), 0, 0, "RING_CONTEXT_CONTROL" }, \ + { RING_HWS_PGA(0), 0, 0, "HWS" }, \ + { RING_MODE_GEN7(0), 0, 0, "GFX_MODE" }, \ + { GEN8_RING_PDP_LDW(0, 0), 0, 0, "PDP0_LDW" }, \ + { GEN8_RING_PDP_UDW(0, 0), 0, 0, "PDP0_UDW" }, \ + { GEN8_RING_PDP_LDW(0, 1), 0, 0, "PDP1_LDW" }, \ + { GEN8_RING_PDP_UDW(0, 1), 0, 0, "PDP1_UDW" }, \ + { GEN8_RING_PDP_LDW(0, 2), 0, 0, "PDP2_LDW" }, \ + { GEN8_RING_PDP_UDW(0, 2), 0, 0, "PDP2_UDW" }, \ + { GEN8_RING_PDP_LDW(0, 3), 0, 0, "PDP3_LDW" }, \ + { GEN8_RING_PDP_UDW(0, 3), 0, 0, "PDP3_UDW" } + +#define COMMON_BASE_HAS_EU \ + { EIR, 0, 0, "EIR" } + +#define COMMON_BASE_RENDER \ + { GEN7_SC_INSTDONE, 0, 0, "GEN7_SC_INSTDONE" } + +#define COMMON_GEN12BASE_RENDER \ + { GEN12_SC_INSTDONE_EXTRA, 0, 0, "GEN12_SC_INSTDONE_EXTRA" }, \ + { GEN12_SC_INSTDONE_EXTRA2, 0, 0, "GEN12_SC_INSTDONE_EXTRA2" } + +#define COMMON_GEN12BASE_VEC \ + { GEN12_SFC_DONE(0), 0, 0, "SFC_DONE[0]" }, \ + { GEN12_SFC_DONE(1), 0, 0, "SFC_DONE[1]" }, \ + { GEN12_SFC_DONE(2), 0, 0, "SFC_DONE[2]" }, \ + { GEN12_SFC_DONE(3), 0, 0, "SFC_DONE[3]" } + +/* XE_LPD - Global */ +static const struct __guc_mmio_reg_descr xe_lpd_global_regs[] = { + COMMON_BASE_GLOBAL, + COMMON_GEN9BASE_GLOBAL, + COMMON_GEN12BASE_GLOBAL, +}; + +/* XE_LPD - Render / Compute Per-Class */ +static const struct __guc_mmio_reg_descr xe_lpd_rc_class_regs[] = { + COMMON_BASE_HAS_EU, + COMMON_BASE_RENDER, + COMMON_GEN12BASE_RENDER, +}; + +/* GEN9/XE_LPD - Render / Compute Per-Engine-Instance */ +static const struct __guc_mmio_reg_descr xe_lpd_rc_inst_regs[] = { + COMMON_BASE_ENGINE_INSTANCE, +}; + +/* GEN9/XE_LPD - Media Decode/Encode Per-Engine-Instance */ +static const struct __guc_mmio_reg_descr xe_lpd_vd_inst_regs[] = { + COMMON_BASE_ENGINE_INSTANCE, +}; + +/* XE_LPD - Video Enhancement Per-Class */ +static const struct __guc_mmio_reg_descr xe_lpd_vec_class_regs[] = { + COMMON_GEN12BASE_VEC, +}; + +/* GEN9/XE_LPD - Video Enhancement Per-Engine-Instance */ +static const struct __guc_mmio_reg_descr xe_lpd_vec_inst_regs[] = { + COMMON_BASE_ENGINE_INSTANCE, +}; + +/* GEN9/XE_LPD - Blitter Per-Engine-Instance */ +static const struct __guc_mmio_reg_descr xe_lpd_blt_inst_regs[] = { + COMMON_BASE_ENGINE_INSTANCE, +}; + +/* GEN9 - Global */ +static const struct __guc_mmio_reg_descr default_global_regs[] = { + COMMON_BASE_GLOBAL, + COMMON_GEN9BASE_GLOBAL, +}; + +static const struct __guc_mmio_reg_descr default_rc_class_regs[] = { + COMMON_BASE_HAS_EU, + COMMON_BASE_RENDER, +}; + +/* + * Empty lists: + * GEN9/XE_LPD - Blitter Per-Class + * GEN9/XE_LPD - Media Decode/Encode Per-Class + * GEN9 - VEC Class + */ +static const struct __guc_mmio_reg_descr empty_regs_list[] = { +}; + +#define TO_GCAP_DEF_OWNER(x) (GUC_CAPTURE_LIST_INDEX_##x) +#define TO_GCAP_DEF_TYPE(x) (GUC_CAPTURE_LIST_TYPE_##x) +#define MAKE_REGLIST(regslist, regsowner, regstype, class) \ + { \ + regslist, \ + ARRAY_SIZE(regslist), \ + TO_GCAP_DEF_OWNER(regsowner), \ + TO_GCAP_DEF_TYPE(regstype), \ + class, \ + NULL, \ + } + +/* List of lists */ +static struct __guc_mmio_reg_descr_group default_lists[] = { + MAKE_REGLIST(default_global_regs, PF, GLOBAL, 0), + MAKE_REGLIST(default_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS), + MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS), + MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS), + MAKE_REGLIST(xe_lpd_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS), + MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS), + MAKE_REGLIST(xe_lpd_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEOENHANCE_CLASS), + MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_BLITTER_CLASS), + MAKE_REGLIST(xe_lpd_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_BLITTER_CLASS), + {} +}; + +static const struct __guc_mmio_reg_descr_group xe_lpd_lists[] = { + MAKE_REGLIST(xe_lpd_global_regs, PF, GLOBAL, 0), + MAKE_REGLIST(xe_lpd_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS), + MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS), + MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS), + MAKE_REGLIST(xe_lpd_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS), + MAKE_REGLIST(xe_lpd_vec_class_regs, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS), + MAKE_REGLIST(xe_lpd_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEOENHANCE_CLASS), + MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_BLITTER_CLASS), + MAKE_REGLIST(xe_lpd_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_BLITTER_CLASS), + {} +}; + +static const struct __guc_mmio_reg_descr_group * +guc_capture_get_one_list(const struct __guc_mmio_reg_descr_group *reglists, + u32 owner, u32 type, u32 id) +{ + int i; + + if (!reglists) + return NULL; + + for (i = 0; reglists[i].list; ++i) { + if (reglists[i].owner == owner && reglists[i].type == type && + (reglists[i].engine == id || reglists[i].type == GUC_CAPTURE_LIST_TYPE_GLOBAL)) + return ®lists[i]; + } + + return NULL; +} + +static struct __guc_mmio_reg_descr_group * +guc_capture_get_one_ext_list(struct __guc_mmio_reg_descr_group *reglists, + u32 owner, u32 type, u32 id) +{ + int i; + + if (!reglists) + return NULL; + + for (i = 0; reglists[i].extlist; ++i) { + if (reglists[i].owner == owner && reglists[i].type == type && + (reglists[i].engine == id || reglists[i].type == GUC_CAPTURE_LIST_TYPE_GLOBAL)) + return ®lists[i]; + } + + return NULL; +} + +static void guc_capture_free_extlists(struct __guc_mmio_reg_descr_group *reglists) +{ + int i = 0; + + if (!reglists) + return; + + while (reglists[i].extlist) + kfree(reglists[i++].extlist); +} + +struct __ext_steer_reg { + const char *name; + i915_reg_t reg; +}; + +static const struct __ext_steer_reg xe_extregs[] = { + {"GEN7_SAMPLER_INSTDONE", GEN7_SAMPLER_INSTDONE}, + {"GEN7_ROW_INSTDONE", GEN7_ROW_INSTDONE} +}; + +static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext, + const struct __ext_steer_reg *extlist, + int slice_id, int subslice_id) +{ + ext->reg = extlist->reg; + ext->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id); + ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id); + ext->regname = extlist->name; +} + +static int +__alloc_ext_regs(struct __guc_mmio_reg_descr_group *newlist, + const struct __guc_mmio_reg_descr_group *rootlist, int num_regs) +{ + struct __guc_mmio_reg_descr *list; + + list = kcalloc(num_regs, sizeof(struct __guc_mmio_reg_descr), GFP_KERNEL); + if (!list) + return -ENOMEM; + + newlist->extlist = list; + newlist->num_regs = num_regs; + newlist->owner = rootlist->owner; + newlist->engine = rootlist->engine; + newlist->type = rootlist->type; + + return 0; +} + +static void +guc_capture_alloc_steered_lists_xe_lpd(struct intel_guc *guc, + const struct __guc_mmio_reg_descr_group *lists) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + int slice, subslice, i, num_steer_regs, num_tot_regs = 0; + const struct __guc_mmio_reg_descr_group *list; + struct __guc_mmio_reg_descr_group *extlists; + struct __guc_mmio_reg_descr *extarray; + struct sseu_dev_info *sseu; + + /* In XE_LPD we only have steered registers for the render-class */ + list = guc_capture_get_one_list(lists, GUC_CAPTURE_LIST_INDEX_PF, + GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, GUC_RENDER_CLASS); + /* skip if extlists was previously allocated */ + if (!list || guc->capture->extlists) + return; + + num_steer_regs = ARRAY_SIZE(xe_extregs); + + sseu = >->info.sseu; + for_each_instdone_slice_subslice(i915, sseu, slice, subslice) + num_tot_regs += num_steer_regs; + + if (!num_tot_regs) + return; + + /* allocate an extra for an end marker */ + extlists = kcalloc(2, sizeof(struct __guc_mmio_reg_descr_group), GFP_KERNEL); + if (!extlists) + return; + + if (__alloc_ext_regs(&extlists[0], list, num_tot_regs)) { + kfree(extlists); + return; + } + + extarray = extlists[0].extlist; + for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { + for (i = 0; i < num_steer_regs; ++i) { + __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice); + ++extarray; + } + } + + guc->capture->extlists = extlists; +} + +static const struct __ext_steer_reg xehpg_extregs[] = { + {"XEHPG_INSTDONE_GEOM_SVG", XEHPG_INSTDONE_GEOM_SVG} +}; + +static bool __has_xehpg_extregs(u32 ipver) +{ + return (ipver >= IP_VER(12, 55)); +} + +static void +guc_capture_alloc_steered_lists_xe_hpg(struct intel_guc *guc, + const struct __guc_mmio_reg_descr_group *lists, + u32 ipver) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct sseu_dev_info *sseu; + int slice, subslice, i, iter, num_steer_regs, num_tot_regs = 0; + const struct __guc_mmio_reg_descr_group *list; + struct __guc_mmio_reg_descr_group *extlists; + struct __guc_mmio_reg_descr *extarray; + + /* In XE_LP / HPG we only have render-class steering registers during error-capture */ + list = guc_capture_get_one_list(lists, GUC_CAPTURE_LIST_INDEX_PF, + GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, GUC_RENDER_CLASS); + /* skip if extlists was previously allocated */ + if (!list || guc->capture->extlists) + return; + + num_steer_regs = ARRAY_SIZE(xe_extregs); + if (__has_xehpg_extregs(ipver)) + num_steer_regs += ARRAY_SIZE(xehpg_extregs); + + sseu = >->info.sseu; + for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { + num_tot_regs += num_steer_regs; + } + + if (!num_tot_regs) + return; + + /* allocate an extra for an end marker */ + extlists = kcalloc(2, sizeof(struct __guc_mmio_reg_descr_group), GFP_KERNEL); + if (!extlists) + return; + + if (__alloc_ext_regs(&extlists[0], list, num_tot_regs)) { + kfree(extlists); + return; + } + + extarray = extlists[0].extlist; + for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { + for (i = 0; i < ARRAY_SIZE(xe_extregs); ++i) { + __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice); + ++extarray; + } + if (__has_xehpg_extregs(ipver)) { + for (i = 0; i < ARRAY_SIZE(xehpg_extregs); ++i) { + __fill_ext_reg(extarray, &xehpg_extregs[i], slice, subslice); + ++extarray; + } + } + } + + drm_dbg(&i915->drm, "GuC-capture found %d-ext-regs.\n", num_tot_regs); + guc->capture->extlists = extlists; +} + +static const struct __guc_mmio_reg_descr_group * +guc_capture_get_device_reglist(struct intel_guc *guc) +{ + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + + if (GRAPHICS_VER(i915) > 11) { + /* + * For certain engine classes, there are slice and subslice + * level registers requiring steering. We allocate and populate + * these at init time based on hw config add it as an extension + * list at the end of the pre-populated render list. + */ + if (IS_DG2(i915)) + guc_capture_alloc_steered_lists_xe_hpg(guc, xe_lpd_lists, IP_VER(12, 55)); + else if (IS_XEHPSDV(i915)) + guc_capture_alloc_steered_lists_xe_hpg(guc, xe_lpd_lists, IP_VER(12, 50)); + else + guc_capture_alloc_steered_lists_xe_lpd(guc, xe_lpd_lists); + + return xe_lpd_lists; + } + + /* if GuC submission is enabled on a non-POR platform, just use a common baseline */ + return default_lists; +} + +static const char * +__stringify_owner(u32 owner) +{ + switch (owner) { + case GUC_CAPTURE_LIST_INDEX_PF: + return "PF"; + case GUC_CAPTURE_LIST_INDEX_VF: + return "VF"; + default: + return "unknown"; + } + + return ""; +} + +static const char * +__stringify_type(u32 type) +{ + switch (type) { + case GUC_CAPTURE_LIST_TYPE_GLOBAL: + return "Global"; + case GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS: + return "Class"; + case GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE: + return "Instance"; + default: + return "unknown"; + } + + return ""; +} + +static const char * +__stringify_engclass(u32 class) +{ + switch (class) { + case GUC_RENDER_CLASS: + return "Render"; + case GUC_VIDEO_CLASS: + return "Video"; + case GUC_VIDEOENHANCE_CLASS: + return "VideoEnhance"; + case GUC_BLITTER_CLASS: + return "Blitter"; + case GUC_COMPUTE_CLASS: + return "Compute"; + default: + return "unknown"; + } + + return ""; +} + +static void +guc_capture_warn_with_list_info(struct drm_i915_private *i915, char *msg, + u32 owner, u32 type, u32 classid) +{ + if (type == GUC_CAPTURE_LIST_TYPE_GLOBAL) + drm_dbg(&i915->drm, "GuC-capture: %s for %s %s-Registers.\n", msg, + __stringify_owner(owner), __stringify_type(type)); + else + drm_dbg(&i915->drm, "GuC-capture: %s for %s %s-Registers on %s-Engine\n", msg, + __stringify_owner(owner), __stringify_type(type), + __stringify_engclass(classid)); +} + +static int +guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 classid, + struct guc_mmio_reg *ptr, u16 num_entries) +{ + u32 i = 0, j = 0; + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + const struct __guc_mmio_reg_descr_group *reglists = guc->capture->reglists; + struct __guc_mmio_reg_descr_group *extlists = guc->capture->extlists; + const struct __guc_mmio_reg_descr_group *match; + struct __guc_mmio_reg_descr_group *matchext; + + if (!reglists) + return -ENODEV; + + match = guc_capture_get_one_list(reglists, owner, type, classid); + if (!match) { + guc_capture_warn_with_list_info(i915, "Missing register list init", owner, type, + classid); + return -ENODATA; + } + + for (i = 0; i < num_entries && i < match->num_regs; ++i) { + ptr[i].offset = match->list[i].reg.reg; + ptr[i].value = 0xDEADF00D; + ptr[i].flags = match->list[i].flags; + ptr[i].mask = match->list[i].mask; + } + + matchext = guc_capture_get_one_ext_list(extlists, owner, type, classid); + if (matchext) { + for (i = match->num_regs, j = 0; i < num_entries && + i < (match->num_regs + matchext->num_regs) && + j < matchext->num_regs; ++i, ++j) { + ptr[i].offset = matchext->extlist[j].reg.reg; + ptr[i].value = 0xDEADF00D; + ptr[i].flags = matchext->extlist[j].flags; + ptr[i].mask = matchext->extlist[j].mask; + } + } + if (i < num_entries) + drm_dbg(&i915->drm, "GuC-capture: Init reglist short %d out %d.\n", + (int)i, (int)num_entries); + + return 0; +} + +static int +guc_cap_list_num_regs(struct intel_guc_state_capture *gc, u32 owner, u32 type, u32 classid) +{ + const struct __guc_mmio_reg_descr_group *match; + struct __guc_mmio_reg_descr_group *matchext; + int num_regs; + + match = guc_capture_get_one_list(gc->reglists, owner, type, classid); + if (!match) + return 0; + + num_regs = match->num_regs; + + matchext = guc_capture_get_one_ext_list(gc->extlists, owner, type, classid); + if (matchext) + num_regs += matchext->num_regs; + + return num_regs; +} + +int +intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid, + size_t *size) +{ + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct intel_guc_state_capture *gc = guc->capture; + struct __guc_capture_ads_cache *cache = &gc->ads_cache[owner][type][classid]; + int num_regs; + + if (!gc->reglists) + return -ENODEV; + + if (cache->is_valid) { + *size = cache->size; + return cache->status; + } + + num_regs = guc_cap_list_num_regs(gc, owner, type, classid); + if (!num_regs) { + guc_capture_warn_with_list_info(i915, "Missing register list size", + owner, type, classid); + return -ENODATA; + } + + *size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) + + (num_regs * sizeof(struct guc_mmio_reg))); + + return 0; +} + +static void guc_capture_create_prealloc_nodes(struct intel_guc *guc); + +int +intel_guc_capture_getlist(struct intel_guc *guc, u32 owner, u32 type, u32 classid, + void **outptr) +{ + struct intel_guc_state_capture *gc = guc->capture; + struct __guc_capture_ads_cache *cache = &gc->ads_cache[owner][type][classid]; + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct guc_debug_capture_list *listnode; + int ret, num_regs; + u8 *caplist, *tmp; + size_t size = 0; + + if (!gc->reglists) + return -ENODEV; + + if (cache->is_valid) { + *outptr = cache->ptr; + return cache->status; + } + + /* + * ADS population of input registers is a good + * time to pre-allocate cachelist output nodes + */ + guc_capture_create_prealloc_nodes(guc); + + ret = intel_guc_capture_getlistsize(guc, owner, type, classid, &size); + if (ret) { + cache->is_valid = true; + cache->ptr = NULL; + cache->size = 0; + cache->status = ret; + return ret; + } + + caplist = kzalloc(size, GFP_KERNEL); + if (!caplist) { + drm_dbg(&i915->drm, "GuC-capture: failed to alloc cached caplist"); + return -ENOMEM; + } + + /* populate capture list header */ + tmp = caplist; + num_regs = guc_cap_list_num_regs(guc->capture, owner, type, classid); + listnode = (struct guc_debug_capture_list *)tmp; + listnode->header.info = FIELD_PREP(GUC_CAPTURELISTHDR_NUMDESCR, (u32)num_regs); + + /* populate list of register descriptor */ + tmp += sizeof(struct guc_debug_capture_list); + guc_capture_list_init(guc, owner, type, classid, (struct guc_mmio_reg *)tmp, num_regs); + + /* cache this list */ + cache->is_valid = true; + cache->ptr = caplist; + cache->size = size; + cache->status = 0; + + *outptr = caplist; + + return 0; +} + +int +intel_guc_capture_getnullheader(struct intel_guc *guc, + void **outptr, size_t *size) +{ + struct intel_guc_state_capture *gc = guc->capture; + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + int tmp = sizeof(u32) * 4; + void *null_header; + + if (gc->ads_null_cache) { + *outptr = gc->ads_null_cache; + *size = tmp; + return 0; + } + + null_header = kzalloc(tmp, GFP_KERNEL); + if (!null_header) { + drm_dbg(&i915->drm, "GuC-capture: failed to alloc cached nulllist"); + return -ENOMEM; + } + + gc->ads_null_cache = null_header; + *outptr = null_header; + *size = tmp; + + return 0; +} + +#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3 + +int +intel_guc_capture_output_min_size_est(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + int worst_min_size = 0, num_regs = 0; + size_t tmp = 0; + + if (!guc->capture) + return -ENODEV; + + /* + * If every single engine-instance suffered a failure in quick succession but + * were all unrelated, then a burst of multiple error-capture events would dump + * registers for every one engine instance, one at a time. In this case, GuC + * would even dump the global-registers repeatedly. + * + * For each engine instance, there would be 1 x guc_state_capture_group_t output + * followed by 3 x guc_state_capture_t lists. The latter is how the register + * dumps are split across different register types (where the '3' are global vs class + * vs instance). Finally, let's multiply the whole thing by 3x (just so we are + * not limited to just 1 round of data in a worst case full register dump log) + * + * NOTE: intel_guc_log that allocates the log buffer would round this size up to + * a power of two. + */ + + for_each_engine(engine, gt, id) { + worst_min_size += sizeof(struct guc_state_capture_group_header_t) + + (3 * sizeof(struct guc_state_capture_header_t)); + + if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &tmp)) + num_regs += tmp; + + if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, + engine->class, &tmp)) { + num_regs += tmp; + } + if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE, + engine->class, &tmp)) { + num_regs += tmp; + } + } + + worst_min_size += (num_regs * sizeof(struct guc_mmio_reg)); + + return (worst_min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER); +} + +/* + * KMD Init time flows: + * -------------------- + * --> alloc A: GuC input capture regs lists (registered to GuC via ADS). + * intel_guc_ads acquires the register lists by calling + * intel_guc_capture_list_size and intel_guc_capture_list_get 'n' times, + * where n = 1 for global-reg-list + + * num_engine_classes for class-reg-list + + * num_engine_classes for instance-reg-list + * (since all instances of the same engine-class type + * have an identical engine-instance register-list). + * ADS module also calls separately for PF vs VF. + * + * --> alloc B: GuC output capture buf (registered via guc_init_params(log_param)) + * Size = #define CAPTURE_BUFFER_SIZE (warns if on too-small) + * Note2: 'x 3' to hold multiple capture groups + * + * GUC Runtime notify capture: + * -------------------------- + * --> G2H STATE_CAPTURE_NOTIFICATION + * L--> intel_guc_capture_process + * L--> Loop through B (head..tail) and for each engine instance's + * err-state-captured register-list we find, we alloc 'C': + * --> alloc C: A capture-output-node structure that includes misc capture info along + * with 3 register list dumps (global, engine-class and engine-instance) + * This node is created from a pre-allocated list of blank nodes in + * guc->capture->cachelist and populated with the error-capture + * data from GuC and then it's added into guc->capture->outlist linked + * list. This list is used for matchup and printout by i915_gpu_coredump + * and err_print_gt, (when user invokes the error capture sysfs). + * + * GUC --> notify context reset: + * ----------------------------- + * --> G2H CONTEXT RESET + * L--> guc_handle_context_reset --> i915_capture_error_state + * L--> i915_gpu_coredump(..IS_GUC_CAPTURE) --> gt_record_engines + * --> capture_engine(..IS_GUC_CAPTURE) + * L--> intel_guc_capture_get_matching_node is where + * detach C from internal linked list and add it into + * intel_engine_coredump struct (if the context and + * engine of the event notification matches a node + * in the link list). + * + * User Sysfs / Debugfs + * -------------------- + * --> i915_gpu_coredump_copy_to_buffer-> + * L--> err_print_to_sgl --> err_print_gt + * L--> error_print_guc_captures + * L--> intel_guc_capture_print_node prints the + * register lists values of the attached node + * on the error-engine-dump being reported. + * L--> i915_reset_error_state ... -->__i915_gpu_coredump_free + * L--> ... cleanup_gt --> + * L--> intel_guc_capture_free_node returns the + * capture-output-node back to the internal + * cachelist for reuse. + * + */ + +static int guc_capture_buf_cnt(struct __guc_capture_bufstate *buf) +{ + if (buf->wr >= buf->rd) + return (buf->wr - buf->rd); + return (buf->size - buf->rd) + buf->wr; +} + +static int guc_capture_buf_cnt_to_end(struct __guc_capture_bufstate *buf) +{ + if (buf->rd > buf->wr) + return (buf->size - buf->rd); + return (buf->wr - buf->rd); +} + +/* + * GuC's error-capture output is a ring buffer populated in a byte-stream fashion: + * + * The GuC Log buffer region for error-capture is managed like a ring buffer. + * The GuC firmware dumps error capture logs into this ring in a byte-stream flow. + * Additionally, as per the current and foreseeable future, all packed error- + * capture output structures are dword aligned. + * + * That said, if the GuC firmware is in the midst of writing a structure that is larger + * than one dword but the tail end of the err-capture buffer-region has lesser space left, + * we would need to extract that structure one dword at a time straddled across the end, + * onto the start of the ring. + * + * Below function, guc_capture_log_remove_dw is a helper for that. All callers of this + * function would typically do a straight-up memcpy from the ring contents and will only + * call this helper if their structure-extraction is straddling across the end of the + * ring. GuC firmware does not add any padding. The reason for the no-padding is to ease + * scalability for future expansion of output data types without requiring a redesign + * of the flow controls. + */ +static int +guc_capture_log_remove_dw(struct intel_guc *guc, struct __guc_capture_bufstate *buf, + u32 *dw) +{ + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + int tries = 2; + int avail = 0; + u32 *src_data; + + if (!guc_capture_buf_cnt(buf)) + return 0; + + while (tries--) { + avail = guc_capture_buf_cnt_to_end(buf); + if (avail >= sizeof(u32)) { + src_data = (u32 *)(buf->data + buf->rd); + *dw = *src_data; + buf->rd += 4; + return 4; + } + if (avail) + drm_dbg(&i915->drm, "GuC-Cap-Logs not dword aligned, skipping.\n"); + buf->rd = 0; + } + + return 0; +} + +static bool +guc_capture_data_extracted(struct __guc_capture_bufstate *b, + int size, void *dest) +{ + if (guc_capture_buf_cnt_to_end(b) >= size) { + memcpy(dest, (b->data + b->rd), size); + b->rd += size; + return true; + } + return false; +} + +static int +guc_capture_log_get_group_hdr(struct intel_guc *guc, struct __guc_capture_bufstate *buf, + struct guc_state_capture_group_header_t *ghdr) +{ + int read = 0; + int fullsize = sizeof(struct guc_state_capture_group_header_t); + + if (fullsize > guc_capture_buf_cnt(buf)) + return -1; + + if (guc_capture_data_extracted(buf, fullsize, (void *)ghdr)) + return 0; + + read += guc_capture_log_remove_dw(guc, buf, &ghdr->owner); + read += guc_capture_log_remove_dw(guc, buf, &ghdr->info); + if (read != fullsize) + return -1; + + return 0; +} + +static int +guc_capture_log_get_data_hdr(struct intel_guc *guc, struct __guc_capture_bufstate *buf, + struct guc_state_capture_header_t *hdr) +{ + int read = 0; + int fullsize = sizeof(struct guc_state_capture_header_t); + + if (fullsize > guc_capture_buf_cnt(buf)) + return -1; + + if (guc_capture_data_extracted(buf, fullsize, (void *)hdr)) + return 0; + + read += guc_capture_log_remove_dw(guc, buf, &hdr->owner); + read += guc_capture_log_remove_dw(guc, buf, &hdr->info); + read += guc_capture_log_remove_dw(guc, buf, &hdr->lrca); + read += guc_capture_log_remove_dw(guc, buf, &hdr->guc_id); + read += guc_capture_log_remove_dw(guc, buf, &hdr->num_mmios); + if (read != fullsize) + return -1; + + return 0; +} + +static int +guc_capture_log_get_register(struct intel_guc *guc, struct __guc_capture_bufstate *buf, + struct guc_mmio_reg *reg) +{ + int read = 0; + int fullsize = sizeof(struct guc_mmio_reg); + + if (fullsize > guc_capture_buf_cnt(buf)) + return -1; + + if (guc_capture_data_extracted(buf, fullsize, (void *)reg)) + return 0; + + read += guc_capture_log_remove_dw(guc, buf, ®->offset); + read += guc_capture_log_remove_dw(guc, buf, ®->value); + read += guc_capture_log_remove_dw(guc, buf, ®->flags); + read += guc_capture_log_remove_dw(guc, buf, ®->mask); + if (read != fullsize) + return -1; + + return 0; +} + +static void +guc_capture_delete_one_node(struct intel_guc *guc, struct __guc_capture_parsed_output *node) +{ + int i; + + for (i = 0; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i) + kfree(node->reginfo[i].regs); + list_del(&node->link); + kfree(node); +} + +static void +guc_capture_delete_prealloc_nodes(struct intel_guc *guc) +{ + struct __guc_capture_parsed_output *n, *ntmp; + + /* + * NOTE: At the end of driver operation, we must assume that we + * have prealloc nodes in both the cachelist as well as outlist + * if unclaimed error capture events occurred prior to shutdown. + */ + list_for_each_entry_safe(n, ntmp, &guc->capture->outlist, link) + guc_capture_delete_one_node(guc, n); + + list_for_each_entry_safe(n, ntmp, &guc->capture->cachelist, link) + guc_capture_delete_one_node(guc, n); +} + +static void +guc_capture_add_node_to_list(struct __guc_capture_parsed_output *node, + struct list_head *list) +{ + list_add_tail(&node->link, list); +} + +static void +guc_capture_add_node_to_outlist(struct intel_guc_state_capture *gc, + struct __guc_capture_parsed_output *node) +{ + guc_capture_add_node_to_list(node, &gc->outlist); +} + +static void +guc_capture_add_node_to_cachelist(struct intel_guc_state_capture *gc, + struct __guc_capture_parsed_output *node) +{ + guc_capture_add_node_to_list(node, &gc->cachelist); +} + +static void +guc_capture_init_node(struct intel_guc *guc, struct __guc_capture_parsed_output *node) +{ + struct guc_mmio_reg *tmp[GUC_CAPTURE_LIST_TYPE_MAX]; + int i; + + for (i = 0; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i) { + tmp[i] = node->reginfo[i].regs; + memset(tmp[i], 0, sizeof(struct guc_mmio_reg) * + guc->capture->max_mmio_per_node); + } + memset(node, 0, sizeof(*node)); + for (i = 0; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i) + node->reginfo[i].regs = tmp[i]; + + INIT_LIST_HEAD(&node->link); +} + +static struct __guc_capture_parsed_output * +guc_capture_get_prealloc_node(struct intel_guc *guc) +{ + struct __guc_capture_parsed_output *found = NULL; + + if (!list_empty(&guc->capture->cachelist)) { + struct __guc_capture_parsed_output *n, *ntmp; + + /* get first avail node from the cache list */ + list_for_each_entry_safe(n, ntmp, &guc->capture->cachelist, link) { + found = n; + list_del(&n->link); + break; + } + } else { + struct __guc_capture_parsed_output *n, *ntmp; + + /* traverse down and steal back the oldest node already allocated */ + list_for_each_entry_safe(n, ntmp, &guc->capture->outlist, link) { + found = n; + } + if (found) + list_del(&found->link); + } + if (found) + guc_capture_init_node(guc, found); + + return found; +} + +static struct __guc_capture_parsed_output * +guc_capture_alloc_one_node(struct intel_guc *guc) +{ + struct __guc_capture_parsed_output *new; + int i; + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return NULL; + + for (i = 0; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i) { + new->reginfo[i].regs = kcalloc(guc->capture->max_mmio_per_node, + sizeof(struct guc_mmio_reg), GFP_KERNEL); + if (!new->reginfo[i].regs) { + while (i) + kfree(new->reginfo[--i].regs); + kfree(new); + return NULL; + } + } + guc_capture_init_node(guc, new); + + return new; +} + +static struct __guc_capture_parsed_output * +guc_capture_clone_node(struct intel_guc *guc, struct __guc_capture_parsed_output *original, + u32 keep_reglist_mask) +{ + struct __guc_capture_parsed_output *new; + int i; + + new = guc_capture_get_prealloc_node(guc); + if (!new) + return NULL; + if (!original) + return new; + + new->is_partial = original->is_partial; + + /* copy reg-lists that we want to clone */ + for (i = 0; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i) { + if (keep_reglist_mask & BIT(i)) { + GEM_BUG_ON(original->reginfo[i].num_regs > + guc->capture->max_mmio_per_node); + + memcpy(new->reginfo[i].regs, original->reginfo[i].regs, + original->reginfo[i].num_regs * sizeof(struct guc_mmio_reg)); + + new->reginfo[i].num_regs = original->reginfo[i].num_regs; + new->reginfo[i].vfid = original->reginfo[i].vfid; + + if (i == GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS) { + new->eng_class = original->eng_class; + } else if (i == GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE) { + new->eng_inst = original->eng_inst; + new->guc_id = original->guc_id; + new->lrca = original->lrca; + } + } + } + + return new; +} + +static void +__guc_capture_create_prealloc_nodes(struct intel_guc *guc) +{ + struct __guc_capture_parsed_output *node = NULL; + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + int i; + + for (i = 0; i < PREALLOC_NODES_MAX_COUNT; ++i) { + node = guc_capture_alloc_one_node(guc); + if (!node) { + drm_warn(&i915->drm, "GuC Capture pre-alloc-cache failure\n"); + /* dont free the priors, use what we got and cleanup at shutdown */ + return; + } + guc_capture_add_node_to_cachelist(guc->capture, node); + } +} + +static int +guc_get_max_reglist_count(struct intel_guc *guc) +{ + int i, j, k, tmp, maxregcount = 0; + + for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; ++i) { + for (j = 0; j < GUC_CAPTURE_LIST_TYPE_MAX; ++j) { + for (k = 0; k < GUC_MAX_ENGINE_CLASSES; ++k) { + if (j == GUC_CAPTURE_LIST_TYPE_GLOBAL && k > 0) + continue; + + tmp = guc_cap_list_num_regs(guc->capture, i, j, k); + if (tmp > maxregcount) + maxregcount = tmp; + } + } + } + if (!maxregcount) + maxregcount = PREALLOC_NODES_DEFAULT_NUMREGS; + + return maxregcount; +} + +static void +guc_capture_create_prealloc_nodes(struct intel_guc *guc) +{ + /* skip if we've already done the pre-alloc */ + if (guc->capture->max_mmio_per_node) + return; + + guc->capture->max_mmio_per_node = guc_get_max_reglist_count(guc); + __guc_capture_create_prealloc_nodes(guc); +} + +static int +guc_capture_extract_reglists(struct intel_guc *guc, struct __guc_capture_bufstate *buf) +{ + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct guc_state_capture_group_header_t ghdr = {0}; + struct guc_state_capture_header_t hdr = {0}; + struct __guc_capture_parsed_output *node = NULL; + struct guc_mmio_reg *regs = NULL; + int i, numlists, numregs, ret = 0; + enum guc_capture_type datatype; + struct guc_mmio_reg tmp; + bool is_partial = false; + + i = guc_capture_buf_cnt(buf); + if (!i) + return -ENODATA; + if (i % sizeof(u32)) { + drm_warn(&i915->drm, "GuC Capture new entries unaligned\n"); + ret = -EIO; + goto bailout; + } + + /* first get the capture group header */ + if (guc_capture_log_get_group_hdr(guc, buf, &ghdr)) { + ret = -EIO; + goto bailout; + } + /* + * we would typically expect a layout as below where n would be expected to be + * anywhere between 3 to n where n > 3 if we are seeing multiple dependent engine + * instances being reset together. + * ____________________________________________ + * | Capture Group | + * | ________________________________________ | + * | | Capture Group Header: | | + * | | - num_captures = 5 | | + * | |______________________________________| | + * | ________________________________________ | + * | | Capture1: | | + * | | Hdr: GLOBAL, numregs=a | | + * | | ____________________________________ | | + * | | | Reglist | | | + * | | | - reg1, reg2, ... rega | | | + * | | |__________________________________| | | + * | |______________________________________| | + * | ________________________________________ | + * | | Capture2: | | + * | | Hdr: CLASS=RENDER/COMPUTE, numregs=b| | + * | | ____________________________________ | | + * | | | Reglist | | | + * | | | - reg1, reg2, ... regb | | | + * | | |__________________________________| | | + * | |______________________________________| | + * | ________________________________________ | + * | | Capture3: | | + * | | Hdr: INSTANCE=RCS, numregs=c | | + * | | ____________________________________ | | + * | | | Reglist | | | + * | | | - reg1, reg2, ... regc | | | + * | | |__________________________________| | | + * | |______________________________________| | + * | ________________________________________ | + * | | Capture4: | | + * | | Hdr: CLASS=RENDER/COMPUTE, numregs=d| | + * | | ____________________________________ | | + * | | | Reglist | | | + * | | | - reg1, reg2, ... regd | | | + * | | |__________________________________| | | + * | |______________________________________| | + * | ________________________________________ | + * | | Capture5: | | + * | | Hdr: INSTANCE=CCS0, numregs=e | | + * | | ____________________________________ | | + * | | | Reglist | | | + * | | | - reg1, reg2, ... rege | | | + * | | |__________________________________| | | + * | |______________________________________| | + * |__________________________________________| + */ + is_partial = FIELD_GET(CAP_GRP_HDR_CAPTURE_TYPE, ghdr.info); + numlists = FIELD_GET(CAP_GRP_HDR_NUM_CAPTURES, ghdr.info); + + while (numlists--) { + if (guc_capture_log_get_data_hdr(guc, buf, &hdr)) { + ret = -EIO; + break; + } + + datatype = FIELD_GET(CAP_HDR_CAPTURE_TYPE, hdr.info); + if (datatype > GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE) { + /* unknown capture type - skip over to next capture set */ + numregs = FIELD_GET(CAP_HDR_NUM_MMIOS, hdr.num_mmios); + while (numregs--) { + if (guc_capture_log_get_register(guc, buf, &tmp)) { + ret = -EIO; + break; + } + } + continue; + } else if (node) { + /* + * Based on the current capture type and what we have so far, + * decide if we should add the current node into the internal + * linked list for match-up when i915_gpu_coredump calls later + * (and alloc a blank node for the next set of reglists) + * or continue with the same node or clone the current node + * but only retain the global or class registers (such as the + * case of dependent engine resets). + */ + if (datatype == GUC_CAPTURE_LIST_TYPE_GLOBAL) { + guc_capture_add_node_to_outlist(guc->capture, node); + node = NULL; + } else if (datatype == GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS && + node->reginfo[GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS].num_regs) { + /* Add to list, clone node and duplicate global list */ + guc_capture_add_node_to_outlist(guc->capture, node); + node = guc_capture_clone_node(guc, node, + GCAP_PARSED_REGLIST_INDEX_GLOBAL); + } else if (datatype == GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE && + node->reginfo[GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE].num_regs) { + /* Add to list, clone node and duplicate global + class lists */ + guc_capture_add_node_to_outlist(guc->capture, node); + node = guc_capture_clone_node(guc, node, + (GCAP_PARSED_REGLIST_INDEX_GLOBAL | + GCAP_PARSED_REGLIST_INDEX_ENGCLASS)); + } + } + + if (!node) { + node = guc_capture_get_prealloc_node(guc); + if (!node) { + ret = -ENOMEM; + break; + } + if (datatype != GUC_CAPTURE_LIST_TYPE_GLOBAL) + drm_dbg(&i915->drm, "GuC Capture missing global dump: %08x!\n", + datatype); + } + node->is_partial = is_partial; + node->reginfo[datatype].vfid = FIELD_GET(CAP_HDR_CAPTURE_VFID, hdr.owner); + switch (datatype) { + case GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE: + node->eng_class = FIELD_GET(CAP_HDR_ENGINE_CLASS, hdr.info); + node->eng_inst = FIELD_GET(CAP_HDR_ENGINE_INSTANCE, hdr.info); + node->lrca = hdr.lrca; + node->guc_id = hdr.guc_id; + break; + case GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS: + node->eng_class = FIELD_GET(CAP_HDR_ENGINE_CLASS, hdr.info); + break; + default: + break; + } + + numregs = FIELD_GET(CAP_HDR_NUM_MMIOS, hdr.num_mmios); + if (numregs > guc->capture->max_mmio_per_node) { + drm_dbg(&i915->drm, "GuC Capture list extraction clipped by prealloc!\n"); + numregs = guc->capture->max_mmio_per_node; + } + node->reginfo[datatype].num_regs = numregs; + regs = node->reginfo[datatype].regs; + i = 0; + while (numregs--) { + if (guc_capture_log_get_register(guc, buf, ®s[i++])) { + ret = -EIO; + break; + } + } + } + +bailout: + if (node) { + /* If we have data, add to linked list for match-up when i915_gpu_coredump calls */ + for (i = GUC_CAPTURE_LIST_TYPE_GLOBAL; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i) { + if (node->reginfo[i].regs) { + guc_capture_add_node_to_outlist(guc->capture, node); + node = NULL; + break; + } + } + if (node) /* else return it back to cache list */ + guc_capture_add_node_to_cachelist(guc->capture, node); + } + return ret; +} + +static int __guc_capture_flushlog_complete(struct intel_guc *guc) +{ + u32 action[] = { + INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE, + GUC_CAPTURE_LOG_BUFFER + }; + + return intel_guc_send(guc, action, ARRAY_SIZE(action)); +} + +static void __guc_capture_process_output(struct intel_guc *guc) +{ + unsigned int buffer_size, read_offset, write_offset, full_count; + struct intel_uc *uc = container_of(guc, typeof(*uc), guc); + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct guc_log_buffer_state log_buf_state_local; + struct guc_log_buffer_state *log_buf_state; + struct __guc_capture_bufstate buf; + void *src_data = NULL; + bool new_overflow; + int ret; + + log_buf_state = guc->log.buf_addr + + (sizeof(struct guc_log_buffer_state) * GUC_CAPTURE_LOG_BUFFER); + src_data = guc->log.buf_addr + intel_guc_get_log_buffer_offset(GUC_CAPTURE_LOG_BUFFER); + + /* + * Make a copy of the state structure, inside GuC log buffer + * (which is uncached mapped), on the stack to avoid reading + * from it multiple times. + */ + memcpy(&log_buf_state_local, log_buf_state, sizeof(struct guc_log_buffer_state)); + buffer_size = intel_guc_get_log_buffer_size(GUC_CAPTURE_LOG_BUFFER); + read_offset = log_buf_state_local.read_ptr; + write_offset = log_buf_state_local.sampled_write_ptr; + full_count = log_buf_state_local.buffer_full_cnt; + + /* Bookkeeping stuff */ + guc->log.stats[GUC_CAPTURE_LOG_BUFFER].flush += log_buf_state_local.flush_to_file; + new_overflow = intel_guc_check_log_buf_overflow(&guc->log, GUC_CAPTURE_LOG_BUFFER, + full_count); + + /* Now copy the actual logs. */ + if (unlikely(new_overflow)) { + /* copy the whole buffer in case of overflow */ + read_offset = 0; + write_offset = buffer_size; + } else if (unlikely((read_offset > buffer_size) || + (write_offset > buffer_size))) { + drm_err(&i915->drm, "invalid GuC log capture buffer state!\n"); + /* copy whole buffer as offsets are unreliable */ + read_offset = 0; + write_offset = buffer_size; + } + + buf.size = buffer_size; + buf.rd = read_offset; + buf.wr = write_offset; + buf.data = src_data; + + if (!uc->reset_in_progress) { + do { + ret = guc_capture_extract_reglists(guc, &buf); + } while (ret >= 0); + } + + /* Update the state of log buffer err-cap state */ + log_buf_state->read_ptr = write_offset; + log_buf_state->flush_to_file = 0; + __guc_capture_flushlog_complete(guc); +} + +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) + +static const char * +guc_capture_reg_to_str(const struct intel_guc *guc, u32 owner, u32 type, + u32 class, u32 id, u32 offset, u32 *is_ext) +{ + const struct __guc_mmio_reg_descr_group *reglists = guc->capture->reglists; + struct __guc_mmio_reg_descr_group *extlists = guc->capture->extlists; + const struct __guc_mmio_reg_descr_group *match; + struct __guc_mmio_reg_descr_group *matchext; + int j; + + *is_ext = 0; + if (!reglists) + return NULL; + + match = guc_capture_get_one_list(reglists, owner, type, id); + if (!match) + return NULL; + + for (j = 0; j < match->num_regs; ++j) { + if (offset == match->list[j].reg.reg) + return match->list[j].regname; + } + if (extlists) { + matchext = guc_capture_get_one_ext_list(extlists, owner, type, id); + if (!matchext) + return NULL; + for (j = 0; j < matchext->num_regs; ++j) { + if (offset == matchext->extlist[j].reg.reg) { + *is_ext = 1; + return matchext->extlist[j].regname; + } + } + } + + return NULL; +} + +#ifdef CONFIG_DRM_I915_DEBUG_GUC +#define __out(a, ...) \ + do { \ + drm_warn((&(a)->i915->drm), __VA_ARGS__); \ + i915_error_printf((a), __VA_ARGS__); \ + } while (0) +#else +#define __out(a, ...) \ + i915_error_printf(a, __VA_ARGS__) +#endif + +#define GCAP_PRINT_INTEL_ENG_INFO(ebuf, eng) \ + do { \ + __out(ebuf, " i915-Eng-Name: %s command stream\n", \ + (eng)->name); \ + __out(ebuf, " i915-Eng-Inst-Class: 0x%02x\n", (eng)->class); \ + __out(ebuf, " i915-Eng-Inst-Id: 0x%02x\n", (eng)->instance); \ + __out(ebuf, " i915-Eng-LogicalMask: 0x%08x\n", \ + (eng)->logical_mask); \ + } while (0) + +#define GCAP_PRINT_GUC_INST_INFO(ebuf, node) \ + do { \ + __out(ebuf, " GuC-Engine-Inst-Id: 0x%08x\n", \ + (node)->eng_inst); \ + __out(ebuf, " GuC-Context-Id: 0x%08x\n", (node)->guc_id); \ + __out(ebuf, " LRCA: 0x%08x\n", (node)->lrca); \ + } while (0) + +int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *ebuf, + const struct intel_engine_coredump *ee) +{ + const char *grptype[GUC_STATE_CAPTURE_GROUP_TYPE_MAX] = { + "full-capture", + "partial-capture" + }; + const char *datatype[GUC_CAPTURE_LIST_TYPE_MAX] = { + "Global", + "Engine-Class", + "Engine-Instance" + }; + struct intel_guc_state_capture *cap; + struct __guc_capture_parsed_output *node; + struct intel_engine_cs *eng; + struct guc_mmio_reg *regs; + struct intel_guc *guc; + const char *str; + int numregs, i, j; + u32 is_ext; + + if (!ebuf || !ee) + return -EINVAL; + cap = ee->capture; + if (!cap || !ee->engine) + return -ENODEV; + + guc = &ee->engine->gt->uc.guc; + + __out(ebuf, "global --- GuC Error Capture on %s command stream:\n", + ee->engine->name); + + node = ee->guc_capture_node; + if (!node) { + __out(ebuf, " No matching ee-node\n"); + return 0; + } + + __out(ebuf, "Coverage: %s\n", grptype[node->is_partial]); + + for (i = GUC_CAPTURE_LIST_TYPE_GLOBAL; i < GUC_CAPTURE_LIST_TYPE_MAX; ++i) { + __out(ebuf, " RegListType: %s\n", + datatype[i % GUC_CAPTURE_LIST_TYPE_MAX]); + __out(ebuf, " Owner-Id: %d\n", node->reginfo[i].vfid); + + switch (i) { + case GUC_CAPTURE_LIST_TYPE_GLOBAL: + default: + break; + case GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS: + __out(ebuf, " GuC-Eng-Class: %d\n", node->eng_class); + __out(ebuf, " i915-Eng-Class: %d\n", + guc_class_to_engine_class(node->eng_class)); + break; + case GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE: + eng = intel_guc_lookup_engine(guc, node->eng_class, node->eng_inst); + if (eng) + GCAP_PRINT_INTEL_ENG_INFO(ebuf, eng); + else + __out(ebuf, " i915-Eng-Lookup Fail!\n"); + GCAP_PRINT_GUC_INST_INFO(ebuf, node); + break; + } + + numregs = node->reginfo[i].num_regs; + __out(ebuf, " NumRegs: %d\n", numregs); + j = 0; + while (numregs--) { + regs = node->reginfo[i].regs; + str = guc_capture_reg_to_str(guc, GUC_CAPTURE_LIST_INDEX_PF, i, + node->eng_class, 0, regs[j].offset, &is_ext); + if (!str) + __out(ebuf, " REG-0x%08x", regs[j].offset); + else + __out(ebuf, " %s", str); + if (is_ext) + __out(ebuf, "[%ld][%ld]", + FIELD_GET(GUC_REGSET_STEERING_GROUP, regs[j].flags), + FIELD_GET(GUC_REGSET_STEERING_INSTANCE, regs[j].flags)); + __out(ebuf, ": 0x%08x\n", regs[j].value); + ++j; + } + } + return 0; +} + +#endif //CONFIG_DRM_I915_CAPTURE_ERROR + +void intel_guc_capture_free_node(struct intel_engine_coredump *ee) +{ + if (!ee || !ee->guc_capture_node) + return; + + guc_capture_add_node_to_cachelist(ee->capture, ee->guc_capture_node); + ee->capture = NULL; + ee->guc_capture_node = NULL; +} + +void intel_guc_capture_get_matching_node(struct intel_gt *gt, + struct intel_engine_coredump *ee, + struct intel_context *ce) +{ + struct __guc_capture_parsed_output *n, *ntmp; + struct drm_i915_private *i915; + struct intel_guc *guc; + + if (!gt || !ee || !ce) + return; + + i915 = gt->i915; + guc = >->uc.guc; + if (!guc->capture) + return; + + GEM_BUG_ON(ee->guc_capture_node); + /* + * Look for a matching GuC reported error capture node from + * the internal output link-list based on lrca, guc-id and engine + * identification. + */ + list_for_each_entry_safe(n, ntmp, &guc->capture->outlist, link) { + if (n->eng_inst == GUC_ID_TO_ENGINE_INSTANCE(ee->engine->guc_id) && + n->eng_class == GUC_ID_TO_ENGINE_CLASS(ee->engine->guc_id) && + n->guc_id && n->guc_id == ce->guc_id.id && + (n->lrca & CTX_GTT_ADDRESS_MASK) && (n->lrca & CTX_GTT_ADDRESS_MASK) == + (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) { + list_del(&n->link); + ee->guc_capture_node = n; + ee->capture = guc->capture; + return; + } + } + drm_dbg(&i915->drm, "GuC capture can't match ee to node\n"); +} + +void intel_guc_capture_process(struct intel_guc *guc) +{ + if (guc->capture) + __guc_capture_process_output(guc); +} + +static void +guc_capture_free_ads_cache(struct intel_guc_state_capture *gc) +{ + int i, j, k; + struct __guc_capture_ads_cache *cache; + + for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; ++i) { + for (j = 0; j < GUC_CAPTURE_LIST_TYPE_MAX; ++j) { + for (k = 0; k < GUC_MAX_ENGINE_CLASSES; ++k) { + cache = &gc->ads_cache[i][j][k]; + if (cache->is_valid) + kfree(cache->ptr); + } + } + } + kfree(gc->ads_null_cache); +} + +void intel_guc_capture_destroy(struct intel_guc *guc) +{ + if (!guc->capture) + return; + + guc_capture_free_ads_cache(guc->capture); + + guc_capture_delete_prealloc_nodes(guc); + + guc_capture_free_extlists(guc->capture->extlists); + kfree(guc->capture->extlists); + + kfree(guc->capture); + guc->capture = NULL; +} + +int intel_guc_capture_init(struct intel_guc *guc) +{ + guc->capture = kzalloc(sizeof(*guc->capture), GFP_KERNEL); + if (!guc->capture) + return -ENOMEM; + + guc->capture->reglists = guc_capture_get_device_reglist(guc); + + INIT_LIST_HEAD(&guc->capture->outlist); + INIT_LIST_HEAD(&guc->capture->cachelist); + + return 0; +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h new file mode 100644 index 000000000000..d3d7bd0b6db6 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021-2021 Intel Corporation + */ + +#ifndef _INTEL_GUC_CAPTURE_H +#define _INTEL_GUC_CAPTURE_H + +#include <linux/types.h> + +struct drm_i915_error_state_buf; +struct guc_gt_system_info; +struct intel_engine_coredump; +struct intel_context; +struct intel_gt; +struct intel_guc; + +void intel_guc_capture_free_node(struct intel_engine_coredump *ee); +int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *m, + const struct intel_engine_coredump *ee); +void intel_guc_capture_get_matching_node(struct intel_gt *gt, struct intel_engine_coredump *ee, + struct intel_context *ce); +void intel_guc_capture_process(struct intel_guc *guc); +int intel_guc_capture_output_min_size_est(struct intel_guc *guc); +int intel_guc_capture_getlist(struct intel_guc *guc, u32 owner, u32 type, u32 classid, + void **outptr); +int intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid, + size_t *size); +int intel_guc_capture_getnullheader(struct intel_guc *guc, void **outptr, size_t *size); +void intel_guc_capture_destroy(struct intel_guc *guc); +int intel_guc_capture_init(struct intel_guc *guc); + +#endif /* _INTEL_GUC_CAPTURE_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index 4b300b6cc0f9..42cb7a9a6199 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -32,8 +32,8 @@ #define GUC_CLIENT_PRIORITY_NORMAL 3 #define GUC_CLIENT_PRIORITY_NUM 4 -#define GUC_MAX_LRC_DESCRIPTORS 65535 -#define GUC_INVALID_LRC_ID GUC_MAX_LRC_DESCRIPTORS +#define GUC_MAX_CONTEXT_ID 65535 +#define GUC_INVALID_CONTEXT_ID GUC_MAX_CONTEXT_ID #define GUC_RENDER_ENGINE 0 #define GUC_VIDEO_ENGINE 1 @@ -98,7 +98,13 @@ #define GUC_LOG_BUF_ADDR_SHIFT 12 #define GUC_CTL_WA 1 -#define GUC_WA_POLLCS BIT(18) +#define GUC_WA_GAM_CREDITS BIT(10) +#define GUC_WA_DUAL_QUEUE BIT(11) +#define GUC_WA_RCS_RESET_BEFORE_RC6 BIT(13) +#define GUC_WA_CONTEXT_ISOLATION BIT(15) +#define GUC_WA_PRE_PARSER BIT(14) +#define GUC_WA_HOLD_CCS_SWITCHOUT BIT(17) +#define GUC_WA_POLLCS BIT(18) #define GUC_CTL_FEATURE 2 #define GUC_CTL_ENABLE_SLPC BIT(2) @@ -197,54 +203,45 @@ struct guc_wq_item { u32 fence_id; } __packed; -struct guc_process_desc { - u32 stage_id; - u64 db_base_addr; +struct guc_sched_wq_desc { u32 head; u32 tail; u32 error_offset; - u64 wq_base_addr; - u32 wq_size_bytes; u32 wq_status; - u32 engine_presence; - u32 priority; - u32 reserved[36]; + u32 reserved[28]; } __packed; +/* Helper for context registration H2G */ +struct guc_ctxt_registration_info { + u32 flags; + u32 context_idx; + u32 engine_class; + u32 engine_submit_mask; + u32 wq_desc_lo; + u32 wq_desc_hi; + u32 wq_base_lo; + u32 wq_base_hi; + u32 wq_size; + u32 hwlrca_lo; + u32 hwlrca_hi; +}; #define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) -#define CONTEXT_POLICY_DEFAULT_EXECUTION_QUANTUM_US 1000000 -#define CONTEXT_POLICY_DEFAULT_PREEMPTION_TIME_US 500000 +/* 32-bit KLV structure as used by policy updates and others */ +struct guc_klv_generic_dw_t { + u32 kl; + u32 value; +} __packed; -/* Preempt to idle on quantum expiry */ -#define CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE BIT(0) +/* Format of the UPDATE_CONTEXT_POLICIES H2G data packet */ +struct guc_update_context_policy_header { + u32 action; + u32 ctx_id; +} __packed; -/* - * GuC Context registration descriptor. - * FIXME: This is only required to exist during context registration. - * The current 1:1 between guc_lrc_desc and LRCs for the lifetime of the LRC - * is not required. - */ -struct guc_lrc_desc { - u32 hw_context_desc; - u32 slpm_perf_mode_hint; /* SPLC v1 only */ - u32 slpm_freq_hint; - u32 engine_submit_mask; /* In logical space */ - u8 engine_class; - u8 reserved0[3]; - u32 priority; - u32 process_desc; - u32 wq_addr; - u32 wq_size; - u32 context_flags; /* CONTEXT_REGISTRATION_* */ - /* Time for one workload to execute. (in micro seconds) */ - u32 execution_quantum; - /* Time to wait for a preemption request to complete before issuing a - * reset. (in micro seconds). - */ - u32 preemption_timeout; - u32 policy_flags; /* CONTEXT_POLICY_* */ - u32 reserved1[19]; +struct guc_update_context_policy { + struct guc_update_context_policy_header header; + struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS]; } __packed; #define GUC_POWER_UNSPECIFIED 0 @@ -285,10 +282,13 @@ struct guc_mmio_reg { u32 offset; u32 value; u32 flags; - u32 mask; #define GUC_REGSET_MASKED BIT(0) +#define GUC_REGSET_NEEDS_STEERING BIT(1) #define GUC_REGSET_MASKED_WITH_VALUE BIT(2) #define GUC_REGSET_RESTORE_ONLY BIT(3) +#define GUC_REGSET_STEERING_GROUP GENMASK(15, 12) +#define GUC_REGSET_STEERING_INSTANCE GENMASK(23, 20) + u32 mask; } __packed; /* GuC register sets */ @@ -311,6 +311,14 @@ enum { GUC_CAPTURE_LIST_INDEX_MAX = 2, }; +/*Register-types of GuC capture register lists */ +enum guc_capture_type { + GUC_CAPTURE_LIST_TYPE_GLOBAL = 0, + GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS, + GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE, + GUC_CAPTURE_LIST_TYPE_MAX, +}; + /* GuC Additional Data Struct */ struct guc_ads { struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c new file mode 100644 index 000000000000..79c66b6b51a3 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "gt/intel_gt.h" +#include "gt/intel_hwconfig.h" +#include "i915_drv.h" +#include "i915_memcpy.h" + +/* + * GuC has a blob containing hardware configuration information (HWConfig). + * This is formatted as a simple and flexible KLV (Key/Length/Value) table. + * + * For example, a minimal version could be: + * enum device_attr { + * ATTR_SOME_VALUE = 0, + * ATTR_SOME_MASK = 1, + * }; + * + * static const u32 hwconfig[] = { + * ATTR_SOME_VALUE, + * 1, // Value Length in DWords + * 8, // Value + * + * ATTR_SOME_MASK, + * 3, + * 0x00FFFFFFFF, 0xFFFFFFFF, 0xFF000000, + * }; + * + * The attribute ids are defined in a hardware spec. + */ + +static int __guc_action_get_hwconfig(struct intel_guc *guc, + u32 ggtt_offset, u32 ggtt_size) +{ + u32 action[] = { + INTEL_GUC_ACTION_GET_HWCONFIG, + lower_32_bits(ggtt_offset), + upper_32_bits(ggtt_offset), + ggtt_size, + }; + int ret; + + ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); + if (ret == -ENXIO) + return -ENOENT; + + return ret; +} + +static int guc_hwconfig_discover_size(struct intel_guc *guc, struct intel_hwconfig *hwconfig) +{ + int ret; + + /* + * Sending a query with zero offset and size will return the + * size of the blob. + */ + ret = __guc_action_get_hwconfig(guc, 0, 0); + if (ret < 0) + return ret; + + if (ret == 0) + return -EINVAL; + + hwconfig->size = ret; + return 0; +} + +static int guc_hwconfig_fill_buffer(struct intel_guc *guc, struct intel_hwconfig *hwconfig) +{ + struct i915_vma *vma; + u32 ggtt_offset; + void *vaddr; + int ret; + + GEM_BUG_ON(!hwconfig->size); + + ret = intel_guc_allocate_and_map_vma(guc, hwconfig->size, &vma, &vaddr); + if (ret) + return ret; + + ggtt_offset = intel_guc_ggtt_offset(guc, vma); + + ret = __guc_action_get_hwconfig(guc, ggtt_offset, hwconfig->size); + if (ret >= 0) + memcpy(hwconfig->ptr, vaddr, hwconfig->size); + + i915_vma_unpin_and_release(&vma, I915_VMA_RELEASE_MAP); + + return ret; +} + +static bool has_table(struct drm_i915_private *i915) +{ + if (IS_ALDERLAKE_P(i915)) + return true; + if (IS_DG2(i915)) + return true; + + return false; +} + +/** + * intel_guc_hwconfig_init - Initialize the HWConfig + * + * Retrieve the HWConfig table from the GuC and save it locally. + * It can then be queried on demand by other users later on. + */ +static int guc_hwconfig_init(struct intel_gt *gt) +{ + struct intel_hwconfig *hwconfig = >->info.hwconfig; + struct intel_guc *guc = >->uc.guc; + int ret; + + if (!has_table(gt->i915)) + return 0; + + ret = guc_hwconfig_discover_size(guc, hwconfig); + if (ret) + return ret; + + hwconfig->ptr = kmalloc(hwconfig->size, GFP_KERNEL); + if (!hwconfig->ptr) { + hwconfig->size = 0; + return -ENOMEM; + } + + ret = guc_hwconfig_fill_buffer(guc, hwconfig); + if (ret < 0) { + intel_gt_fini_hwconfig(gt); + return ret; + } + + return 0; +} + +/** + * intel_gt_init_hwconfig - Initialize the HWConfig if available + * + * Retrieve the HWConfig table if available on the current platform. + */ +int intel_gt_init_hwconfig(struct intel_gt *gt) +{ + if (!intel_uc_uses_guc(>->uc)) + return 0; + + return guc_hwconfig_init(gt); +} + +/** + * intel_gt_fini_hwconfig - Finalize the HWConfig + * + * Free up the memory allocation holding the table. + */ +void intel_gt_fini_hwconfig(struct intel_gt *gt) +{ + struct intel_hwconfig *hwconfig = >->info.hwconfig; + + kfree(hwconfig->ptr); + hwconfig->size = 0; + hwconfig->ptr = NULL; +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index a24dc6441872..78d2989fe917 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -10,9 +10,10 @@ #include "i915_drv.h" #include "i915_irq.h" #include "i915_memcpy.h" +#include "intel_guc_capture.h" #include "intel_guc_log.h" -static void guc_log_capture_logs(struct intel_guc_log *log); +static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log); /** * DOC: GuC firmware log @@ -26,7 +27,8 @@ static void guc_log_capture_logs(struct intel_guc_log *log); static int guc_action_flush_log_complete(struct intel_guc *guc) { u32 action[] = { - INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE + INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE, + GUC_DEBUG_LOG_BUFFER }; return intel_guc_send(guc, action, ARRAY_SIZE(action)); @@ -137,7 +139,7 @@ static void guc_move_to_next_buf(struct intel_guc_log *log) smp_wmb(); /* All data has been written, so now move the offset of sub buffer. */ - relay_reserve(log->relay.channel, log->vma->obj->base.size); + relay_reserve(log->relay.channel, log->vma->obj->base.size - CAPTURE_BUFFER_SIZE); /* Switch to the next sub buffer */ relay_flush(log->relay.channel); @@ -157,9 +159,9 @@ static void *guc_get_write_buffer(struct intel_guc_log *log) return relay_reserve(log->relay.channel, 0); } -static bool guc_check_log_buf_overflow(struct intel_guc_log *log, - enum guc_log_buffer_type type, - unsigned int full_cnt) +bool intel_guc_check_log_buf_overflow(struct intel_guc_log *log, + enum guc_log_buffer_type type, + unsigned int full_cnt) { unsigned int prev_full_cnt = log->stats[type].sampled_overflow; bool overflow = false; @@ -182,7 +184,7 @@ static bool guc_check_log_buf_overflow(struct intel_guc_log *log, return overflow; } -static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type) +unsigned int intel_guc_get_log_buffer_size(enum guc_log_buffer_type type) { switch (type) { case GUC_DEBUG_LOG_BUFFER: @@ -198,7 +200,21 @@ static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type) return 0; } -static void guc_read_update_log_buffer(struct intel_guc_log *log) +size_t intel_guc_get_log_buffer_offset(enum guc_log_buffer_type type) +{ + enum guc_log_buffer_type i; + size_t offset = PAGE_SIZE;/* for the log_buffer_states */ + + for (i = GUC_DEBUG_LOG_BUFFER; i < GUC_MAX_LOG_BUFFER; ++i) { + if (i == type) + break; + offset += intel_guc_get_log_buffer_size(i); + } + + return offset; +} + +static void _guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log) { unsigned int buffer_size, read_offset, write_offset, bytes_to_copy, full_cnt; struct guc_log_buffer_state *log_buf_state, *log_buf_snapshot_state; @@ -213,7 +229,8 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) goto out_unlock; /* Get the pointer to shared GuC log buffer */ - log_buf_state = src_data = log->relay.buf_addr; + src_data = log->buf_addr; + log_buf_state = src_data; /* Get the pointer to local buffer to store the logs */ log_buf_snapshot_state = dst_data = guc_get_write_buffer(log); @@ -223,7 +240,7 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) * Used rate limited to avoid deluge of messages, logs might be * getting consumed by User at a slow rate. */ - DRM_ERROR_RATELIMITED("no sub-buffer to capture logs\n"); + DRM_ERROR_RATELIMITED("no sub-buffer to copy general logs\n"); log->relay.full_count++; goto out_unlock; @@ -233,7 +250,8 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) src_data += PAGE_SIZE; dst_data += PAGE_SIZE; - for (type = GUC_DEBUG_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { + /* For relay logging, we exclude error state capture */ + for (type = GUC_DEBUG_LOG_BUFFER; type <= GUC_CRASH_DUMP_LOG_BUFFER; type++) { /* * Make a copy of the state structure, inside GuC log buffer * (which is uncached mapped), on the stack to avoid reading @@ -241,14 +259,14 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) */ memcpy(&log_buf_state_local, log_buf_state, sizeof(struct guc_log_buffer_state)); - buffer_size = guc_get_log_buffer_size(type); + buffer_size = intel_guc_get_log_buffer_size(type); read_offset = log_buf_state_local.read_ptr; write_offset = log_buf_state_local.sampled_write_ptr; full_cnt = log_buf_state_local.buffer_full_cnt; /* Bookkeeping stuff */ log->stats[type].flush += log_buf_state_local.flush_to_file; - new_overflow = guc_check_log_buf_overflow(log, type, full_cnt); + new_overflow = intel_guc_check_log_buf_overflow(log, type, full_cnt); /* Update the state of shared log buffer */ log_buf_state->read_ptr = write_offset; @@ -301,49 +319,43 @@ out_unlock: mutex_unlock(&log->relay.lock); } -static void capture_logs_work(struct work_struct *work) +static void copy_debug_logs_work(struct work_struct *work) { struct intel_guc_log *log = container_of(work, struct intel_guc_log, relay.flush_work); - guc_log_capture_logs(log); + guc_log_copy_debuglogs_for_relay(log); } -static int guc_log_map(struct intel_guc_log *log) +static int guc_log_relay_map(struct intel_guc_log *log) { - void *vaddr; - lockdep_assert_held(&log->relay.lock); - if (!log->vma) + if (!log->vma || !log->buf_addr) return -ENODEV; /* - * Create a WC (Uncached for read) vmalloc mapping of log - * buffer pages, so that we can directly get the data - * (up-to-date) from memory. + * WC vmalloc mapping of log buffer pages was done at + * GuC Log Init time, but lets keep a ref for book-keeping */ - vaddr = i915_gem_object_pin_map_unlocked(log->vma->obj, I915_MAP_WC); - if (IS_ERR(vaddr)) - return PTR_ERR(vaddr); - - log->relay.buf_addr = vaddr; + i915_gem_object_get(log->vma->obj); + log->relay.buf_in_use = true; return 0; } -static void guc_log_unmap(struct intel_guc_log *log) +static void guc_log_relay_unmap(struct intel_guc_log *log) { lockdep_assert_held(&log->relay.lock); - i915_gem_object_unpin_map(log->vma->obj); - log->relay.buf_addr = NULL; + i915_gem_object_put(log->vma->obj); + log->relay.buf_in_use = false; } void intel_guc_log_init_early(struct intel_guc_log *log) { mutex_init(&log->relay.lock); - INIT_WORK(&log->relay.flush_work, capture_logs_work); + INIT_WORK(&log->relay.flush_work, copy_debug_logs_work); log->relay.started = false; } @@ -358,8 +370,11 @@ static int guc_log_relay_create(struct intel_guc_log *log) lockdep_assert_held(&log->relay.lock); GEM_BUG_ON(!log->vma); - /* Keep the size of sub buffers same as shared log buffer */ - subbuf_size = log->vma->size; + /* + * Keep the size of sub buffers same as shared log buffer + * but GuC log-events excludes the error-state-capture logs + */ + subbuf_size = log->vma->size - CAPTURE_BUFFER_SIZE; /* * Store up to 8 snapshots, which is large enough to buffer sufficient @@ -394,13 +409,13 @@ static void guc_log_relay_destroy(struct intel_guc_log *log) log->relay.channel = NULL; } -static void guc_log_capture_logs(struct intel_guc_log *log) +static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915; intel_wakeref_t wakeref; - guc_read_update_log_buffer(log); + _guc_log_copy_debuglogs_for_relay(log); /* * Generally device is expected to be active only at this @@ -440,6 +455,7 @@ int intel_guc_log_create(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); struct i915_vma *vma; + void *vaddr; u32 guc_log_size; int ret; @@ -447,23 +463,28 @@ int intel_guc_log_create(struct intel_guc_log *log) /* * GuC Log buffer Layout + * (this ordering must follow "enum guc_log_buffer_type" definition) * * +===============================+ 00B - * | Crash dump state header | - * +-------------------------------+ 32B * | Debug state header | + * +-------------------------------+ 32B + * | Crash dump state header | * +-------------------------------+ 64B * | Capture state header | * +-------------------------------+ 96B * | | * +===============================+ PAGE_SIZE (4KB) - * | Crash Dump logs | - * +===============================+ + CRASH_SIZE * | Debug logs | * +===============================+ + DEBUG_SIZE + * | Crash Dump logs | + * +===============================+ + CRASH_SIZE * | Capture logs | * +===============================+ + CAPTURE_SIZE */ + if (intel_guc_capture_output_min_size_est(guc) > CAPTURE_BUFFER_SIZE) + DRM_WARN("GuC log buffer for state_capture maybe too small. %d < %d\n", + CAPTURE_BUFFER_SIZE, intel_guc_capture_output_min_size_est(guc)); + guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE + CAPTURE_BUFFER_SIZE; @@ -474,6 +495,17 @@ int intel_guc_log_create(struct intel_guc_log *log) } log->vma = vma; + /* + * Create a WC (Uncached for read) vmalloc mapping up front immediate access to + * data from memory during critical events such as error capture + */ + vaddr = i915_gem_object_pin_map_unlocked(log->vma->obj, I915_MAP_WC); + if (IS_ERR(vaddr)) { + ret = PTR_ERR(vaddr); + i915_vma_unpin_and_release(&log->vma, 0); + goto err; + } + log->buf_addr = vaddr; log->level = __get_default_log_level(log); DRM_DEBUG_DRIVER("guc_log_level=%d (%s, verbose:%s, verbosity:%d)\n", @@ -484,13 +516,14 @@ int intel_guc_log_create(struct intel_guc_log *log) return 0; err: - DRM_ERROR("Failed to allocate GuC log buffer. %d\n", ret); + DRM_ERROR("Failed to allocate or map GuC log buffer. %d\n", ret); return ret; } void intel_guc_log_destroy(struct intel_guc_log *log) { - i915_vma_unpin_and_release(&log->vma, 0); + log->buf_addr = NULL; + i915_vma_unpin_and_release(&log->vma, I915_VMA_RELEASE_MAP); } int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) @@ -535,7 +568,7 @@ out_unlock: bool intel_guc_log_relay_created(const struct intel_guc_log *log) { - return log->relay.buf_addr; + return log->buf_addr; } int intel_guc_log_relay_open(struct intel_guc_log *log) @@ -566,7 +599,7 @@ int intel_guc_log_relay_open(struct intel_guc_log *log) if (ret) goto out_unlock; - ret = guc_log_map(log); + ret = guc_log_relay_map(log); if (ret) goto out_relay; @@ -616,8 +649,8 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log) with_intel_runtime_pm(guc_to_gt(guc)->uncore->rpm, wakeref) guc_action_flush_log(guc); - /* GuC would have updated log buffer by now, so capture it */ - guc_log_capture_logs(log); + /* GuC would have updated log buffer by now, so copy it */ + guc_log_copy_debuglogs_for_relay(log); } /* @@ -646,7 +679,7 @@ void intel_guc_log_relay_close(struct intel_guc_log *log) mutex_lock(&log->relay.lock); GEM_BUG_ON(!intel_guc_log_relay_created(log)); - guc_log_unmap(log); + guc_log_relay_unmap(log); guc_log_relay_destroy(log); mutex_unlock(&log->relay.lock); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index d7e1b6471fed..18007e639be9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -49,8 +49,9 @@ struct intel_guc; struct intel_guc_log { u32 level; struct i915_vma *vma; + void *buf_addr; struct { - void *buf_addr; + bool buf_in_use; bool started; struct work_struct flush_work; struct rchan *channel; @@ -66,6 +67,10 @@ struct intel_guc_log { }; void intel_guc_log_init_early(struct intel_guc_log *log); +bool intel_guc_check_log_buf_overflow(struct intel_guc_log *log, enum guc_log_buffer_type type, + unsigned int full_cnt); +unsigned int intel_guc_get_log_buffer_size(enum guc_log_buffer_type type); +size_t intel_guc_get_log_buffer_offset(enum guc_log_buffer_type type); int intel_guc_log_create(struct intel_guc_log *log); void intel_guc_log_destroy(struct intel_guc_log *log); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 9f032c65a488..1db833da42df 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -153,8 +153,8 @@ static int slpc_query_task_state(struct intel_guc_slpc *slpc) ret = guc_action_slpc_query(guc, offset); if (unlikely(ret)) - drm_err(&i915->drm, "Failed to query task state (%pe)\n", - ERR_PTR(ret)); + i915_probe_error(i915, "Failed to query task state (%pe)\n", + ERR_PTR(ret)); drm_clflush_virt_range(slpc->vaddr, SLPC_PAGE_SIZE_BYTES); @@ -171,8 +171,8 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value) ret = guc_action_slpc_set_param(guc, id, value); if (ret) - drm_err(&i915->drm, "Failed to set param %d to %u (%pe)\n", - id, value, ERR_PTR(ret)); + i915_probe_error(i915, "Failed to set param %d to %u (%pe)\n", + id, value, ERR_PTR(ret)); return ret; } @@ -212,8 +212,8 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, freq); if (ret) - drm_err(&i915->drm, "Unable to force min freq to %u: %d", - freq, ret); + i915_probe_error(i915, "Unable to force min freq to %u: %d", + freq, ret); } return ret; @@ -248,9 +248,9 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc) err = intel_guc_allocate_and_map_vma(guc, size, &slpc->vma, (void **)&slpc->vaddr); if (unlikely(err)) { - drm_err(&i915->drm, - "Failed to allocate SLPC struct (err=%pe)\n", - ERR_PTR(err)); + i915_probe_error(i915, + "Failed to allocate SLPC struct (err=%pe)\n", + ERR_PTR(err)); return err; } @@ -317,15 +317,15 @@ static int slpc_reset(struct intel_guc_slpc *slpc) ret = guc_action_slpc_reset(guc, offset); if (unlikely(ret < 0)) { - drm_err(&i915->drm, "SLPC reset action failed (%pe)\n", - ERR_PTR(ret)); + i915_probe_error(i915, "SLPC reset action failed (%pe)\n", + ERR_PTR(ret)); return ret; } if (!ret) { if (wait_for(slpc_is_running(slpc), SLPC_RESET_TIMEOUT_MS)) { - drm_err(&i915->drm, "SLPC not enabled! State = %s\n", - slpc_get_state_string(slpc)); + i915_probe_error(i915, "SLPC not enabled! State = %s\n", + slpc_get_state_string(slpc)); return -EIO; } } @@ -582,16 +582,12 @@ static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc) static void slpc_get_rp_values(struct intel_guc_slpc *slpc) { struct intel_rps *rps = &slpc_to_gt(slpc)->rps; - u32 rp_state_cap; + struct intel_rps_freq_caps caps; - rp_state_cap = intel_rps_read_state_cap(rps); - - slpc->rp0_freq = REG_FIELD_GET(RP0_CAP_MASK, rp_state_cap) * - GT_FREQUENCY_MULTIPLIER; - slpc->rp1_freq = REG_FIELD_GET(RP1_CAP_MASK, rp_state_cap) * - GT_FREQUENCY_MULTIPLIER; - slpc->min_freq = REG_FIELD_GET(RPN_CAP_MASK, rp_state_cap) * - GT_FREQUENCY_MULTIPLIER; + gen6_rps_get_freq_caps(rps, &caps); + slpc->rp0_freq = intel_gpu_freq(rps, caps.rp0_freq); + slpc->rp1_freq = intel_gpu_freq(rps, caps.rp1_freq); + slpc->min_freq = intel_gpu_freq(rps, caps.min_freq); if (!slpc->boost_freq) slpc->boost_freq = slpc->rp0_freq; @@ -621,8 +617,8 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) ret = slpc_reset(slpc); if (unlikely(ret < 0)) { - drm_err(&i915->drm, "SLPC Reset event returned (%pe)\n", - ERR_PTR(ret)); + i915_probe_error(i915, "SLPC Reset event returned (%pe)\n", + ERR_PTR(ret)); return ret; } @@ -637,24 +633,24 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) /* Ignore efficient freq and set min to platform min */ ret = slpc_ignore_eff_freq(slpc, true); if (unlikely(ret)) { - drm_err(&i915->drm, "Failed to set SLPC min to RPn (%pe)\n", - ERR_PTR(ret)); + i915_probe_error(i915, "Failed to set SLPC min to RPn (%pe)\n", + ERR_PTR(ret)); return ret; } /* Set SLPC max limit to RP0 */ ret = slpc_use_fused_rp0(slpc); if (unlikely(ret)) { - drm_err(&i915->drm, "Failed to set SLPC max to RP0 (%pe)\n", - ERR_PTR(ret)); + i915_probe_error(i915, "Failed to set SLPC max to RP0 (%pe)\n", + ERR_PTR(ret)); return ret; } /* Revert SLPC min/max to softlimits if necessary */ ret = slpc_set_softlimits(slpc); if (unlikely(ret)) { - drm_err(&i915->drm, "Failed to set SLPC softlimits (%pe)\n", - ERR_PTR(ret)); + i915_probe_error(i915, "Failed to set SLPC softlimits (%pe)\n", + ERR_PTR(ret)); return ret; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 1ce7e04aa837..61a6f2424e24 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -25,6 +25,7 @@ #include "gt/intel_ring.h" #include "intel_guc_ads.h" +#include "intel_guc_capture.h" #include "intel_guc_submission.h" #include "i915_drv.h" @@ -161,7 +162,8 @@ guc_create_parallel(struct intel_engine_cs **engines, #define SCHED_STATE_ENABLED BIT(4) #define SCHED_STATE_PENDING_ENABLE BIT(5) #define SCHED_STATE_REGISTERED BIT(6) -#define SCHED_STATE_BLOCKED_SHIFT 7 +#define SCHED_STATE_POLICY_REQUIRED BIT(7) +#define SCHED_STATE_BLOCKED_SHIFT 8 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) @@ -300,6 +302,23 @@ static inline void clr_context_registered(struct intel_context *ce) ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; } +static inline bool context_policy_required(struct intel_context *ce) +{ + return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED; +} + +static inline void set_context_policy_required(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED; +} + +static inline void clr_context_policy_required(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED; +} + static inline u32 context_blocked(struct intel_context *ce) { return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> @@ -351,12 +370,12 @@ request_to_scheduling_context(struct i915_request *rq) static inline bool context_guc_id_invalid(struct intel_context *ce) { - return ce->guc_id.id == GUC_INVALID_LRC_ID; + return ce->guc_id.id == GUC_INVALID_CONTEXT_ID; } static inline void set_context_guc_id_invalid(struct intel_context *ce) { - ce->guc_id.id = GUC_INVALID_LRC_ID; + ce->guc_id.id = GUC_INVALID_CONTEXT_ID; } static inline struct intel_guc *ce_to_guc(struct intel_context *ce) @@ -395,12 +414,12 @@ struct sync_semaphore { }; struct parent_scratch { - struct guc_process_desc pdesc; + struct guc_sched_wq_desc wq_desc; struct sync_semaphore go; struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; - u8 unused[WQ_OFFSET - sizeof(struct guc_process_desc) - + u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) - sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; u32 wq[WQ_SIZE / sizeof(u32)]; @@ -437,15 +456,15 @@ __get_parent_scratch(struct intel_context *ce) LRC_STATE_OFFSET) / sizeof(u32))); } -static struct guc_process_desc * -__get_process_desc(struct intel_context *ce) +static struct guc_sched_wq_desc * +__get_wq_desc(struct intel_context *ce) { struct parent_scratch *ps = __get_parent_scratch(ce); - return &ps->pdesc; + return &ps->wq_desc; } -static u32 *get_wq_pointer(struct guc_process_desc *desc, +static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc, struct intel_context *ce, u32 wqi_size) { @@ -457,7 +476,7 @@ static u32 *get_wq_pointer(struct guc_process_desc *desc, #define AVAILABLE_SPACE \ CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) if (wqi_size > AVAILABLE_SPACE) { - ce->parallel.guc.wqi_head = READ_ONCE(desc->head); + ce->parallel.guc.wqi_head = READ_ONCE(wq_desc->head); if (wqi_size > AVAILABLE_SPACE) return NULL; @@ -467,75 +486,27 @@ static u32 *get_wq_pointer(struct guc_process_desc *desc, return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)]; } -static struct guc_lrc_desc *__get_lrc_desc(struct intel_guc *guc, u32 index) -{ - struct guc_lrc_desc *base = guc->lrc_desc_pool_vaddr; - - GEM_BUG_ON(index >= GUC_MAX_LRC_DESCRIPTORS); - - return &base[index]; -} - static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) { struct intel_context *ce = xa_load(&guc->context_lookup, id); - GEM_BUG_ON(id >= GUC_MAX_LRC_DESCRIPTORS); + GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID); return ce; } -static int guc_lrc_desc_pool_create(struct intel_guc *guc) -{ - u32 size; - int ret; - - size = PAGE_ALIGN(sizeof(struct guc_lrc_desc) * - GUC_MAX_LRC_DESCRIPTORS); - ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool, - (void **)&guc->lrc_desc_pool_vaddr); - if (ret) - return ret; - - return 0; -} - -static void guc_lrc_desc_pool_destroy(struct intel_guc *guc) -{ - guc->lrc_desc_pool_vaddr = NULL; - i915_vma_unpin_and_release(&guc->lrc_desc_pool, I915_VMA_RELEASE_MAP); -} - static inline bool guc_submission_initialized(struct intel_guc *guc) { - return !!guc->lrc_desc_pool_vaddr; + return guc->submission_initialized; } -static inline void reset_lrc_desc(struct intel_guc *guc, u32 id) -{ - if (likely(guc_submission_initialized(guc))) { - struct guc_lrc_desc *desc = __get_lrc_desc(guc, id); - unsigned long flags; - - memset(desc, 0, sizeof(*desc)); - - /* - * xarray API doesn't have xa_erase_irqsave wrapper, so calling - * the lower level functions directly. - */ - xa_lock_irqsave(&guc->context_lookup, flags); - __xa_erase(&guc->context_lookup, id); - xa_unlock_irqrestore(&guc->context_lookup, flags); - } -} - -static inline bool lrc_desc_registered(struct intel_guc *guc, u32 id) +static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id) { return __get_context(guc, id); } -static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, - struct intel_context *ce) +static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id, + struct intel_context *ce) { unsigned long flags; @@ -548,6 +519,22 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, xa_unlock_irqrestore(&guc->context_lookup, flags); } +static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id) +{ + unsigned long flags; + + if (unlikely(!guc_submission_initialized(guc))) + return; + + /* + * xarray API doesn't have xa_erase_irqsave wrapper, so calling + * the lower level functions directly. + */ + xa_lock_irqsave(&guc->context_lookup, flags); + __xa_erase(&guc->context_lookup, id); + xa_unlock_irqrestore(&guc->context_lookup, flags); +} + static void decr_outstanding_submission_g2h(struct intel_guc *guc) { if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) @@ -624,7 +611,8 @@ int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) true, timeout); } -static int guc_lrc_desc_pin(struct intel_context *ce, bool loop); +static int guc_context_policy_init(struct intel_context *ce, bool loop); +static int try_context_registration(struct intel_context *ce, bool loop); static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) { @@ -650,6 +638,12 @@ static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); GEM_BUG_ON(context_guc_id_invalid(ce)); + if (context_policy_required(ce)) { + err = guc_context_policy_init(ce, false); + if (err) + return err; + } + spin_lock(&ce->guc_state.lock); /* @@ -743,7 +737,7 @@ static u32 wq_space_until_wrap(struct intel_context *ce) return (WQ_SIZE - ce->parallel.guc.wqi_tail); } -static void write_wqi(struct guc_process_desc *desc, +static void write_wqi(struct guc_sched_wq_desc *wq_desc, struct intel_context *ce, u32 wqi_size) { @@ -756,13 +750,13 @@ static void write_wqi(struct guc_process_desc *desc, ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & (WQ_SIZE - 1); - WRITE_ONCE(desc->tail, ce->parallel.guc.wqi_tail); + WRITE_ONCE(wq_desc->tail, ce->parallel.guc.wqi_tail); } static int guc_wq_noop_append(struct intel_context *ce) { - struct guc_process_desc *desc = __get_process_desc(ce); - u32 *wqi = get_wq_pointer(desc, ce, wq_space_until_wrap(ce)); + struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); + u32 *wqi = get_wq_pointer(wq_desc, ce, wq_space_until_wrap(ce)); u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; if (!wqi) @@ -781,7 +775,7 @@ static int __guc_wq_item_append(struct i915_request *rq) { struct intel_context *ce = request_to_scheduling_context(rq); struct intel_context *child; - struct guc_process_desc *desc = __get_process_desc(ce); + struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); unsigned int wqi_size = (ce->parallel.number_children + 4) * sizeof(u32); u32 *wqi; @@ -792,7 +786,7 @@ static int __guc_wq_item_append(struct i915_request *rq) GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); GEM_BUG_ON(context_guc_id_invalid(ce)); GEM_BUG_ON(context_wait_for_deregister_to_register(ce)); - GEM_BUG_ON(!lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id)); + GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)); /* Insert NOOP if this work queue item will wrap the tail pointer. */ if (wqi_size > wq_space_until_wrap(ce)) { @@ -801,7 +795,7 @@ static int __guc_wq_item_append(struct i915_request *rq) return ret; } - wqi = get_wq_pointer(desc, ce, wqi_size); + wqi = get_wq_pointer(wq_desc, ce, wqi_size); if (!wqi) return -EBUSY; @@ -816,7 +810,7 @@ static int __guc_wq_item_append(struct i915_request *rq) for_each_child(ce, child) *wqi++ = child->ring->tail / sizeof(u64); - write_wqi(desc, ce, wqi_size); + write_wqi(wq_desc, ce, wqi_size); return 0; } @@ -920,9 +914,9 @@ register_context: if (submit) { struct intel_context *ce = request_to_scheduling_context(last); - if (unlikely(!lrc_desc_registered(guc, ce->guc_id.id) && + if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) && !intel_context_is_banned(ce))) { - ret = guc_lrc_desc_pin(ce, false); + ret = try_context_registration(ce, false); if (unlikely(ret == -EPIPE)) { goto deadlk; } else if (ret == -EBUSY) { @@ -1546,6 +1540,89 @@ static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) lrc_update_regs(ce, engine, head); } +static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine) +{ + static const i915_reg_t _reg[I915_NUM_ENGINES] = { + [RCS0] = MSG_IDLE_CS, + [BCS0] = MSG_IDLE_BCS, + [VCS0] = MSG_IDLE_VCS0, + [VCS1] = MSG_IDLE_VCS1, + [VCS2] = MSG_IDLE_VCS2, + [VCS3] = MSG_IDLE_VCS3, + [VCS4] = MSG_IDLE_VCS4, + [VCS5] = MSG_IDLE_VCS5, + [VCS6] = MSG_IDLE_VCS6, + [VCS7] = MSG_IDLE_VCS7, + [VECS0] = MSG_IDLE_VECS0, + [VECS1] = MSG_IDLE_VECS1, + [VECS2] = MSG_IDLE_VECS2, + [VECS3] = MSG_IDLE_VECS3, + [CCS0] = MSG_IDLE_CS, + [CCS1] = MSG_IDLE_CS, + [CCS2] = MSG_IDLE_CS, + [CCS3] = MSG_IDLE_CS, + }; + u32 val; + + if (!_reg[engine->id].reg) + return 0; + + val = intel_uncore_read(engine->uncore, _reg[engine->id]); + + /* bits[29:25] & bits[13:9] >> shift */ + return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT; +} + +static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask) +{ + int ret; + + /* Ensure GPM receives fw up/down after CS is stopped */ + udelay(1); + + /* Wait for forcewake request to complete in GPM */ + ret = __intel_wait_for_register_fw(gt->uncore, + GEN9_PWRGT_DOMAIN_STATUS, + fw_mask, fw_mask, 5000, 0, NULL); + + /* Ensure CS receives fw ack from GPM */ + udelay(1); + + if (ret) + GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret); +} + +/* + * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any + * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The + * pending status is indicated by bits[13:9] (masked by bits[ 29:25]) in the + * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we + * are concerned only with the gt reset here, we use a logical OR of pending + * forcewakeups from all reset domains and then wait for them to complete by + * querying PWRGT_DOMAIN_STATUS. + */ +static void guc_engine_reset_prepare(struct intel_engine_cs *engine) +{ + u32 fw_pending; + + if (GRAPHICS_VER(engine->i915) != 12) + return; + + /* + * Wa_22011802037 + * TODO: Occasionally trying to stop the cs times out, but does not + * adversely affect functionality. The timeout is set as a config + * parameter that defaults to 100ms. Assuming that this timeout is + * sufficient for any pending MI_FORCEWAKEs to complete, ignore the + * timeout returned here until it is root caused. + */ + intel_engine_stop_cs(engine); + + fw_pending = __cs_pending_mi_force_wakes(engine); + if (fw_pending) + __gpm_wait_for_fw_complete(engine->gt, fw_pending); +} + static void guc_reset_nop(struct intel_engine_cs *engine) { } @@ -1804,20 +1881,10 @@ static void reset_fail_worker_func(struct work_struct *w); int intel_guc_submission_init(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); - int ret; - if (guc->lrc_desc_pool) + if (guc->submission_initialized) return 0; - ret = guc_lrc_desc_pool_create(guc); - if (ret) - return ret; - /* - * Keep static analysers happy, let them know that we allocated the - * vma after testing that it didn't exist earlier. - */ - GEM_BUG_ON(!guc->lrc_desc_pool); - guc->submission_state.guc_ids_bitmap = bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); if (!guc->submission_state.guc_ids_bitmap) @@ -1825,19 +1892,20 @@ int intel_guc_submission_init(struct intel_guc *guc) guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; guc->timestamp.shift = gpm_timestamp_shift(gt); + guc->submission_initialized = true; return 0; } void intel_guc_submission_fini(struct intel_guc *guc) { - if (!guc->lrc_desc_pool) + if (!guc->submission_initialized) return; guc_flush_destroyed_contexts(guc); - guc_lrc_desc_pool_destroy(guc); i915_sched_engine_put(guc->sched_engine); bitmap_free(guc->submission_state.guc_ids_bitmap); + guc->submission_initialized = false; } static inline void queue_request(struct i915_sched_engine *sched_engine, @@ -1884,7 +1952,7 @@ static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) return submission_disabled(guc) || guc->stalled_request || !i915_sched_engine_is_empty(sched_engine) || - !lrc_desc_registered(guc, ce->guc_id.id); + !ctx_id_mapped(guc, ce->guc_id.id); } static void guc_submit_request(struct i915_request *rq) @@ -1941,7 +2009,7 @@ static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) else ida_simple_remove(&guc->submission_state.guc_ids, ce->guc_id.id); - reset_lrc_desc(guc, ce->guc_id.id); + clr_ctx_id_mapping(guc, ce->guc_id.id); set_context_guc_id_invalid(ce); } if (!list_empty(&ce->guc_id.link)) @@ -2094,65 +2162,96 @@ static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) static int __guc_action_register_multi_lrc(struct intel_guc *guc, struct intel_context *ce, - u32 guc_id, - u32 offset, + struct guc_ctxt_registration_info *info, bool loop) { struct intel_context *child; - u32 action[4 + MAX_ENGINE_INSTANCE]; + u32 action[13 + (MAX_ENGINE_INSTANCE * 2)]; int len = 0; + u32 next_id; GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; - action[len++] = guc_id; + action[len++] = info->flags; + action[len++] = info->context_idx; + action[len++] = info->engine_class; + action[len++] = info->engine_submit_mask; + action[len++] = info->wq_desc_lo; + action[len++] = info->wq_desc_hi; + action[len++] = info->wq_base_lo; + action[len++] = info->wq_base_hi; + action[len++] = info->wq_size; action[len++] = ce->parallel.number_children + 1; - action[len++] = offset; + action[len++] = info->hwlrca_lo; + action[len++] = info->hwlrca_hi; + + next_id = info->context_idx + 1; for_each_child(ce, child) { - offset += sizeof(struct guc_lrc_desc); - action[len++] = offset; + GEM_BUG_ON(next_id++ != child->guc_id.id); + + /* + * NB: GuC interface supports 64 bit LRCA even though i915/HW + * only supports 32 bit currently. + */ + action[len++] = lower_32_bits(child->lrc.lrca); + action[len++] = upper_32_bits(child->lrc.lrca); } + GEM_BUG_ON(len > ARRAY_SIZE(action)); + return guc_submission_send_busy_loop(guc, action, len, 0, loop); } static int __guc_action_register_context(struct intel_guc *guc, - u32 guc_id, - u32 offset, + struct guc_ctxt_registration_info *info, bool loop) { u32 action[] = { INTEL_GUC_ACTION_REGISTER_CONTEXT, - guc_id, - offset, + info->flags, + info->context_idx, + info->engine_class, + info->engine_submit_mask, + info->wq_desc_lo, + info->wq_desc_hi, + info->wq_base_lo, + info->wq_base_hi, + info->wq_size, + info->hwlrca_lo, + info->hwlrca_hi, }; return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, loop); } +static void prepare_context_registration_info(struct intel_context *ce, + struct guc_ctxt_registration_info *info); + static int register_context(struct intel_context *ce, bool loop) { + struct guc_ctxt_registration_info info; struct intel_guc *guc = ce_to_guc(ce); - u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) + - ce->guc_id.id * sizeof(struct guc_lrc_desc); int ret; GEM_BUG_ON(intel_context_is_child(ce)); trace_intel_context_register(ce); + prepare_context_registration_info(ce, &info); + if (intel_context_is_parent(ce)) - ret = __guc_action_register_multi_lrc(guc, ce, ce->guc_id.id, - offset, loop); + ret = __guc_action_register_multi_lrc(guc, ce, &info, loop); else - ret = __guc_action_register_context(guc, ce->guc_id.id, offset, - loop); + ret = __guc_action_register_context(guc, &info, loop); if (likely(!ret)) { unsigned long flags; spin_lock_irqsave(&ce->guc_state.lock, flags); set_context_registered(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + guc_context_policy_init(ce, loop); } return ret; @@ -2202,33 +2301,120 @@ static inline u32 get_children_join_value(struct intel_context *ce, return __get_parent_scratch(ce)->join[child_index].semaphore; } -static void guc_context_policy_init(struct intel_engine_cs *engine, - struct guc_lrc_desc *desc) +struct context_policy { + u32 count; + struct guc_update_context_policy h2g; +}; + +static u32 __guc_context_policy_action_size(struct context_policy *policy) { - desc->policy_flags = 0; + size_t bytes = sizeof(policy->h2g.header) + + (sizeof(policy->h2g.klv[0]) * policy->count); - if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) - desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE; + return bytes / sizeof(u32); +} + +static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id) +{ + policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; + policy->h2g.header.ctx_id = guc_id; + policy->count = 0; +} + +#define MAKE_CONTEXT_POLICY_ADD(func, id) \ +static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \ +{ \ + GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ + policy->h2g.klv[policy->count].kl = \ + FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ + FIELD_PREP(GUC_KLV_0_LEN, 1); \ + policy->h2g.klv[policy->count].value = data; \ + policy->count++; \ +} + +MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) +MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) +MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY) +MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY) + +#undef MAKE_CONTEXT_POLICY_ADD + +static int __guc_context_set_context_policies(struct intel_guc *guc, + struct context_policy *policy, + bool loop) +{ + return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g, + __guc_context_policy_action_size(policy), + 0, loop); +} + +static int guc_context_policy_init(struct intel_context *ce, bool loop) +{ + struct intel_engine_cs *engine = ce->engine; + struct intel_guc *guc = &engine->gt->uc.guc; + struct context_policy policy; + u32 execution_quantum; + u32 preemption_timeout; + bool missing = false; + unsigned long flags; + int ret; /* NB: For both of these, zero means disabled. */ - desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; - desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; + execution_quantum = engine->props.timeslice_duration_ms * 1000; + preemption_timeout = engine->props.preempt_timeout_ms * 1000; + + __guc_context_policy_start_klv(&policy, ce->guc_id.id); + + __guc_context_policy_add_priority(&policy, ce->guc_state.prio); + __guc_context_policy_add_execution_quantum(&policy, execution_quantum); + __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); + + if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) + __guc_context_policy_add_preempt_to_idle(&policy, 1); + + ret = __guc_context_set_context_policies(guc, &policy, loop); + missing = ret != 0; + + if (!missing && intel_context_is_parent(ce)) { + struct intel_context *child; + + for_each_child(ce, child) { + __guc_context_policy_start_klv(&policy, child->guc_id.id); + + if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) + __guc_context_policy_add_preempt_to_idle(&policy, 1); + + child->guc_state.prio = ce->guc_state.prio; + __guc_context_policy_add_priority(&policy, ce->guc_state.prio); + __guc_context_policy_add_execution_quantum(&policy, execution_quantum); + __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); + + ret = __guc_context_set_context_policies(guc, &policy, loop); + if (ret) { + missing = true; + break; + } + } + } + + spin_lock_irqsave(&ce->guc_state.lock, flags); + if (missing) + set_context_policy_required(ce); + else + clr_context_policy_required(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + return ret; } -static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) +static void prepare_context_registration_info(struct intel_context *ce, + struct guc_ctxt_registration_info *info) { struct intel_engine_cs *engine = ce->engine; - struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; struct intel_guc *guc = &engine->gt->uc.guc; - u32 desc_idx = ce->guc_id.id; - struct guc_lrc_desc *desc; - bool context_registered; - intel_wakeref_t wakeref; - struct intel_context *child; - int ret = 0; + u32 ctx_id = ce->guc_id.id; GEM_BUG_ON(!engine->mask); - GEM_BUG_ON(!sched_state_is_init(ce)); /* * Ensure LRC + CT vmas are is same region as write barrier is done @@ -2237,55 +2423,63 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != i915_gem_object_is_lmem(ce->ring->vma->obj)); - context_registered = lrc_desc_registered(guc, desc_idx); - - reset_lrc_desc(guc, desc_idx); - set_lrc_desc_registered(guc, desc_idx, ce); - - desc = __get_lrc_desc(guc, desc_idx); - desc->engine_class = engine_class_to_guc_class(engine->class); - desc->engine_submit_mask = engine->logical_mask; - desc->hw_context_desc = ce->lrc.lrca; - desc->priority = ce->guc_state.prio; - desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; - guc_context_policy_init(engine, desc); + memset(info, 0, sizeof(*info)); + info->context_idx = ctx_id; + info->engine_class = engine_class_to_guc_class(engine->class); + info->engine_submit_mask = engine->logical_mask; + /* + * NB: GuC interface supports 64 bit LRCA even though i915/HW + * only supports 32 bit currently. + */ + info->hwlrca_lo = lower_32_bits(ce->lrc.lrca); + info->hwlrca_hi = upper_32_bits(ce->lrc.lrca); + info->flags = CONTEXT_REGISTRATION_FLAG_KMD; /* * If context is a parent, we need to register a process descriptor * describing a work queue and register all child contexts. */ if (intel_context_is_parent(ce)) { - struct guc_process_desc *pdesc; + struct guc_sched_wq_desc *wq_desc; + u64 wq_desc_offset, wq_base_offset; ce->parallel.guc.wqi_tail = 0; ce->parallel.guc.wqi_head = 0; - desc->process_desc = i915_ggtt_offset(ce->state) + - __get_parent_scratch_offset(ce); - desc->wq_addr = i915_ggtt_offset(ce->state) + - __get_wq_offset(ce); - desc->wq_size = WQ_SIZE; + wq_desc_offset = i915_ggtt_offset(ce->state) + + __get_parent_scratch_offset(ce); + wq_base_offset = i915_ggtt_offset(ce->state) + + __get_wq_offset(ce); + info->wq_desc_lo = lower_32_bits(wq_desc_offset); + info->wq_desc_hi = upper_32_bits(wq_desc_offset); + info->wq_base_lo = lower_32_bits(wq_base_offset); + info->wq_base_hi = upper_32_bits(wq_base_offset); + info->wq_size = WQ_SIZE; - pdesc = __get_process_desc(ce); - memset(pdesc, 0, sizeof(*(pdesc))); - pdesc->stage_id = ce->guc_id.id; - pdesc->wq_base_addr = desc->wq_addr; - pdesc->wq_size_bytes = desc->wq_size; - pdesc->wq_status = WQ_STATUS_ACTIVE; - - for_each_child(ce, child) { - desc = __get_lrc_desc(guc, child->guc_id.id); - - desc->engine_class = - engine_class_to_guc_class(engine->class); - desc->hw_context_desc = child->lrc.lrca; - desc->priority = ce->guc_state.prio; - desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; - guc_context_policy_init(engine, desc); - } + wq_desc = __get_wq_desc(ce); + memset(wq_desc, 0, sizeof(*wq_desc)); + wq_desc->wq_status = WQ_STATUS_ACTIVE; clear_children_join_go_memory(ce); } +} + +static int try_context_registration(struct intel_context *ce, bool loop) +{ + struct intel_engine_cs *engine = ce->engine; + struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; + struct intel_guc *guc = &engine->gt->uc.guc; + intel_wakeref_t wakeref; + u32 ctx_id = ce->guc_id.id; + bool context_registered; + int ret = 0; + + GEM_BUG_ON(!sched_state_is_init(ce)); + + context_registered = ctx_id_mapped(guc, ctx_id); + + clr_ctx_id_mapping(guc, ctx_id); + set_ctx_id_mapping(guc, ctx_id, ce); /* * The context_lookup xarray is used to determine if the hardware @@ -2311,7 +2505,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) } spin_unlock_irqrestore(&ce->guc_state.lock, flags); if (unlikely(disabled)) { - reset_lrc_desc(guc, desc_idx); + clr_ctx_id_mapping(guc, ctx_id); return 0; /* Will get registered later */ } @@ -2327,9 +2521,9 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) with_intel_runtime_pm(runtime_pm, wakeref) ret = register_context(ce, loop); if (unlikely(ret == -EBUSY)) { - reset_lrc_desc(guc, desc_idx); + clr_ctx_id_mapping(guc, ctx_id); } else if (unlikely(ret == -ENODEV)) { - reset_lrc_desc(guc, desc_idx); + clr_ctx_id_mapping(guc, ctx_id); ret = 0; /* Will get registered later */ } } @@ -2419,7 +2613,7 @@ static void __guc_context_sched_disable(struct intel_guc *guc, GUC_CONTEXT_DISABLE }; - GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID); + GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID); GEM_BUG_ON(intel_context_is_child(ce)); trace_intel_context_sched_disable(ce); @@ -2516,7 +2710,7 @@ static bool context_cant_unblock(struct intel_context *ce) return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || context_guc_id_invalid(ce) || - !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id) || + !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) || !intel_context_is_pinned(ce); } @@ -2580,13 +2774,11 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc, u16 guc_id, u32 preemption_timeout) { - u32 action[] = { - INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT, - guc_id, - preemption_timeout - }; + struct context_policy policy; - intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); + __guc_context_policy_start_klv(&policy, guc_id); + __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); + __guc_context_set_context_policies(guc, &policy, true); } static void guc_context_ban(struct intel_context *ce, struct i915_request *rq) @@ -2686,7 +2878,7 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) bool disabled; GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); - GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id)); + GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id)); GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); GEM_BUG_ON(context_enabled(ce)); @@ -2803,7 +2995,7 @@ static void guc_context_destroy(struct kref *kref) */ spin_lock_irqsave(&guc->submission_state.lock, flags); destroy = submission_disabled(guc) || context_guc_id_invalid(ce) || - !lrc_desc_registered(guc, ce->guc_id.id); + !ctx_id_mapped(guc, ce->guc_id.id); if (likely(!destroy)) { if (!list_empty(&ce->guc_id.link)) list_del_init(&ce->guc_id.link); @@ -2831,16 +3023,20 @@ static int guc_context_alloc(struct intel_context *ce) return lrc_alloc(ce, ce->engine); } +static void __guc_context_set_prio(struct intel_guc *guc, + struct intel_context *ce) +{ + struct context_policy policy; + + __guc_context_policy_start_klv(&policy, ce->guc_id.id); + __guc_context_policy_add_priority(&policy, ce->guc_state.prio); + __guc_context_set_context_policies(guc, &policy, true); +} + static void guc_context_set_prio(struct intel_guc *guc, struct intel_context *ce, u8 prio) { - u32 action[] = { - INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY, - ce->guc_id.id, - prio, - }; - GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || prio > GUC_CLIENT_PRIORITY_NORMAL); lockdep_assert_held(&ce->guc_state.lock); @@ -2851,9 +3047,9 @@ static void guc_context_set_prio(struct intel_guc *guc, return; } - guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); - ce->guc_state.prio = prio; + __guc_context_set_prio(guc, ce); + trace_intel_context_set_prio(ce); } @@ -3046,7 +3242,7 @@ static void guc_signal_context_fence(struct intel_context *ce) static bool context_needs_register(struct intel_context *ce, bool new_guc_id) { return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || - !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id)) && + !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) && !submission_disabled(ce_to_guc(ce)); } @@ -3123,7 +3319,7 @@ static int guc_request_alloc(struct i915_request *rq) if (unlikely(ret < 0)) return ret; if (context_needs_register(ce, !!ret)) { - ret = guc_lrc_desc_pin(ce, true); + ret = try_context_registration(ce, true); if (unlikely(ret)) { /* unwind */ if (ret == -EPIPE) { disable_submission(guc); @@ -3560,7 +3756,7 @@ static void guc_sanitize(struct intel_engine_cs *engine) sanitize_hwsp(engine); /* And scrub the dirty cachelines for the HWSP */ - clflush_cache_range(engine->status_page.addr, PAGE_SIZE); + drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE); intel_engine_reset_pinned_contexts(engine); } @@ -3595,7 +3791,7 @@ static int guc_resume(struct intel_engine_cs *engine) setup_hwsp(engine); start_engine(engine); - if (engine->class == RENDER_CLASS) + if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) xehp_enable_ccs_engines(engine); return 0; @@ -3614,9 +3810,17 @@ static void guc_set_default_submission(struct intel_engine_cs *engine) static inline void guc_kernel_context_pin(struct intel_guc *guc, struct intel_context *ce) { + /* + * Note: we purposefully do not check the returns below because + * the registration can only fail if a reset is just starting. + * This is called at the end of reset so presumably another reset + * isn't happening and even it did this code would be run again. + */ + if (context_guc_id_invalid(ce)) pin_guc_id(guc, ce); - guc_lrc_desc_pin(ce, true); + + try_context_registration(ce, true); } static inline void guc_init_lrc_mapping(struct intel_guc *guc) @@ -3634,13 +3838,7 @@ static inline void guc_init_lrc_mapping(struct intel_guc *guc) * Also, after a reset the of the GuC we want to make sure that the * information shared with GuC is properly reset. The kernel LRCs are * not attached to the gem_context, so they need to be added separately. - * - * Note: we purposefully do not check the return of guc_lrc_desc_pin, - * because that function can only fail if a reset is just starting. This - * is at the end of reset so presumably another reset isn't happening - * and even it did this code would be run again. */ - for_each_engine(engine, gt, id) { struct intel_context *ce; @@ -3680,7 +3878,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) engine->sched_engine->schedule = i915_schedule; - engine->reset.prepare = guc_reset_nop; + engine->reset.prepare = guc_engine_reset_prepare; engine->reset.rewind = guc_rewind_nop; engine->reset.cancel = guc_reset_nop; engine->reset.finish = guc_reset_nop; @@ -3699,6 +3897,10 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) engine->flags |= I915_ENGINE_HAS_PREEMPTION; engine->flags |= I915_ENGINE_HAS_TIMESLICES; + /* Wa_14014475959:dg2 */ + if (IS_DG2(engine->i915) && engine->class == COMPUTE_CLASS) + engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; + /* * TODO: GuC supports timeslicing and semaphores as well, but they're * handled by the firmware so some minor tweaks are required before @@ -3835,32 +4037,32 @@ void intel_guc_submission_init_early(struct intel_guc *guc) spin_lock_init(&guc->timestamp.lock); INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); - guc->submission_state.num_guc_ids = GUC_MAX_LRC_DESCRIPTORS; + guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID; guc->submission_supported = __guc_submission_supported(guc); guc->submission_selected = __guc_submission_selected(guc); } static inline struct intel_context * -g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) +g2h_context_lookup(struct intel_guc *guc, u32 ctx_id) { struct intel_context *ce; - if (unlikely(desc_idx >= GUC_MAX_LRC_DESCRIPTORS)) { + if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) { drm_err(&guc_to_gt(guc)->i915->drm, - "Invalid desc_idx %u", desc_idx); + "Invalid ctx_id %u\n", ctx_id); return NULL; } - ce = __get_context(guc, desc_idx); + ce = __get_context(guc, ctx_id); if (unlikely(!ce)) { drm_err(&guc_to_gt(guc)->i915->drm, - "Context is NULL, desc_idx %u", desc_idx); + "Context is NULL, ctx_id %u\n", ctx_id); return NULL; } if (unlikely(intel_context_is_child(ce))) { drm_err(&guc_to_gt(guc)->i915->drm, - "Context is child, desc_idx %u", desc_idx); + "Context is child, ctx_id %u\n", ctx_id); return NULL; } @@ -3872,14 +4074,15 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, u32 len) { struct intel_context *ce; - u32 desc_idx = msg[0]; + u32 ctx_id; if (unlikely(len < 1)) { - drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); + drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len); return -EPROTO; } + ctx_id = msg[0]; - ce = g2h_context_lookup(guc, desc_idx); + ce = g2h_context_lookup(guc, ctx_id); if (unlikely(!ce)) return -EPROTO; @@ -3923,14 +4126,15 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, { struct intel_context *ce; unsigned long flags; - u32 desc_idx = msg[0]; + u32 ctx_id; if (unlikely(len < 2)) { - drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); + drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len); return -EPROTO; } + ctx_id = msg[0]; - ce = g2h_context_lookup(guc, desc_idx); + ce = g2h_context_lookup(guc, ctx_id); if (unlikely(!ce)) return -EPROTO; @@ -3938,8 +4142,8 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, (!context_pending_enable(ce) && !context_pending_disable(ce)))) { drm_err(&guc_to_gt(guc)->i915->drm, - "Bad context sched_state 0x%x, desc_idx %u", - ce->guc_state.sched_state, desc_idx); + "Bad context sched_state 0x%x, ctx_id %u\n", + ce->guc_state.sched_state, ctx_id); return -EPROTO; } @@ -4005,7 +4209,7 @@ static void capture_error_state(struct intel_guc *guc, intel_engine_set_hung_context(engine, ce); with_intel_runtime_pm(&i915->runtime_pm, wakeref) - i915_capture_error_state(gt, engine->mask); + i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE); atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]); } @@ -4037,14 +4241,14 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc, { struct intel_context *ce; unsigned long flags; - int desc_idx; + int ctx_id; if (unlikely(len != 1)) { drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); return -EPROTO; } - desc_idx = msg[0]; + ctx_id = msg[0]; /* * The context lookup uses the xarray but lookups only require an RCU lock @@ -4053,7 +4257,7 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc, * asynchronously until the reset is done. */ xa_lock_irqsave(&guc->context_lookup, flags); - ce = g2h_context_lookup(guc, desc_idx); + ce = g2h_context_lookup(guc, ctx_id); if (ce) intel_context_get(ce); xa_unlock_irqrestore(&guc->context_lookup, flags); @@ -4070,23 +4274,24 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc, int intel_guc_error_capture_process_msg(struct intel_guc *guc, const u32 *msg, u32 len) { - int status; + u32 status; if (unlikely(len != 1)) { drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); return -EPROTO; } - status = msg[0]; - drm_info(&guc_to_gt(guc)->i915->drm, "Got error capture: status = %d", status); + status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; + if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) + drm_warn(&guc_to_gt(guc)->i915->drm, "G2H-Error capture no space"); - /* FIXME: Do something with the capture */ + intel_guc_capture_process(guc); return 0; } -static struct intel_engine_cs * -guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) +struct intel_engine_cs * +intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) { struct intel_gt *gt = guc_to_gt(guc); u8 engine_class = guc_class_to_engine_class(guc_class); @@ -4135,7 +4340,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc, instance = msg[1]; reason = msg[2]; - engine = guc_lookup_engine(guc, guc_class, instance); + engine = intel_guc_lookup_engine(guc, guc_class, instance); if (unlikely(!engine)) { drm_err(>->i915->drm, "Invalid engine %d:%d", guc_class, instance); @@ -4333,17 +4538,17 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, guc_log_context_priority(p, ce); if (intel_context_is_parent(ce)) { - struct guc_process_desc *desc = __get_process_desc(ce); + struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); struct intel_context *child; drm_printf(p, "\t\tNumber children: %u\n", ce->parallel.number_children); drm_printf(p, "\t\tWQI Head: %u\n", - READ_ONCE(desc->head)); + READ_ONCE(wq_desc->head)); drm_printf(p, "\t\tWQI Tail: %u\n", - READ_ONCE(desc->tail)); + READ_ONCE(wq_desc->tail)); drm_printf(p, "\t\tWQI Status: %u\n\n", - READ_ONCE(desc->wq_status)); + READ_ONCE(wq_desc->wq_status)); if (ce->engine->emit_bb_start == emit_bb_start_parent_no_preempt_mid_batch) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 541f180aaa29..a876d39e6bcf 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -53,21 +53,21 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * firmware as TGL. */ #define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \ - fw_def(ALDERLAKE_P, 0, guc_def(adlp, 69, 0, 3)) \ - fw_def(ALDERLAKE_S, 0, guc_def(tgl, 69, 0, 3)) \ - fw_def(DG1, 0, guc_def(dg1, 69, 0, 3)) \ - fw_def(ROCKETLAKE, 0, guc_def(tgl, 69, 0, 3)) \ - fw_def(TIGERLAKE, 0, guc_def(tgl, 69, 0, 3)) \ - fw_def(JASPERLAKE, 0, guc_def(ehl, 69, 0, 3)) \ - fw_def(ELKHARTLAKE, 0, guc_def(ehl, 69, 0, 3)) \ - fw_def(ICELAKE, 0, guc_def(icl, 69, 0, 3)) \ - fw_def(COMETLAKE, 5, guc_def(cml, 69, 0, 3)) \ - fw_def(COMETLAKE, 0, guc_def(kbl, 69, 0, 3)) \ - fw_def(COFFEELAKE, 0, guc_def(kbl, 69, 0, 3)) \ - fw_def(GEMINILAKE, 0, guc_def(glk, 69, 0, 3)) \ - fw_def(KABYLAKE, 0, guc_def(kbl, 69, 0, 3)) \ - fw_def(BROXTON, 0, guc_def(bxt, 69, 0, 3)) \ - fw_def(SKYLAKE, 0, guc_def(skl, 69, 0, 3)) + fw_def(ALDERLAKE_P, 0, guc_def(adlp, 70, 1, 1)) \ + fw_def(ALDERLAKE_S, 0, guc_def(tgl, 70, 1, 1)) \ + fw_def(DG1, 0, guc_def(dg1, 70, 1, 1)) \ + fw_def(ROCKETLAKE, 0, guc_def(tgl, 70, 1, 1)) \ + fw_def(TIGERLAKE, 0, guc_def(tgl, 70, 1, 1)) \ + fw_def(JASPERLAKE, 0, guc_def(ehl, 70, 1, 1)) \ + fw_def(ELKHARTLAKE, 0, guc_def(ehl, 70, 1, 1)) \ + fw_def(ICELAKE, 0, guc_def(icl, 70, 1, 1)) \ + fw_def(COMETLAKE, 5, guc_def(cml, 70, 1, 1)) \ + fw_def(COMETLAKE, 0, guc_def(kbl, 70, 1, 1)) \ + fw_def(COFFEELAKE, 0, guc_def(kbl, 70, 1, 1)) \ + fw_def(GEMINILAKE, 0, guc_def(glk, 70, 1, 1)) \ + fw_def(KABYLAKE, 0, guc_def(kbl, 70, 1, 1)) \ + fw_def(BROXTON, 0, guc_def(bxt, 70, 1, 1)) \ + fw_def(SKYLAKE, 0, guc_def(skl, 70, 1, 1)) #define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \ fw_def(ALDERLAKE_P, 0, huc_def(tgl, 7, 9, 3)) \ diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c index a115894d5896..1df71d0796ae 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -148,7 +148,7 @@ static int intel_guc_steal_guc_ids(void *arg) struct i915_request *spin_rq = NULL, *rq, *last = NULL; int number_guc_id_stolen = guc->number_guc_id_stolen; - ce = kzalloc(sizeof(*ce) * GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL); + ce = kcalloc(GUC_MAX_CONTEXT_ID, sizeof(*ce), GFP_KERNEL); if (!ce) { pr_err("Context array allocation failed\n"); return -ENOMEM; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 02239395ce81..94e5c29d2ee3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -309,7 +309,8 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file) gpu = NULL; with_intel_runtime_pm(&i915->runtime_pm, wakeref) - gpu = i915_gpu_coredump(to_gt(i915), ALL_ENGINES); + gpu = i915_gpu_coredump(to_gt(i915), ALL_ENGINES, CORE_DUMP_FLAG_NONE); + if (IS_ERR(gpu)) return PTR_ERR(gpu); @@ -582,8 +583,9 @@ static int i915_wedged_get(void *data, u64 *val) static int i915_wedged_set(void *data, u64 val) { struct drm_i915_private *i915 = data; + intel_gt_debugfs_reset_store(to_gt(i915), val); - return intel_gt_debugfs_reset_store(to_gt(i915), val); + return 0; } DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops, @@ -731,15 +733,17 @@ static int i915_sseu_status(struct seq_file *m, void *unused) static int i915_forcewake_open(struct inode *inode, struct file *file) { struct drm_i915_private *i915 = inode->i_private; + intel_gt_pm_debugfs_forcewake_user_open(to_gt(i915)); - return intel_gt_pm_debugfs_forcewake_user_open(to_gt(i915)); + return 0; } static int i915_forcewake_release(struct inode *inode, struct file *file) { struct drm_i915_private *i915 = inode->i_private; + intel_gt_pm_debugfs_forcewake_user_release(to_gt(i915)); - return intel_gt_pm_debugfs_forcewake_user_release(to_gt(i915)); + return 0; } static const struct file_operations i915_forcewake_fops = { diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 09de45d3e274..3ffb617d75c9 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -77,6 +77,7 @@ #include "i915_file_private.h" #include "i915_debugfs.h" #include "i915_driver.h" +#include "i915_drm_client.h" #include "i915_drv.h" #include "i915_getparam.h" #include "i915_ioc32.h" @@ -322,9 +323,7 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) intel_device_info_subplatform_init(dev_priv); intel_step_init(dev_priv); - intel_gt_init_early(to_gt(dev_priv), dev_priv); intel_uncore_mmio_debug_init_early(&dev_priv->mmio_debug); - intel_uncore_init_early(&dev_priv->uncore, to_gt(dev_priv)); spin_lock_init(&dev_priv->irq_lock); spin_lock_init(&dev_priv->gpu_error.lock); @@ -355,7 +354,9 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) intel_wopcm_init_early(&dev_priv->wopcm); - __intel_gt_init_early(to_gt(dev_priv), dev_priv); + intel_root_gt_init_early(dev_priv); + + i915_drm_clients_init(&dev_priv->clients, dev_priv); i915_gem_init_early(dev_priv); @@ -376,7 +377,8 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) err_gem: i915_gem_cleanup_early(dev_priv); - intel_gt_driver_late_release(to_gt(dev_priv)); + intel_gt_driver_late_release_all(dev_priv); + i915_drm_clients_fini(&dev_priv->clients); intel_region_ttm_device_fini(dev_priv); err_ttm: vlv_suspend_cleanup(dev_priv); @@ -395,7 +397,8 @@ static void i915_driver_late_release(struct drm_i915_private *dev_priv) intel_irq_fini(dev_priv); intel_power_domains_cleanup(dev_priv); i915_gem_cleanup_early(dev_priv); - intel_gt_driver_late_release(to_gt(dev_priv)); + intel_gt_driver_late_release_all(dev_priv); + i915_drm_clients_fini(&dev_priv->clients); intel_region_ttm_device_fini(dev_priv); vlv_suspend_cleanup(dev_priv); i915_workqueues_cleanup(dev_priv); @@ -426,13 +429,9 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv) if (ret < 0) return ret; - ret = intel_uncore_setup_mmio(&dev_priv->uncore); - if (ret < 0) - goto err_bridge; - ret = intel_uncore_init_mmio(&dev_priv->uncore); if (ret) - goto err_mmio; + return ret; /* Try to make sure MCHBAR is enabled before poking at it */ intel_setup_mchbar(dev_priv); @@ -450,9 +449,6 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv) err_uncore: intel_teardown_mchbar(dev_priv); intel_uncore_fini_mmio(&dev_priv->uncore); -err_mmio: - intel_uncore_cleanup_mmio(&dev_priv->uncore); -err_bridge: pci_dev_put(dev_priv->bridge_dev); return ret; @@ -466,7 +462,6 @@ static void i915_driver_mmio_release(struct drm_i915_private *dev_priv) { intel_teardown_mchbar(dev_priv); intel_uncore_fini_mmio(&dev_priv->uncore); - intel_uncore_cleanup_mmio(&dev_priv->uncore); pci_dev_put(dev_priv->bridge_dev); } @@ -599,7 +594,7 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) if (ret) goto err_ggtt; - ret = intel_gt_probe_lmem(to_gt(dev_priv)); + ret = intel_gt_tiles_init(dev_priv); if (ret) goto err_mem_regions; @@ -850,10 +845,14 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) intel_vgpu_detect(i915); - ret = i915_driver_mmio_probe(i915); + ret = intel_gt_probe_all(i915); if (ret < 0) goto out_runtime_pm_put; + ret = i915_driver_mmio_probe(i915); + if (ret < 0) + goto out_tiles_cleanup; + ret = i915_driver_hw_probe(i915); if (ret < 0) goto out_cleanup_mmio; @@ -910,6 +909,8 @@ out_cleanup_hw: i915_ggtt_driver_late_release(i915); out_cleanup_mmio: i915_driver_mmio_release(i915); +out_tiles_cleanup: + intel_gt_release_all(i915); out_runtime_pm_put: enable_rpm_wakeref_asserts(&i915->runtime_pm); i915_driver_late_release(i915); @@ -1013,6 +1014,7 @@ static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) struct drm_i915_file_private *file_priv = file->driver_priv; i915_gem_context_close(file); + i915_drm_client_put(file_priv->client); kfree_rcu(file_priv, rcu); @@ -1743,6 +1745,9 @@ static const struct file_operations i915_driver_fops = { .read = drm_read, .compat_ioctl = i915_ioc32_compat_ioctl, .llseek = noop_llseek, +#ifdef CONFIG_PROC_FS + .show_fdinfo = i915_drm_client_fdinfo, +#endif }; static int diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c new file mode 100644 index 000000000000..475a6f824cad --- /dev/null +++ b/drivers/gpu/drm/i915/i915_drm_client.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/types.h> + +#include <uapi/drm/i915_drm.h> + +#include <drm/drm_print.h> + +#include "gem/i915_gem_context.h" +#include "i915_drm_client.h" +#include "i915_file_private.h" +#include "i915_gem.h" +#include "i915_utils.h" + +void i915_drm_clients_init(struct i915_drm_clients *clients, + struct drm_i915_private *i915) +{ + clients->i915 = i915; + clients->next_id = 0; + + xa_init_flags(&clients->xarray, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); +} + +struct i915_drm_client *i915_drm_client_add(struct i915_drm_clients *clients) +{ + struct i915_drm_client *client; + struct xarray *xa = &clients->xarray; + int ret; + + client = kzalloc(sizeof(*client), GFP_KERNEL); + if (!client) + return ERR_PTR(-ENOMEM); + + xa_lock_irq(xa); + ret = __xa_alloc_cyclic(xa, &client->id, client, xa_limit_32b, + &clients->next_id, GFP_KERNEL); + xa_unlock_irq(xa); + if (ret < 0) + goto err; + + kref_init(&client->kref); + spin_lock_init(&client->ctx_lock); + INIT_LIST_HEAD(&client->ctx_list); + client->clients = clients; + + return client; + +err: + kfree(client); + + return ERR_PTR(ret); +} + +void __i915_drm_client_free(struct kref *kref) +{ + struct i915_drm_client *client = + container_of(kref, typeof(*client), kref); + struct xarray *xa = &client->clients->xarray; + unsigned long flags; + + xa_lock_irqsave(xa, flags); + __xa_erase(xa, client->id); + xa_unlock_irqrestore(xa, flags); + kfree(client); +} + +void i915_drm_clients_fini(struct i915_drm_clients *clients) +{ + GEM_BUG_ON(!xa_empty(&clients->xarray)); + xa_destroy(&clients->xarray); +} + +#ifdef CONFIG_PROC_FS +static const char * const uabi_class_names[] = { + [I915_ENGINE_CLASS_RENDER] = "render", + [I915_ENGINE_CLASS_COPY] = "copy", + [I915_ENGINE_CLASS_VIDEO] = "video", + [I915_ENGINE_CLASS_VIDEO_ENHANCE] = "video-enhance", +}; + +static u64 busy_add(struct i915_gem_context *ctx, unsigned int class) +{ + struct i915_gem_engines_iter it; + struct intel_context *ce; + u64 total = 0; + + for_each_gem_engine(ce, rcu_dereference(ctx->engines), it) { + if (ce->engine->uabi_class != class) + continue; + + total += intel_context_get_total_runtime_ns(ce); + } + + return total; +} + +static void +show_client_class(struct seq_file *m, + struct i915_drm_client *client, + unsigned int class) +{ + const struct list_head *list = &client->ctx_list; + u64 total = atomic64_read(&client->past_runtime[class]); + const unsigned int capacity = + client->clients->i915->engine_uabi_class_count[class]; + struct i915_gem_context *ctx; + + rcu_read_lock(); + list_for_each_entry_rcu(ctx, list, client_link) + total += busy_add(ctx, class); + rcu_read_unlock(); + + seq_printf(m, "drm-engine-%s:\t%llu ns\n", + uabi_class_names[class], total); + + if (capacity > 1) + seq_printf(m, "drm-engine-capacity-%s:\t%u\n", + uabi_class_names[class], + capacity); +} + +void i915_drm_client_fdinfo(struct seq_file *m, struct file *f) +{ + struct drm_file *file = f->private_data; + struct drm_i915_file_private *file_priv = file->driver_priv; + struct drm_i915_private *i915 = file_priv->dev_priv; + struct i915_drm_client *client = file_priv->client; + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + unsigned int i; + + /* + * ****************************************************************** + * For text output format description please see drm-usage-stats.rst! + * ****************************************************************** + */ + + seq_printf(m, "drm-driver:\t%s\n", i915->drm.driver->name); + seq_printf(m, "drm-pdev:\t%04x:%02x:%02x.%d\n", + pci_domain_nr(pdev->bus), pdev->bus->number, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + seq_printf(m, "drm-client-id:\t%u\n", client->id); + + /* + * Temporarily skip showing client engine information with GuC submission till + * fetching engine busyness is implemented in the GuC submission backend + */ + if (GRAPHICS_VER(i915) < 8 || intel_uc_uses_guc_submission(&i915->gt0.uc)) + return; + + for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++) + show_client_class(m, client, i); +} +#endif diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h new file mode 100644 index 000000000000..5f5b02b01ba0 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_drm_client.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __I915_DRM_CLIENT_H__ +#define __I915_DRM_CLIENT_H__ + +#include <linux/kref.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/xarray.h> + +#include "gt/intel_engine_types.h" + +#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_VIDEO_ENHANCE + +struct drm_i915_private; + +struct i915_drm_clients { + struct drm_i915_private *i915; + + struct xarray xarray; + u32 next_id; +}; + +struct i915_drm_client { + struct kref kref; + + unsigned int id; + + spinlock_t ctx_lock; /* For add/remove from ctx_list. */ + struct list_head ctx_list; /* List of contexts belonging to client. */ + + struct i915_drm_clients *clients; + + /** + * @past_runtime: Accumulation of pphwsp runtimes from closed contexts. + */ + atomic64_t past_runtime[I915_LAST_UABI_ENGINE_CLASS + 1]; +}; + +void i915_drm_clients_init(struct i915_drm_clients *clients, + struct drm_i915_private *i915); + +static inline struct i915_drm_client * +i915_drm_client_get(struct i915_drm_client *client) +{ + kref_get(&client->kref); + return client; +} + +void __i915_drm_client_free(struct kref *kref); + +static inline void i915_drm_client_put(struct i915_drm_client *client) +{ + kref_put(&client->kref, __i915_drm_client_free); +} + +struct i915_drm_client *i915_drm_client_add(struct i915_drm_clients *clients); + +#ifdef CONFIG_PROC_FS +void i915_drm_client_fdinfo(struct seq_file *m, struct file *f); +#endif + +void i915_drm_clients_fini(struct i915_drm_clients *clients); + +#endif /* !__I915_DRM_CLIENT_H__ */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ce2cd6491d6d..a6cf9716d6aa 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -61,6 +61,7 @@ #include "gt/intel_workarounds.h" #include "gt/uc/intel_uc.h" +#include "i915_drm_client.h" #include "i915_gem.h" #include "i915_gpu_error.h" #include "i915_params.h" @@ -500,6 +501,7 @@ struct drm_i915_private { struct pci_dev *bridge_dev; struct rb_root uabi_engines; + unsigned int engine_uabi_class_count[I915_LAST_UABI_ENGINE_CLASS + 1]; struct resource mch_res; @@ -766,6 +768,14 @@ struct drm_i915_private { /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ struct intel_gt gt0; + /* + * i915->gt[0] == &i915->gt0 + */ +#define I915_MAX_GT 4 + struct intel_gt *gt[I915_MAX_GT]; + + struct kobject *sysfs_gt; + struct { struct i915_gem_contexts { spinlock_t lock; /* locks list */ @@ -810,6 +820,8 @@ struct drm_i915_private { struct i915_pmu pmu; + struct i915_drm_clients clients; + struct i915_hdcp_comp_master *hdcp_master; bool hdcp_comp_added; @@ -1201,6 +1213,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, ((gt)->info.engine_mask & \ GENMASK(first__ + count__ - 1, first__)) >> first__; \ }) +#define RCS_MASK(gt) \ + ENGINE_INSTANCES_MASK(gt, RCS0, I915_MAX_RCS) #define VDBOX_MASK(gt) \ ENGINE_INSTANCES_MASK(gt, VCS0, I915_MAX_VCS) #define VEBOX_MASK(gt) \ @@ -1294,6 +1308,14 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_DMC(dev_priv) (INTEL_INFO(dev_priv)->display.has_dmc) +#define HAS_HECI_PXP(dev_priv) \ + (INTEL_INFO(dev_priv)->has_heci_pxp) + +#define HAS_HECI_GSCFI(dev_priv) \ + (INTEL_INFO(dev_priv)->has_heci_gscfi) + +#define HAS_HECI_GSC(dev_priv) (HAS_HECI_PXP(dev_priv) || HAS_HECI_GSCFI(dev_priv)) + #define HAS_MSO(i915) (DISPLAY_VER(i915) >= 12) #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm) @@ -1363,6 +1385,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_GUC_DEPRIVILEGE(dev_priv) \ (INTEL_INFO(dev_priv)->has_guc_deprivilege) +#define HAS_PERCTX_PREEMPT_CTRL(i915) \ + ((GRAPHICS_VER(i915) >= 9) && GRAPHICS_VER_FULL(i915) < IP_VER(12, 55)) + #define HAS_D12_PLANE_MINIMIZATION(dev_priv) (IS_ROCKETLAKE(dev_priv) || \ IS_ALDERLAKE_S(dev_priv)) diff --git a/drivers/gpu/drm/i915/i915_file_private.h b/drivers/gpu/drm/i915/i915_file_private.h index fb16cc431b2a..f42877869692 100644 --- a/drivers/gpu/drm/i915/i915_file_private.h +++ b/drivers/gpu/drm/i915/i915_file_private.h @@ -12,6 +12,7 @@ struct drm_i915_private; struct drm_file; +struct i915_drm_client; struct drm_i915_file_private { struct drm_i915_private *dev_priv; @@ -103,6 +104,8 @@ struct drm_i915_file_private { /** ban_score: Accumulated score of all ctx bans and fast hangs. */ atomic_t ban_score; unsigned long hang_timestamp; + + struct i915_drm_client *client; }; #endif /* __I915_FILE_PRIVATE_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2e10187cd0a0..702e5b89be22 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -118,6 +118,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj, unsigned long flags) { struct intel_runtime_pm *rpm = &to_i915(obj->base.dev)->runtime_pm; + bool vm_trylock = !!(flags & I915_GEM_OBJECT_UNBIND_VM_TRYLOCK); LIST_HEAD(still_in_list); intel_wakeref_t wakeref; struct i915_vma *vma; @@ -142,8 +143,6 @@ try_again: while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, struct i915_vma, obj_link))) { - struct i915_address_space *vm = vma->vm; - list_move_tail(&vma->obj_link, &still_in_list); if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) continue; @@ -153,40 +152,44 @@ try_again: break; } + /* + * Requiring the vm destructor to take the object lock + * before destroying a vma would help us eliminate the + * i915_vm_tryget() here, AND thus also the barrier stuff + * at the end. That's an easy fix, but sleeping locks in + * a kthread should generally be avoided. + */ ret = -EAGAIN; - if (!i915_vm_tryopen(vm)) + if (!i915_vm_tryget(vma->vm)) break; - /* Prevent vma being freed by i915_vma_parked as we unbind */ - vma = __i915_vma_get(vma); spin_unlock(&obj->vma.lock); - if (vma) { - bool vm_trylock = !!(flags & I915_GEM_OBJECT_UNBIND_VM_TRYLOCK); - ret = -EBUSY; - if (flags & I915_GEM_OBJECT_UNBIND_ASYNC) { - assert_object_held(vma->obj); - ret = i915_vma_unbind_async(vma, vm_trylock); - } + /* + * Since i915_vma_parked() takes the object lock + * before vma destruction, it won't race us here, + * and destroy the vma from under us. + */ - if (ret == -EBUSY && (flags & I915_GEM_OBJECT_UNBIND_ACTIVE || - !i915_vma_is_active(vma))) { - if (vm_trylock) { - if (mutex_trylock(&vma->vm->mutex)) { - ret = __i915_vma_unbind(vma); - mutex_unlock(&vma->vm->mutex); - } else { - ret = -EBUSY; - } - } else { - ret = i915_vma_unbind(vma); + ret = -EBUSY; + if (flags & I915_GEM_OBJECT_UNBIND_ASYNC) { + assert_object_held(vma->obj); + ret = i915_vma_unbind_async(vma, vm_trylock); + } + + if (ret == -EBUSY && (flags & I915_GEM_OBJECT_UNBIND_ACTIVE || + !i915_vma_is_active(vma))) { + if (vm_trylock) { + if (mutex_trylock(&vma->vm->mutex)) { + ret = __i915_vma_unbind(vma); + mutex_unlock(&vma->vm->mutex); } + } else { + ret = i915_vma_unbind(vma); } - - __i915_vma_put(vma); } - i915_vm_close(vm); + i915_vm_put(vma->vm); spin_lock(&obj->vma.lock); } list_splice_init(&still_in_list, &obj->vma.list); @@ -936,8 +939,19 @@ new_vma: if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) return ERR_PTR(-ENOSPC); + /* + * If this misplaced vma is too big (i.e, at-least + * half the size of aperture) or hasn't been pinned + * mappable before, we ignore the misplacement when + * PIN_NONBLOCK is set in order to avoid the ping-pong + * issue described above. In other words, we try to + * avoid the costly operation of unbinding this vma + * from the GGTT and rebinding it back because there + * may not be enough space for this vma in the aperture. + */ if (flags & PIN_MAPPABLE && - vma->fence_size > ggtt->mappable_end / 2) + (vma->fence_size > ggtt->mappable_end / 2 || + !i915_vma_is_map_and_fenceable(vma))) return ERR_PTR(-ENOSPC); } @@ -1213,25 +1227,40 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) { struct drm_i915_file_private *file_priv; - int ret; + struct i915_drm_client *client; + int ret = -ENOMEM; DRM_DEBUG("\n"); file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); if (!file_priv) - return -ENOMEM; + goto err_alloc; + + client = i915_drm_client_add(&i915->clients); + if (IS_ERR(client)) { + ret = PTR_ERR(client); + goto err_client; + } file->driver_priv = file_priv; file_priv->dev_priv = i915; file_priv->file = file; + file_priv->client = client; file_priv->bsd_engine = -1; file_priv->hang_timestamp = jiffies; ret = i915_gem_context_open(i915, file); if (ret) - kfree(file_priv); + goto err_context; + + return 0; +err_context: + i915_drm_client_put(client); +err_client: + kfree(file_priv); +err_alloc: return ret; } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index d0e7ee7b07df..0512c66fa4f3 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -48,6 +48,7 @@ #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" #include "gt/intel_gt_regs.h" +#include "gt/uc/intel_guc_capture.h" #include "i915_driver.h" #include "i915_drv.h" @@ -511,13 +512,10 @@ static void error_print_context(struct drm_i915_error_state_buf *m, const char *header, const struct i915_gem_context_coredump *ctx) { - const u32 period = to_gt(m->i915)->clock_period_ns; - err_printf(m, "%s%s[%d] prio %d, guilty %d active %d, runtime total %lluns, avg %lluns\n", header, ctx->comm, ctx->pid, ctx->sched_attr.priority, ctx->guilty, ctx->active, - ctx->total_runtime * period, - mul_u32_u32(ctx->avg_runtime, period)); + ctx->total_runtime, ctx->avg_runtime); } static struct i915_vma_coredump * @@ -532,8 +530,8 @@ __find_vma(struct i915_vma_coredump *vma, const char *name) return NULL; } -static struct i915_vma_coredump * -find_batch(const struct intel_engine_coredump *ee) +struct i915_vma_coredump * +intel_gpu_error_find_batch(const struct intel_engine_coredump *ee) { return __find_vma(ee->vma, "batch"); } @@ -561,7 +559,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, error_print_instdone(m, ee); - batch = find_batch(ee); + batch = intel_gpu_error_find_batch(ee); if (batch) { u64 start = batch->gtt_offset; u64 end = start + batch->gtt_size; @@ -596,15 +594,11 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, ee->vm_info.pp_dir_base); } } - err_printf(m, " hung: %u\n", ee->hung); - err_printf(m, " engine reset count: %u\n", ee->reset_count); for (n = 0; n < ee->num_ports; n++) { err_printf(m, " ELSP[%d]:", n); error_print_request(m, " ", &ee->execlist[n]); } - - error_print_context(m, " Active context: ", &ee->context); } void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) @@ -616,9 +610,9 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) va_end(args); } -static void print_error_vma(struct drm_i915_error_state_buf *m, - const struct intel_engine_cs *engine, - const struct i915_vma_coredump *vma) +void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m, + const struct intel_engine_cs *engine, + const struct i915_vma_coredump *vma) { char out[ASCII85_BUFSZ]; struct page *page; @@ -687,7 +681,7 @@ static void err_print_uc(struct drm_i915_error_state_buf *m, intel_uc_fw_dump(&error_uc->guc_fw, &p); intel_uc_fw_dump(&error_uc->huc_fw, &p); - print_error_vma(m, NULL, error_uc->guc_log); + intel_gpu_error_print_vma(m, NULL, error_uc->guc_log); } static void err_free_sgl(struct scatterlist *sgl) @@ -713,26 +707,33 @@ static void err_print_gt_info(struct drm_i915_error_state_buf *m, struct drm_printer p = i915_error_printer(m); intel_gt_info_print(>->info, &p); - intel_sseu_print_topology(>->info.sseu, &p); + intel_sseu_print_topology(gt->_gt->i915, >->info.sseu, &p); } -static void err_print_gt(struct drm_i915_error_state_buf *m, - struct intel_gt_coredump *gt) +static void err_print_gt_display(struct drm_i915_error_state_buf *m, + struct intel_gt_coredump *gt) +{ + err_printf(m, "IER: 0x%08x\n", gt->ier); + err_printf(m, "DERRMR: 0x%08x\n", gt->derrmr); +} + +static void err_print_gt_global_nonguc(struct drm_i915_error_state_buf *m, + struct intel_gt_coredump *gt) { - const struct intel_engine_coredump *ee; int i; err_printf(m, "GT awake: %s\n", str_yes_no(gt->awake)); err_printf(m, "EIR: 0x%08x\n", gt->eir); - err_printf(m, "IER: 0x%08x\n", gt->ier); + err_printf(m, "PGTBL_ER: 0x%08x\n", gt->pgtbl_er); + for (i = 0; i < gt->ngtier; i++) err_printf(m, "GTIER[%d]: 0x%08x\n", i, gt->gtier[i]); - err_printf(m, "PGTBL_ER: 0x%08x\n", gt->pgtbl_er); - err_printf(m, "FORCEWAKE: 0x%08x\n", gt->forcewake); - err_printf(m, "DERRMR: 0x%08x\n", gt->derrmr); +} - for (i = 0; i < gt->nfence; i++) - err_printf(m, " fence[%d] = %08llx\n", i, gt->fence[i]); +static void err_print_gt_global(struct drm_i915_error_state_buf *m, + struct intel_gt_coredump *gt) +{ + err_printf(m, "FORCEWAKE: 0x%08x\n", gt->forcewake); if (IS_GRAPHICS_VER(m->i915, 6, 11)) { err_printf(m, "ERROR: 0x%08x\n", gt->error); @@ -755,7 +756,7 @@ static void err_print_gt(struct drm_i915_error_state_buf *m, if (GRAPHICS_VER(m->i915) >= 12) { int i; - for (i = 0; i < GEN12_SFC_DONE_MAX; i++) { + for (i = 0; i < I915_MAX_SFC; i++) { /* * SFC_DONE resides in the VD forcewake domain, so it * only exists if the corresponding VCS engine is @@ -771,19 +772,38 @@ static void err_print_gt(struct drm_i915_error_state_buf *m, err_printf(m, " GAM_DONE: 0x%08x\n", gt->gam_done); } +} + +static void err_print_gt_fences(struct drm_i915_error_state_buf *m, + struct intel_gt_coredump *gt) +{ + int i; + + for (i = 0; i < gt->nfence; i++) + err_printf(m, " fence[%d] = %08llx\n", i, gt->fence[i]); +} + +static void err_print_gt_engines(struct drm_i915_error_state_buf *m, + struct intel_gt_coredump *gt) +{ + const struct intel_engine_coredump *ee; for (ee = gt->engine; ee; ee = ee->next) { const struct i915_vma_coredump *vma; - error_print_engine(m, ee); + if (ee->guc_capture_node) + intel_guc_capture_print_engine_node(m, ee); + else + error_print_engine(m, ee); + + err_printf(m, " hung: %u\n", ee->hung); + err_printf(m, " engine reset count: %u\n", ee->reset_count); + error_print_context(m, " Active context: ", &ee->context); + for (vma = ee->vma; vma; vma = vma->next) - print_error_vma(m, ee->engine, vma); + intel_gpu_error_print_vma(m, ee->engine, vma); } - if (gt->uc) - err_print_uc(m, gt->uc); - - err_print_gt_info(m, gt); } static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, @@ -831,8 +851,30 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, err_printf(m, "RPM wakelock: %s\n", str_yes_no(error->wakelock)); err_printf(m, "PM suspended: %s\n", str_yes_no(error->suspended)); - if (error->gt) - err_print_gt(m, error->gt); + if (error->gt) { + bool print_guc_capture = false; + + if (error->gt->uc && error->gt->uc->is_guc_capture) + print_guc_capture = true; + + err_print_gt_display(m, error->gt); + err_print_gt_global_nonguc(m, error->gt); + err_print_gt_fences(m, error->gt); + + /* + * GuC dumped global, eng-class and eng-instance registers together + * as part of engine state dump so we print in err_print_gt_engines + */ + if (!print_guc_capture) + err_print_gt_global(m, error->gt); + + err_print_gt_engines(m, error->gt); + + if (error->gt->uc) + err_print_uc(m, error->gt->uc); + + err_print_gt_info(m, error->gt); + } if (error->overlay) intel_overlay_print_error_state(m, error->overlay); @@ -980,6 +1022,7 @@ static void cleanup_gt(struct intel_gt_coredump *gt) gt->engine = ee->next; i915_vma_coredump_free(ee->vma); + intel_guc_capture_free_node(ee); kfree(ee); } @@ -1313,8 +1356,8 @@ static bool record_context(struct i915_gem_context_coredump *e, e->guilty = atomic_read(&ctx->guilty_count); e->active = atomic_read(&ctx->active_count); - e->total_runtime = rq->context->runtime.total; - e->avg_runtime = ewma_runtime_read(&rq->context->runtime.avg); + e->total_runtime = intel_context_get_total_runtime_ns(rq->context); + e->avg_runtime = intel_context_get_avg_runtime_ns(rq->context); simulated = i915_gem_context_no_error_capture(ctx); @@ -1431,7 +1474,7 @@ static void add_vma_coredump(struct intel_engine_coredump *ee, } struct intel_engine_coredump * -intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp) +intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags) { struct intel_engine_coredump *ee; @@ -1441,8 +1484,10 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp) ee->engine = engine; - engine_record_registers(ee); - engine_record_execlists(ee); + if (!(dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)) { + engine_record_registers(ee); + engine_record_execlists(ee); + } return ee; } @@ -1506,7 +1551,8 @@ intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, static struct intel_engine_coredump * capture_engine(struct intel_engine_cs *engine, - struct i915_vma_compress *compress) + struct i915_vma_compress *compress, + u32 dump_flags) { struct intel_engine_capture_vma *capture = NULL; struct intel_engine_coredump *ee; @@ -1514,7 +1560,7 @@ capture_engine(struct intel_engine_cs *engine, struct i915_request *rq = NULL; unsigned long flags; - ee = intel_engine_coredump_alloc(engine, ALLOW_FAIL); + ee = intel_engine_coredump_alloc(engine, ALLOW_FAIL, dump_flags); if (!ee) return NULL; @@ -1547,6 +1593,8 @@ capture_engine(struct intel_engine_cs *engine, i915_request_put(rq); goto no_request_capture; } + if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE) + intel_guc_capture_get_matching_node(engine->gt, ee, ce); intel_engine_coredump_add_vma(ee, capture, compress); i915_request_put(rq); @@ -1561,7 +1609,8 @@ no_request_capture: static void gt_record_engines(struct intel_gt_coredump *gt, intel_engine_mask_t engine_mask, - struct i915_vma_compress *compress) + struct i915_vma_compress *compress, + u32 dump_flags) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -1572,7 +1621,7 @@ gt_record_engines(struct intel_gt_coredump *gt, /* Refill our page pool before entering atomic section */ pool_refill(&compress->pool, ALLOW_FAIL); - ee = capture_engine(engine, compress); + ee = capture_engine(engine, compress, dump_flags); if (!ee) continue; @@ -1580,6 +1629,8 @@ gt_record_engines(struct intel_gt_coredump *gt, gt->simulated |= ee->simulated; if (ee->simulated) { + if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE) + intel_guc_capture_free_node(ee); kfree(ee); continue; } @@ -1615,8 +1666,74 @@ gt_record_uc(struct intel_gt_coredump *gt, return error_uc; } -/* Capture all registers which don't fit into another category. */ -static void gt_record_regs(struct intel_gt_coredump *gt) +/* Capture display registers. */ +static void gt_record_display_regs(struct intel_gt_coredump *gt) +{ + struct intel_uncore *uncore = gt->_gt->uncore; + struct drm_i915_private *i915 = uncore->i915; + + if (GRAPHICS_VER(i915) >= 6) + gt->derrmr = intel_uncore_read(uncore, DERRMR); + + if (GRAPHICS_VER(i915) >= 8) + gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER); + else if (IS_VALLEYVIEW(i915)) + gt->ier = intel_uncore_read(uncore, VLV_IER); + else if (HAS_PCH_SPLIT(i915)) + gt->ier = intel_uncore_read(uncore, DEIER); + else if (GRAPHICS_VER(i915) == 2) + gt->ier = intel_uncore_read16(uncore, GEN2_IER); + else + gt->ier = intel_uncore_read(uncore, GEN2_IER); +} + +/* Capture all other registers that GuC doesn't capture. */ +static void gt_record_global_nonguc_regs(struct intel_gt_coredump *gt) +{ + struct intel_uncore *uncore = gt->_gt->uncore; + struct drm_i915_private *i915 = uncore->i915; + int i; + + if (IS_VALLEYVIEW(i915)) { + gt->gtier[0] = intel_uncore_read(uncore, GTIER); + gt->ngtier = 1; + } else if (GRAPHICS_VER(i915) >= 11) { + gt->gtier[0] = + intel_uncore_read(uncore, + GEN11_RENDER_COPY_INTR_ENABLE); + gt->gtier[1] = + intel_uncore_read(uncore, GEN11_VCS_VECS_INTR_ENABLE); + gt->gtier[2] = + intel_uncore_read(uncore, GEN11_GUC_SG_INTR_ENABLE); + gt->gtier[3] = + intel_uncore_read(uncore, + GEN11_GPM_WGBOXPERF_INTR_ENABLE); + gt->gtier[4] = + intel_uncore_read(uncore, + GEN11_CRYPTO_RSVD_INTR_ENABLE); + gt->gtier[5] = + intel_uncore_read(uncore, + GEN11_GUNIT_CSME_INTR_ENABLE); + gt->ngtier = 6; + } else if (GRAPHICS_VER(i915) >= 8) { + for (i = 0; i < 4; i++) + gt->gtier[i] = + intel_uncore_read(uncore, GEN8_GT_IER(i)); + gt->ngtier = 4; + } else if (HAS_PCH_SPLIT(i915)) { + gt->gtier[0] = intel_uncore_read(uncore, GTIER); + gt->ngtier = 1; + } + + gt->eir = intel_uncore_read(uncore, EIR); + gt->pgtbl_er = intel_uncore_read(uncore, PGTBL_ER); +} + +/* + * Capture all registers that relate to workload submission. + * NOTE: In GuC submission, when GuC resets an engine, it can dump these for us + */ +static void gt_record_global_regs(struct intel_gt_coredump *gt) { struct intel_uncore *uncore = gt->_gt->uncore; struct drm_i915_private *i915 = uncore->i915; @@ -1632,11 +1749,8 @@ static void gt_record_regs(struct intel_gt_coredump *gt) */ /* 1: Registers specific to a single generation */ - if (IS_VALLEYVIEW(i915)) { - gt->gtier[0] = intel_uncore_read(uncore, GTIER); - gt->ier = intel_uncore_read(uncore, VLV_IER); + if (IS_VALLEYVIEW(i915)) gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_VLV); - } if (GRAPHICS_VER(i915) == 7) gt->err_int = intel_uncore_read(uncore, GEN7_ERR_INT); @@ -1664,7 +1778,6 @@ static void gt_record_regs(struct intel_gt_coredump *gt) gt->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_MT); if (GRAPHICS_VER(i915) >= 6) { - gt->derrmr = intel_uncore_read(uncore, DERRMR); if (GRAPHICS_VER(i915) < 12) { gt->error = intel_uncore_read(uncore, ERROR_GEN6); gt->done_reg = intel_uncore_read(uncore, DONE_REG); @@ -1684,7 +1797,7 @@ static void gt_record_regs(struct intel_gt_coredump *gt) gt->aux_err = intel_uncore_read(uncore, GEN12_AUX_ERR_DBG); if (GRAPHICS_VER(i915) >= 12) { - for (i = 0; i < GEN12_SFC_DONE_MAX; i++) { + for (i = 0; i < I915_MAX_SFC; i++) { /* * SFC_DONE resides in the VD forcewake domain, so it * only exists if the corresponding VCS engine is @@ -1700,44 +1813,6 @@ static void gt_record_regs(struct intel_gt_coredump *gt) gt->gam_done = intel_uncore_read(uncore, GEN12_GAM_DONE); } - - /* 4: Everything else */ - if (GRAPHICS_VER(i915) >= 11) { - gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER); - gt->gtier[0] = - intel_uncore_read(uncore, - GEN11_RENDER_COPY_INTR_ENABLE); - gt->gtier[1] = - intel_uncore_read(uncore, GEN11_VCS_VECS_INTR_ENABLE); - gt->gtier[2] = - intel_uncore_read(uncore, GEN11_GUC_SG_INTR_ENABLE); - gt->gtier[3] = - intel_uncore_read(uncore, - GEN11_GPM_WGBOXPERF_INTR_ENABLE); - gt->gtier[4] = - intel_uncore_read(uncore, - GEN11_CRYPTO_RSVD_INTR_ENABLE); - gt->gtier[5] = - intel_uncore_read(uncore, - GEN11_GUNIT_CSME_INTR_ENABLE); - gt->ngtier = 6; - } else if (GRAPHICS_VER(i915) >= 8) { - gt->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER); - for (i = 0; i < 4; i++) - gt->gtier[i] = - intel_uncore_read(uncore, GEN8_GT_IER(i)); - gt->ngtier = 4; - } else if (HAS_PCH_SPLIT(i915)) { - gt->ier = intel_uncore_read(uncore, DEIER); - gt->gtier[0] = intel_uncore_read(uncore, GTIER); - gt->ngtier = 1; - } else if (GRAPHICS_VER(i915) == 2) { - gt->ier = intel_uncore_read16(uncore, GEN2_IER); - } else if (!IS_VALLEYVIEW(i915)) { - gt->ier = intel_uncore_read(uncore, GEN2_IER); - } - gt->eir = intel_uncore_read(uncore, EIR); - gt->pgtbl_er = intel_uncore_read(uncore, PGTBL_ER); } static void gt_record_info(struct intel_gt_coredump *gt) @@ -1849,7 +1924,7 @@ i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp) #define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x)) struct intel_gt_coredump * -intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp) +intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags) { struct intel_gt_coredump *gc; @@ -1860,7 +1935,21 @@ intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp) gc->_gt = gt; gc->awake = intel_gt_pm_is_awake(gt); - gt_record_regs(gc); + gt_record_display_regs(gc); + gt_record_global_nonguc_regs(gc); + + /* + * GuC dumps global, eng-class and eng-instance registers + * (that can change as part of engine state during execution) + * before an engine is reset due to a hung context. + * GuC captures and reports all three groups of registers + * together as a single set before the engine is reset. + * Thus, if GuC triggered the context reset we retrieve + * the register values as part of gt_record_engines. + */ + if (!(dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)) + gt_record_global_regs(gc); + gt_record_fences(gc); return gc; @@ -1894,7 +1983,7 @@ void i915_vma_capture_finish(struct intel_gt_coredump *gt, } static struct i915_gpu_coredump * -__i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask) +__i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags) { struct drm_i915_private *i915 = gt->i915; struct i915_gpu_coredump *error; @@ -1908,7 +1997,7 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask) if (!error) return ERR_PTR(-ENOMEM); - error->gt = intel_gt_coredump_alloc(gt, ALLOW_FAIL); + error->gt = intel_gt_coredump_alloc(gt, ALLOW_FAIL, dump_flags); if (error->gt) { struct i915_vma_compress *compress; @@ -1919,11 +2008,19 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask) return ERR_PTR(-ENOMEM); } + if (INTEL_INFO(i915)->has_gt_uc) { + error->gt->uc = gt_record_uc(error->gt, compress); + if (error->gt->uc) { + if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE) + error->gt->uc->is_guc_capture = true; + else + GEM_BUG_ON(error->gt->uc->is_guc_capture); + } + } + gt_record_info(error->gt); - gt_record_engines(error->gt, engine_mask, compress); + gt_record_engines(error->gt, engine_mask, compress, dump_flags); - if (INTEL_INFO(i915)->has_gt_uc) - error->gt->uc = gt_record_uc(error->gt, compress); i915_vma_capture_finish(error->gt, compress); @@ -1936,7 +2033,7 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask) } struct i915_gpu_coredump * -i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask) +i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags) { static DEFINE_MUTEX(capture_mutex); int ret = mutex_lock_interruptible(&capture_mutex); @@ -1945,7 +2042,7 @@ i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask) if (ret) return ERR_PTR(ret); - dump = __i915_gpu_coredump(gt, engine_mask); + dump = __i915_gpu_coredump(gt, engine_mask, dump_flags); mutex_unlock(&capture_mutex); return dump; @@ -1992,11 +2089,11 @@ void i915_error_state_store(struct i915_gpu_coredump *error) * to pick up. */ void i915_capture_error_state(struct intel_gt *gt, - intel_engine_mask_t engine_mask) + intel_engine_mask_t engine_mask, u32 dump_flags) { struct i915_gpu_coredump *error; - error = i915_gpu_coredump(gt, engine_mask); + error = i915_gpu_coredump(gt, engine_mask, dump_flags); if (IS_ERR(error)) { cmpxchg(>->i915->gpu_error.first_error, NULL, error); return; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 88ce3a08f555..a611abacd9c2 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -53,6 +53,8 @@ struct i915_request_coredump { struct i915_sched_attr sched_attr; }; +struct __guc_capture_parsed_output; + struct intel_engine_coredump { const struct intel_engine_cs *engine; @@ -84,11 +86,15 @@ struct intel_engine_coredump { u32 rc_psmi; /* sleep state */ struct intel_instdone instdone; + /* GuC matched capture-lists info */ + struct intel_guc_state_capture *capture; + struct __guc_capture_parsed_output *guc_capture_node; + struct i915_gem_context_coredump { char comm[TASK_COMM_LEN]; u64 total_runtime; - u32 avg_runtime; + u64 avg_runtime; pid_t pid; int active; @@ -124,7 +130,6 @@ struct intel_gt_coredump { u32 pgtbl_er; u32 ier; u32 gtier[6], ngtier; - u32 derrmr; u32 forcewake; u32 error; /* gen6+ */ u32 err_int; /* gen7 */ @@ -137,9 +142,12 @@ struct intel_gt_coredump { u32 gfx_mode; u32 gtt_cache; u32 aux_err; /* gen12 */ - u32 sfc_done[GEN12_SFC_DONE_MAX]; /* gen12 */ u32 gam_done; /* gen12 */ + /* Display related */ + u32 derrmr; + u32 sfc_done[I915_MAX_SFC]; /* gen12 */ + u32 nfence; u64 fence[I915_MAX_NUM_FENCES]; @@ -149,6 +157,7 @@ struct intel_gt_coredump { struct intel_uc_fw guc_fw; struct intel_uc_fw huc_fw; struct i915_vma_coredump *guc_log; + bool is_guc_capture; } *uc; struct intel_gt_coredump *next; @@ -221,24 +230,32 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, return atomic_read(&error->reset_engine_count[engine->uabi_class]); } +#define CORE_DUMP_FLAG_NONE 0x0 +#define CORE_DUMP_FLAG_IS_GUC_CAPTURE BIT(0) + #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) __printf(2, 3) void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); +void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m, + const struct intel_engine_cs *engine, + const struct i915_vma_coredump *vma); +struct i915_vma_coredump * +intel_gpu_error_find_batch(const struct intel_engine_coredump *ee); struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt, - intel_engine_mask_t engine_mask); + intel_engine_mask_t engine_mask, u32 dump_flags); void i915_capture_error_state(struct intel_gt *gt, - intel_engine_mask_t engine_mask); + intel_engine_mask_t engine_mask, u32 dump_flags); struct i915_gpu_coredump * i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp); struct intel_gt_coredump * -intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp); +intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags); struct intel_engine_coredump * -intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp); +intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags); struct intel_engine_capture_vma * intel_engine_coredump_add_request(struct intel_engine_coredump *ee, @@ -288,7 +305,7 @@ i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) } static inline void -i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask) +i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags) { } @@ -299,13 +316,13 @@ i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp) } static inline struct intel_gt_coredump * -intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp) +intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags) { return NULL; } static inline struct intel_engine_coredump * -intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp) +intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags) { return NULL; } diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 5ad071e09301..701fbc98afa0 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -202,6 +202,9 @@ i915_param_named_unsafe(request_timeout_ms, uint, 0600, "Default request/fence/batch buffer expiration timeout."); #endif +i915_param_named_unsafe(lmem_size, uint, 0400, + "Set the lmem size(in MiB) for each region. (default: 0, all memory)"); + static __always_inline void _print_param(struct drm_printer *p, const char *name, const char *type, diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index c779a6f85c7e..b5e7ea45d191 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -73,6 +73,7 @@ struct drm_printer; param(int, enable_dpcd_backlight, -1, 0600) \ param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE, 0400) \ param(unsigned int, request_timeout_ms, CONFIG_DRM_I915_REQUEST_TIMEOUT, CONFIG_DRM_I915_REQUEST_TIMEOUT ? 0600 : 0) \ + param(unsigned int, lmem_size, 0, 0400) \ /* leave bools at the end to not create holes */ \ param(bool, enable_hangcheck, true, 0600) \ param(bool, load_detect_test, false, 0600) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 9e077929ed67..38f7de778914 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -901,7 +901,8 @@ static const struct intel_device_info rkl_info = { .has_llc = 0, \ .has_pxp = 0, \ .has_snoop = 1, \ - .is_dgfx = 1 + .is_dgfx = 1, \ + .has_heci_gscfi = 1 static const struct intel_device_info dg1_info = { GEN12_FEATURES, @@ -1050,6 +1051,7 @@ static const struct intel_device_info xehpsdv_info = { .has_4tile = 1, \ .has_64k_pages = 1, \ .has_guc_deprivilege = 1, \ + .has_heci_pxp = 1, \ .needs_compact_pt = 1, \ .platform_engine_mask = \ BIT(RCS0) | BIT(BCS0) | \ diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index 2dfbc22857a3..7584cec53d5d 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -9,6 +9,7 @@ #include "i915_drv.h" #include "i915_perf.h" #include "i915_query.h" +#include "gt/intel_engine_user.h" #include <uapi/drm/i915_drm.h> static int copy_query_item(void *query_hdr, size_t query_sz, @@ -28,36 +29,30 @@ static int copy_query_item(void *query_hdr, size_t query_sz, return 0; } -static int query_topology_info(struct drm_i915_private *dev_priv, - struct drm_i915_query_item *query_item) +static int fill_topology_info(const struct sseu_dev_info *sseu, + struct drm_i915_query_item *query_item, + const u8 *subslice_mask) { - const struct sseu_dev_info *sseu = &to_gt(dev_priv)->info.sseu; struct drm_i915_query_topology_info topo; u32 slice_length, subslice_length, eu_length, total_length; int ret; - if (query_item->flags != 0) - return -EINVAL; + BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask)); if (sseu->max_slices == 0) return -ENODEV; - BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask)); - slice_length = sizeof(sseu->slice_mask); subslice_length = sseu->max_slices * sseu->ss_stride; eu_length = sseu->max_slices * sseu->max_subslices * sseu->eu_stride; total_length = sizeof(topo) + slice_length + subslice_length + eu_length; - ret = copy_query_item(&topo, sizeof(topo), total_length, - query_item); + ret = copy_query_item(&topo, sizeof(topo), total_length, query_item); + if (ret != 0) return ret; - if (topo.flags != 0) - return -EINVAL; - memset(&topo, 0, sizeof(topo)); topo.max_slices = sseu->max_slices; topo.max_subslices = sseu->max_subslices; @@ -69,27 +64,64 @@ static int query_topology_info(struct drm_i915_private *dev_priv, topo.eu_stride = sseu->eu_stride; if (copy_to_user(u64_to_user_ptr(query_item->data_ptr), - &topo, sizeof(topo))) + &topo, sizeof(topo))) return -EFAULT; if (copy_to_user(u64_to_user_ptr(query_item->data_ptr + sizeof(topo)), - &sseu->slice_mask, slice_length)) + &sseu->slice_mask, slice_length)) return -EFAULT; if (copy_to_user(u64_to_user_ptr(query_item->data_ptr + - sizeof(topo) + slice_length), - sseu->subslice_mask, subslice_length)) + sizeof(topo) + slice_length), + subslice_mask, subslice_length)) return -EFAULT; if (copy_to_user(u64_to_user_ptr(query_item->data_ptr + - sizeof(topo) + - slice_length + subslice_length), - sseu->eu_mask, eu_length)) + sizeof(topo) + + slice_length + subslice_length), + sseu->eu_mask, eu_length)) return -EFAULT; return total_length; } +static int query_topology_info(struct drm_i915_private *dev_priv, + struct drm_i915_query_item *query_item) +{ + const struct sseu_dev_info *sseu = &to_gt(dev_priv)->info.sseu; + + if (query_item->flags != 0) + return -EINVAL; + + return fill_topology_info(sseu, query_item, sseu->subslice_mask); +} + +static int query_geometry_subslices(struct drm_i915_private *i915, + struct drm_i915_query_item *query_item) +{ + const struct sseu_dev_info *sseu; + struct intel_engine_cs *engine; + struct i915_engine_class_instance classinstance; + + if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) + return -ENODEV; + + classinstance = *((struct i915_engine_class_instance *)&query_item->flags); + + engine = intel_engine_lookup_user(i915, (u8)classinstance.engine_class, + (u8)classinstance.engine_instance); + + if (!engine) + return -EINVAL; + + if (engine->class != RENDER_CLASS) + return -EINVAL; + + sseu = &engine->gt->info.sseu; + + return fill_topology_info(sseu, query_item, sseu->geometry_subslice_mask); +} + static int query_engine_info(struct drm_i915_private *i915, struct drm_i915_query_item *query_item) @@ -479,12 +511,36 @@ static int query_memregion_info(struct drm_i915_private *i915, return total_length; } +static int query_hwconfig_blob(struct drm_i915_private *i915, + struct drm_i915_query_item *query_item) +{ + struct intel_gt *gt = to_gt(i915); + struct intel_hwconfig *hwconfig = >->info.hwconfig; + + if (!hwconfig->size || !hwconfig->ptr) + return -ENODEV; + + if (query_item->length == 0) + return hwconfig->size; + + if (query_item->length < hwconfig->size) + return -EINVAL; + + if (copy_to_user(u64_to_user_ptr(query_item->data_ptr), + hwconfig->ptr, hwconfig->size)) + return -EFAULT; + + return hwconfig->size; +} + static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv, struct drm_i915_query_item *query_item) = { query_topology_info, query_engine_info, query_perf_config, query_memregion_info, + query_hwconfig_blob, + query_geometry_subslices, }; int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 51f46fe45c72..98bb53226d6b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -976,6 +976,10 @@ #define GEN12_COMPUTE2_RING_BASE 0x1e000 #define GEN12_COMPUTE3_RING_BASE 0x26000 #define BLT_RING_BASE 0x22000 +#define DG1_GSC_HECI1_BASE 0x00258000 +#define DG1_GSC_HECI2_BASE 0x00259000 +#define DG2_GSC_HECI1_BASE 0x00373000 +#define DG2_GSC_HECI2_BASE 0x00374000 @@ -1842,6 +1846,17 @@ #define GEN9_RP_STATE_LIMITS _MMIO(0x138148) #define XEHPSDV_RP_STATE_CAP _MMIO(0x250014) +#define GT0_PERF_LIMIT_REASONS _MMIO(0x1381a8) +#define GT0_PERF_LIMIT_REASONS_MASK 0xde3 +#define PROCHOT_MASK REG_BIT(1) +#define THERMAL_LIMIT_MASK REG_BIT(2) +#define RATL_MASK REG_BIT(6) +#define VR_THERMALERT_MASK REG_BIT(7) +#define VR_TDC_MASK REG_BIT(8) +#define POWER_LIMIT_4_MASK REG_BIT(9) +#define POWER_LIMIT_1_MASK REG_BIT(11) +#define POWER_LIMIT_2_MASK REG_BIT(12) + #define CHV_CLK_CTL1 _MMIO(0x101100) #define VLV_CLK_CTL2 _MMIO(0x101104) #define CLK_CTL2_CZCOUNT_30NS_SHIFT 28 @@ -4352,12 +4367,12 @@ #define _DSPAADDR 0x70184 #define _DSPASTRIDE 0x70188 #define _DSPAPOS 0x7018C /* reserved */ -#define DISP_POS_Y_MASK REG_GENMASK(31, 0) +#define DISP_POS_Y_MASK REG_GENMASK(31, 16) #define DISP_POS_Y(y) REG_FIELD_PREP(DISP_POS_Y_MASK, (y)) #define DISP_POS_X_MASK REG_GENMASK(15, 0) #define DISP_POS_X(x) REG_FIELD_PREP(DISP_POS_X_MASK, (x)) #define _DSPASIZE 0x70190 -#define DISP_HEIGHT_MASK REG_GENMASK(31, 0) +#define DISP_HEIGHT_MASK REG_GENMASK(31, 16) #define DISP_HEIGHT(h) REG_FIELD_PREP(DISP_HEIGHT_MASK, (h)) #define DISP_WIDTH_MASK REG_GENMASK(15, 0) #define DISP_WIDTH(w) REG_FIELD_PREP(DISP_WIDTH_MASK, (w)) @@ -5160,7 +5175,7 @@ #define _SEL_FETCH_PLANE_BASE_6_A 0x70940 #define _SEL_FETCH_PLANE_BASE_7_A 0x70960 #define _SEL_FETCH_PLANE_BASE_CUR_A 0x70880 -#define _SEL_FETCH_PLANE_BASE_1_B 0x70990 +#define _SEL_FETCH_PLANE_BASE_1_B 0x71890 #define _SEL_FETCH_PLANE_BASE_A(plane) _PICK(plane, \ _SEL_FETCH_PLANE_BASE_1_A, \ @@ -8460,6 +8475,9 @@ enum skl_power_gate { #define SGGI_DIS REG_BIT(15) #define SGR_DIS REG_BIT(13) +#define XEHPSDV_TILE0_ADDR_RANGE _MMIO(0x4900) +#define XEHPSDV_TILE_LMEM_RANGE_SHIFT 8 + #define XEHPSDV_FLAT_CCS_BASE_ADDR _MMIO(0x4910) #define XEHPSDV_CCS_BASE_SHIFT 8 diff --git a/drivers/gpu/drm/i915/i915_reg_defs.h b/drivers/gpu/drm/i915/i915_reg_defs.h index d78d78fce431..8f486f77609f 100644 --- a/drivers/gpu/drm/i915/i915_reg_defs.h +++ b/drivers/gpu/drm/i915/i915_reg_defs.h @@ -123,6 +123,4 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define VLV_DISPLAY_BASE 0x180000 -#define GEN12_SFC_DONE_MAX 4 - #endif /* __I915_REG_DEFS__ */ diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index a4d1759375b9..8521daba212a 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -39,113 +39,12 @@ #include "i915_sysfs.h" #include "intel_pm.h" -static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev) +struct drm_i915_private *kdev_minor_to_i915(struct device *kdev) { struct drm_minor *minor = dev_get_drvdata(kdev); return to_i915(minor->dev); } -#ifdef CONFIG_PM -static u32 calc_residency(struct drm_i915_private *dev_priv, - i915_reg_t reg) -{ - intel_wakeref_t wakeref; - u64 res = 0; - - with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) - res = intel_rc6_residency_us(&to_gt(dev_priv)->rc6, reg); - - return DIV_ROUND_CLOSEST_ULL(res, 1000); -} - -static ssize_t rc6_enable_show(struct device *kdev, - struct device_attribute *attr, char *buf) -{ - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - unsigned int mask; - - mask = 0; - if (HAS_RC6(dev_priv)) - mask |= BIT(0); - if (HAS_RC6p(dev_priv)) - mask |= BIT(1); - if (HAS_RC6pp(dev_priv)) - mask |= BIT(2); - - return sysfs_emit(buf, "%x\n", mask); -} - -static ssize_t rc6_residency_ms_show(struct device *kdev, - struct device_attribute *attr, char *buf) -{ - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - u32 rc6_residency = calc_residency(dev_priv, GEN6_GT_GFX_RC6); - return sysfs_emit(buf, "%u\n", rc6_residency); -} - -static ssize_t rc6p_residency_ms_show(struct device *kdev, - struct device_attribute *attr, char *buf) -{ - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - u32 rc6p_residency = calc_residency(dev_priv, GEN6_GT_GFX_RC6p); - return sysfs_emit(buf, "%u\n", rc6p_residency); -} - -static ssize_t rc6pp_residency_ms_show(struct device *kdev, - struct device_attribute *attr, char *buf) -{ - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - u32 rc6pp_residency = calc_residency(dev_priv, GEN6_GT_GFX_RC6pp); - return sysfs_emit(buf, "%u\n", rc6pp_residency); -} - -static ssize_t media_rc6_residency_ms_show(struct device *kdev, - struct device_attribute *attr, char *buf) -{ - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - u32 rc6_residency = calc_residency(dev_priv, VLV_GT_MEDIA_RC6); - return sysfs_emit(buf, "%u\n", rc6_residency); -} - -static DEVICE_ATTR_RO(rc6_enable); -static DEVICE_ATTR_RO(rc6_residency_ms); -static DEVICE_ATTR_RO(rc6p_residency_ms); -static DEVICE_ATTR_RO(rc6pp_residency_ms); -static DEVICE_ATTR_RO(media_rc6_residency_ms); - -static struct attribute *rc6_attrs[] = { - &dev_attr_rc6_enable.attr, - &dev_attr_rc6_residency_ms.attr, - NULL -}; - -static const struct attribute_group rc6_attr_group = { - .name = power_group_name, - .attrs = rc6_attrs -}; - -static struct attribute *rc6p_attrs[] = { - &dev_attr_rc6p_residency_ms.attr, - &dev_attr_rc6pp_residency_ms.attr, - NULL -}; - -static const struct attribute_group rc6p_attr_group = { - .name = power_group_name, - .attrs = rc6p_attrs -}; - -static struct attribute *media_rc6_attrs[] = { - &dev_attr_media_rc6_residency_ms.attr, - NULL -}; - -static const struct attribute_group media_rc6_attr_group = { - .name = power_group_name, - .attrs = media_rc6_attrs -}; -#endif - static int l3_access_valid(struct drm_i915_private *i915, loff_t offset) { if (!HAS_L3_DPF(i915)) @@ -257,171 +156,6 @@ static const struct bin_attribute dpf_attrs_1 = { .private = (void *)1 }; -static ssize_t gt_act_freq_mhz_show(struct device *kdev, - struct device_attribute *attr, char *buf) -{ - struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &to_gt(i915)->rps; - - return sysfs_emit(buf, "%d\n", intel_rps_read_actual_frequency(rps)); -} - -static ssize_t gt_cur_freq_mhz_show(struct device *kdev, - struct device_attribute *attr, char *buf) -{ - struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &to_gt(i915)->rps; - - return sysfs_emit(buf, "%d\n", intel_rps_get_requested_frequency(rps)); -} - -static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) -{ - struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &to_gt(i915)->rps; - - return sysfs_emit(buf, "%d\n", intel_rps_get_boost_frequency(rps)); -} - -static ssize_t gt_boost_freq_mhz_store(struct device *kdev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &to_gt(dev_priv)->rps; - ssize_t ret; - u32 val; - - ret = kstrtou32(buf, 0, &val); - if (ret) - return ret; - - ret = intel_rps_set_boost_frequency(rps, val); - - return ret ?: count; -} - -static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev, - struct device_attribute *attr, char *buf) -{ - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &to_gt(dev_priv)->rps; - - return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->efficient_freq)); -} - -static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) -{ - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_gt *gt = to_gt(dev_priv); - struct intel_rps *rps = >->rps; - - return sysfs_emit(buf, "%d\n", intel_rps_get_max_frequency(rps)); -} - -static ssize_t gt_max_freq_mhz_store(struct device *kdev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_gt *gt = to_gt(dev_priv); - struct intel_rps *rps = >->rps; - ssize_t ret; - u32 val; - - ret = kstrtou32(buf, 0, &val); - if (ret) - return ret; - - ret = intel_rps_set_max_frequency(rps, val); - - return ret ?: count; -} - -static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) -{ - struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); - struct intel_gt *gt = to_gt(i915); - struct intel_rps *rps = >->rps; - - return sysfs_emit(buf, "%d\n", intel_rps_get_min_frequency(rps)); -} - -static ssize_t gt_min_freq_mhz_store(struct device *kdev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &to_gt(i915)->rps; - ssize_t ret; - u32 val; - - ret = kstrtou32(buf, 0, &val); - if (ret) - return ret; - - ret = intel_rps_set_min_frequency(rps, val); - - return ret ?: count; -} - -static DEVICE_ATTR_RO(gt_act_freq_mhz); -static DEVICE_ATTR_RO(gt_cur_freq_mhz); -static DEVICE_ATTR_RW(gt_boost_freq_mhz); -static DEVICE_ATTR_RW(gt_max_freq_mhz); -static DEVICE_ATTR_RW(gt_min_freq_mhz); - -static DEVICE_ATTR_RO(vlv_rpe_freq_mhz); - -static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf); -static DEVICE_ATTR(gt_RP0_freq_mhz, S_IRUGO, gt_rp_mhz_show, NULL); -static DEVICE_ATTR(gt_RP1_freq_mhz, S_IRUGO, gt_rp_mhz_show, NULL); -static DEVICE_ATTR(gt_RPn_freq_mhz, S_IRUGO, gt_rp_mhz_show, NULL); - -/* For now we have a static number of RP states */ -static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) -{ - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &to_gt(dev_priv)->rps; - u32 val; - - if (attr == &dev_attr_gt_RP0_freq_mhz) - val = intel_rps_get_rp0_frequency(rps); - else if (attr == &dev_attr_gt_RP1_freq_mhz) - val = intel_rps_get_rp1_frequency(rps); - else if (attr == &dev_attr_gt_RPn_freq_mhz) - val = intel_rps_get_rpn_frequency(rps); - else - BUG(); - - return sysfs_emit(buf, "%d\n", val); -} - -static const struct attribute * const gen6_attrs[] = { - &dev_attr_gt_act_freq_mhz.attr, - &dev_attr_gt_cur_freq_mhz.attr, - &dev_attr_gt_boost_freq_mhz.attr, - &dev_attr_gt_max_freq_mhz.attr, - &dev_attr_gt_min_freq_mhz.attr, - &dev_attr_gt_RP0_freq_mhz.attr, - &dev_attr_gt_RP1_freq_mhz.attr, - &dev_attr_gt_RPn_freq_mhz.attr, - NULL, -}; - -static const struct attribute * const vlv_attrs[] = { - &dev_attr_gt_act_freq_mhz.attr, - &dev_attr_gt_cur_freq_mhz.attr, - &dev_attr_gt_boost_freq_mhz.attr, - &dev_attr_gt_max_freq_mhz.attr, - &dev_attr_gt_min_freq_mhz.attr, - &dev_attr_gt_RP0_freq_mhz.attr, - &dev_attr_gt_RP1_freq_mhz.attr, - &dev_attr_gt_RPn_freq_mhz.attr, - &dev_attr_vlv_rpe_freq_mhz.attr, - NULL, -}; - #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) static ssize_t error_state_read(struct file *filp, struct kobject *kobj, @@ -492,29 +226,6 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv) struct device *kdev = dev_priv->drm.primary->kdev; int ret; -#ifdef CONFIG_PM - if (HAS_RC6(dev_priv)) { - ret = sysfs_merge_group(&kdev->kobj, - &rc6_attr_group); - if (ret) - drm_err(&dev_priv->drm, - "RC6 residency sysfs setup failed\n"); - } - if (HAS_RC6p(dev_priv)) { - ret = sysfs_merge_group(&kdev->kobj, - &rc6p_attr_group); - if (ret) - drm_err(&dev_priv->drm, - "RC6p residency sysfs setup failed\n"); - } - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - ret = sysfs_merge_group(&kdev->kobj, - &media_rc6_attr_group); - if (ret) - drm_err(&dev_priv->drm, - "Media RC6 residency sysfs setup failed\n"); - } -#endif if (HAS_L3_DPF(dev_priv)) { ret = device_create_bin_file(kdev, &dpf_attrs); if (ret) @@ -530,13 +241,10 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv) } } - ret = 0; - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - ret = sysfs_create_files(&kdev->kobj, vlv_attrs); - else if (GRAPHICS_VER(dev_priv) >= 6) - ret = sysfs_create_files(&kdev->kobj, gen6_attrs); - if (ret) - drm_err(&dev_priv->drm, "RPS sysfs setup failed\n"); + dev_priv->sysfs_gt = kobject_create_and_add("gt", &kdev->kobj); + if (!dev_priv->sysfs_gt) + drm_warn(&dev_priv->drm, + "failed to register GT sysfs directory\n"); i915_setup_error_capture(kdev); @@ -549,14 +257,6 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv) i915_teardown_error_capture(kdev); - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - sysfs_remove_files(&kdev->kobj, vlv_attrs); - else - sysfs_remove_files(&kdev->kobj, gen6_attrs); device_remove_bin_file(kdev, &dpf_attrs_1); device_remove_bin_file(kdev, &dpf_attrs); -#ifdef CONFIG_PM - sysfs_unmerge_group(&kdev->kobj, &rc6_attr_group); - sysfs_unmerge_group(&kdev->kobj, &rc6p_attr_group); -#endif } diff --git a/drivers/gpu/drm/i915/i915_sysfs.h b/drivers/gpu/drm/i915/i915_sysfs.h index 41afd4366416..243a17741e3f 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.h +++ b/drivers/gpu/drm/i915/i915_sysfs.h @@ -6,8 +6,11 @@ #ifndef __I915_SYSFS_H__ #define __I915_SYSFS_H__ +struct device; struct drm_i915_private; +struct drm_i915_private *kdev_minor_to_i915(struct device *kdev); + void i915_setup_sysfs(struct drm_i915_private *i915); void i915_teardown_sysfs(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c index 129f668f21ff..a5109548abc0 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c @@ -70,8 +70,10 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, min_page_size = bo->page_alignment << PAGE_SHIFT; GEM_BUG_ON(min_page_size < mm->chunk_size); + GEM_BUG_ON(!IS_ALIGNED(size, min_page_size)); - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { + if (place->fpfn + bman_res->base.num_pages != place->lpfn && + place->flags & TTM_PL_FLAG_CONTIGUOUS) { unsigned long pages; size = roundup_pow_of_two(size); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index d077f7b9eaad..162e8d83691b 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -47,7 +47,7 @@ static inline void assert_vma_held_evict(const struct i915_vma *vma) * This is the only exception to the requirement of the object lock * being held. */ - if (atomic_read(&vma->vm->open)) + if (kref_read(&vma->vm->ref)) assert_object_held_shared(vma->obj); } @@ -113,6 +113,7 @@ vma_create(struct drm_i915_gem_object *obj, struct i915_vma *pos = ERR_PTR(-E2BIG); struct i915_vma *vma; struct rb_node *rb, **p; + int err; /* The aliasing_ppgtt should never be used directly! */ GEM_BUG_ON(vm == &vm->gt->ggtt->alias->vm); @@ -121,8 +122,6 @@ vma_create(struct drm_i915_gem_object *obj, if (vma == NULL) return ERR_PTR(-ENOMEM); - kref_init(&vma->ref); - vma->vm = i915_vm_get(vm); vma->ops = &vm->vma_ops; vma->obj = obj; vma->size = obj->base.size; @@ -138,6 +137,8 @@ vma_create(struct drm_i915_gem_object *obj, } INIT_LIST_HEAD(&vma->closed_link); + INIT_LIST_HEAD(&vma->obj_link); + RB_CLEAR_NODE(&vma->obj_node); if (view && view->type != I915_GGTT_VIEW_NORMAL) { vma->ggtt_view = *view; @@ -163,8 +164,16 @@ vma_create(struct drm_i915_gem_object *obj, GEM_BUG_ON(!IS_ALIGNED(vma->size, I915_GTT_PAGE_SIZE)); - spin_lock(&obj->vma.lock); + err = mutex_lock_interruptible(&vm->mutex); + if (err) { + pos = ERR_PTR(err); + goto err_vma; + } + + vma->vm = vm; + list_add_tail(&vma->vm_link, &vm->unbound_list); + spin_lock(&obj->vma.lock); if (i915_is_ggtt(vm)) { if (unlikely(overflows_type(vma->size, u32))) goto err_unlock; @@ -222,13 +231,15 @@ vma_create(struct drm_i915_gem_object *obj, list_add_tail(&vma->obj_link, &obj->vma.list); spin_unlock(&obj->vma.lock); + mutex_unlock(&vm->mutex); return vma; err_unlock: spin_unlock(&obj->vma.lock); + list_del_init(&vma->vm_link); + mutex_unlock(&vm->mutex); err_vma: - i915_vm_put(vm); i915_vma_free(vma); return pos; } @@ -279,7 +290,7 @@ i915_vma_instance(struct drm_i915_gem_object *obj, struct i915_vma *vma; GEM_BUG_ON(view && !i915_is_ggtt_or_dpt(vm)); - GEM_BUG_ON(!atomic_read(&vm->open)); + GEM_BUG_ON(!kref_read(&vm->ref)); spin_lock(&obj->vma.lock); vma = i915_vma_lookup(obj, vm, view); @@ -322,7 +333,6 @@ static void __vma_release(struct dma_fence_work *work) i915_gem_object_put(vw->pinned); i915_vm_free_pt_stash(vw->vm, &vw->stash); - i915_vm_put(vw->vm); if (vw->vma_res) i915_vma_resource_put(vw->vma_res); } @@ -515,21 +525,18 @@ int i915_vma_bind(struct i915_vma *vma, if (!work->vma_res->bi.pages_rsgt) work->pinned = i915_gem_object_get(vma->obj); } else { - if (vma->obj) { - ret = i915_gem_object_wait_moving_fence(vma->obj, true); - if (ret) { - i915_vma_resource_free(vma->resource); - vma->resource = NULL; + ret = i915_gem_object_wait_moving_fence(vma->obj, true); + if (ret) { + i915_vma_resource_free(vma->resource); + vma->resource = NULL; - return ret; - } + return ret; } vma->ops->bind_vma(vma->vm, NULL, vma->resource, cache_level, bind_flags); } - if (vma->obj) - set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags); + set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags); atomic_or(bind_flags, &vma->flags); return 0; @@ -841,7 +848,7 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color)); - list_add_tail(&vma->vm_link, &vma->vm->bound_list); + list_move_tail(&vma->vm_link, &vma->vm->bound_list); return 0; } @@ -857,7 +864,7 @@ i915_vma_detach(struct i915_vma *vma) * vma, we can drop its hold on the backing storage and allow * it to be reaped by the shrinker. */ - list_del(&vma->vm_link); + list_move_tail(&vma->vm_link, &vma->vm->unbound_list); } static bool try_qad_pin(struct i915_vma *vma, unsigned int flags) @@ -1357,18 +1364,10 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, if (err) return err; - if (vma->obj) { - err = i915_gem_object_get_moving_fence(vma->obj, &moving); - if (err) - return err; - } else { - moving = NULL; - } - if (flags & PIN_GLOBAL) wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); - if (flags & vma->vm->bind_async_flags || moving) { + if (flags & vma->vm->bind_async_flags) { /* lock VM */ err = i915_vm_lock_objects(vma->vm, ww); if (err) @@ -1380,7 +1379,11 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, goto err_rpm; } - work->vm = i915_vm_get(vma->vm); + work->vm = vma->vm; + + err = i915_gem_object_get_moving_fence(vma->obj, &moving); + if (err) + goto err_rpm; dma_fence_work_chain(&work->base, moving); @@ -1625,16 +1628,6 @@ void i915_vma_reopen(struct i915_vma *vma) __i915_vma_remove_closed(vma); } -void i915_vma_release(struct kref *ref) -{ - struct i915_vma *vma = container_of(ref, typeof(*vma), ref); - - i915_vm_put(vma->vm); - i915_active_fini(&vma->active); - GEM_WARN_ON(vma->resource); - i915_vma_free(vma); -} - static void force_unbind(struct i915_vma *vma) { if (!drm_mm_node_allocated(&vma->node)) @@ -1645,7 +1638,7 @@ static void force_unbind(struct i915_vma *vma) GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); } -static void release_references(struct i915_vma *vma) +static void release_references(struct i915_vma *vma, bool vm_ddestroy) { struct drm_i915_gem_object *obj = vma->obj; @@ -1655,11 +1648,17 @@ static void release_references(struct i915_vma *vma) list_del(&vma->obj_link); if (!RB_EMPTY_NODE(&vma->obj_node)) rb_erase(&vma->obj_node, &obj->vma.tree); + spin_unlock(&obj->vma.lock); __i915_vma_remove_closed(vma); - __i915_vma_put(vma); + if (vm_ddestroy) + i915_vm_resv_put(vma->vm); + + i915_active_fini(&vma->active); + GEM_WARN_ON(vma->resource); + i915_vma_free(vma); } /** @@ -1674,8 +1673,12 @@ static void release_references(struct i915_vma *vma) * - __i915_gem_object_pages_fini() * - __i915_vm_close() - Blocks the above function by taking a reference on * the object. - * - __i915_vma_parked() - Blocks the above functions by taking an open-count on - * the vm and a reference on the object. + * - __i915_vma_parked() - Blocks the above functions by taking a reference + * on the vm and a reference on the object. Also takes the object lock so + * destruction from __i915_vma_parked() can be blocked by holding the + * object lock. Since the object lock is only allowed from within i915 with + * an object refcount, holding the object lock also implicitly blocks the + * vma freeing from __i915_gem_object_pages_fini(). * * Because of locks taken during destruction, a vma is also guaranteed to * stay alive while the following locks are held if it was looked up while @@ -1683,24 +1686,27 @@ static void release_references(struct i915_vma *vma) * - vm->mutex * - obj->vma.lock * - gt->closed_lock - * - * A vma user can also temporarily keep the vma alive while holding a vma - * reference. */ void i915_vma_destroy_locked(struct i915_vma *vma) { lockdep_assert_held(&vma->vm->mutex); force_unbind(vma); - release_references(vma); + list_del_init(&vma->vm_link); + release_references(vma, false); } void i915_vma_destroy(struct i915_vma *vma) { + bool vm_ddestroy; + mutex_lock(&vma->vm->mutex); force_unbind(vma); + list_del_init(&vma->vm_link); + vm_ddestroy = vma->vm_ddestroy; + vma->vm_ddestroy = false; mutex_unlock(&vma->vm->mutex); - release_references(vma); + release_references(vma, vm_ddestroy); } void i915_vma_parked(struct intel_gt *gt) @@ -1718,7 +1724,7 @@ void i915_vma_parked(struct intel_gt *gt) if (!kref_get_unless_zero(&obj->base.refcount)) continue; - if (!i915_vm_tryopen(vm)) { + if (!i915_vm_tryget(vm)) { i915_gem_object_put(obj); continue; } @@ -1744,7 +1750,7 @@ void i915_vma_parked(struct intel_gt *gt) } i915_gem_object_put(obj); - i915_vm_close(vm); + i915_vm_put(vm); } } @@ -1903,7 +1909,9 @@ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async) /* If vm is not open, unbind is a nop. */ vma_res->needs_wakeref = i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND) && - atomic_read(&vma->vm->open); + kref_read(&vma->vm->ref); + vma_res->skip_pte_rewrite = !kref_read(&vma->vm->ref) || + vma->vm->skip_pte_rewrite; trace_i915_vma_unbind(vma); unbind_fence = i915_vma_resource_unbind(vma_res); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 67ae7341c7e0..6034991d89fe 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -222,20 +222,6 @@ void i915_vma_unlink_ctx(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); void i915_vma_reopen(struct i915_vma *vma); -static inline struct i915_vma *__i915_vma_get(struct i915_vma *vma) -{ - if (kref_get_unless_zero(&vma->ref)) - return vma; - - return NULL; -} - -void i915_vma_release(struct kref *ref); -static inline void __i915_vma_put(struct i915_vma *vma) -{ - kref_put(&vma->ref, i915_vma_release); -} - void i915_vma_destroy_locked(struct i915_vma *vma); void i915_vma_destroy(struct i915_vma *vma); diff --git a/drivers/gpu/drm/i915/i915_vma_resource.c b/drivers/gpu/drm/i915/i915_vma_resource.c index 57ae92ba8af1..27c55027387a 100644 --- a/drivers/gpu/drm/i915/i915_vma_resource.c +++ b/drivers/gpu/drm/i915/i915_vma_resource.c @@ -178,7 +178,7 @@ static void i915_vma_resource_unbind_work(struct work_struct *work) bool lockdep_cookie; lockdep_cookie = dma_fence_begin_signalling(); - if (likely(atomic_read(&vm->open))) + if (likely(!vma_res->skip_pte_rewrite)) vma_res->ops->unbind_vma(vm, vma_res); dma_fence_end_signalling(lockdep_cookie); diff --git a/drivers/gpu/drm/i915/i915_vma_resource.h b/drivers/gpu/drm/i915/i915_vma_resource.h index 25913913baa6..5d8427caa2ba 100644 --- a/drivers/gpu/drm/i915/i915_vma_resource.h +++ b/drivers/gpu/drm/i915/i915_vma_resource.h @@ -62,6 +62,11 @@ struct i915_page_sizes { * deferred to a work item awaiting unsignaled fences. This is a hack. * (dma_fence_work uses a fence flag for this, but this seems slightly * cleaner). + * @needs_wakeref: Whether a wakeref is needed during unbind. Since we can't + * take a wakeref in the dma-fence signalling critical path, it needs to be + * taken when the unbind is scheduled. + * @skip_pte_rewrite: During ggtt suspend and vm takedown pte rewriting + * needs to be skipped for unbind. * * The lifetime of a struct i915_vma_resource is from a binding request to * the actual possible asynchronous unbind has completed. @@ -113,6 +118,7 @@ struct i915_vma_resource { bool allocated:1; bool immediate_unbind:1; bool needs_wakeref:1; + bool skip_pte_rewrite:1; }; bool i915_vma_resource_hold(struct i915_vma_resource *vma_res, diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h index 88370dadca82..be6e028c3b57 100644 --- a/drivers/gpu/drm/i915/i915_vma_types.h +++ b/drivers/gpu/drm/i915/i915_vma_types.h @@ -211,7 +211,6 @@ struct i915_vma { * handles (but same file) for execbuf, i.e. the number of aliases * that exist in the ctx->handle_vmas LUT for this vma. */ - struct kref ref; atomic_t open_count; atomic_t flags; /** @@ -272,6 +271,13 @@ struct i915_vma { atomic_t pages_count; /* number of active binds to the pages */ /** + * Whether we hold a reference on the vm dma_resv lock to temporarily + * block vm freeing until the vma is destroyed. + * Protected by the vm mutex. + */ + bool vm_ddestroy; + + /** * Support different GGTT views into the same object. * This means there can be multiple VMA mappings per object and per VM. * i915_ggtt_view_type is used to distinguish between those entries. diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index f9b955810593..576d15a04c9e 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -141,6 +141,8 @@ enum intel_ppgtt_type { func(has_flat_ccs); \ func(has_global_mocs); \ func(has_gt_uc); \ + func(has_heci_pxp); \ + func(has_heci_gscfi); \ func(has_guc_deprivilege); \ func(has_l3_dpf); \ func(has_llc); \ diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index ded78b83e0b5..e38d2db1c3e3 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -19,7 +19,7 @@ static const struct { .class = INTEL_MEMORY_SYSTEM, .instance = 0, }, - [INTEL_REGION_LMEM] = { + [INTEL_REGION_LMEM_0] = { .class = INTEL_MEMORY_LOCAL, .instance = 0, }, diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index bbc35ec5c090..3d8378c1b447 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -29,14 +29,17 @@ enum intel_memory_type { enum intel_region_id { INTEL_REGION_SMEM = 0, - INTEL_REGION_LMEM, + INTEL_REGION_LMEM_0, + INTEL_REGION_LMEM_1, + INTEL_REGION_LMEM_2, + INTEL_REGION_LMEM_3, INTEL_REGION_STOLEN_SMEM, INTEL_REGION_STOLEN_LMEM, INTEL_REGION_UNKNOWN, /* Should be last */ }; #define REGION_SMEM BIT(INTEL_REGION_SMEM) -#define REGION_LMEM BIT(INTEL_REGION_LMEM) +#define REGION_LMEM BIT(INTEL_REGION_LMEM_0) #define REGION_STOLEN_SMEM BIT(INTEL_REGION_STOLEN_SMEM) #define REGION_STOLEN_LMEM BIT(INTEL_REGION_STOLEN_LMEM) @@ -54,6 +57,7 @@ struct intel_memory_region_ops { int (*init_object)(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, + resource_size_t offset, resource_size_t size, resource_size_t page_size, unsigned int flags); diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 737ef3f4ab54..62ff77445b01 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -12,6 +12,7 @@ #include "intel_region_ttm.h" +#include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" /* For the funcs/ops export only */ /** * DOC: TTM support structure @@ -191,6 +192,7 @@ intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem, */ struct ttm_resource * intel_region_ttm_resource_alloc(struct intel_memory_region *mem, + resource_size_t offset, resource_size_t size, unsigned int flags) { @@ -202,7 +204,10 @@ intel_region_ttm_resource_alloc(struct intel_memory_region *mem, if (flags & I915_BO_ALLOC_CONTIGUOUS) place.flags |= TTM_PL_FLAG_CONTIGUOUS; - if (mem->io_size && mem->io_size < mem->total) { + if (offset != I915_BO_INVALID_OFFSET) { + place.fpfn = offset >> PAGE_SHIFT; + place.lpfn = place.fpfn + (size >> PAGE_SHIFT); + } else if (mem->io_size && mem->io_size < mem->total) { if (flags & I915_BO_ALLOC_GPU_ONLY) { place.flags |= TTM_PL_FLAG_TOPDOWN; } else { diff --git a/drivers/gpu/drm/i915/intel_region_ttm.h b/drivers/gpu/drm/i915/intel_region_ttm.h index fdee5e7bd46c..cf9d86dcf409 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.h +++ b/drivers/gpu/drm/i915/intel_region_ttm.h @@ -36,6 +36,7 @@ struct ttm_device_funcs *i915_ttm_driver(void); #ifdef CONFIG_DRM_I915_SELFTEST struct ttm_resource * intel_region_ttm_resource_alloc(struct intel_memory_region *mem, + resource_size_t offset, resource_size_t size, unsigned int flags); #endif diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 24cd3c3729ca..83517a703eb6 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -2050,14 +2050,11 @@ static int i915_pmic_bus_access_notifier(struct notifier_block *nb, return NOTIFY_OK; } -int intel_uncore_setup_mmio(struct intel_uncore *uncore) +int intel_uncore_setup_mmio(struct intel_uncore *uncore, phys_addr_t phys_addr) { struct drm_i915_private *i915 = uncore->i915; - struct pci_dev *pdev = to_pci_dev(i915->drm.dev); - int mmio_bar; int mmio_size; - mmio_bar = GRAPHICS_VER(i915) == 2 ? 1 : 0; /* * Before gen4, the registers and the GTT are behind different BARs. * However, from gen4 onwards, the registers and the GTT are shared @@ -2074,7 +2071,7 @@ int intel_uncore_setup_mmio(struct intel_uncore *uncore) else mmio_size = 2 * 1024 * 1024; - uncore->regs = pci_iomap(pdev, mmio_bar, mmio_size); + uncore->regs = ioremap(phys_addr, mmio_size); if (uncore->regs == NULL) { drm_err(&i915->drm, "failed to map registers\n"); return -EIO; @@ -2085,9 +2082,7 @@ int intel_uncore_setup_mmio(struct intel_uncore *uncore) void intel_uncore_cleanup_mmio(struct intel_uncore *uncore) { - struct pci_dev *pdev = to_pci_dev(uncore->i915->drm.dev); - - pci_iounmap(pdev, uncore->regs); + iounmap(uncore->regs); } void intel_uncore_init_early(struct intel_uncore *uncore, @@ -2475,17 +2470,46 @@ intel_uncore_forcewake_for_reg(struct intel_uncore *uncore, return fw_domains; } -u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore, - i915_reg_t reg, - int slice, int subslice) +/** + * uncore_rw_with_mcr_steering_fw - Access a register after programming + * the MCR selector register. + * @uncore: pointer to struct intel_uncore + * @reg: register being accessed + * @rw_flag: FW_REG_READ for read access or FW_REG_WRITE for write access + * @slice: slice number (ignored for multi-cast write) + * @subslice: sub-slice number (ignored for multi-cast write) + * @value: register value to be written (ignored for read) + * + * Return: 0 for write access. register value for read access. + * + * Caller needs to make sure the relevant forcewake wells are up. + */ +static u32 uncore_rw_with_mcr_steering_fw(struct intel_uncore *uncore, + i915_reg_t reg, u8 rw_flag, + int slice, int subslice, u32 value) { - u32 mcr_mask, mcr_ss, mcr, old_mcr, val; + u32 mcr_mask, mcr_ss, mcr, old_mcr, val = 0; lockdep_assert_held(&uncore->lock); if (GRAPHICS_VER(uncore->i915) >= 11) { mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); + + /* + * Wa_22013088509 + * + * The setting of the multicast/unicast bit usually wouldn't + * matter for read operations (which always return the value + * from a single register instance regardless of how that bit + * is set), but some platforms have a workaround requiring us + * to remain in multicast mode for reads. There's no real + * downside to this, so we'll just go ahead and do so on all + * platforms; we'll only clear the multicast bit from the mask + * when exlicitly doing a write operation. + */ + if (rw_flag == FW_REG_WRITE) + mcr_mask |= GEN11_MCR_MULTICAST; } else { mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); @@ -2497,7 +2521,10 @@ u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore, mcr |= mcr_ss; intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); - val = intel_uncore_read_fw(uncore, reg); + if (rw_flag == FW_REG_READ) + val = intel_uncore_read_fw(uncore, reg); + else + intel_uncore_write_fw(uncore, reg, value); mcr &= ~mcr_mask; mcr |= old_mcr & mcr_mask; @@ -2507,14 +2534,16 @@ u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore, return val; } -u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore, - i915_reg_t reg, int slice, int subslice) +static u32 uncore_rw_with_mcr_steering(struct intel_uncore *uncore, + i915_reg_t reg, u8 rw_flag, + int slice, int subslice, + u32 value) { enum forcewake_domains fw_domains; u32 val; fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, - FW_REG_READ); + rw_flag); fw_domains |= intel_uncore_forcewake_for_reg(uncore, GEN8_MCR_SELECTOR, FW_REG_READ | FW_REG_WRITE); @@ -2522,7 +2551,8 @@ u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore, spin_lock_irq(&uncore->lock); intel_uncore_forcewake_get__locked(uncore, fw_domains); - val = intel_uncore_read_with_mcr_steering_fw(uncore, reg, slice, subslice); + val = uncore_rw_with_mcr_steering_fw(uncore, reg, rw_flag, + slice, subslice, value); intel_uncore_forcewake_put__locked(uncore, fw_domains); spin_unlock_irq(&uncore->lock); @@ -2530,6 +2560,28 @@ u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore, return val; } +u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore, + i915_reg_t reg, int slice, int subslice) +{ + return uncore_rw_with_mcr_steering_fw(uncore, reg, FW_REG_READ, + slice, subslice, 0); +} + +u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore, + i915_reg_t reg, int slice, int subslice) +{ + return uncore_rw_with_mcr_steering(uncore, reg, FW_REG_READ, + slice, subslice, 0); +} + +void intel_uncore_write_with_mcr_steering(struct intel_uncore *uncore, + i915_reg_t reg, u32 value, + int slice, int subslice) +{ + uncore_rw_with_mcr_steering(uncore, reg, FW_REG_WRITE, + slice, subslice, value); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_uncore.c" #include "selftests/intel_uncore.c" diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 6ff56d673e2b..52fe3d89dd2b 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -29,6 +29,7 @@ #include <linux/notifier.h> #include <linux/hrtimer.h> #include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/types.h> #include "i915_reg_defs.h" @@ -214,12 +215,14 @@ u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore, int slice, int subslice); u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore, i915_reg_t reg, int slice, int subslice); - +void intel_uncore_write_with_mcr_steering(struct intel_uncore *uncore, + i915_reg_t reg, u32 value, + int slice, int subslice); void intel_uncore_mmio_debug_init_early(struct intel_uncore_mmio_debug *mmio_debug); void intel_uncore_init_early(struct intel_uncore *uncore, struct intel_gt *gt); -int intel_uncore_setup_mmio(struct intel_uncore *uncore); +int intel_uncore_setup_mmio(struct intel_uncore *uncore, phys_addr_t phys_addr); int intel_uncore_init_mmio(struct intel_uncore *uncore); void intel_uncore_prune_engine_fw_domains(struct intel_uncore *uncore, struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index ab751192eb3b..8633bec18fa7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1112,10 +1112,16 @@ static int misaligned_case(struct i915_address_space *vm, struct intel_memory_re expected_vma_size = round_up(size, 1 << (ffs(vma->resource->page_sizes_gtt) - 1)); expected_node_size = expected_vma_size; - if (NEEDS_COMPACT_PT(vm->i915) && i915_gem_object_is_lmem(obj)) { - /* compact-pt should expand lmem node to 2MB */ + if (HAS_64K_PAGES(vm->i915) && i915_gem_object_is_lmem(obj)) { + /* + * The compact-pt should expand lmem node to 2MB for the ppGTT, + * for all other cases we should only expect 64K. + */ expected_vma_size = round_up(size, I915_GTT_PAGE_SIZE_64K); - expected_node_size = round_up(size, I915_GTT_PAGE_SIZE_2M); + if (NEEDS_COMPACT_PT(vm->i915) && !i915_is_ggtt(vm)) + expected_node_size = round_up(size, I915_GTT_PAGE_SIZE_2M); + else + expected_node_size = round_up(size, I915_GTT_PAGE_SIZE_64K); } if (vma->size != expected_vma_size || vma->node.size != expected_node_size) { @@ -1150,7 +1156,7 @@ static int misaligned_pin(struct i915_address_space *vm, flags |= PIN_GLOBAL; for_each_memory_region(mr, vm->i915, id) { - u64 min_alignment = i915_vm_min_alignment(vm, (enum intel_memory_type)id); + u64 min_alignment = i915_vm_min_alignment(vm, mr->type); u64 size = min_alignment; u64 addr = round_down(hole_start + (hole_size / 2), min_alignment); @@ -1205,7 +1211,7 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv, goto out_free; } GEM_BUG_ON(offset_in_page(ppgtt->vm.total)); - GEM_BUG_ON(!atomic_read(&ppgtt->vm.open)); + assert_vm_alive(&ppgtt->vm); err = func(&ppgtt->vm, 0, ppgtt->vm.total, end_time); @@ -1438,7 +1444,7 @@ static void track_vma_bind(struct i915_vma *vma) vma->resource->bi.pages = vma->pages; mutex_lock(&vma->vm->mutex); - list_add_tail(&vma->vm_link, &vma->vm->bound_list); + list_move_tail(&vma->vm_link, &vma->vm->bound_list); mutex_unlock(&vma->vm->mutex); } diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 573d9b2e1a4a..9c31a16f8380 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -73,7 +73,7 @@ static void mock_device_release(struct drm_device *dev) destroy_workqueue(i915->wq); intel_region_ttm_device_fini(i915); - intel_gt_driver_late_release(to_gt(i915)); + intel_gt_driver_late_release_all(i915); intel_memory_regions_driver_release(i915); drm_mode_config_cleanup(&i915->drm); @@ -112,6 +112,11 @@ static struct dev_pm_domain pm_domain = { }, }; +static void mock_gt_probe(struct drm_i915_private *i915) +{ + i915->gt[0] = &i915->gt0; +} + struct drm_i915_private *mock_gem_device(void) { #if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU) @@ -180,11 +185,11 @@ struct drm_i915_private *mock_gem_device(void) spin_lock_init(&i915->gpu_error.lock); i915_gem_init__mm(i915); - intel_gt_init_early(to_gt(i915), i915); - __intel_gt_init_early(to_gt(i915), i915); + intel_root_gt_init_early(i915); mock_uncore_init(&i915->uncore, i915); atomic_inc(&to_gt(i915)->wakeref.count); /* disable; no hw support */ to_gt(i915)->awake = -ENODEV; + mock_gt_probe(i915); ret = intel_region_ttm_device_init(i915); if (ret) @@ -229,7 +234,7 @@ err_unlock: err_drv: intel_region_ttm_device_fini(i915); err_ttm: - intel_gt_driver_late_release(to_gt(i915)); + intel_gt_driver_late_release_all(i915); intel_memory_regions_driver_release(i915); drm_mode_config_cleanup(&i915->drm); mock_destroy_device(i915); diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index f64325491f35..670557ce1024 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -26,6 +26,7 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj) int err; obj->mm.res = intel_region_ttm_resource_alloc(obj->mm.region, + obj->bo_offset, obj->base.size, obj->flags); if (IS_ERR(obj->mm.res)) @@ -57,6 +58,7 @@ static const struct drm_i915_gem_object_ops mock_region_obj_ops = { static int mock_object_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, + resource_size_t offset, resource_size_t size, resource_size_t page_size, unsigned int flags) @@ -70,6 +72,8 @@ static int mock_object_init(struct intel_memory_region *mem, drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &mock_region_obj_ops, &lock_class, flags); + obj->bo_offset = offset; + obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 83c31b2ad865..ccc4fcf7a630 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1742,7 +1742,7 @@ a6xx_create_private_address_space(struct msm_gpu *gpu) return ERR_CAST(mmu); return msm_gem_address_space_create(mmu, - "gpu", 0x100000000ULL, 0x1ffffffffULL); + "gpu", 0x100000000ULL, SZ_4G); } static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 89cfd84760d7..8706bcdd1472 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -599,43 +599,91 @@ static const struct of_device_id dt_match[] = { {} }; -#ifdef CONFIG_PM -static int adreno_resume(struct device *dev) +static int adreno_runtime_resume(struct device *dev) { struct msm_gpu *gpu = dev_to_gpu(dev); return gpu->funcs->pm_resume(gpu); } -static int active_submits(struct msm_gpu *gpu) +static int adreno_runtime_suspend(struct device *dev) { - int active_submits; - mutex_lock(&gpu->active_lock); - active_submits = gpu->active_submits; - mutex_unlock(&gpu->active_lock); - return active_submits; + struct msm_gpu *gpu = dev_to_gpu(dev); + + /* + * We should be holding a runpm ref, which will prevent + * runtime suspend. In the system suspend path, we've + * already waited for active jobs to complete. + */ + WARN_ON_ONCE(gpu->active_submits); + + return gpu->funcs->pm_suspend(gpu); +} + +static void suspend_scheduler(struct msm_gpu *gpu) +{ + int i; + + /* + * Shut down the scheduler before we force suspend, so that + * suspend isn't racing with scheduler kthread feeding us + * more work. + * + * Note, we just want to park the thread, and let any jobs + * that are already on the hw queue complete normally, as + * opposed to the drm_sched_stop() path used for handling + * faulting/timed-out jobs. We can't really cancel any jobs + * already on the hw queue without racing with the GPU. + */ + for (i = 0; i < gpu->nr_rings; i++) { + struct drm_gpu_scheduler *sched = &gpu->rb[i]->sched; + kthread_park(sched->thread); + } } -static int adreno_suspend(struct device *dev) +static void resume_scheduler(struct msm_gpu *gpu) +{ + int i; + + for (i = 0; i < gpu->nr_rings; i++) { + struct drm_gpu_scheduler *sched = &gpu->rb[i]->sched; + kthread_unpark(sched->thread); + } +} + +static int adreno_system_suspend(struct device *dev) { struct msm_gpu *gpu = dev_to_gpu(dev); - int remaining; + int remaining, ret; + + suspend_scheduler(gpu); remaining = wait_event_timeout(gpu->retire_event, - active_submits(gpu) == 0, + gpu->active_submits == 0, msecs_to_jiffies(1000)); if (remaining == 0) { dev_err(dev, "Timeout waiting for GPU to suspend\n"); - return -EBUSY; + ret = -EBUSY; + goto out; } - return gpu->funcs->pm_suspend(gpu); + ret = pm_runtime_force_suspend(dev); +out: + if (ret) + resume_scheduler(gpu); + + return ret; +} + +static int adreno_system_resume(struct device *dev) +{ + resume_scheduler(dev_to_gpu(dev)); + return pm_runtime_force_resume(dev); } -#endif static const struct dev_pm_ops adreno_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) - SET_RUNTIME_PM_OPS(adreno_suspend, adreno_resume, NULL) + SYSTEM_SLEEP_PM_OPS(adreno_system_suspend, adreno_system_resume) + RUNTIME_PM_OPS(adreno_runtime_suspend, adreno_runtime_resume, NULL) }; static struct platform_driver adreno_driver = { diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c index c515b7cf922c..c61b5b283f08 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c @@ -54,87 +54,87 @@ struct dpu_intr_reg { * When making changes be sure to sync with dpu_hw_intr_reg */ static const struct dpu_intr_reg dpu_intr_set[] = { - { + [MDP_SSPP_TOP0_INTR] = { MDP_SSPP_TOP0_OFF+INTR_CLEAR, MDP_SSPP_TOP0_OFF+INTR_EN, MDP_SSPP_TOP0_OFF+INTR_STATUS }, - { + [MDP_SSPP_TOP0_INTR2] = { MDP_SSPP_TOP0_OFF+INTR2_CLEAR, MDP_SSPP_TOP0_OFF+INTR2_EN, MDP_SSPP_TOP0_OFF+INTR2_STATUS }, - { + [MDP_SSPP_TOP0_HIST_INTR] = { MDP_SSPP_TOP0_OFF+HIST_INTR_CLEAR, MDP_SSPP_TOP0_OFF+HIST_INTR_EN, MDP_SSPP_TOP0_OFF+HIST_INTR_STATUS }, - { + [MDP_INTF0_INTR] = { MDP_INTF_0_OFF+INTF_INTR_CLEAR, MDP_INTF_0_OFF+INTF_INTR_EN, MDP_INTF_0_OFF+INTF_INTR_STATUS }, - { + [MDP_INTF1_INTR] = { MDP_INTF_1_OFF+INTF_INTR_CLEAR, MDP_INTF_1_OFF+INTF_INTR_EN, MDP_INTF_1_OFF+INTF_INTR_STATUS }, - { + [MDP_INTF2_INTR] = { MDP_INTF_2_OFF+INTF_INTR_CLEAR, MDP_INTF_2_OFF+INTF_INTR_EN, MDP_INTF_2_OFF+INTF_INTR_STATUS }, - { + [MDP_INTF3_INTR] = { MDP_INTF_3_OFF+INTF_INTR_CLEAR, MDP_INTF_3_OFF+INTF_INTR_EN, MDP_INTF_3_OFF+INTF_INTR_STATUS }, - { + [MDP_INTF4_INTR] = { MDP_INTF_4_OFF+INTF_INTR_CLEAR, MDP_INTF_4_OFF+INTF_INTR_EN, MDP_INTF_4_OFF+INTF_INTR_STATUS }, - { + [MDP_INTF5_INTR] = { MDP_INTF_5_OFF+INTF_INTR_CLEAR, MDP_INTF_5_OFF+INTF_INTR_EN, MDP_INTF_5_OFF+INTF_INTR_STATUS }, - { + [MDP_AD4_0_INTR] = { MDP_AD4_0_OFF + MDP_AD4_INTR_CLEAR_OFF, MDP_AD4_0_OFF + MDP_AD4_INTR_EN_OFF, MDP_AD4_0_OFF + MDP_AD4_INTR_STATUS_OFF, }, - { + [MDP_AD4_1_INTR] = { MDP_AD4_1_OFF + MDP_AD4_INTR_CLEAR_OFF, MDP_AD4_1_OFF + MDP_AD4_INTR_EN_OFF, MDP_AD4_1_OFF + MDP_AD4_INTR_STATUS_OFF, }, - { + [MDP_INTF0_7xxx_INTR] = { MDP_INTF_0_OFF_REV_7xxx+INTF_INTR_CLEAR, MDP_INTF_0_OFF_REV_7xxx+INTF_INTR_EN, MDP_INTF_0_OFF_REV_7xxx+INTF_INTR_STATUS }, - { + [MDP_INTF1_7xxx_INTR] = { MDP_INTF_1_OFF_REV_7xxx+INTF_INTR_CLEAR, MDP_INTF_1_OFF_REV_7xxx+INTF_INTR_EN, MDP_INTF_1_OFF_REV_7xxx+INTF_INTR_STATUS }, - { + [MDP_INTF2_7xxx_INTR] = { MDP_INTF_2_OFF_REV_7xxx+INTF_INTR_CLEAR, MDP_INTF_2_OFF_REV_7xxx+INTF_INTR_EN, MDP_INTF_2_OFF_REV_7xxx+INTF_INTR_STATUS }, - { + [MDP_INTF3_7xxx_INTR] = { MDP_INTF_3_OFF_REV_7xxx+INTF_INTR_CLEAR, MDP_INTF_3_OFF_REV_7xxx+INTF_INTR_EN, MDP_INTF_3_OFF_REV_7xxx+INTF_INTR_STATUS }, - { + [MDP_INTF4_7xxx_INTR] = { MDP_INTF_4_OFF_REV_7xxx+INTF_INTR_CLEAR, MDP_INTF_4_OFF_REV_7xxx+INTF_INTR_EN, MDP_INTF_4_OFF_REV_7xxx+INTF_INTR_STATUS }, - { + [MDP_INTF5_7xxx_INTR] = { MDP_INTF_5_OFF_REV_7xxx+INTF_INTR_CLEAR, MDP_INTF_5_OFF_REV_7xxx+INTF_INTR_EN, MDP_INTF_5_OFF_REV_7xxx+INTF_INTR_STATUS diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c index 1ee824600995..c478d25f7825 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c @@ -98,7 +98,10 @@ static void mdp5_plane_reset(struct drm_plane *plane) __drm_atomic_helper_plane_destroy_state(plane->state); kfree(to_mdp5_plane_state(plane->state)); + plane->state = NULL; mdp5_state = kzalloc(sizeof(*mdp5_state), GFP_KERNEL); + if (!mdp5_state) + return; __drm_atomic_helper_plane_reset(plane, &mdp5_state->base); } diff --git a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c index 5d2ff6791058..acfe1b31e079 100644 --- a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c +++ b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c @@ -176,6 +176,8 @@ void msm_disp_snapshot_add_block(struct msm_disp_state *disp_state, u32 len, va_list va; new_blk = kzalloc(sizeof(struct msm_disp_state_block), GFP_KERNEL); + if (!new_blk) + return; va_start(va, fmt); diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 178b774a5fbd..a42732b67349 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -580,6 +580,12 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data) dp->dp_display.connector_type, state); mutex_unlock(&dp->event_mutex); + /* + * add fail safe mode outside event_mutex scope + * to avoid potiential circular lock with drm thread + */ + dp_panel_add_fail_safe_mode(dp->dp_display.connector); + /* uevent will complete connection part */ return 0; }; diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c index f1418722c549..26c3653c99ec 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.c +++ b/drivers/gpu/drm/msm/dp/dp_panel.c @@ -151,6 +151,15 @@ static int dp_panel_update_modes(struct drm_connector *connector, return rc; } +void dp_panel_add_fail_safe_mode(struct drm_connector *connector) +{ + /* fail safe edid */ + mutex_lock(&connector->dev->mode_config.mutex); + if (drm_add_modes_noedid(connector, 640, 480)) + drm_set_preferred_mode(connector, 640, 480); + mutex_unlock(&connector->dev->mode_config.mutex); +} + int dp_panel_read_sink_caps(struct dp_panel *dp_panel, struct drm_connector *connector) { @@ -207,16 +216,7 @@ int dp_panel_read_sink_caps(struct dp_panel *dp_panel, goto end; } - /* fail safe edid */ - mutex_lock(&connector->dev->mode_config.mutex); - if (drm_add_modes_noedid(connector, 640, 480)) - drm_set_preferred_mode(connector, 640, 480); - mutex_unlock(&connector->dev->mode_config.mutex); - } else { - /* always add fail-safe mode as backup mode */ - mutex_lock(&connector->dev->mode_config.mutex); - drm_add_modes_noedid(connector, 640, 480); - mutex_unlock(&connector->dev->mode_config.mutex); + dp_panel_add_fail_safe_mode(connector); } if (panel->aux_cfg_update_done) { diff --git a/drivers/gpu/drm/msm/dp/dp_panel.h b/drivers/gpu/drm/msm/dp/dp_panel.h index 9023e5bb4b8b..99739ea679a7 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.h +++ b/drivers/gpu/drm/msm/dp/dp_panel.h @@ -59,6 +59,7 @@ int dp_panel_init_panel_info(struct dp_panel *dp_panel); int dp_panel_deinit(struct dp_panel *dp_panel); int dp_panel_timing_cfg(struct dp_panel *dp_panel); void dp_panel_dump_regs(struct dp_panel *dp_panel); +void dp_panel_add_fail_safe_mode(struct drm_connector *connector); int dp_panel_read_sink_caps(struct dp_panel *dp_panel, struct drm_connector *connector); u32 dp_panel_get_mode_bpp(struct dp_panel *dp_panel, u32 mode_max_bpp, diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c index 0c1b7dde377c..9f6af0f0fe00 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_manager.c +++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c @@ -638,7 +638,7 @@ struct drm_connector *msm_dsi_manager_connector_init(u8 id) return connector; fail: - connector->funcs->destroy(msm_dsi->connector); + connector->funcs->destroy(connector); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 01bbb5f2d462..8f492656c9ad 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -927,6 +927,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, get_pid_task(aspace->pid, PIDTYPE_PID); if (task) { comm = kstrdup(task->comm, GFP_KERNEL); + put_task_struct(task); } else { comm = NULL; } diff --git a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c index 46029c5610c8..145047e19394 100644 --- a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c +++ b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c @@ -229,7 +229,7 @@ static void rpi_touchscreen_i2c_write(struct rpi_touchscreen *ts, ret = i2c_smbus_write_byte_data(ts->i2c, reg, val); if (ret) - dev_err(&ts->dsi->dev, "I2C write failed: %d\n", ret); + dev_err(&ts->i2c->dev, "I2C write failed: %d\n", ret); } static int rpi_touchscreen_write(struct rpi_touchscreen *ts, u16 reg, u32 val) @@ -265,7 +265,7 @@ static int rpi_touchscreen_noop(struct drm_panel *panel) return 0; } -static int rpi_touchscreen_enable(struct drm_panel *panel) +static int rpi_touchscreen_prepare(struct drm_panel *panel) { struct rpi_touchscreen *ts = panel_to_ts(panel); int i; @@ -295,6 +295,13 @@ static int rpi_touchscreen_enable(struct drm_panel *panel) rpi_touchscreen_write(ts, DSI_STARTDSI, 0x01); msleep(100); + return 0; +} + +static int rpi_touchscreen_enable(struct drm_panel *panel) +{ + struct rpi_touchscreen *ts = panel_to_ts(panel); + /* Turn on the backlight. */ rpi_touchscreen_i2c_write(ts, REG_PWM, 255); @@ -349,7 +356,7 @@ static int rpi_touchscreen_get_modes(struct drm_panel *panel, static const struct drm_panel_funcs rpi_touchscreen_funcs = { .disable = rpi_touchscreen_disable, .unprepare = rpi_touchscreen_noop, - .prepare = rpi_touchscreen_noop, + .prepare = rpi_touchscreen_prepare, .enable = rpi_touchscreen_enable, .get_modes = rpi_touchscreen_get_modes, }; diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index 11c97edde54d..1045d2c46a76 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -39,12 +39,12 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ radeon_clocks.o radeon_fb.o radeon_gem.o radeon_ring.o radeon_irq_kms.o \ radeon_cs.o radeon_bios.o radeon_benchmark.o r100.o r300.o r420.o \ rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o \ - r200.o radeon_legacy_tv.o r600_cs.o r600_blit_shaders.o \ + r200.o radeon_legacy_tv.o r600_cs.o \ radeon_pm.o atombios_dp.o r600_hdmi.o dce3_1_afmt.o \ - evergreen.o evergreen_cs.o evergreen_blit_shaders.o \ - evergreen_hdmi.o radeon_trace_points.o ni.o cayman_blit_shaders.o \ + evergreen.o evergreen_cs.o \ + evergreen_hdmi.o radeon_trace_points.o ni.o \ atombios_encoders.o radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o \ - si_blit_shaders.o radeon_prime.o cik.o cik_blit_shaders.o \ + radeon_prime.o cik.o cik_blit_shaders.o \ r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \ rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \ trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \ diff --git a/drivers/gpu/drm/radeon/cayman_blit_shaders.c b/drivers/gpu/drm/radeon/cayman_blit_shaders.c deleted file mode 100644 index 9fec4d09f383..000000000000 --- a/drivers/gpu/drm/radeon/cayman_blit_shaders.c +++ /dev/null @@ -1,320 +0,0 @@ -/* - * Copyright 2010 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Alex Deucher <alexander.deucher@amd.com> - */ - -#include <linux/bug.h> -#include <linux/types.h> -#include <linux/kernel.h> - -/* - * evergreen cards need to use the 3D engine to blit data which requires - * quite a bit of hw state setup. Rather than pull the whole 3D driver - * (which normally generates the 3D state) into the DRM, we opt to use - * statically generated state tables. The register state and shaders - * were hand generated to support blitting functionality. See the 3D - * driver or documentation for descriptions of the registers and - * shader instructions. - */ - -const u32 cayman_default_state[] = -{ - 0xc0066900, - 0x00000000, - 0x00000060, /* DB_RENDER_CONTROL */ - 0x00000000, /* DB_COUNT_CONTROL */ - 0x00000000, /* DB_DEPTH_VIEW */ - 0x0000002a, /* DB_RENDER_OVERRIDE */ - 0x00000000, /* DB_RENDER_OVERRIDE2 */ - 0x00000000, /* DB_HTILE_DATA_BASE */ - - 0xc0026900, - 0x0000000a, - 0x00000000, /* DB_STENCIL_CLEAR */ - 0x00000000, /* DB_DEPTH_CLEAR */ - - 0xc0036900, - 0x0000000f, - 0x00000000, /* DB_DEPTH_INFO */ - 0x00000000, /* DB_Z_INFO */ - 0x00000000, /* DB_STENCIL_INFO */ - - 0xc0016900, - 0x00000080, - 0x00000000, /* PA_SC_WINDOW_OFFSET */ - - 0xc00d6900, - 0x00000083, - 0x0000ffff, /* PA_SC_CLIPRECT_RULE */ - 0x00000000, /* PA_SC_CLIPRECT_0_TL */ - 0x20002000, /* PA_SC_CLIPRECT_0_BR */ - 0x00000000, - 0x20002000, - 0x00000000, - 0x20002000, - 0x00000000, - 0x20002000, - 0xaaaaaaaa, /* PA_SC_EDGERULE */ - 0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */ - 0x0000000f, /* CB_TARGET_MASK */ - 0x0000000f, /* CB_SHADER_MASK */ - - 0xc0226900, - 0x00000094, - 0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */ - 0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */ - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x00000000, /* PA_SC_VPORT_ZMIN_0 */ - 0x3f800000, /* PA_SC_VPORT_ZMAX_0 */ - - 0xc0016900, - 0x000000d4, - 0x00000000, /* SX_MISC */ - - 0xc0026900, - 0x000000d9, - 0x00000000, /* CP_RINGID */ - 0x00000000, /* CP_VMID */ - - 0xc0096900, - 0x00000100, - 0x00ffffff, /* VGT_MAX_VTX_INDX */ - 0x00000000, /* VGT_MIN_VTX_INDX */ - 0x00000000, /* VGT_INDX_OFFSET */ - 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */ - 0x00000000, /* SX_ALPHA_TEST_CONTROL */ - 0x00000000, /* CB_BLEND_RED */ - 0x00000000, /* CB_BLEND_GREEN */ - 0x00000000, /* CB_BLEND_BLUE */ - 0x00000000, /* CB_BLEND_ALPHA */ - - 0xc0016900, - 0x00000187, - 0x00000100, /* SPI_VS_OUT_ID_0 */ - - 0xc0026900, - 0x00000191, - 0x00000100, /* SPI_PS_INPUT_CNTL_0 */ - 0x00000101, /* SPI_PS_INPUT_CNTL_1 */ - - 0xc0016900, - 0x000001b1, - 0x00000000, /* SPI_VS_OUT_CONFIG */ - - 0xc0106900, - 0x000001b3, - 0x20000001, /* SPI_PS_IN_CONTROL_0 */ - 0x00000000, /* SPI_PS_IN_CONTROL_1 */ - 0x00000000, /* SPI_INTERP_CONTROL_0 */ - 0x00000000, /* SPI_INPUT_Z */ - 0x00000000, /* SPI_FOG_CNTL */ - 0x00100000, /* SPI_BARYC_CNTL */ - 0x00000000, /* SPI_PS_IN_CONTROL_2 */ - 0x00000000, /* SPI_COMPUTE_INPUT_CNTL */ - 0x00000000, /* SPI_COMPUTE_NUM_THREAD_X */ - 0x00000000, /* SPI_COMPUTE_NUM_THREAD_Y */ - 0x00000000, /* SPI_COMPUTE_NUM_THREAD_Z */ - 0x00000000, /* SPI_GPR_MGMT */ - 0x00000000, /* SPI_LDS_MGMT */ - 0x00000000, /* SPI_STACK_MGMT */ - 0x00000000, /* SPI_WAVE_MGMT_1 */ - 0x00000000, /* SPI_WAVE_MGMT_2 */ - - 0xc0016900, - 0x000001e0, - 0x00000000, /* CB_BLEND0_CONTROL */ - - 0xc00e6900, - 0x00000200, - 0x00000000, /* DB_DEPTH_CONTROL */ - 0x00000000, /* DB_EQAA */ - 0x00cc0010, /* CB_COLOR_CONTROL */ - 0x00000210, /* DB_SHADER_CONTROL */ - 0x00010000, /* PA_CL_CLIP_CNTL */ - 0x00000004, /* PA_SU_SC_MODE_CNTL */ - 0x00000100, /* PA_CL_VTE_CNTL */ - 0x00000000, /* PA_CL_VS_OUT_CNTL */ - 0x00000000, /* PA_CL_NANINF_CNTL */ - 0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */ - 0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */ - 0x00000000, /* PA_SU_PRIM_FILTER_CNTL */ - 0x00000000, /* */ - 0x00000000, /* */ - - 0xc0026900, - 0x00000229, - 0x00000000, /* SQ_PGM_START_FS */ - 0x00000000, - - 0xc0016900, - 0x0000023b, - 0x00000000, /* SQ_LDS_ALLOC_PS */ - - 0xc0066900, - 0x00000240, - 0x00000000, /* SQ_ESGS_RING_ITEMSIZE */ - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - - 0xc0046900, - 0x00000247, - 0x00000000, /* SQ_GS_VERT_ITEMSIZE */ - 0x00000000, - 0x00000000, - 0x00000000, - - 0xc0116900, - 0x00000280, - 0x00000000, /* PA_SU_POINT_SIZE */ - 0x00000000, /* PA_SU_POINT_MINMAX */ - 0x00000008, /* PA_SU_LINE_CNTL */ - 0x00000000, /* PA_SC_LINE_STIPPLE */ - 0x00000000, /* VGT_OUTPUT_PATH_CNTL */ - 0x00000000, /* VGT_HOS_CNTL */ - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, /* VGT_GS_MODE */ - - 0xc0026900, - 0x00000292, - 0x00000000, /* PA_SC_MODE_CNTL_0 */ - 0x00000000, /* PA_SC_MODE_CNTL_1 */ - - 0xc0016900, - 0x000002a1, - 0x00000000, /* VGT_PRIMITIVEID_EN */ - - 0xc0016900, - 0x000002a5, - 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */ - - 0xc0026900, - 0x000002a8, - 0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */ - 0x00000000, - - 0xc0026900, - 0x000002ad, - 0x00000000, /* VGT_REUSE_OFF */ - 0x00000000, - - 0xc0016900, - 0x000002d5, - 0x00000000, /* VGT_SHADER_STAGES_EN */ - - 0xc0016900, - 0x000002dc, - 0x0000aa00, /* DB_ALPHA_TO_MASK */ - - 0xc0066900, - 0x000002de, - 0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */ - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - - 0xc0026900, - 0x000002e5, - 0x00000000, /* VGT_STRMOUT_CONFIG */ - 0x00000000, - - 0xc01b6900, - 0x000002f5, - 0x76543210, /* PA_SC_CENTROID_PRIORITY_0 */ - 0xfedcba98, /* PA_SC_CENTROID_PRIORITY_1 */ - 0x00000000, /* PA_SC_LINE_CNTL */ - 0x00000000, /* PA_SC_AA_CONFIG */ - 0x00000005, /* PA_SU_VTX_CNTL */ - 0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */ - 0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */ - 0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */ - 0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */ - 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0xffffffff, /* PA_SC_AA_MASK_X0Y0_X1Y0 */ - 0xffffffff, - - 0xc0026900, - 0x00000316, - 0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */ - 0x00000010, /* */ -}; - -const u32 cayman_default_size = ARRAY_SIZE(cayman_default_state); diff --git a/drivers/gpu/drm/radeon/cayman_blit_shaders.h b/drivers/gpu/drm/radeon/cayman_blit_shaders.h index f5d0e9a60267..1dca73d9e005 100644 --- a/drivers/gpu/drm/radeon/cayman_blit_shaders.h +++ b/drivers/gpu/drm/radeon/cayman_blit_shaders.h @@ -20,16 +20,300 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * + * Authors: + * Alex Deucher <alexander.deucher@amd.com> */ #ifndef CAYMAN_BLIT_SHADERS_H #define CAYMAN_BLIT_SHADERS_H -extern const u32 cayman_ps[]; -extern const u32 cayman_vs[]; -extern const u32 cayman_default_state[]; +/* + * evergreen cards need to use the 3D engine to blit data which requires + * quite a bit of hw state setup. Rather than pull the whole 3D driver + * (which normally generates the 3D state) into the DRM, we opt to use + * statically generated state tables. The register state and shaders + * were hand generated to support blitting functionality. See the 3D + * driver or documentation for descriptions of the registers and + * shader instructions. + */ +static const u32 cayman_default_state[] = { + 0xc0066900, + 0x00000000, + 0x00000060, /* DB_RENDER_CONTROL */ + 0x00000000, /* DB_COUNT_CONTROL */ + 0x00000000, /* DB_DEPTH_VIEW */ + 0x0000002a, /* DB_RENDER_OVERRIDE */ + 0x00000000, /* DB_RENDER_OVERRIDE2 */ + 0x00000000, /* DB_HTILE_DATA_BASE */ + + 0xc0026900, + 0x0000000a, + 0x00000000, /* DB_STENCIL_CLEAR */ + 0x00000000, /* DB_DEPTH_CLEAR */ + + 0xc0036900, + 0x0000000f, + 0x00000000, /* DB_DEPTH_INFO */ + 0x00000000, /* DB_Z_INFO */ + 0x00000000, /* DB_STENCIL_INFO */ + + 0xc0016900, + 0x00000080, + 0x00000000, /* PA_SC_WINDOW_OFFSET */ + + 0xc00d6900, + 0x00000083, + 0x0000ffff, /* PA_SC_CLIPRECT_RULE */ + 0x00000000, /* PA_SC_CLIPRECT_0_TL */ + 0x20002000, /* PA_SC_CLIPRECT_0_BR */ + 0x00000000, + 0x20002000, + 0x00000000, + 0x20002000, + 0x00000000, + 0x20002000, + 0xaaaaaaaa, /* PA_SC_EDGERULE */ + 0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */ + 0x0000000f, /* CB_TARGET_MASK */ + 0x0000000f, /* CB_SHADER_MASK */ + + 0xc0226900, + 0x00000094, + 0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */ + 0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */ + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x00000000, /* PA_SC_VPORT_ZMIN_0 */ + 0x3f800000, /* PA_SC_VPORT_ZMAX_0 */ + + 0xc0016900, + 0x000000d4, + 0x00000000, /* SX_MISC */ + + 0xc0026900, + 0x000000d9, + 0x00000000, /* CP_RINGID */ + 0x00000000, /* CP_VMID */ + + 0xc0096900, + 0x00000100, + 0x00ffffff, /* VGT_MAX_VTX_INDX */ + 0x00000000, /* VGT_MIN_VTX_INDX */ + 0x00000000, /* VGT_INDX_OFFSET */ + 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */ + 0x00000000, /* SX_ALPHA_TEST_CONTROL */ + 0x00000000, /* CB_BLEND_RED */ + 0x00000000, /* CB_BLEND_GREEN */ + 0x00000000, /* CB_BLEND_BLUE */ + 0x00000000, /* CB_BLEND_ALPHA */ + + 0xc0016900, + 0x00000187, + 0x00000100, /* SPI_VS_OUT_ID_0 */ + + 0xc0026900, + 0x00000191, + 0x00000100, /* SPI_PS_INPUT_CNTL_0 */ + 0x00000101, /* SPI_PS_INPUT_CNTL_1 */ + + 0xc0016900, + 0x000001b1, + 0x00000000, /* SPI_VS_OUT_CONFIG */ + + 0xc0106900, + 0x000001b3, + 0x20000001, /* SPI_PS_IN_CONTROL_0 */ + 0x00000000, /* SPI_PS_IN_CONTROL_1 */ + 0x00000000, /* SPI_INTERP_CONTROL_0 */ + 0x00000000, /* SPI_INPUT_Z */ + 0x00000000, /* SPI_FOG_CNTL */ + 0x00100000, /* SPI_BARYC_CNTL */ + 0x00000000, /* SPI_PS_IN_CONTROL_2 */ + 0x00000000, /* SPI_COMPUTE_INPUT_CNTL */ + 0x00000000, /* SPI_COMPUTE_NUM_THREAD_X */ + 0x00000000, /* SPI_COMPUTE_NUM_THREAD_Y */ + 0x00000000, /* SPI_COMPUTE_NUM_THREAD_Z */ + 0x00000000, /* SPI_GPR_MGMT */ + 0x00000000, /* SPI_LDS_MGMT */ + 0x00000000, /* SPI_STACK_MGMT */ + 0x00000000, /* SPI_WAVE_MGMT_1 */ + 0x00000000, /* SPI_WAVE_MGMT_2 */ + + 0xc0016900, + 0x000001e0, + 0x00000000, /* CB_BLEND0_CONTROL */ + + 0xc00e6900, + 0x00000200, + 0x00000000, /* DB_DEPTH_CONTROL */ + 0x00000000, /* DB_EQAA */ + 0x00cc0010, /* CB_COLOR_CONTROL */ + 0x00000210, /* DB_SHADER_CONTROL */ + 0x00010000, /* PA_CL_CLIP_CNTL */ + 0x00000004, /* PA_SU_SC_MODE_CNTL */ + 0x00000100, /* PA_CL_VTE_CNTL */ + 0x00000000, /* PA_CL_VS_OUT_CNTL */ + 0x00000000, /* PA_CL_NANINF_CNTL */ + 0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */ + 0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */ + 0x00000000, /* PA_SU_PRIM_FILTER_CNTL */ + 0x00000000, /* */ + 0x00000000, /* */ + + 0xc0026900, + 0x00000229, + 0x00000000, /* SQ_PGM_START_FS */ + 0x00000000, + + 0xc0016900, + 0x0000023b, + 0x00000000, /* SQ_LDS_ALLOC_PS */ + + 0xc0066900, + 0x00000240, + 0x00000000, /* SQ_ESGS_RING_ITEMSIZE */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + + 0xc0046900, + 0x00000247, + 0x00000000, /* SQ_GS_VERT_ITEMSIZE */ + 0x00000000, + 0x00000000, + 0x00000000, + + 0xc0116900, + 0x00000280, + 0x00000000, /* PA_SU_POINT_SIZE */ + 0x00000000, /* PA_SU_POINT_MINMAX */ + 0x00000008, /* PA_SU_LINE_CNTL */ + 0x00000000, /* PA_SC_LINE_STIPPLE */ + 0x00000000, /* VGT_OUTPUT_PATH_CNTL */ + 0x00000000, /* VGT_HOS_CNTL */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, /* VGT_GS_MODE */ + + 0xc0026900, + 0x00000292, + 0x00000000, /* PA_SC_MODE_CNTL_0 */ + 0x00000000, /* PA_SC_MODE_CNTL_1 */ + + 0xc0016900, + 0x000002a1, + 0x00000000, /* VGT_PRIMITIVEID_EN */ + + 0xc0016900, + 0x000002a5, + 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */ + + 0xc0026900, + 0x000002a8, + 0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */ + 0x00000000, + + 0xc0026900, + 0x000002ad, + 0x00000000, /* VGT_REUSE_OFF */ + 0x00000000, + + 0xc0016900, + 0x000002d5, + 0x00000000, /* VGT_SHADER_STAGES_EN */ + + 0xc0016900, + 0x000002dc, + 0x0000aa00, /* DB_ALPHA_TO_MASK */ + + 0xc0066900, + 0x000002de, + 0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + + 0xc0026900, + 0x000002e5, + 0x00000000, /* VGT_STRMOUT_CONFIG */ + 0x00000000, + + 0xc01b6900, + 0x000002f5, + 0x76543210, /* PA_SC_CENTROID_PRIORITY_0 */ + 0xfedcba98, /* PA_SC_CENTROID_PRIORITY_1 */ + 0x00000000, /* PA_SC_LINE_CNTL */ + 0x00000000, /* PA_SC_AA_CONFIG */ + 0x00000005, /* PA_SU_VTX_CNTL */ + 0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */ + 0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */ + 0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */ + 0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */ + 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xffffffff, /* PA_SC_AA_MASK_X0Y0_X1Y0 */ + 0xffffffff, + + 0xc0026900, + 0x00000316, + 0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */ + 0x00000010, /* */ +}; -extern const u32 cayman_ps_size, cayman_vs_size; -extern const u32 cayman_default_size; +static const u32 cayman_default_size = ARRAY_SIZE(cayman_default_state); #endif diff --git a/drivers/gpu/drm/radeon/evergreen_blit_shaders.c b/drivers/gpu/drm/radeon/evergreen_blit_shaders.c deleted file mode 100644 index 1a96ddb3e5ed..000000000000 --- a/drivers/gpu/drm/radeon/evergreen_blit_shaders.c +++ /dev/null @@ -1,303 +0,0 @@ -/* - * Copyright 2010 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Alex Deucher <alexander.deucher@amd.com> - */ - -#include <linux/bug.h> -#include <linux/types.h> -#include <linux/kernel.h> - -/* - * evergreen cards need to use the 3D engine to blit data which requires - * quite a bit of hw state setup. Rather than pull the whole 3D driver - * (which normally generates the 3D state) into the DRM, we opt to use - * statically generated state tables. The register state and shaders - * were hand generated to support blitting functionality. See the 3D - * driver or documentation for descriptions of the registers and - * shader instructions. - */ - -const u32 evergreen_default_state[] = -{ - 0xc0016900, - 0x0000023b, - 0x00000000, /* SQ_LDS_ALLOC_PS */ - - 0xc0066900, - 0x00000240, - 0x00000000, /* SQ_ESGS_RING_ITEMSIZE */ - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - - 0xc0046900, - 0x00000247, - 0x00000000, /* SQ_GS_VERT_ITEMSIZE */ - 0x00000000, - 0x00000000, - 0x00000000, - - 0xc0026900, - 0x00000010, - 0x00000000, /* DB_Z_INFO */ - 0x00000000, /* DB_STENCIL_INFO */ - - 0xc0016900, - 0x00000200, - 0x00000000, /* DB_DEPTH_CONTROL */ - - 0xc0066900, - 0x00000000, - 0x00000060, /* DB_RENDER_CONTROL */ - 0x00000000, /* DB_COUNT_CONTROL */ - 0x00000000, /* DB_DEPTH_VIEW */ - 0x0000002a, /* DB_RENDER_OVERRIDE */ - 0x00000000, /* DB_RENDER_OVERRIDE2 */ - 0x00000000, /* DB_HTILE_DATA_BASE */ - - 0xc0026900, - 0x0000000a, - 0x00000000, /* DB_STENCIL_CLEAR */ - 0x00000000, /* DB_DEPTH_CLEAR */ - - 0xc0016900, - 0x000002dc, - 0x0000aa00, /* DB_ALPHA_TO_MASK */ - - 0xc0016900, - 0x00000080, - 0x00000000, /* PA_SC_WINDOW_OFFSET */ - - 0xc00d6900, - 0x00000083, - 0x0000ffff, /* PA_SC_CLIPRECT_RULE */ - 0x00000000, /* PA_SC_CLIPRECT_0_TL */ - 0x20002000, /* PA_SC_CLIPRECT_0_BR */ - 0x00000000, - 0x20002000, - 0x00000000, - 0x20002000, - 0x00000000, - 0x20002000, - 0xaaaaaaaa, /* PA_SC_EDGERULE */ - 0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */ - 0x0000000f, /* CB_TARGET_MASK */ - 0x0000000f, /* CB_SHADER_MASK */ - - 0xc0226900, - 0x00000094, - 0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */ - 0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */ - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x00000000, /* PA_SC_VPORT_ZMIN_0 */ - 0x3f800000, /* PA_SC_VPORT_ZMAX_0 */ - - 0xc0016900, - 0x000000d4, - 0x00000000, /* SX_MISC */ - - 0xc0026900, - 0x00000292, - 0x00000000, /* PA_SC_MODE_CNTL_0 */ - 0x00000000, /* PA_SC_MODE_CNTL_1 */ - - 0xc0106900, - 0x00000300, - 0x00000000, /* PA_SC_LINE_CNTL */ - 0x00000000, /* PA_SC_AA_CONFIG */ - 0x00000005, /* PA_SU_VTX_CNTL */ - 0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */ - 0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */ - 0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */ - 0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */ - 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_0 */ - 0x00000000, /* */ - 0x00000000, /* */ - 0x00000000, /* */ - 0x00000000, /* */ - 0x00000000, /* */ - 0x00000000, /* */ - 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_7 */ - 0xffffffff, /* PA_SC_AA_MASK */ - - 0xc00d6900, - 0x00000202, - 0x00cc0010, /* CB_COLOR_CONTROL */ - 0x00000210, /* DB_SHADER_CONTROL */ - 0x00010000, /* PA_CL_CLIP_CNTL */ - 0x00000004, /* PA_SU_SC_MODE_CNTL */ - 0x00000100, /* PA_CL_VTE_CNTL */ - 0x00000000, /* PA_CL_VS_OUT_CNTL */ - 0x00000000, /* PA_CL_NANINF_CNTL */ - 0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */ - 0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */ - 0x00000000, /* PA_SU_PRIM_FILTER_CNTL */ - 0x00000000, /* */ - 0x00000000, /* */ - 0x00000000, /* SQ_DYN_GPR_RESOURCE_LIMIT_1 */ - - 0xc0066900, - 0x000002de, - 0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */ - 0x00000000, /* */ - 0x00000000, /* */ - 0x00000000, /* */ - 0x00000000, /* */ - 0x00000000, /* */ - - 0xc0016900, - 0x00000229, - 0x00000000, /* SQ_PGM_START_FS */ - - 0xc0016900, - 0x0000022a, - 0x00000000, /* SQ_PGM_RESOURCES_FS */ - - 0xc0096900, - 0x00000100, - 0x00ffffff, /* VGT_MAX_VTX_INDX */ - 0x00000000, /* */ - 0x00000000, /* */ - 0x00000000, /* */ - 0x00000000, /* SX_ALPHA_TEST_CONTROL */ - 0x00000000, /* CB_BLEND_RED */ - 0x00000000, /* CB_BLEND_GREEN */ - 0x00000000, /* CB_BLEND_BLUE */ - 0x00000000, /* CB_BLEND_ALPHA */ - - 0xc0026900, - 0x000002a8, - 0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */ - 0x00000000, /* */ - - 0xc0026900, - 0x000002ad, - 0x00000000, /* VGT_REUSE_OFF */ - 0x00000000, /* */ - - 0xc0116900, - 0x00000280, - 0x00000000, /* PA_SU_POINT_SIZE */ - 0x00000000, /* PA_SU_POINT_MINMAX */ - 0x00000008, /* PA_SU_LINE_CNTL */ - 0x00000000, /* PA_SC_LINE_STIPPLE */ - 0x00000000, /* VGT_OUTPUT_PATH_CNTL */ - 0x00000000, /* VGT_HOS_CNTL */ - 0x00000000, /* */ - 0x00000000, /* */ - 0x00000000, /* */ - 0x00000000, /* */ - 0x00000000, /* */ - 0x00000000, /* */ - 0x00000000, /* */ - 0x00000000, /* */ - 0x00000000, /* */ - 0x00000000, /* */ - 0x00000000, /* VGT_GS_MODE */ - - 0xc0016900, - 0x000002a1, - 0x00000000, /* VGT_PRIMITIVEID_EN */ - - 0xc0016900, - 0x000002a5, - 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */ - - 0xc0016900, - 0x000002d5, - 0x00000000, /* VGT_SHADER_STAGES_EN */ - - 0xc0026900, - 0x000002e5, - 0x00000000, /* VGT_STRMOUT_CONFIG */ - 0x00000000, /* */ - - 0xc0016900, - 0x000001e0, - 0x00000000, /* CB_BLEND0_CONTROL */ - - 0xc0016900, - 0x000001b1, - 0x00000000, /* SPI_VS_OUT_CONFIG */ - - 0xc0016900, - 0x00000187, - 0x00000000, /* SPI_VS_OUT_ID_0 */ - - 0xc0016900, - 0x00000191, - 0x00000100, /* SPI_PS_INPUT_CNTL_0 */ - - 0xc00b6900, - 0x000001b3, - 0x20000001, /* SPI_PS_IN_CONTROL_0 */ - 0x00000000, /* SPI_PS_IN_CONTROL_1 */ - 0x00000000, /* SPI_INTERP_CONTROL_0 */ - 0x00000000, /* SPI_INPUT_Z */ - 0x00000000, /* SPI_FOG_CNTL */ - 0x00100000, /* SPI_BARYC_CNTL */ - 0x00000000, /* SPI_PS_IN_CONTROL_2 */ - 0x00000000, /* */ - 0x00000000, /* */ - 0x00000000, /* */ - 0x00000000, /* */ - - 0xc0026900, - 0x00000316, - 0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */ - 0x00000010, /* */ -}; - -const u32 evergreen_default_size = ARRAY_SIZE(evergreen_default_state); diff --git a/drivers/gpu/drm/radeon/evergreen_blit_shaders.h b/drivers/gpu/drm/radeon/evergreen_blit_shaders.h index bb8d6c751595..4492524ee1df 100644 --- a/drivers/gpu/drm/radeon/evergreen_blit_shaders.h +++ b/drivers/gpu/drm/radeon/evergreen_blit_shaders.h @@ -20,16 +20,284 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * + * Authors: + * Alex Deucher <alexander.deucher@amd.com> */ #ifndef EVERGREEN_BLIT_SHADERS_H #define EVERGREEN_BLIT_SHADERS_H -extern const u32 evergreen_ps[]; -extern const u32 evergreen_vs[]; -extern const u32 evergreen_default_state[]; +/* + * evergreen cards need to use the 3D engine to blit data which requires + * quite a bit of hw state setup. Rather than pull the whole 3D driver + * (which normally generates the 3D state) into the DRM, we opt to use + * statically generated state tables. The register state and shaders + * were hand generated to support blitting functionality. See the 3D + * driver or documentation for descriptions of the registers and + * shader instructions. + */ + +static const u32 evergreen_default_state[] = { + 0xc0016900, + 0x0000023b, + 0x00000000, /* SQ_LDS_ALLOC_PS */ + + 0xc0066900, + 0x00000240, + 0x00000000, /* SQ_ESGS_RING_ITEMSIZE */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + + 0xc0046900, + 0x00000247, + 0x00000000, /* SQ_GS_VERT_ITEMSIZE */ + 0x00000000, + 0x00000000, + 0x00000000, + + 0xc0026900, + 0x00000010, + 0x00000000, /* DB_Z_INFO */ + 0x00000000, /* DB_STENCIL_INFO */ + + 0xc0016900, + 0x00000200, + 0x00000000, /* DB_DEPTH_CONTROL */ + + 0xc0066900, + 0x00000000, + 0x00000060, /* DB_RENDER_CONTROL */ + 0x00000000, /* DB_COUNT_CONTROL */ + 0x00000000, /* DB_DEPTH_VIEW */ + 0x0000002a, /* DB_RENDER_OVERRIDE */ + 0x00000000, /* DB_RENDER_OVERRIDE2 */ + 0x00000000, /* DB_HTILE_DATA_BASE */ + + 0xc0026900, + 0x0000000a, + 0x00000000, /* DB_STENCIL_CLEAR */ + 0x00000000, /* DB_DEPTH_CLEAR */ + + 0xc0016900, + 0x000002dc, + 0x0000aa00, /* DB_ALPHA_TO_MASK */ + + 0xc0016900, + 0x00000080, + 0x00000000, /* PA_SC_WINDOW_OFFSET */ + + 0xc00d6900, + 0x00000083, + 0x0000ffff, /* PA_SC_CLIPRECT_RULE */ + 0x00000000, /* PA_SC_CLIPRECT_0_TL */ + 0x20002000, /* PA_SC_CLIPRECT_0_BR */ + 0x00000000, + 0x20002000, + 0x00000000, + 0x20002000, + 0x00000000, + 0x20002000, + 0xaaaaaaaa, /* PA_SC_EDGERULE */ + 0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */ + 0x0000000f, /* CB_TARGET_MASK */ + 0x0000000f, /* CB_SHADER_MASK */ + + 0xc0226900, + 0x00000094, + 0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */ + 0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */ + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x00000000, /* PA_SC_VPORT_ZMIN_0 */ + 0x3f800000, /* PA_SC_VPORT_ZMAX_0 */ + + 0xc0016900, + 0x000000d4, + 0x00000000, /* SX_MISC */ + + 0xc0026900, + 0x00000292, + 0x00000000, /* PA_SC_MODE_CNTL_0 */ + 0x00000000, /* PA_SC_MODE_CNTL_1 */ + + 0xc0106900, + 0x00000300, + 0x00000000, /* PA_SC_LINE_CNTL */ + 0x00000000, /* PA_SC_AA_CONFIG */ + 0x00000005, /* PA_SU_VTX_CNTL */ + 0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */ + 0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */ + 0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */ + 0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */ + 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_0 */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_7 */ + 0xffffffff, /* PA_SC_AA_MASK */ + + 0xc00d6900, + 0x00000202, + 0x00cc0010, /* CB_COLOR_CONTROL */ + 0x00000210, /* DB_SHADER_CONTROL */ + 0x00010000, /* PA_CL_CLIP_CNTL */ + 0x00000004, /* PA_SU_SC_MODE_CNTL */ + 0x00000100, /* PA_CL_VTE_CNTL */ + 0x00000000, /* PA_CL_VS_OUT_CNTL */ + 0x00000000, /* PA_CL_NANINF_CNTL */ + 0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */ + 0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */ + 0x00000000, /* PA_SU_PRIM_FILTER_CNTL */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* SQ_DYN_GPR_RESOURCE_LIMIT_1 */ + + 0xc0066900, + 0x000002de, + 0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + + 0xc0016900, + 0x00000229, + 0x00000000, /* SQ_PGM_START_FS */ + + 0xc0016900, + 0x0000022a, + 0x00000000, /* SQ_PGM_RESOURCES_FS */ + + 0xc0096900, + 0x00000100, + 0x00ffffff, /* VGT_MAX_VTX_INDX */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* SX_ALPHA_TEST_CONTROL */ + 0x00000000, /* CB_BLEND_RED */ + 0x00000000, /* CB_BLEND_GREEN */ + 0x00000000, /* CB_BLEND_BLUE */ + 0x00000000, /* CB_BLEND_ALPHA */ + + 0xc0026900, + 0x000002a8, + 0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */ + 0x00000000, /* */ + + 0xc0026900, + 0x000002ad, + 0x00000000, /* VGT_REUSE_OFF */ + 0x00000000, /* */ + + 0xc0116900, + 0x00000280, + 0x00000000, /* PA_SU_POINT_SIZE */ + 0x00000000, /* PA_SU_POINT_MINMAX */ + 0x00000008, /* PA_SU_LINE_CNTL */ + 0x00000000, /* PA_SC_LINE_STIPPLE */ + 0x00000000, /* VGT_OUTPUT_PATH_CNTL */ + 0x00000000, /* VGT_HOS_CNTL */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* VGT_GS_MODE */ + + 0xc0016900, + 0x000002a1, + 0x00000000, /* VGT_PRIMITIVEID_EN */ + + 0xc0016900, + 0x000002a5, + 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */ + + 0xc0016900, + 0x000002d5, + 0x00000000, /* VGT_SHADER_STAGES_EN */ + + 0xc0026900, + 0x000002e5, + 0x00000000, /* VGT_STRMOUT_CONFIG */ + 0x00000000, /* */ + + 0xc0016900, + 0x000001e0, + 0x00000000, /* CB_BLEND0_CONTROL */ + + 0xc0016900, + 0x000001b1, + 0x00000000, /* SPI_VS_OUT_CONFIG */ + + 0xc0016900, + 0x00000187, + 0x00000000, /* SPI_VS_OUT_ID_0 */ + + 0xc0016900, + 0x00000191, + 0x00000100, /* SPI_PS_INPUT_CNTL_0 */ + + 0xc00b6900, + 0x000001b3, + 0x20000001, /* SPI_PS_IN_CONTROL_0 */ + 0x00000000, /* SPI_PS_IN_CONTROL_1 */ + 0x00000000, /* SPI_INTERP_CONTROL_0 */ + 0x00000000, /* SPI_INPUT_Z */ + 0x00000000, /* SPI_FOG_CNTL */ + 0x00100000, /* SPI_BARYC_CNTL */ + 0x00000000, /* SPI_PS_IN_CONTROL_2 */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + + 0xc0026900, + 0x00000316, + 0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */ + 0x00000010, /* */ +}; -extern const u32 evergreen_ps_size, evergreen_vs_size; -extern const u32 evergreen_default_size; +static const u32 evergreen_default_size = ARRAY_SIZE(evergreen_default_state); #endif diff --git a/drivers/gpu/drm/radeon/r600_blit_shaders.c b/drivers/gpu/drm/radeon/r600_blit_shaders.c deleted file mode 100644 index 443cbe59b274..000000000000 --- a/drivers/gpu/drm/radeon/r600_blit_shaders.c +++ /dev/null @@ -1,719 +0,0 @@ -/* - * Copyright 2009 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Alex Deucher <alexander.deucher@amd.com> - */ - -#include <linux/bug.h> -#include <linux/types.h> -#include <linux/kernel.h> - -/* - * R6xx+ cards need to use the 3D engine to blit data which requires - * quite a bit of hw state setup. Rather than pull the whole 3D driver - * (which normally generates the 3D state) into the DRM, we opt to use - * statically generated state tables. The register state and shaders - * were hand generated to support blitting functionality. See the 3D - * driver or documentation for descriptions of the registers and - * shader instructions. - */ - -const u32 r6xx_default_state[] = -{ - 0xc0002400, /* START_3D_CMDBUF */ - 0x00000000, - - 0xc0012800, /* CONTEXT_CONTROL */ - 0x80000000, - 0x80000000, - - 0xc0016800, - 0x00000010, - 0x00008000, /* WAIT_UNTIL */ - - 0xc0016800, - 0x00000542, - 0x07000003, /* TA_CNTL_AUX */ - - 0xc0016800, - 0x000005c5, - 0x00000000, /* VC_ENHANCE */ - - 0xc0016800, - 0x00000363, - 0x00000000, /* SQ_DYN_GPR_CNTL_PS_FLUSH_REQ */ - - 0xc0016800, - 0x0000060c, - 0x82000000, /* DB_DEBUG */ - - 0xc0016800, - 0x0000060e, - 0x01020204, /* DB_WATERMARKS */ - - 0xc0026f00, - 0x00000000, - 0x00000000, /* SQ_VTX_BASE_VTX_LOC */ - 0x00000000, /* SQ_VTX_START_INST_LOC */ - - 0xc0096900, - 0x0000022a, - 0x00000000, /* SQ_ESGS_RING_ITEMSIZE */ - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - - 0xc0016900, - 0x00000004, - 0x00000000, /* DB_DEPTH_INFO */ - - 0xc0026900, - 0x0000000a, - 0x00000000, /* DB_STENCIL_CLEAR */ - 0x00000000, /* DB_DEPTH_CLEAR */ - - 0xc0016900, - 0x00000200, - 0x00000000, /* DB_DEPTH_CONTROL */ - - 0xc0026900, - 0x00000343, - 0x00000060, /* DB_RENDER_CONTROL */ - 0x00000040, /* DB_RENDER_OVERRIDE */ - - 0xc0016900, - 0x00000351, - 0x0000aa00, /* DB_ALPHA_TO_MASK */ - - 0xc00f6900, - 0x00000100, - 0x00000800, /* VGT_MAX_VTX_INDX */ - 0x00000000, /* VGT_MIN_VTX_INDX */ - 0x00000000, /* VGT_INDX_OFFSET */ - 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */ - 0x00000000, /* SX_ALPHA_TEST_CONTROL */ - 0x00000000, /* CB_BLEND_RED */ - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, /* CB_FOG_RED */ - 0x00000000, - 0x00000000, - 0x00000000, /* DB_STENCILREFMASK */ - 0x00000000, /* DB_STENCILREFMASK_BF */ - 0x00000000, /* SX_ALPHA_REF */ - - 0xc0046900, - 0x0000030c, - 0x01000000, /* CB_CLRCMP_CNTL */ - 0x00000000, - 0x00000000, - 0x00000000, - - 0xc0046900, - 0x00000048, - 0x3f800000, /* CB_CLEAR_RED */ - 0x00000000, - 0x3f800000, - 0x3f800000, - - 0xc0016900, - 0x00000080, - 0x00000000, /* PA_SC_WINDOW_OFFSET */ - - 0xc00a6900, - 0x00000083, - 0x0000ffff, /* PA_SC_CLIP_RECT_RULE */ - 0x00000000, /* PA_SC_CLIPRECT_0_TL */ - 0x20002000, - 0x00000000, - 0x20002000, - 0x00000000, - 0x20002000, - 0x00000000, - 0x20002000, - 0x00000000, /* PA_SC_EDGERULE */ - - 0xc0406900, - 0x00000094, - 0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */ - 0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */ - 0x80000000, /* PA_SC_VPORT_SCISSOR_1_TL */ - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x00000000, /* PA_SC_VPORT_ZMIN_0 */ - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - - 0xc0026900, - 0x00000292, - 0x00000000, /* PA_SC_MPASS_PS_CNTL */ - 0x00004010, /* PA_SC_MODE_CNTL */ - - 0xc0096900, - 0x00000300, - 0x00000000, /* PA_SC_LINE_CNTL */ - 0x00000000, /* PA_SC_AA_CONFIG */ - 0x0000002d, /* PA_SU_VTX_CNTL */ - 0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */ - 0x3f800000, - 0x3f800000, - 0x3f800000, - 0x00000000, /* PA_SC_SAMPLE_LOCS_MCTX */ - 0x00000000, - - 0xc0016900, - 0x00000312, - 0xffffffff, /* PA_SC_AA_MASK */ - - 0xc0066900, - 0x0000037e, - 0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */ - 0x00000000, /* PA_SU_POLY_OFFSET_CLAMP */ - 0x00000000, /* PA_SU_POLY_OFFSET_FRONT_SCALE */ - 0x00000000, /* PA_SU_POLY_OFFSET_FRONT_OFFSET */ - 0x00000000, /* PA_SU_POLY_OFFSET_BACK_SCALE */ - 0x00000000, /* PA_SU_POLY_OFFSET_BACK_OFFSET */ - - 0xc0046900, - 0x000001b6, - 0x00000000, /* SPI_INPUT_Z */ - 0x00000000, /* SPI_FOG_CNTL */ - 0x00000000, /* SPI_FOG_FUNC_SCALE */ - 0x00000000, /* SPI_FOG_FUNC_BIAS */ - - 0xc0016900, - 0x00000225, - 0x00000000, /* SQ_PGM_START_FS */ - - 0xc0016900, - 0x00000229, - 0x00000000, /* SQ_PGM_RESOURCES_FS */ - - 0xc0016900, - 0x00000237, - 0x00000000, /* SQ_PGM_CF_OFFSET_FS */ - - 0xc0026900, - 0x000002a8, - 0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */ - 0x00000000, /* VGT_INSTANCE_STEP_RATE_1 */ - - 0xc0116900, - 0x00000280, - 0x00000000, /* PA_SU_POINT_SIZE */ - 0x00000000, /* PA_SU_POINT_MINMAX */ - 0x00000008, /* PA_SU_LINE_CNTL */ - 0x00000000, /* PA_SC_LINE_STIPPLE */ - 0x00000000, /* VGT_OUTPUT_PATH_CNTL */ - 0x00000000, /* VGT_HOS_CNTL */ - 0x00000000, /* VGT_HOS_MAX_TESS_LEVEL */ - 0x00000000, /* VGT_HOS_MIN_TESS_LEVEL */ - 0x00000000, /* VGT_HOS_REUSE_DEPTH */ - 0x00000000, /* VGT_GROUP_PRIM_TYPE */ - 0x00000000, /* VGT_GROUP_FIRST_DECR */ - 0x00000000, /* VGT_GROUP_DECR */ - 0x00000000, /* VGT_GROUP_VECT_0_CNTL */ - 0x00000000, /* VGT_GROUP_VECT_1_CNTL */ - 0x00000000, /* VGT_GROUP_VECT_0_FMT_CNTL */ - 0x00000000, /* VGT_GROUP_VECT_1_FMT_CNTL */ - 0x00000000, /* VGT_GS_MODE */ - - 0xc0016900, - 0x000002a1, - 0x00000000, /* VGT_PRIMITIVEID_EN */ - - 0xc0016900, - 0x000002a5, - 0x00000000, /* VGT_MULTI_PRIM_ID_RESET_EN */ - - 0xc0036900, - 0x000002ac, - 0x00000000, /* VGT_STRMOUT_EN */ - 0x00000000, /* VGT_REUSE_OFF */ - 0x00000000, /* VGT_VTX_CNT_EN */ - - 0xc0016900, - 0x000000d4, - 0x00000000, /* SX_MISC */ - - 0xc0016900, - 0x000002c8, - 0x00000000, /* VGT_STRMOUT_BUFFER_EN */ - - 0xc0076900, - 0x00000202, - 0x00cc0000, /* CB_COLOR_CONTROL */ - 0x00000210, /* DB_SHADER_CNTL */ - 0x00010000, /* PA_CL_CLIP_CNTL */ - 0x00000244, /* PA_SU_SC_MODE_CNTL */ - 0x00000100, /* PA_CL_VTE_CNTL */ - 0x00000000, /* PA_CL_VS_OUT_CNTL */ - 0x00000000, /* PA_CL_NANINF_CNTL */ - - 0xc0026900, - 0x0000008e, - 0x0000000f, /* CB_TARGET_MASK */ - 0x0000000f, /* CB_SHADER_MASK */ - - 0xc0016900, - 0x000001e8, - 0x00000001, /* CB_SHADER_CONTROL */ - - 0xc0016900, - 0x00000185, - 0x00000000, /* SPI_VS_OUT_ID_0 */ - - 0xc0016900, - 0x00000191, - 0x00000b00, /* SPI_PS_INPUT_CNTL_0 */ - - 0xc0056900, - 0x000001b1, - 0x00000000, /* SPI_VS_OUT_CONFIG */ - 0x00000000, /* SPI_THREAD_GROUPING */ - 0x00000001, /* SPI_PS_IN_CONTROL_0 */ - 0x00000000, /* SPI_PS_IN_CONTROL_1 */ - 0x00000000, /* SPI_INTERP_CONTROL_0 */ - - 0xc0036e00, /* SET_SAMPLER */ - 0x00000000, - 0x00000012, - 0x00000000, - 0x00000000, -}; - -const u32 r7xx_default_state[] = -{ - 0xc0012800, /* CONTEXT_CONTROL */ - 0x80000000, - 0x80000000, - - 0xc0016800, - 0x00000010, - 0x00008000, /* WAIT_UNTIL */ - - 0xc0016800, - 0x00000542, - 0x07000002, /* TA_CNTL_AUX */ - - 0xc0016800, - 0x000005c5, - 0x00000000, /* VC_ENHANCE */ - - 0xc0016800, - 0x00000363, - 0x00004000, /* SQ_DYN_GPR_CNTL_PS_FLUSH_REQ */ - - 0xc0016800, - 0x0000060c, - 0x00000000, /* DB_DEBUG */ - - 0xc0016800, - 0x0000060e, - 0x00420204, /* DB_WATERMARKS */ - - 0xc0026f00, - 0x00000000, - 0x00000000, /* SQ_VTX_BASE_VTX_LOC */ - 0x00000000, /* SQ_VTX_START_INST_LOC */ - - 0xc0096900, - 0x0000022a, - 0x00000000, /* SQ_ESGS_RING_ITEMSIZE */ - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - - 0xc0016900, - 0x00000004, - 0x00000000, /* DB_DEPTH_INFO */ - - 0xc0026900, - 0x0000000a, - 0x00000000, /* DB_STENCIL_CLEAR */ - 0x00000000, /* DB_DEPTH_CLEAR */ - - 0xc0016900, - 0x00000200, - 0x00000000, /* DB_DEPTH_CONTROL */ - - 0xc0026900, - 0x00000343, - 0x00000060, /* DB_RENDER_CONTROL */ - 0x00000000, /* DB_RENDER_OVERRIDE */ - - 0xc0016900, - 0x00000351, - 0x0000aa00, /* DB_ALPHA_TO_MASK */ - - 0xc0096900, - 0x00000100, - 0x00000800, /* VGT_MAX_VTX_INDX */ - 0x00000000, /* VGT_MIN_VTX_INDX */ - 0x00000000, /* VGT_INDX_OFFSET */ - 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */ - 0x00000000, /* SX_ALPHA_TEST_CONTROL */ - 0x00000000, /* CB_BLEND_RED */ - 0x00000000, - 0x00000000, - 0x00000000, - - 0xc0036900, - 0x0000010c, - 0x00000000, /* DB_STENCILREFMASK */ - 0x00000000, /* DB_STENCILREFMASK_BF */ - 0x00000000, /* SX_ALPHA_REF */ - - 0xc0046900, - 0x0000030c, /* CB_CLRCMP_CNTL */ - 0x01000000, - 0x00000000, - 0x00000000, - 0x00000000, - - 0xc0016900, - 0x00000080, - 0x00000000, /* PA_SC_WINDOW_OFFSET */ - - 0xc00a6900, - 0x00000083, - 0x0000ffff, /* PA_SC_CLIP_RECT_RULE */ - 0x00000000, /* PA_SC_CLIPRECT_0_TL */ - 0x20002000, - 0x00000000, - 0x20002000, - 0x00000000, - 0x20002000, - 0x00000000, - 0x20002000, - 0xaaaaaaaa, /* PA_SC_EDGERULE */ - - 0xc0406900, - 0x00000094, - 0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */ - 0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */ - 0x80000000, /* PA_SC_VPORT_SCISSOR_1_TL */ - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x00000000, /* PA_SC_VPORT_ZMIN_0 */ - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - - 0xc0026900, - 0x00000292, - 0x00000000, /* PA_SC_MPASS_PS_CNTL */ - 0x00514000, /* PA_SC_MODE_CNTL */ - - 0xc0096900, - 0x00000300, - 0x00000000, /* PA_SC_LINE_CNTL */ - 0x00000000, /* PA_SC_AA_CONFIG */ - 0x0000002d, /* PA_SU_VTX_CNTL */ - 0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */ - 0x3f800000, - 0x3f800000, - 0x3f800000, - 0x00000000, /* PA_SC_SAMPLE_LOCS_MCTX */ - 0x00000000, - - 0xc0016900, - 0x00000312, - 0xffffffff, /* PA_SC_AA_MASK */ - - 0xc0066900, - 0x0000037e, - 0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */ - 0x00000000, /* PA_SU_POLY_OFFSET_CLAMP */ - 0x00000000, /* PA_SU_POLY_OFFSET_FRONT_SCALE */ - 0x00000000, /* PA_SU_POLY_OFFSET_FRONT_OFFSET */ - 0x00000000, /* PA_SU_POLY_OFFSET_BACK_SCALE */ - 0x00000000, /* PA_SU_POLY_OFFSET_BACK_OFFSET */ - - 0xc0046900, - 0x000001b6, - 0x00000000, /* SPI_INPUT_Z */ - 0x00000000, /* SPI_FOG_CNTL */ - 0x00000000, /* SPI_FOG_FUNC_SCALE */ - 0x00000000, /* SPI_FOG_FUNC_BIAS */ - - 0xc0016900, - 0x00000225, - 0x00000000, /* SQ_PGM_START_FS */ - - 0xc0016900, - 0x00000229, - 0x00000000, /* SQ_PGM_RESOURCES_FS */ - - 0xc0016900, - 0x00000237, - 0x00000000, /* SQ_PGM_CF_OFFSET_FS */ - - 0xc0026900, - 0x000002a8, - 0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */ - 0x00000000, /* VGT_INSTANCE_STEP_RATE_1 */ - - 0xc0116900, - 0x00000280, - 0x00000000, /* PA_SU_POINT_SIZE */ - 0x00000000, /* PA_SU_POINT_MINMAX */ - 0x00000008, /* PA_SU_LINE_CNTL */ - 0x00000000, /* PA_SC_LINE_STIPPLE */ - 0x00000000, /* VGT_OUTPUT_PATH_CNTL */ - 0x00000000, /* VGT_HOS_CNTL */ - 0x00000000, /* VGT_HOS_MAX_TESS_LEVEL */ - 0x00000000, /* VGT_HOS_MIN_TESS_LEVEL */ - 0x00000000, /* VGT_HOS_REUSE_DEPTH */ - 0x00000000, /* VGT_GROUP_PRIM_TYPE */ - 0x00000000, /* VGT_GROUP_FIRST_DECR */ - 0x00000000, /* VGT_GROUP_DECR */ - 0x00000000, /* VGT_GROUP_VECT_0_CNTL */ - 0x00000000, /* VGT_GROUP_VECT_1_CNTL */ - 0x00000000, /* VGT_GROUP_VECT_0_FMT_CNTL */ - 0x00000000, /* VGT_GROUP_VECT_1_FMT_CNTL */ - 0x00000000, /* VGT_GS_MODE */ - - 0xc0016900, - 0x000002a1, - 0x00000000, /* VGT_PRIMITIVEID_EN */ - - 0xc0016900, - 0x000002a5, - 0x00000000, /* VGT_MULTI_PRIM_ID_RESET_EN */ - - 0xc0036900, - 0x000002ac, - 0x00000000, /* VGT_STRMOUT_EN */ - 0x00000000, /* VGT_REUSE_OFF */ - 0x00000000, /* VGT_VTX_CNT_EN */ - - 0xc0016900, - 0x000000d4, - 0x00000000, /* SX_MISC */ - - 0xc0016900, - 0x000002c8, - 0x00000000, /* VGT_STRMOUT_BUFFER_EN */ - - 0xc0076900, - 0x00000202, - 0x00cc0000, /* CB_COLOR_CONTROL */ - 0x00000210, /* DB_SHADER_CNTL */ - 0x00010000, /* PA_CL_CLIP_CNTL */ - 0x00000244, /* PA_SU_SC_MODE_CNTL */ - 0x00000100, /* PA_CL_VTE_CNTL */ - 0x00000000, /* PA_CL_VS_OUT_CNTL */ - 0x00000000, /* PA_CL_NANINF_CNTL */ - - 0xc0026900, - 0x0000008e, - 0x0000000f, /* CB_TARGET_MASK */ - 0x0000000f, /* CB_SHADER_MASK */ - - 0xc0016900, - 0x000001e8, - 0x00000001, /* CB_SHADER_CONTROL */ - - 0xc0016900, - 0x00000185, - 0x00000000, /* SPI_VS_OUT_ID_0 */ - - 0xc0016900, - 0x00000191, - 0x00000b00, /* SPI_PS_INPUT_CNTL_0 */ - - 0xc0056900, - 0x000001b1, - 0x00000000, /* SPI_VS_OUT_CONFIG */ - 0x00000001, /* SPI_THREAD_GROUPING */ - 0x00000001, /* SPI_PS_IN_CONTROL_0 */ - 0x00000000, /* SPI_PS_IN_CONTROL_1 */ - 0x00000000, /* SPI_INTERP_CONTROL_0 */ - - 0xc0036e00, /* SET_SAMPLER */ - 0x00000000, - 0x00000012, - 0x00000000, - 0x00000000, -}; - -/* same for r6xx/r7xx */ -const u32 r6xx_vs[] = -{ - 0x00000004, - 0x81000000, - 0x0000203c, - 0x94000b08, - 0x00004000, - 0x14200b1a, - 0x00000000, - 0x00000000, - 0x3c000000, - 0x68cd1000, -#ifdef __BIG_ENDIAN - 0x000a0000, -#else - 0x00080000, -#endif - 0x00000000, -}; - -const u32 r6xx_ps[] = -{ - 0x00000002, - 0x80800000, - 0x00000000, - 0x94200688, - 0x00000010, - 0x000d1000, - 0xb0800000, - 0x00000000, -}; - -const u32 r6xx_ps_size = ARRAY_SIZE(r6xx_ps); -const u32 r6xx_vs_size = ARRAY_SIZE(r6xx_vs); -const u32 r6xx_default_size = ARRAY_SIZE(r6xx_default_state); -const u32 r7xx_default_size = ARRAY_SIZE(r7xx_default_state); diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index c67b6ddb29a4..e765abcb3b01 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -1629,7 +1629,7 @@ int radeon_pm_late_init(struct radeon_device *rdev) ret = device_create_file(rdev->dev, &dev_attr_power_method); if (ret) DRM_ERROR("failed to create device file for power method\n"); - if (!ret) + else rdev->pm.sysfs_initialized = true; } } diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c index 49bbb2266c0f..6416f129e090 100644 --- a/drivers/gpu/drm/radeon/radeon_sync.c +++ b/drivers/gpu/drm/radeon/radeon_sync.c @@ -96,7 +96,7 @@ int radeon_sync_resv(struct radeon_device *rdev, struct dma_fence *f; int r = 0; - dma_resv_for_each_fence(&cursor, resv, dma_resv_usage_rw(shared), f) { + dma_resv_for_each_fence(&cursor, resv, dma_resv_usage_rw(!shared), f) { fence = to_radeon_fence(f); if (fence && fence->rdev == rdev) radeon_sync_fence(sync, fence); diff --git a/drivers/gpu/drm/radeon/si_blit_shaders.c b/drivers/gpu/drm/radeon/si_blit_shaders.c deleted file mode 100644 index ec415e7dfa4b..000000000000 --- a/drivers/gpu/drm/radeon/si_blit_shaders.c +++ /dev/null @@ -1,253 +0,0 @@ -/* - * Copyright 2011 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Alex Deucher <alexander.deucher@amd.com> - */ - -#include <linux/types.h> -#include <linux/bug.h> -#include <linux/kernel.h> - -const u32 si_default_state[] = -{ - 0xc0066900, - 0x00000000, - 0x00000060, /* DB_RENDER_CONTROL */ - 0x00000000, /* DB_COUNT_CONTROL */ - 0x00000000, /* DB_DEPTH_VIEW */ - 0x0000002a, /* DB_RENDER_OVERRIDE */ - 0x00000000, /* DB_RENDER_OVERRIDE2 */ - 0x00000000, /* DB_HTILE_DATA_BASE */ - - 0xc0046900, - 0x00000008, - 0x00000000, /* DB_DEPTH_BOUNDS_MIN */ - 0x00000000, /* DB_DEPTH_BOUNDS_MAX */ - 0x00000000, /* DB_STENCIL_CLEAR */ - 0x00000000, /* DB_DEPTH_CLEAR */ - - 0xc0036900, - 0x0000000f, - 0x00000000, /* DB_DEPTH_INFO */ - 0x00000000, /* DB_Z_INFO */ - 0x00000000, /* DB_STENCIL_INFO */ - - 0xc0016900, - 0x00000080, - 0x00000000, /* PA_SC_WINDOW_OFFSET */ - - 0xc00d6900, - 0x00000083, - 0x0000ffff, /* PA_SC_CLIPRECT_RULE */ - 0x00000000, /* PA_SC_CLIPRECT_0_TL */ - 0x20002000, /* PA_SC_CLIPRECT_0_BR */ - 0x00000000, - 0x20002000, - 0x00000000, - 0x20002000, - 0x00000000, - 0x20002000, - 0xaaaaaaaa, /* PA_SC_EDGERULE */ - 0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */ - 0x0000000f, /* CB_TARGET_MASK */ - 0x0000000f, /* CB_SHADER_MASK */ - - 0xc0226900, - 0x00000094, - 0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */ - 0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */ - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x00000000, /* PA_SC_VPORT_ZMIN_0 */ - 0x3f800000, /* PA_SC_VPORT_ZMAX_0 */ - - 0xc0026900, - 0x000000d9, - 0x00000000, /* CP_RINGID */ - 0x00000000, /* CP_VMID */ - - 0xc0046900, - 0x00000100, - 0xffffffff, /* VGT_MAX_VTX_INDX */ - 0x00000000, /* VGT_MIN_VTX_INDX */ - 0x00000000, /* VGT_INDX_OFFSET */ - 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */ - - 0xc0046900, - 0x00000105, - 0x00000000, /* CB_BLEND_RED */ - 0x00000000, /* CB_BLEND_GREEN */ - 0x00000000, /* CB_BLEND_BLUE */ - 0x00000000, /* CB_BLEND_ALPHA */ - - 0xc0016900, - 0x000001e0, - 0x00000000, /* CB_BLEND0_CONTROL */ - - 0xc00e6900, - 0x00000200, - 0x00000000, /* DB_DEPTH_CONTROL */ - 0x00000000, /* DB_EQAA */ - 0x00cc0010, /* CB_COLOR_CONTROL */ - 0x00000210, /* DB_SHADER_CONTROL */ - 0x00010000, /* PA_CL_CLIP_CNTL */ - 0x00000004, /* PA_SU_SC_MODE_CNTL */ - 0x00000100, /* PA_CL_VTE_CNTL */ - 0x00000000, /* PA_CL_VS_OUT_CNTL */ - 0x00000000, /* PA_CL_NANINF_CNTL */ - 0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */ - 0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */ - 0x00000000, /* PA_SU_PRIM_FILTER_CNTL */ - 0x00000000, /* */ - 0x00000000, /* */ - - 0xc0116900, - 0x00000280, - 0x00000000, /* PA_SU_POINT_SIZE */ - 0x00000000, /* PA_SU_POINT_MINMAX */ - 0x00000008, /* PA_SU_LINE_CNTL */ - 0x00000000, /* PA_SC_LINE_STIPPLE */ - 0x00000000, /* VGT_OUTPUT_PATH_CNTL */ - 0x00000000, /* VGT_HOS_CNTL */ - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, /* VGT_GS_MODE */ - - 0xc0026900, - 0x00000292, - 0x00000000, /* PA_SC_MODE_CNTL_0 */ - 0x00000000, /* PA_SC_MODE_CNTL_1 */ - - 0xc0016900, - 0x000002a1, - 0x00000000, /* VGT_PRIMITIVEID_EN */ - - 0xc0016900, - 0x000002a5, - 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */ - - 0xc0026900, - 0x000002a8, - 0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */ - 0x00000000, - - 0xc0026900, - 0x000002ad, - 0x00000000, /* VGT_REUSE_OFF */ - 0x00000000, - - 0xc0016900, - 0x000002d5, - 0x00000000, /* VGT_SHADER_STAGES_EN */ - - 0xc0016900, - 0x000002dc, - 0x0000aa00, /* DB_ALPHA_TO_MASK */ - - 0xc0066900, - 0x000002de, - 0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */ - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - - 0xc0026900, - 0x000002e5, - 0x00000000, /* VGT_STRMOUT_CONFIG */ - 0x00000000, - - 0xc01b6900, - 0x000002f5, - 0x76543210, /* PA_SC_CENTROID_PRIORITY_0 */ - 0xfedcba98, /* PA_SC_CENTROID_PRIORITY_1 */ - 0x00000000, /* PA_SC_LINE_CNTL */ - 0x00000000, /* PA_SC_AA_CONFIG */ - 0x00000005, /* PA_SU_VTX_CNTL */ - 0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */ - 0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */ - 0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */ - 0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */ - 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0xffffffff, /* PA_SC_AA_MASK_X0Y0_X1Y0 */ - 0xffffffff, - - 0xc0026900, - 0x00000316, - 0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */ - 0x00000010, /* */ -}; - -const u32 si_default_size = ARRAY_SIZE(si_default_state); diff --git a/drivers/gpu/drm/radeon/si_blit_shaders.h b/drivers/gpu/drm/radeon/si_blit_shaders.h index c739e51e3961..829a2b6228b7 100644 --- a/drivers/gpu/drm/radeon/si_blit_shaders.h +++ b/drivers/gpu/drm/radeon/si_blit_shaders.h @@ -25,8 +25,227 @@ #ifndef SI_BLIT_SHADERS_H #define SI_BLIT_SHADERS_H -extern const u32 si_default_state[]; +static const u32 si_default_state[] = { + 0xc0066900, + 0x00000000, + 0x00000060, /* DB_RENDER_CONTROL */ + 0x00000000, /* DB_COUNT_CONTROL */ + 0x00000000, /* DB_DEPTH_VIEW */ + 0x0000002a, /* DB_RENDER_OVERRIDE */ + 0x00000000, /* DB_RENDER_OVERRIDE2 */ + 0x00000000, /* DB_HTILE_DATA_BASE */ -extern const u32 si_default_size; + 0xc0046900, + 0x00000008, + 0x00000000, /* DB_DEPTH_BOUNDS_MIN */ + 0x00000000, /* DB_DEPTH_BOUNDS_MAX */ + 0x00000000, /* DB_STENCIL_CLEAR */ + 0x00000000, /* DB_DEPTH_CLEAR */ + + 0xc0036900, + 0x0000000f, + 0x00000000, /* DB_DEPTH_INFO */ + 0x00000000, /* DB_Z_INFO */ + 0x00000000, /* DB_STENCIL_INFO */ + + 0xc0016900, + 0x00000080, + 0x00000000, /* PA_SC_WINDOW_OFFSET */ + + 0xc00d6900, + 0x00000083, + 0x0000ffff, /* PA_SC_CLIPRECT_RULE */ + 0x00000000, /* PA_SC_CLIPRECT_0_TL */ + 0x20002000, /* PA_SC_CLIPRECT_0_BR */ + 0x00000000, + 0x20002000, + 0x00000000, + 0x20002000, + 0x00000000, + 0x20002000, + 0xaaaaaaaa, /* PA_SC_EDGERULE */ + 0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */ + 0x0000000f, /* CB_TARGET_MASK */ + 0x0000000f, /* CB_SHADER_MASK */ + + 0xc0226900, + 0x00000094, + 0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */ + 0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */ + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x00000000, /* PA_SC_VPORT_ZMIN_0 */ + 0x3f800000, /* PA_SC_VPORT_ZMAX_0 */ + + 0xc0026900, + 0x000000d9, + 0x00000000, /* CP_RINGID */ + 0x00000000, /* CP_VMID */ + + 0xc0046900, + 0x00000100, + 0xffffffff, /* VGT_MAX_VTX_INDX */ + 0x00000000, /* VGT_MIN_VTX_INDX */ + 0x00000000, /* VGT_INDX_OFFSET */ + 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */ + + 0xc0046900, + 0x00000105, + 0x00000000, /* CB_BLEND_RED */ + 0x00000000, /* CB_BLEND_GREEN */ + 0x00000000, /* CB_BLEND_BLUE */ + 0x00000000, /* CB_BLEND_ALPHA */ + + 0xc0016900, + 0x000001e0, + 0x00000000, /* CB_BLEND0_CONTROL */ + + 0xc00e6900, + 0x00000200, + 0x00000000, /* DB_DEPTH_CONTROL */ + 0x00000000, /* DB_EQAA */ + 0x00cc0010, /* CB_COLOR_CONTROL */ + 0x00000210, /* DB_SHADER_CONTROL */ + 0x00010000, /* PA_CL_CLIP_CNTL */ + 0x00000004, /* PA_SU_SC_MODE_CNTL */ + 0x00000100, /* PA_CL_VTE_CNTL */ + 0x00000000, /* PA_CL_VS_OUT_CNTL */ + 0x00000000, /* PA_CL_NANINF_CNTL */ + 0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */ + 0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */ + 0x00000000, /* PA_SU_PRIM_FILTER_CNTL */ + 0x00000000, /* */ + 0x00000000, /* */ + + 0xc0116900, + 0x00000280, + 0x00000000, /* PA_SU_POINT_SIZE */ + 0x00000000, /* PA_SU_POINT_MINMAX */ + 0x00000008, /* PA_SU_LINE_CNTL */ + 0x00000000, /* PA_SC_LINE_STIPPLE */ + 0x00000000, /* VGT_OUTPUT_PATH_CNTL */ + 0x00000000, /* VGT_HOS_CNTL */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, /* VGT_GS_MODE */ + + 0xc0026900, + 0x00000292, + 0x00000000, /* PA_SC_MODE_CNTL_0 */ + 0x00000000, /* PA_SC_MODE_CNTL_1 */ + + 0xc0016900, + 0x000002a1, + 0x00000000, /* VGT_PRIMITIVEID_EN */ + + 0xc0016900, + 0x000002a5, + 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */ + + 0xc0026900, + 0x000002a8, + 0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */ + 0x00000000, + + 0xc0026900, + 0x000002ad, + 0x00000000, /* VGT_REUSE_OFF */ + 0x00000000, + + 0xc0016900, + 0x000002d5, + 0x00000000, /* VGT_SHADER_STAGES_EN */ + + 0xc0016900, + 0x000002dc, + 0x0000aa00, /* DB_ALPHA_TO_MASK */ + + 0xc0066900, + 0x000002de, + 0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + + 0xc0026900, + 0x000002e5, + 0x00000000, /* VGT_STRMOUT_CONFIG */ + 0x00000000, + + 0xc01b6900, + 0x000002f5, + 0x76543210, /* PA_SC_CENTROID_PRIORITY_0 */ + 0xfedcba98, /* PA_SC_CENTROID_PRIORITY_1 */ + 0x00000000, /* PA_SC_LINE_CNTL */ + 0x00000000, /* PA_SC_AA_CONFIG */ + 0x00000005, /* PA_SU_VTX_CNTL */ + 0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */ + 0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */ + 0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */ + 0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */ + 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xffffffff, /* PA_SC_AA_MASK_X0Y0_X1Y0 */ + 0xffffffff, + + 0xc0026900, + 0x00000316, + 0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */ + 0x00000010, /* */ +}; + +static const u32 si_default_size = ARRAY_SIZE(si_default_state); #endif diff --git a/drivers/gpu/drm/sun4i/sun4i_frontend.c b/drivers/gpu/drm/sun4i/sun4i_frontend.c index 56ae38389db0..462fae73eae9 100644 --- a/drivers/gpu/drm/sun4i/sun4i_frontend.c +++ b/drivers/gpu/drm/sun4i/sun4i_frontend.c @@ -222,13 +222,11 @@ void sun4i_frontend_update_buffer(struct sun4i_frontend *frontend, /* Set the physical address of the buffer in memory */ paddr = drm_fb_cma_get_gem_addr(fb, state, 0); - paddr -= PHYS_OFFSET; DRM_DEBUG_DRIVER("Setting buffer #0 address to %pad\n", &paddr); regmap_write(frontend->regs, SUN4I_FRONTEND_BUF_ADDR0_REG, paddr); if (fb->format->num_planes > 1) { paddr = drm_fb_cma_get_gem_addr(fb, state, swap ? 2 : 1); - paddr -= PHYS_OFFSET; DRM_DEBUG_DRIVER("Setting buffer #1 address to %pad\n", &paddr); regmap_write(frontend->regs, SUN4I_FRONTEND_BUF_ADDR1_REG, paddr); @@ -236,7 +234,6 @@ void sun4i_frontend_update_buffer(struct sun4i_frontend *frontend, if (fb->format->num_planes > 2) { paddr = drm_fb_cma_get_gem_addr(fb, state, swap ? 1 : 2); - paddr -= PHYS_OFFSET; DRM_DEBUG_DRIVER("Setting buffer #2 address to %pad\n", &paddr); regmap_write(frontend->regs, SUN4I_FRONTEND_BUF_ADDR2_REG, paddr); diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig index 1eef1849cf54..061be9a6619d 100644 --- a/drivers/gpu/drm/vc4/Kconfig +++ b/drivers/gpu/drm/vc4/Kconfig @@ -2,6 +2,9 @@ config DRM_VC4 tristate "Broadcom VC4 Graphics" depends on ARCH_BCM || ARCH_BCM2835 || COMPILE_TEST + # Make sure not 'y' when RASPBERRYPI_FIRMWARE is 'm'. This can only + # happen when COMPILE_TEST=y, hence the added !RASPBERRYPI_FIRMWARE. + depends on RASPBERRYPI_FIRMWARE || (COMPILE_TEST && !RASPBERRYPI_FIRMWARE) depends on DRM depends on SND && SND_SOC depends on COMMON_CLK diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c index 752f921735c6..98308a17e4ed 100644 --- a/drivers/gpu/drm/vc4/vc4_dsi.c +++ b/drivers/gpu/drm/vc4/vc4_dsi.c @@ -846,7 +846,7 @@ static void vc4_dsi_encoder_enable(struct drm_encoder *encoder) unsigned long phy_clock; int ret; - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret) { DRM_ERROR("Failed to runtime PM enable on DSI%d\n", dsi->variant->port); return; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index 408ede1f967f..eb94433067ba 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -46,6 +46,21 @@ vmw_buffer_object(struct ttm_buffer_object *bo) return container_of(bo, struct vmw_buffer_object, base); } +/** + * bo_is_vmw - check if the buffer object is a &vmw_buffer_object + * @bo: ttm buffer object to be checked + * + * Uses destroy function associated with the object to determine if this is + * a &vmw_buffer_object. + * + * Returns: + * true if the object is of &vmw_buffer_object type, false if not. + */ +static bool bo_is_vmw(struct ttm_buffer_object *bo) +{ + return bo->destroy == &vmw_bo_bo_free || + bo->destroy == &vmw_gem_destroy; +} /** * vmw_bo_pin_in_placement - Validate a buffer to placement. @@ -615,8 +630,9 @@ int vmw_user_bo_synccpu_ioctl(struct drm_device *dev, void *data, ret = vmw_user_bo_synccpu_grab(vbo, arg->flags); vmw_bo_unreference(&vbo); - if (unlikely(ret != 0 && ret != -ERESTARTSYS && - ret != -EBUSY)) { + if (unlikely(ret != 0)) { + if (ret == -ERESTARTSYS || ret == -EBUSY) + return -EBUSY; DRM_ERROR("Failed synccpu grab on handle 0x%08x.\n", (unsigned int) arg->handle); return ret; @@ -805,7 +821,7 @@ int vmw_dumb_create(struct drm_file *file_priv, void vmw_bo_swap_notify(struct ttm_buffer_object *bo) { /* Is @bo embedded in a struct vmw_buffer_object? */ - if (vmw_bo_is_vmw_bo(bo)) + if (!bo_is_vmw(bo)) return; /* Kill any cached kernel maps before swapout */ @@ -829,7 +845,7 @@ void vmw_bo_move_notify(struct ttm_buffer_object *bo, struct vmw_buffer_object *vbo; /* Make sure @bo is embedded in a struct vmw_buffer_object? */ - if (vmw_bo_is_vmw_bo(bo)) + if (!bo_is_vmw(bo)) return; vbo = container_of(bo, struct vmw_buffer_object, base); @@ -850,22 +866,3 @@ void vmw_bo_move_notify(struct ttm_buffer_object *bo, if (mem->mem_type != VMW_PL_MOB && bo->resource->mem_type == VMW_PL_MOB) vmw_resource_unbind_list(vbo); } - -/** - * vmw_bo_is_vmw_bo - check if the buffer object is a &vmw_buffer_object - * @bo: buffer object to be checked - * - * Uses destroy function associated with the object to determine if this is - * a &vmw_buffer_object. - * - * Returns: - * true if the object is of &vmw_buffer_object type, false if not. - */ -bool vmw_bo_is_vmw_bo(struct ttm_buffer_object *bo) -{ - if (bo->destroy == &vmw_bo_bo_free || - bo->destroy == &vmw_gem_destroy) - return true; - - return false; -} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index ffd975c14136..01a5b47e95f9 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -994,13 +994,10 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) goto out_no_fman; } - drm_vma_offset_manager_init(&dev_priv->vma_manager, - DRM_FILE_PAGE_OFFSET_START, - DRM_FILE_PAGE_OFFSET_SIZE); ret = ttm_device_init(&dev_priv->bdev, &vmw_bo_driver, dev_priv->drm.dev, dev_priv->drm.anon_inode->i_mapping, - &dev_priv->vma_manager, + dev_priv->drm.vma_offset_manager, dev_priv->map_mode == vmw_dma_alloc_coherent, false); if (unlikely(ret != 0)) { @@ -1170,7 +1167,6 @@ static void vmw_driver_unload(struct drm_device *dev) vmw_devcaps_destroy(dev_priv); vmw_vram_manager_fini(dev_priv); ttm_device_fini(&dev_priv->bdev); - drm_vma_offset_manager_destroy(&dev_priv->vma_manager); vmw_release_device_late(dev_priv); vmw_fence_manager_takedown(dev_priv->fman); if (dev_priv->capabilities & SVGA_CAP_IRQMASK) @@ -1411,7 +1407,7 @@ vmw_get_unmapped_area(struct file *file, unsigned long uaddr, struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev); return drm_get_unmapped_area(file, uaddr, len, pgoff, flags, - &dev_priv->vma_manager); + dev_priv->drm.vma_offset_manager); } static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 00e8e27e4884..ace7ca150b03 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -683,6 +683,9 @@ static void vmw_user_surface_base_release(struct ttm_base_object **p_base) container_of(base, struct vmw_user_surface, prime.base); struct vmw_resource *res = &user_srf->srf.res; + if (base->shareable && res && res->backup) + drm_gem_object_put(&res->backup->base.base); + *p_base = NULL; vmw_resource_unreference(&res); } @@ -857,6 +860,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, goto out_unlock; } vmw_bo_reference(res->backup); + drm_gem_object_get(&res->backup->base.base); } tmp = vmw_resource_reference(&srf->res); @@ -1513,7 +1517,6 @@ vmw_gb_surface_define_internal(struct drm_device *dev, &res->backup); if (ret == 0) vmw_bo_reference(res->backup); - } if (unlikely(ret != 0)) { @@ -1561,6 +1564,8 @@ vmw_gb_surface_define_internal(struct drm_device *dev, drm_vma_node_offset_addr(&res->backup->base.base.vma_node); rep->buffer_size = res->backup->base.base.size; rep->buffer_handle = backup_handle; + if (user_srf->prime.base.shareable) + drm_gem_object_get(&res->backup->base.base); } else { rep->buffer_map_handle = 0; rep->buffer_size = 0; diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 27f969b3dc07..e9e2db68b9fb 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -179,6 +179,12 @@ struct imx_i2c_hwdata { unsigned int ndivs; unsigned int i2sr_clr_opcode; unsigned int i2cr_ien_opcode; + /* + * Errata ERR007805 or e7805: + * I2C: When the I2C clock speed is configured for 400 kHz, + * the SCL low period violates the I2C spec of 1.3 uS min. + */ + bool has_err007805; }; struct imx_i2c_dma { @@ -240,6 +246,16 @@ static const struct imx_i2c_hwdata imx21_i2c_hwdata = { }; +static const struct imx_i2c_hwdata imx6_i2c_hwdata = { + .devtype = IMX21_I2C, + .regshift = IMX_I2C_REGSHIFT, + .clk_div = imx_i2c_clk_div, + .ndivs = ARRAY_SIZE(imx_i2c_clk_div), + .i2sr_clr_opcode = I2SR_CLR_OPCODE_W0C, + .i2cr_ien_opcode = I2CR_IEN_OPCODE_1, + .has_err007805 = true, +}; + static struct imx_i2c_hwdata vf610_i2c_hwdata = { .devtype = VF610_I2C, .regshift = VF610_I2C_REGSHIFT, @@ -266,6 +282,16 @@ MODULE_DEVICE_TABLE(platform, imx_i2c_devtype); static const struct of_device_id i2c_imx_dt_ids[] = { { .compatible = "fsl,imx1-i2c", .data = &imx1_i2c_hwdata, }, { .compatible = "fsl,imx21-i2c", .data = &imx21_i2c_hwdata, }, + { .compatible = "fsl,imx6q-i2c", .data = &imx6_i2c_hwdata, }, + { .compatible = "fsl,imx6sl-i2c", .data = &imx6_i2c_hwdata, }, + { .compatible = "fsl,imx6sll-i2c", .data = &imx6_i2c_hwdata, }, + { .compatible = "fsl,imx6sx-i2c", .data = &imx6_i2c_hwdata, }, + { .compatible = "fsl,imx6ul-i2c", .data = &imx6_i2c_hwdata, }, + { .compatible = "fsl,imx7s-i2c", .data = &imx6_i2c_hwdata, }, + { .compatible = "fsl,imx8mm-i2c", .data = &imx6_i2c_hwdata, }, + { .compatible = "fsl,imx8mn-i2c", .data = &imx6_i2c_hwdata, }, + { .compatible = "fsl,imx8mp-i2c", .data = &imx6_i2c_hwdata, }, + { .compatible = "fsl,imx8mq-i2c", .data = &imx6_i2c_hwdata, }, { .compatible = "fsl,vf610-i2c", .data = &vf610_i2c_hwdata, }, { /* sentinel */ } }; @@ -551,6 +577,13 @@ static void i2c_imx_set_clk(struct imx_i2c_struct *i2c_imx, unsigned int div; int i; + if (i2c_imx->hwdata->has_err007805 && i2c_imx->bitrate > 384000) { + dev_dbg(&i2c_imx->adapter.dev, + "SoC errata ERR007805 or e7805 applies, bus frequency limited from %d Hz to 384000 Hz.\n", + i2c_imx->bitrate); + i2c_imx->bitrate = 384000; + } + /* Divider value calculation */ if (i2c_imx->cur_clk == i2c_clk_rate) return; diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index f4820fd3dc13..c0364314877e 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -145,8 +145,8 @@ #define ISMT_SPGT_SPD_MASK 0xc0000000 /* SMBus Speed mask */ #define ISMT_SPGT_SPD_80K 0x00 /* 80 kHz */ #define ISMT_SPGT_SPD_100K (0x1 << 30) /* 100 kHz */ -#define ISMT_SPGT_SPD_400K (0x2 << 30) /* 400 kHz */ -#define ISMT_SPGT_SPD_1M (0x3 << 30) /* 1 MHz */ +#define ISMT_SPGT_SPD_400K (0x2U << 30) /* 400 kHz */ +#define ISMT_SPGT_SPD_1M (0x3U << 30) /* 1 MHz */ /* MSI Control Register (MSICTL) bit definitions */ diff --git a/drivers/i2c/busses/i2c-pasemi-core.c b/drivers/i2c/busses/i2c-pasemi-core.c index 7728c8460dc0..9028ffb58cc0 100644 --- a/drivers/i2c/busses/i2c-pasemi-core.c +++ b/drivers/i2c/busses/i2c-pasemi-core.c @@ -137,6 +137,12 @@ static int pasemi_i2c_xfer_msg(struct i2c_adapter *adapter, TXFIFO_WR(smbus, msg->buf[msg->len-1] | (stop ? MTXFIFO_STOP : 0)); + + if (stop) { + err = pasemi_smb_waitready(smbus); + if (err) + goto reset_out; + } } return 0; diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index fc1dcc19f2a1..5b920f0fc7dd 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -843,10 +843,8 @@ static int geni_i2c_probe(struct platform_device *pdev) /* FIFO is disabled, so we can only use GPI DMA */ gi2c->gpi_mode = true; ret = setup_gpi_dma(gi2c); - if (ret) { - dev_err(dev, "Failed to setup GPI DMA mode:%d ret\n", ret); - return ret; - } + if (ret) + return dev_err_probe(dev, ret, "Failed to setup GPI DMA mode\n"); dev_dbg(dev, "Using GPI DMA mode for I2C\n"); } else { diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index cf5d049342ea..ab0adaa130da 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -557,7 +557,7 @@ static long compat_i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned lo .addr = umsg.addr, .flags = umsg.flags, .len = umsg.len, - .buf = compat_ptr(umsg.buf) + .buf = (__force __u8 *)compat_ptr(umsg.buf), }; } @@ -668,16 +668,21 @@ static int i2cdev_attach_adapter(struct device *dev, void *dummy) i2c_dev->dev.class = i2c_dev_class; i2c_dev->dev.parent = &adap->dev; i2c_dev->dev.release = i2cdev_dev_release; - dev_set_name(&i2c_dev->dev, "i2c-%d", adap->nr); + + res = dev_set_name(&i2c_dev->dev, "i2c-%d", adap->nr); + if (res) + goto err_put_i2c_dev; res = cdev_device_add(&i2c_dev->cdev, &i2c_dev->dev); - if (res) { - put_i2c_dev(i2c_dev, false); - return res; - } + if (res) + goto err_put_i2c_dev; pr_debug("adapter [%s] registered as minor %d\n", adap->name, adap->nr); return 0; + +err_put_i2c_dev: + put_i2c_dev(i2c_dev, false); + return res; } static int i2cdev_detach_adapter(struct device *dev, void *dummy) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index b7640cfe0020..47551ab73ca8 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -69,7 +69,12 @@ static unsigned int preferred_states_mask; static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; static unsigned long auto_demotion_disable_flags; -static bool disable_promotion_to_c1e; + +static enum { + C1E_PROMOTION_PRESERVE, + C1E_PROMOTION_ENABLE, + C1E_PROMOTION_DISABLE +} c1e_promotion = C1E_PROMOTION_PRESERVE; struct idle_cpu { struct cpuidle_state *state_table; @@ -1398,8 +1403,6 @@ static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ -static void c1e_promotion_enable(void); - /** * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. * @@ -1578,17 +1581,14 @@ static void __init spr_idle_state_table_update(void) unsigned long long msr; /* Check if user prefers C1E over C1. */ - if (preferred_states_mask & BIT(2)) { - if (preferred_states_mask & BIT(1)) - /* Both can't be enabled, stick to the defaults. */ - return; - + if ((preferred_states_mask & BIT(2)) && + !(preferred_states_mask & BIT(1))) { + /* Disable C1 and enable C1E. */ spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE; spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE; /* Enable C1E using the "C1E promotion" bit. */ - c1e_promotion_enable(); - disable_promotion_to_c1e = false; + c1e_promotion = C1E_PROMOTION_ENABLE; } /* @@ -1754,7 +1754,9 @@ static int intel_idle_cpu_init(unsigned int cpu) if (auto_demotion_disable_flags) auto_demotion_disable(); - if (disable_promotion_to_c1e) + if (c1e_promotion == C1E_PROMOTION_ENABLE) + c1e_promotion_enable(); + else if (c1e_promotion == C1E_PROMOTION_DISABLE) c1e_promotion_disable(); return 0; @@ -1833,7 +1835,8 @@ static int __init intel_idle_init(void) if (icpu) { cpuidle_state_table = icpu->state_table; auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; - disable_promotion_to_c1e = icpu->disable_promotion_to_c1e; + if (icpu->disable_promotion_to_c1e) + c1e_promotion = C1E_PROMOTION_DISABLE; if (icpu->use_acpi || force_use_acpi) intel_idle_acpi_cst_extract(); } else if (!intel_idle_acpi_cst_extract()) { diff --git a/drivers/iio/adc/ad7280a.c b/drivers/iio/adc/ad7280a.c index ef9d27759961..ec9acbf12b9a 100644 --- a/drivers/iio/adc/ad7280a.c +++ b/drivers/iio/adc/ad7280a.c @@ -745,7 +745,7 @@ static int ad7280a_write_thresh(struct iio_dev *indio_dev, case IIO_EV_DIR_RISING: addr = AD7280A_CELL_OVERVOLTAGE_REG; ret = ad7280_write(st, AD7280A_DEVADDR_MASTER, addr, - 1, val); + 1, value); if (ret) break; st->cell_threshhigh = value; @@ -753,7 +753,7 @@ static int ad7280a_write_thresh(struct iio_dev *indio_dev, case IIO_EV_DIR_FALLING: addr = AD7280A_CELL_UNDERVOLTAGE_REG; ret = ad7280_write(st, AD7280A_DEVADDR_MASTER, addr, - 1, val); + 1, value); if (ret) break; st->cell_threshlow = value; @@ -770,18 +770,18 @@ static int ad7280a_write_thresh(struct iio_dev *indio_dev, case IIO_EV_DIR_RISING: addr = AD7280A_AUX_ADC_OVERVOLTAGE_REG; ret = ad7280_write(st, AD7280A_DEVADDR_MASTER, addr, - 1, val); + 1, value); if (ret) break; - st->aux_threshhigh = val; + st->aux_threshhigh = value; break; case IIO_EV_DIR_FALLING: addr = AD7280A_AUX_ADC_UNDERVOLTAGE_REG; ret = ad7280_write(st, AD7280A_DEVADDR_MASTER, addr, - 1, val); + 1, value); if (ret) break; - st->aux_threshlow = val; + st->aux_threshlow = value; break; default: ret = -EINVAL; diff --git a/drivers/iio/chemical/scd4x.c b/drivers/iio/chemical/scd4x.c index 20d4e7584e92..37143b5526ee 100644 --- a/drivers/iio/chemical/scd4x.c +++ b/drivers/iio/chemical/scd4x.c @@ -471,12 +471,15 @@ static ssize_t calibration_forced_value_store(struct device *dev, ret = scd4x_write_and_fetch(state, CMD_FRC, arg, &val, sizeof(val)); mutex_unlock(&state->lock); + if (ret) + return ret; + if (val == 0xff) { dev_err(dev, "forced calibration has failed"); return -EINVAL; } - return ret ?: len; + return len; } static IIO_DEVICE_ATTR_RW(calibration_auto_enable, 0); diff --git a/drivers/iio/dac/ad3552r.c b/drivers/iio/dac/ad3552r.c index 97f13c0b9631..d5ea1a1be122 100644 --- a/drivers/iio/dac/ad3552r.c +++ b/drivers/iio/dac/ad3552r.c @@ -656,7 +656,7 @@ static int ad3552r_reset(struct ad3552r_desc *dac) { struct reg_addr_pool addr; int ret; - u16 val; + int val; dac->gpio_reset = devm_gpiod_get_optional(&dac->spi->dev, "reset", GPIOD_OUT_LOW); @@ -809,10 +809,10 @@ static int ad3552r_configure_custom_gain(struct ad3552r_desc *dac, gain_child = fwnode_get_named_child_node(child, "custom-output-range-config"); - if (IS_ERR(gain_child)) { + if (!gain_child) { dev_err(dev, "mandatory custom-output-range-config property missing\n"); - return PTR_ERR(gain_child); + return -EINVAL; } dac->ch_data[ch].range_override = 1; diff --git a/drivers/iio/dac/ad5446.c b/drivers/iio/dac/ad5446.c index 14cfabacbea5..fdf824041497 100644 --- a/drivers/iio/dac/ad5446.c +++ b/drivers/iio/dac/ad5446.c @@ -178,7 +178,7 @@ static int ad5446_read_raw(struct iio_dev *indio_dev, switch (m) { case IIO_CHAN_INFO_RAW: - *val = st->cached_val; + *val = st->cached_val >> chan->scan_type.shift; return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: *val = st->vref_mv; diff --git a/drivers/iio/dac/ad5592r-base.c b/drivers/iio/dac/ad5592r-base.c index a424b7220b61..4434c1b2a322 100644 --- a/drivers/iio/dac/ad5592r-base.c +++ b/drivers/iio/dac/ad5592r-base.c @@ -522,7 +522,7 @@ static int ad5592r_alloc_channels(struct iio_dev *iio_dev) if (!ret) st->channel_modes[reg] = tmp; - fwnode_property_read_u32(child, "adi,off-state", &tmp); + ret = fwnode_property_read_u32(child, "adi,off-state", &tmp); if (!ret) st->channel_offstate[reg] = tmp; } diff --git a/drivers/iio/dac/ltc2688.c b/drivers/iio/dac/ltc2688.c index e41861d29767..2f9c384885f4 100644 --- a/drivers/iio/dac/ltc2688.c +++ b/drivers/iio/dac/ltc2688.c @@ -298,7 +298,7 @@ static int ltc2688_read_raw(struct iio_dev *indio_dev, if (ret) return ret; - *val = 16; + *val2 = 16; return IIO_VAL_FRACTIONAL_LOG2; case IIO_CHAN_INFO_CALIBBIAS: ret = regmap_read(st->regmap, diff --git a/drivers/iio/dac/ti-dac5571.c b/drivers/iio/dac/ti-dac5571.c index 4a3b8d875518..0b775f943db3 100644 --- a/drivers/iio/dac/ti-dac5571.c +++ b/drivers/iio/dac/ti-dac5571.c @@ -19,6 +19,7 @@ #include <linux/i2c.h> #include <linux/module.h> #include <linux/mod_devicetable.h> +#include <linux/property.h> #include <linux/regulator/consumer.h> enum chip_id { @@ -311,6 +312,7 @@ static int dac5571_probe(struct i2c_client *client, const struct dac5571_spec *spec; struct dac5571_data *data; struct iio_dev *indio_dev; + enum chip_id chip_id; int ret, i; indio_dev = devm_iio_device_alloc(dev, sizeof(*data)); @@ -326,7 +328,13 @@ static int dac5571_probe(struct i2c_client *client, indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->channels = dac5571_channels; - spec = &dac5571_spec[id->driver_data]; + if (dev_fwnode(dev)) + chip_id = (uintptr_t)device_get_match_data(dev); + else + chip_id = id->driver_data; + + spec = &dac5571_spec[chip_id]; + indio_dev->num_channels = spec->num_channels; data->spec = spec; @@ -385,15 +393,15 @@ static int dac5571_remove(struct i2c_client *i2c) } static const struct of_device_id dac5571_of_id[] = { - {.compatible = "ti,dac5571"}, - {.compatible = "ti,dac6571"}, - {.compatible = "ti,dac7571"}, - {.compatible = "ti,dac5574"}, - {.compatible = "ti,dac6574"}, - {.compatible = "ti,dac7574"}, - {.compatible = "ti,dac5573"}, - {.compatible = "ti,dac6573"}, - {.compatible = "ti,dac7573"}, + {.compatible = "ti,dac5571", .data = (void *)single_8bit}, + {.compatible = "ti,dac6571", .data = (void *)single_10bit}, + {.compatible = "ti,dac7571", .data = (void *)single_12bit}, + {.compatible = "ti,dac5574", .data = (void *)quad_8bit}, + {.compatible = "ti,dac6574", .data = (void *)quad_10bit}, + {.compatible = "ti,dac7574", .data = (void *)quad_12bit}, + {.compatible = "ti,dac5573", .data = (void *)quad_8bit}, + {.compatible = "ti,dac6573", .data = (void *)quad_10bit}, + {.compatible = "ti,dac7573", .data = (void *)quad_12bit}, {} }; MODULE_DEVICE_TABLE(of, dac5571_of_id); diff --git a/drivers/iio/filter/Kconfig b/drivers/iio/filter/Kconfig index 3ae35817ad82..a85b345ea14e 100644 --- a/drivers/iio/filter/Kconfig +++ b/drivers/iio/filter/Kconfig @@ -8,6 +8,7 @@ menu "Filters" config ADMV8818 tristate "Analog Devices ADMV8818 High-Pass and Low-Pass Filter" depends on SPI && COMMON_CLK && 64BIT + select REGMAP_SPI help Say yes here to build support for Analog Devices ADMV8818 2 GHz to 18 GHz, Digitally Tunable, High-Pass and Low-Pass Filter. diff --git a/drivers/iio/imu/bmi160/bmi160_core.c b/drivers/iio/imu/bmi160/bmi160_core.c index 824b5124a5f5..01336105792e 100644 --- a/drivers/iio/imu/bmi160/bmi160_core.c +++ b/drivers/iio/imu/bmi160/bmi160_core.c @@ -730,7 +730,7 @@ static int bmi160_chip_init(struct bmi160_data *data, bool use_spi) ret = regmap_write(data->regmap, BMI160_REG_CMD, BMI160_CMD_SOFTRESET); if (ret) - return ret; + goto disable_regulator; usleep_range(BMI160_SOFTRESET_USLEEP, BMI160_SOFTRESET_USLEEP + 1); @@ -741,29 +741,37 @@ static int bmi160_chip_init(struct bmi160_data *data, bool use_spi) if (use_spi) { ret = regmap_read(data->regmap, BMI160_REG_DUMMY, &val); if (ret) - return ret; + goto disable_regulator; } ret = regmap_read(data->regmap, BMI160_REG_CHIP_ID, &val); if (ret) { dev_err(dev, "Error reading chip id\n"); - return ret; + goto disable_regulator; } if (val != BMI160_CHIP_ID_VAL) { dev_err(dev, "Wrong chip id, got %x expected %x\n", val, BMI160_CHIP_ID_VAL); - return -ENODEV; + ret = -ENODEV; + goto disable_regulator; } ret = bmi160_set_mode(data, BMI160_ACCEL, true); if (ret) - return ret; + goto disable_regulator; ret = bmi160_set_mode(data, BMI160_GYRO, true); if (ret) - return ret; + goto disable_accel; return 0; + +disable_accel: + bmi160_set_mode(data, BMI160_ACCEL, false); + +disable_regulator: + regulator_bulk_disable(ARRAY_SIZE(data->supplies), data->supplies); + return ret; } static int bmi160_data_rdy_trigger_set_state(struct iio_trigger *trig, diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c index 33d9afb1ba91..d4a692b838d0 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c @@ -18,12 +18,15 @@ static int inv_icm42600_i2c_bus_setup(struct inv_icm42600_state *st) unsigned int mask, val; int ret; - /* setup interface registers */ - ret = regmap_update_bits(st->map, INV_ICM42600_REG_INTF_CONFIG6, - INV_ICM42600_INTF_CONFIG6_MASK, - INV_ICM42600_INTF_CONFIG6_I3C_EN); - if (ret) - return ret; + /* + * setup interface registers + * This register write to REG_INTF_CONFIG6 enables a spike filter that + * is impacting the line and can prevent the I2C ACK to be seen by the + * controller. So we don't test the return value. + */ + regmap_update_bits(st->map, INV_ICM42600_REG_INTF_CONFIG6, + INV_ICM42600_INTF_CONFIG6_MASK, + INV_ICM42600_INTF_CONFIG6_I3C_EN); ret = regmap_update_bits(st->map, INV_ICM42600_REG_INTF_CONFIG4, INV_ICM42600_INTF_CONFIG4_I3C_BUS_ONLY, 0); diff --git a/drivers/iio/magnetometer/ak8975.c b/drivers/iio/magnetometer/ak8975.c index 088f748b683e..2432e697150c 100644 --- a/drivers/iio/magnetometer/ak8975.c +++ b/drivers/iio/magnetometer/ak8975.c @@ -416,6 +416,7 @@ static int ak8975_power_on(const struct ak8975_data *data) if (ret) { dev_warn(&data->client->dev, "Failed to enable specified Vid supply\n"); + regulator_disable(data->vdd); return ret; } diff --git a/drivers/iio/proximity/sx9324.c b/drivers/iio/proximity/sx9324.c index 0d9bbbb50cb4..70c37f664f6d 100644 --- a/drivers/iio/proximity/sx9324.c +++ b/drivers/iio/proximity/sx9324.c @@ -70,13 +70,17 @@ #define SX9324_REG_AFE_PH2 0x2a #define SX9324_REG_AFE_PH3 0x2b #define SX9324_REG_AFE_CTRL8 0x2c -#define SX9324_REG_AFE_CTRL8_RESFILTN_4KOHM 0x02 +#define SX9324_REG_AFE_CTRL8_RESERVED 0x10 +#define SX9324_REG_AFE_CTRL8_RESFILTIN_4KOHM 0x02 #define SX9324_REG_AFE_CTRL9 0x2d #define SX9324_REG_AFE_CTRL9_AGAIN_1 0x08 #define SX9324_REG_PROX_CTRL0 0x30 #define SX9324_REG_PROX_CTRL0_GAIN_MASK GENMASK(5, 3) -#define SX9324_REG_PROX_CTRL0_GAIN_1 0x80 +#define SX9324_REG_PROX_CTRL0_GAIN_SHIFT 3 +#define SX9324_REG_PROX_CTRL0_GAIN_RSVD 0x0 +#define SX9324_REG_PROX_CTRL0_GAIN_1 0x1 +#define SX9324_REG_PROX_CTRL0_GAIN_8 0x4 #define SX9324_REG_PROX_CTRL0_RAWFILT_MASK GENMASK(2, 0) #define SX9324_REG_PROX_CTRL0_RAWFILT_1P50 0x01 #define SX9324_REG_PROX_CTRL1 0x31 @@ -379,7 +383,14 @@ static int sx9324_read_gain(struct sx_common_data *data, if (ret) return ret; - *val = 1 << FIELD_GET(SX9324_REG_PROX_CTRL0_GAIN_MASK, regval); + regval = FIELD_GET(SX9324_REG_PROX_CTRL0_GAIN_MASK, regval); + if (regval) + regval--; + else if (regval == SX9324_REG_PROX_CTRL0_GAIN_RSVD || + regval > SX9324_REG_PROX_CTRL0_GAIN_8) + return -EINVAL; + + *val = 1 << regval; return IIO_VAL_INT; } @@ -725,8 +736,12 @@ static int sx9324_write_gain(struct sx_common_data *data, unsigned int gain, reg; int ret; - gain = ilog2(val); reg = SX9324_REG_PROX_CTRL0 + chan->channel / 2; + + gain = ilog2(val) + 1; + if (val <= 0 || gain > SX9324_REG_PROX_CTRL0_GAIN_8) + return -EINVAL; + gain = FIELD_PREP(SX9324_REG_PROX_CTRL0_GAIN_MASK, gain); mutex_lock(&data->mutex); @@ -781,12 +796,15 @@ static const struct sx_common_reg_default sx9324_default_regs[] = { { SX9324_REG_AFE_PH2, 0x1a }, { SX9324_REG_AFE_PH3, 0x16 }, - { SX9324_REG_AFE_CTRL8, SX9324_REG_AFE_CTRL8_RESFILTN_4KOHM }, + { SX9324_REG_AFE_CTRL8, SX9324_REG_AFE_CTRL8_RESERVED | + SX9324_REG_AFE_CTRL8_RESFILTIN_4KOHM }, { SX9324_REG_AFE_CTRL9, SX9324_REG_AFE_CTRL9_AGAIN_1 }, - { SX9324_REG_PROX_CTRL0, SX9324_REG_PROX_CTRL0_GAIN_1 | + { SX9324_REG_PROX_CTRL0, + SX9324_REG_PROX_CTRL0_GAIN_1 << SX9324_REG_PROX_CTRL0_GAIN_SHIFT | SX9324_REG_PROX_CTRL0_RAWFILT_1P50 }, - { SX9324_REG_PROX_CTRL1, SX9324_REG_PROX_CTRL0_GAIN_1 | + { SX9324_REG_PROX_CTRL1, + SX9324_REG_PROX_CTRL0_GAIN_1 << SX9324_REG_PROX_CTRL0_GAIN_SHIFT | SX9324_REG_PROX_CTRL0_RAWFILT_1P50 }, { SX9324_REG_PROX_CTRL2, SX9324_REG_PROX_CTRL2_AVGNEG_THRESH_16K }, { SX9324_REG_PROX_CTRL3, SX9324_REG_PROX_CTRL3_AVGDEB_2SAMPLES | diff --git a/drivers/iio/proximity/sx_common.c b/drivers/iio/proximity/sx_common.c index a7c07316a0a9..8ad814d96b7e 100644 --- a/drivers/iio/proximity/sx_common.c +++ b/drivers/iio/proximity/sx_common.c @@ -521,6 +521,7 @@ int sx_common_probe(struct i2c_client *client, return dev_err_probe(dev, ret, "error reading WHOAMI\n"); ACPI_COMPANION_SET(&indio_dev->dev, ACPI_COMPANION(dev)); + indio_dev->dev.of_node = client->dev.of_node; indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->channels = data->chip_info->iio_channels; diff --git a/drivers/input/keyboard/cypress-sf.c b/drivers/input/keyboard/cypress-sf.c index c28996028e80..9a23eed6a4f4 100644 --- a/drivers/input/keyboard/cypress-sf.c +++ b/drivers/input/keyboard/cypress-sf.c @@ -61,6 +61,14 @@ static irqreturn_t cypress_sf_irq_handler(int irq, void *devid) return IRQ_HANDLED; } +static void cypress_sf_disable_regulators(void *arg) +{ + struct cypress_sf_data *touchkey = arg; + + regulator_bulk_disable(ARRAY_SIZE(touchkey->regulators), + touchkey->regulators); +} + static int cypress_sf_probe(struct i2c_client *client) { struct cypress_sf_data *touchkey; @@ -121,6 +129,12 @@ static int cypress_sf_probe(struct i2c_client *client) return error; } + error = devm_add_action_or_reset(&client->dev, + cypress_sf_disable_regulators, + touchkey); + if (error) + return error; + touchkey->input_dev = devm_input_allocate_device(&client->dev); if (!touchkey->input_dev) { dev_err(&client->dev, "Failed to allocate input device\n"); diff --git a/drivers/input/keyboard/omap4-keypad.c b/drivers/input/keyboard/omap4-keypad.c index 43375b38ee59..8a7ce41b8c56 100644 --- a/drivers/input/keyboard/omap4-keypad.c +++ b/drivers/input/keyboard/omap4-keypad.c @@ -393,7 +393,7 @@ static int omap4_keypad_probe(struct platform_device *pdev) * revision register. */ error = pm_runtime_get_sync(dev); - if (error) { + if (error < 0) { dev_err(dev, "pm_runtime_get_sync() failed\n"); pm_runtime_put_noidle(dev); return error; diff --git a/drivers/interconnect/qcom/sc7180.c b/drivers/interconnect/qcom/sc7180.c index 12d59c36df53..5f7c0f85fa8e 100644 --- a/drivers/interconnect/qcom/sc7180.c +++ b/drivers/interconnect/qcom/sc7180.c @@ -47,7 +47,6 @@ DEFINE_QNODE(qnm_mnoc_sf, SC7180_MASTER_MNOC_SF_MEM_NOC, 1, 32, SC7180_SLAVE_GEM DEFINE_QNODE(qnm_snoc_gc, SC7180_MASTER_SNOC_GC_MEM_NOC, 1, 8, SC7180_SLAVE_LLCC); DEFINE_QNODE(qnm_snoc_sf, SC7180_MASTER_SNOC_SF_MEM_NOC, 1, 16, SC7180_SLAVE_LLCC); DEFINE_QNODE(qxm_gpu, SC7180_MASTER_GFX3D, 2, 32, SC7180_SLAVE_GEM_NOC_SNOC, SC7180_SLAVE_LLCC); -DEFINE_QNODE(ipa_core_master, SC7180_MASTER_IPA_CORE, 1, 8, SC7180_SLAVE_IPA_CORE); DEFINE_QNODE(llcc_mc, SC7180_MASTER_LLCC, 2, 4, SC7180_SLAVE_EBI1); DEFINE_QNODE(qhm_mnoc_cfg, SC7180_MASTER_CNOC_MNOC_CFG, 1, 4, SC7180_SLAVE_SERVICE_MNOC); DEFINE_QNODE(qxm_camnoc_hf0, SC7180_MASTER_CAMNOC_HF0, 2, 32, SC7180_SLAVE_MNOC_HF_MEM_NOC); @@ -129,7 +128,6 @@ DEFINE_QNODE(qhs_mdsp_ms_mpu_cfg, SC7180_SLAVE_MSS_PROC_MS_MPU_CFG, 1, 4); DEFINE_QNODE(qns_gem_noc_snoc, SC7180_SLAVE_GEM_NOC_SNOC, 1, 8, SC7180_MASTER_GEM_NOC_SNOC); DEFINE_QNODE(qns_llcc, SC7180_SLAVE_LLCC, 1, 16, SC7180_MASTER_LLCC); DEFINE_QNODE(srvc_gemnoc, SC7180_SLAVE_SERVICE_GEM_NOC, 1, 4); -DEFINE_QNODE(ipa_core_slave, SC7180_SLAVE_IPA_CORE, 1, 8); DEFINE_QNODE(ebi, SC7180_SLAVE_EBI1, 2, 4); DEFINE_QNODE(qns_mem_noc_hf, SC7180_SLAVE_MNOC_HF_MEM_NOC, 1, 32, SC7180_MASTER_MNOC_HF_MEM_NOC); DEFINE_QNODE(qns_mem_noc_sf, SC7180_SLAVE_MNOC_SF_MEM_NOC, 1, 32, SC7180_MASTER_MNOC_SF_MEM_NOC); @@ -160,7 +158,6 @@ DEFINE_QBCM(bcm_mc0, "MC0", true, &ebi); DEFINE_QBCM(bcm_sh0, "SH0", true, &qns_llcc); DEFINE_QBCM(bcm_mm0, "MM0", false, &qns_mem_noc_hf); DEFINE_QBCM(bcm_ce0, "CE0", false, &qxm_crypto); -DEFINE_QBCM(bcm_ip0, "IP0", false, &ipa_core_slave); DEFINE_QBCM(bcm_cn0, "CN0", true, &qnm_snoc, &xm_qdss_dap, &qhs_a1_noc_cfg, &qhs_a2_noc_cfg, &qhs_ahb2phy0, &qhs_aop, &qhs_aoss, &qhs_boot_rom, &qhs_camera_cfg, &qhs_camera_nrt_throttle_cfg, &qhs_camera_rt_throttle_cfg, &qhs_clk_ctl, &qhs_cpr_cx, &qhs_cpr_mx, &qhs_crypto0_cfg, &qhs_dcc_cfg, &qhs_ddrss_cfg, &qhs_display_cfg, &qhs_display_rt_throttle_cfg, &qhs_display_throttle_cfg, &qhs_glm, &qhs_gpuss_cfg, &qhs_imem_cfg, &qhs_ipa, &qhs_mnoc_cfg, &qhs_mss_cfg, &qhs_npu_cfg, &qhs_npu_dma_throttle_cfg, &qhs_npu_dsp_throttle_cfg, &qhs_pimem_cfg, &qhs_prng, &qhs_qdss_cfg, &qhs_qm_cfg, &qhs_qm_mpu_cfg, &qhs_qup0, &qhs_qup1, &qhs_security, &qhs_snoc_cfg, &qhs_tcsr, &qhs_tlmm_1, &qhs_tlmm_2, &qhs_tlmm_3, &qhs_ufs_mem_cfg, &qhs_usb3, &qhs_venus_cfg, &qhs_venus_throttle_cfg, &qhs_vsense_ctrl_cfg, &srvc_cnoc); DEFINE_QBCM(bcm_mm1, "MM1", false, &qxm_camnoc_hf0_uncomp, &qxm_camnoc_hf1_uncomp, &qxm_camnoc_sf_uncomp, &qhm_mnoc_cfg, &qxm_mdp0, &qxm_rot, &qxm_venus0, &qxm_venus_arm9); DEFINE_QBCM(bcm_sh2, "SH2", false, &acm_sys_tcu); @@ -372,22 +369,6 @@ static struct qcom_icc_desc sc7180_gem_noc = { .num_bcms = ARRAY_SIZE(gem_noc_bcms), }; -static struct qcom_icc_bcm *ipa_virt_bcms[] = { - &bcm_ip0, -}; - -static struct qcom_icc_node *ipa_virt_nodes[] = { - [MASTER_IPA_CORE] = &ipa_core_master, - [SLAVE_IPA_CORE] = &ipa_core_slave, -}; - -static struct qcom_icc_desc sc7180_ipa_virt = { - .nodes = ipa_virt_nodes, - .num_nodes = ARRAY_SIZE(ipa_virt_nodes), - .bcms = ipa_virt_bcms, - .num_bcms = ARRAY_SIZE(ipa_virt_bcms), -}; - static struct qcom_icc_bcm *mc_virt_bcms[] = { &bcm_acv, &bcm_mc0, @@ -519,8 +500,6 @@ static const struct of_device_id qnoc_of_match[] = { .data = &sc7180_dc_noc}, { .compatible = "qcom,sc7180-gem-noc", .data = &sc7180_gem_noc}, - { .compatible = "qcom,sc7180-ipa-virt", - .data = &sc7180_ipa_virt}, { .compatible = "qcom,sc7180-mc-virt", .data = &sc7180_mc_virt}, { .compatible = "qcom,sc7180-mmss-noc", diff --git a/drivers/interconnect/qcom/sdx55.c b/drivers/interconnect/qcom/sdx55.c index 03d604f84cc5..e3ac25a997b7 100644 --- a/drivers/interconnect/qcom/sdx55.c +++ b/drivers/interconnect/qcom/sdx55.c @@ -18,7 +18,6 @@ #include "icc-rpmh.h" #include "sdx55.h" -DEFINE_QNODE(ipa_core_master, SDX55_MASTER_IPA_CORE, 1, 8, SDX55_SLAVE_IPA_CORE); DEFINE_QNODE(llcc_mc, SDX55_MASTER_LLCC, 4, 4, SDX55_SLAVE_EBI_CH0); DEFINE_QNODE(acm_tcu, SDX55_MASTER_TCU_0, 1, 8, SDX55_SLAVE_LLCC, SDX55_SLAVE_MEM_NOC_SNOC, SDX55_SLAVE_MEM_NOC_PCIE_SNOC); DEFINE_QNODE(qnm_snoc_gc, SDX55_MASTER_SNOC_GC_MEM_NOC, 1, 8, SDX55_SLAVE_LLCC); @@ -40,7 +39,6 @@ DEFINE_QNODE(xm_pcie, SDX55_MASTER_PCIE, 1, 8, SDX55_SLAVE_ANOC_SNOC); DEFINE_QNODE(xm_qdss_etr, SDX55_MASTER_QDSS_ETR, 1, 8, SDX55_SLAVE_SNOC_CFG, SDX55_SLAVE_EMAC_CFG, SDX55_SLAVE_USB3, SDX55_SLAVE_AOSS, SDX55_SLAVE_SPMI_FETCHER, SDX55_SLAVE_QDSS_CFG, SDX55_SLAVE_PDM, SDX55_SLAVE_SNOC_MEM_NOC_GC, SDX55_SLAVE_TCSR, SDX55_SLAVE_CNOC_DDRSS, SDX55_SLAVE_SPMI_VGI_COEX, SDX55_SLAVE_QPIC, SDX55_SLAVE_OCIMEM, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_USB3_PHY_CFG, SDX55_SLAVE_AOP, SDX55_SLAVE_BLSP_1, SDX55_SLAVE_SDCC_1, SDX55_SLAVE_CNOC_MSS, SDX55_SLAVE_PCIE_PARF, SDX55_SLAVE_ECC_CFG, SDX55_SLAVE_AUDIO, SDX55_SLAVE_AOSS, SDX55_SLAVE_PRNG, SDX55_SLAVE_CRYPTO_0_CFG, SDX55_SLAVE_TCU, SDX55_SLAVE_CLK_CTL, SDX55_SLAVE_IMEM_CFG); DEFINE_QNODE(xm_sdc1, SDX55_MASTER_SDCC_1, 1, 8, SDX55_SLAVE_AOSS, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_ANOC_SNOC, SDX55_SLAVE_AOP, SDX55_SLAVE_AUDIO); DEFINE_QNODE(xm_usb3, SDX55_MASTER_USB3, 1, 8, SDX55_SLAVE_ANOC_SNOC); -DEFINE_QNODE(ipa_core_slave, SDX55_SLAVE_IPA_CORE, 1, 8); DEFINE_QNODE(ebi, SDX55_SLAVE_EBI_CH0, 1, 4); DEFINE_QNODE(qns_llcc, SDX55_SLAVE_LLCC, 1, 16, SDX55_SLAVE_EBI_CH0); DEFINE_QNODE(qns_memnoc_snoc, SDX55_SLAVE_MEM_NOC_SNOC, 1, 8, SDX55_MASTER_MEM_NOC_SNOC); @@ -82,7 +80,6 @@ DEFINE_QNODE(xs_sys_tcu_cfg, SDX55_SLAVE_TCU, 1, 8); DEFINE_QBCM(bcm_mc0, "MC0", true, &ebi); DEFINE_QBCM(bcm_sh0, "SH0", true, &qns_llcc); DEFINE_QBCM(bcm_ce0, "CE0", false, &qxm_crypto); -DEFINE_QBCM(bcm_ip0, "IP0", false, &ipa_core_slave); DEFINE_QBCM(bcm_pn0, "PN0", false, &qhm_snoc_cfg); DEFINE_QBCM(bcm_sh3, "SH3", false, &xm_apps_rdwr); DEFINE_QBCM(bcm_sh4, "SH4", false, &qns_memnoc_snoc, &qns_sys_pcie); @@ -219,22 +216,6 @@ static const struct qcom_icc_desc sdx55_system_noc = { .num_bcms = ARRAY_SIZE(system_noc_bcms), }; -static struct qcom_icc_bcm *ipa_virt_bcms[] = { - &bcm_ip0, -}; - -static struct qcom_icc_node *ipa_virt_nodes[] = { - [MASTER_IPA_CORE] = &ipa_core_master, - [SLAVE_IPA_CORE] = &ipa_core_slave, -}; - -static const struct qcom_icc_desc sdx55_ipa_virt = { - .nodes = ipa_virt_nodes, - .num_nodes = ARRAY_SIZE(ipa_virt_nodes), - .bcms = ipa_virt_bcms, - .num_bcms = ARRAY_SIZE(ipa_virt_bcms), -}; - static const struct of_device_id qnoc_of_match[] = { { .compatible = "qcom,sdx55-mc-virt", .data = &sdx55_mc_virt}, @@ -242,8 +223,6 @@ static const struct of_device_id qnoc_of_match[] = { .data = &sdx55_mem_noc}, { .compatible = "qcom,sdx55-system-noc", .data = &sdx55_system_noc}, - { .compatible = "qcom,sdx55-ipa-virt", - .data = &sdx55_ipa_virt}, { } }; MODULE_DEVICE_TABLE(of, qnoc_of_match); diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 7c2ca52ca3e4..df5347ea450b 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -771,12 +771,12 @@ static void journal_write_unlocked(struct closure *cl) bio_reset(bio, ca->bdev, REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PREFLUSH | REQ_FUA); - bch_bio_map(bio, w->data); bio->bi_iter.bi_sector = PTR_OFFSET(k, i); bio->bi_iter.bi_size = sectors << 9; bio->bi_end_io = journal_write_endio; bio->bi_private = w; + bch_bio_map(bio, w->data); trace_bcache_journal_write(bio, w->data->keys); bio_list_add(&list, bio); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index fdd0194f84dd..320fcdfef48e 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -685,7 +685,7 @@ static void do_bio_hook(struct search *s, { struct bio *bio = &s->bio.bio; - bio_init_clone(bio->bi_bdev, bio, orig_bio, GFP_NOIO); + bio_init_clone(orig_bio->bi_bdev, bio, orig_bio, GFP_NOIO); /* * bi_end_io can be set separately somewhere else, e.g. the * variants in, diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index ad2d5faa2ebb..36ae30b73a6e 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -4399,6 +4399,7 @@ try_smaller_buffer: } if (ic->internal_hash) { + size_t recalc_tags_size; ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1); if (!ic->recalc_wq ) { ti->error = "Cannot allocate workqueue"; @@ -4412,8 +4413,10 @@ try_smaller_buffer: r = -ENOMEM; goto bad; } - ic->recalc_tags = kvmalloc_array(RECALC_SECTORS >> ic->sb->log2_sectors_per_block, - ic->tag_size, GFP_KERNEL); + recalc_tags_size = (RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size; + if (crypto_shash_digestsize(ic->internal_hash) > ic->tag_size) + recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tag_size; + ic->recalc_tags = kvmalloc(recalc_tags_size, GFP_KERNEL); if (!ic->recalc_tags) { ti->error = "Cannot allocate tags for recalculating"; r = -ENOMEM; diff --git a/drivers/md/dm-ps-historical-service-time.c b/drivers/md/dm-ps-historical-service-time.c index 875bca30a0dd..82f2a06153dc 100644 --- a/drivers/md/dm-ps-historical-service-time.c +++ b/drivers/md/dm-ps-historical-service-time.c @@ -27,7 +27,6 @@ #include <linux/blkdev.h> #include <linux/slab.h> #include <linux/module.h> -#include <linux/sched/clock.h> #define DM_MSG_PREFIX "multipath historical-service-time" @@ -433,7 +432,7 @@ static struct dm_path *hst_select_path(struct path_selector *ps, { struct selector *s = ps->context; struct path_info *pi = NULL, *best = NULL; - u64 time_now = sched_clock(); + u64 time_now = ktime_get_ns(); struct dm_path *ret = NULL; unsigned long flags; @@ -474,7 +473,7 @@ static int hst_start_io(struct path_selector *ps, struct dm_path *path, static u64 path_service_time(struct path_info *pi, u64 start_time) { - u64 sched_now = ktime_get_ns(); + u64 now = ktime_get_ns(); /* if a previous disk request has finished after this IO was * sent to the hardware, pretend the submission happened @@ -483,11 +482,11 @@ static u64 path_service_time(struct path_info *pi, u64 start_time) if (time_after64(pi->last_finish, start_time)) start_time = pi->last_finish; - pi->last_finish = sched_now; - if (time_before64(sched_now, start_time)) + pi->last_finish = now; + if (time_before64(now, start_time)) return 0; - return sched_now - start_time; + return now - start_time; } static int hst_end_io(struct path_selector *ps, struct dm_path *path, diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c index c1ca9be4b79e..57daa86c19cf 100644 --- a/drivers/md/dm-zone.c +++ b/drivers/md/dm-zone.c @@ -360,16 +360,20 @@ static int dm_update_zone_wp_offset(struct mapped_device *md, unsigned int zno, return 0; } +struct orig_bio_details { + unsigned int op; + unsigned int nr_sectors; +}; + /* * First phase of BIO mapping for targets with zone append emulation: * check all BIO that change a zone writer pointer and change zone * append operations into regular write operations. */ static bool dm_zone_map_bio_begin(struct mapped_device *md, - struct bio *orig_bio, struct bio *clone) + unsigned int zno, struct bio *clone) { sector_t zsectors = blk_queue_zone_sectors(md->queue); - unsigned int zno = bio_zone_no(orig_bio); unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]); /* @@ -384,7 +388,7 @@ static bool dm_zone_map_bio_begin(struct mapped_device *md, WRITE_ONCE(md->zwp_offset[zno], zwp_offset); } - switch (bio_op(orig_bio)) { + switch (bio_op(clone)) { case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_FINISH: return true; @@ -401,9 +405,8 @@ static bool dm_zone_map_bio_begin(struct mapped_device *md, * target zone. */ clone->bi_opf = REQ_OP_WRITE | REQ_NOMERGE | - (orig_bio->bi_opf & (~REQ_OP_MASK)); - clone->bi_iter.bi_sector = - orig_bio->bi_iter.bi_sector + zwp_offset; + (clone->bi_opf & (~REQ_OP_MASK)); + clone->bi_iter.bi_sector += zwp_offset; break; default: DMWARN_LIMIT("Invalid BIO operation"); @@ -423,11 +426,10 @@ static bool dm_zone_map_bio_begin(struct mapped_device *md, * data written to a zone. Note that at this point, the remapped clone BIO * may already have completed, so we do not touch it. */ -static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, - struct bio *orig_bio, +static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, unsigned int zno, + struct orig_bio_details *orig_bio_details, unsigned int nr_sectors) { - unsigned int zno = bio_zone_no(orig_bio); unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]); /* The clone BIO may already have been completed and failed */ @@ -435,7 +437,7 @@ static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, return BLK_STS_IOERR; /* Update the zone wp offset */ - switch (bio_op(orig_bio)) { + switch (orig_bio_details->op) { case REQ_OP_ZONE_RESET: WRITE_ONCE(md->zwp_offset[zno], 0); return BLK_STS_OK; @@ -452,7 +454,7 @@ static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, * Check that the target did not truncate the write operation * emulating a zone append. */ - if (nr_sectors != bio_sectors(orig_bio)) { + if (nr_sectors != orig_bio_details->nr_sectors) { DMWARN_LIMIT("Truncated write for zone append"); return BLK_STS_IOERR; } @@ -488,7 +490,7 @@ static inline void dm_zone_unlock(struct request_queue *q, bio_clear_flag(clone, BIO_ZONE_WRITE_LOCKED); } -static bool dm_need_zone_wp_tracking(struct bio *orig_bio) +static bool dm_need_zone_wp_tracking(struct bio *bio) { /* * Special processing is not needed for operations that do not need the @@ -496,15 +498,15 @@ static bool dm_need_zone_wp_tracking(struct bio *orig_bio) * zones and all operations that do not modify directly a sequential * zone write pointer. */ - if (op_is_flush(orig_bio->bi_opf) && !bio_sectors(orig_bio)) + if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) return false; - switch (bio_op(orig_bio)) { + switch (bio_op(bio)) { case REQ_OP_WRITE_ZEROES: case REQ_OP_WRITE: case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_FINISH: case REQ_OP_ZONE_APPEND: - return bio_zone_is_seq(orig_bio); + return bio_zone_is_seq(bio); default: return false; } @@ -519,8 +521,8 @@ int dm_zone_map_bio(struct dm_target_io *tio) struct dm_target *ti = tio->ti; struct mapped_device *md = io->md; struct request_queue *q = md->queue; - struct bio *orig_bio = io->orig_bio; struct bio *clone = &tio->clone; + struct orig_bio_details orig_bio_details; unsigned int zno; blk_status_t sts; int r; @@ -529,18 +531,21 @@ int dm_zone_map_bio(struct dm_target_io *tio) * IOs that do not change a zone write pointer do not need * any additional special processing. */ - if (!dm_need_zone_wp_tracking(orig_bio)) + if (!dm_need_zone_wp_tracking(clone)) return ti->type->map(ti, clone); /* Lock the target zone */ - zno = bio_zone_no(orig_bio); + zno = bio_zone_no(clone); dm_zone_lock(q, zno, clone); + orig_bio_details.nr_sectors = bio_sectors(clone); + orig_bio_details.op = bio_op(clone); + /* * Check that the bio and the target zone write pointer offset are * both valid, and if the bio is a zone append, remap it to a write. */ - if (!dm_zone_map_bio_begin(md, orig_bio, clone)) { + if (!dm_zone_map_bio_begin(md, zno, clone)) { dm_zone_unlock(q, zno, clone); return DM_MAPIO_KILL; } @@ -560,7 +565,8 @@ int dm_zone_map_bio(struct dm_target_io *tio) * The target submitted the clone BIO. The target zone will * be unlocked on completion of the clone. */ - sts = dm_zone_map_bio_end(md, orig_bio, *tio->len_ptr); + sts = dm_zone_map_bio_end(md, zno, &orig_bio_details, + *tio->len_ptr); break; case DM_MAPIO_REMAPPED: /* @@ -568,7 +574,8 @@ int dm_zone_map_bio(struct dm_target_io *tio) * unlock the target zone here as the clone will not be * submitted. */ - sts = dm_zone_map_bio_end(md, orig_bio, *tio->len_ptr); + sts = dm_zone_map_bio_end(md, zno, &orig_bio_details, + *tio->len_ptr); if (sts != BLK_STS_OK) dm_zone_unlock(q, zno, clone); break; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 3c5fad7c4ee6..82957bd460e8 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1323,8 +1323,7 @@ static void __map_bio(struct bio *clone) } static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, - struct dm_target *ti, unsigned num_bios, - unsigned *len) + struct dm_target *ti, unsigned num_bios) { struct bio *bio; int try; @@ -1335,7 +1334,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, if (try) mutex_lock(&ci->io->md->table_devices_lock); for (bio_nr = 0; bio_nr < num_bios; bio_nr++) { - bio = alloc_tio(ci, ti, bio_nr, len, + bio = alloc_tio(ci, ti, bio_nr, NULL, try ? GFP_NOIO : GFP_NOWAIT); if (!bio) break; @@ -1363,11 +1362,11 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, break; case 1: clone = alloc_tio(ci, ti, 0, len, GFP_NOIO); - dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO); __map_bio(clone); break; default: - alloc_multiple_bios(&blist, ci, ti, num_bios, len); + /* dm_accept_partial_bio() is not supported with shared tio->len_ptr */ + alloc_multiple_bios(&blist, ci, ti, num_bios); while ((clone = bio_list_pop(&blist))) { dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO); __map_bio(clone); @@ -1392,6 +1391,7 @@ static void __send_empty_flush(struct clone_info *ci) ci->bio = &flush_bio; ci->sector_count = 0; + ci->io->tio.clone.bi_iter.bi_size = 0; while ((ti = dm_table_get_target(ci->map, target_nr++))) __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL); @@ -1407,14 +1407,10 @@ static void __send_changing_extent_only(struct clone_info *ci, struct dm_target len = min_t(sector_t, ci->sector_count, max_io_len_target_boundary(ti, dm_target_offset(ti, ci->sector))); - /* - * dm_accept_partial_bio cannot be used with duplicate bios, - * so update clone_info cursor before __send_duplicate_bios(). - */ + __send_duplicate_bios(ci, ti, num_bios, &len); + ci->sector += len; ci->sector_count -= len; - - __send_duplicate_bios(ci, ti, num_bios, &len); } static bool is_abnormal_io(struct bio *bio) diff --git a/drivers/media/platform/nxp/Kconfig b/drivers/media/platform/nxp/Kconfig index 28f2bafc14d2..5afa373e534f 100644 --- a/drivers/media/platform/nxp/Kconfig +++ b/drivers/media/platform/nxp/Kconfig @@ -7,6 +7,7 @@ comment "NXP media platform drivers" config VIDEO_IMX_MIPI_CSIS tristate "NXP MIPI CSI-2 CSIS receiver found on i.MX7 and i.MX8 models" depends on ARCH_MXC || COMPILE_TEST + depends on VIDEO_DEV select MEDIA_CONTROLLER select V4L2_FWNODE select VIDEO_V4L2_SUBDEV_API diff --git a/drivers/media/platform/rockchip/rga/rga.c b/drivers/media/platform/rockchip/rga/rga.c index 4de5e8d2b261..3d3d1062e212 100644 --- a/drivers/media/platform/rockchip/rga/rga.c +++ b/drivers/media/platform/rockchip/rga/rga.c @@ -892,7 +892,7 @@ static int rga_probe(struct platform_device *pdev) } rga->dst_mmu_pages = (unsigned int *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 3); - if (rga->dst_mmu_pages) { + if (!rga->dst_mmu_pages) { ret = -ENOMEM; goto free_src_pages; } diff --git a/drivers/media/tuners/si2157.c b/drivers/media/tuners/si2157.c index 47029746b89e..0de587b412d4 100644 --- a/drivers/media/tuners/si2157.c +++ b/drivers/media/tuners/si2157.c @@ -77,16 +77,16 @@ err_mutex_unlock: } static const struct si2157_tuner_info si2157_tuners[] = { - { SI2141, false, 0x60, SI2141_60_FIRMWARE, SI2141_A10_FIRMWARE }, - { SI2141, false, 0x61, SI2141_61_FIRMWARE, SI2141_A10_FIRMWARE }, - { SI2146, false, 0x11, SI2146_11_FIRMWARE, NULL }, - { SI2147, false, 0x50, SI2147_50_FIRMWARE, NULL }, - { SI2148, true, 0x32, SI2148_32_FIRMWARE, SI2158_A20_FIRMWARE }, - { SI2148, true, 0x33, SI2148_33_FIRMWARE, SI2158_A20_FIRMWARE }, - { SI2157, false, 0x50, SI2157_50_FIRMWARE, SI2157_A30_FIRMWARE }, - { SI2158, false, 0x50, SI2158_50_FIRMWARE, SI2158_A20_FIRMWARE }, - { SI2158, false, 0x51, SI2158_51_FIRMWARE, SI2158_A20_FIRMWARE }, - { SI2177, false, 0x50, SI2177_50_FIRMWARE, SI2157_A30_FIRMWARE }, + { SI2141, 0x60, false, SI2141_60_FIRMWARE, SI2141_A10_FIRMWARE }, + { SI2141, 0x61, false, SI2141_61_FIRMWARE, SI2141_A10_FIRMWARE }, + { SI2146, 0x11, false, SI2146_11_FIRMWARE, NULL }, + { SI2147, 0x50, false, SI2147_50_FIRMWARE, NULL }, + { SI2148, 0x32, true, SI2148_32_FIRMWARE, SI2158_A20_FIRMWARE }, + { SI2148, 0x33, true, SI2148_33_FIRMWARE, SI2158_A20_FIRMWARE }, + { SI2157, 0x50, false, SI2157_50_FIRMWARE, SI2157_A30_FIRMWARE }, + { SI2158, 0x50, false, SI2158_50_FIRMWARE, SI2158_A20_FIRMWARE }, + { SI2158, 0x51, false, SI2158_51_FIRMWARE, SI2158_A20_FIRMWARE }, + { SI2177, 0x50, false, SI2177_50_FIRMWARE, SI2157_A30_FIRMWARE }, }; static int si2157_load_firmware(struct dvb_frontend *fe, @@ -178,7 +178,7 @@ static int si2157_find_and_load_firmware(struct dvb_frontend *fe) } } - if (!fw_name && !fw_alt_name) { + if (required && !fw_name && !fw_alt_name) { dev_err(&client->dev, "unknown chip version Si21%d-%c%c%c ROM 0x%02x\n", part_id, cmd.args[1], cmd.args[3], cmd.args[4], rom_id); diff --git a/drivers/memory/atmel-ebi.c b/drivers/memory/atmel-ebi.c index c267283b01fd..e749dcb3ddea 100644 --- a/drivers/memory/atmel-ebi.c +++ b/drivers/memory/atmel-ebi.c @@ -544,20 +544,27 @@ static int atmel_ebi_probe(struct platform_device *pdev) smc_np = of_parse_phandle(dev->of_node, "atmel,smc", 0); ebi->smc.regmap = syscon_node_to_regmap(smc_np); - if (IS_ERR(ebi->smc.regmap)) - return PTR_ERR(ebi->smc.regmap); + if (IS_ERR(ebi->smc.regmap)) { + ret = PTR_ERR(ebi->smc.regmap); + goto put_node; + } ebi->smc.layout = atmel_hsmc_get_reg_layout(smc_np); - if (IS_ERR(ebi->smc.layout)) - return PTR_ERR(ebi->smc.layout); + if (IS_ERR(ebi->smc.layout)) { + ret = PTR_ERR(ebi->smc.layout); + goto put_node; + } ebi->smc.clk = of_clk_get(smc_np, 0); if (IS_ERR(ebi->smc.clk)) { - if (PTR_ERR(ebi->smc.clk) != -ENOENT) - return PTR_ERR(ebi->smc.clk); + if (PTR_ERR(ebi->smc.clk) != -ENOENT) { + ret = PTR_ERR(ebi->smc.clk); + goto put_node; + } ebi->smc.clk = NULL; } + of_node_put(smc_np); ret = clk_prepare_enable(ebi->smc.clk); if (ret) return ret; @@ -608,6 +615,10 @@ static int atmel_ebi_probe(struct platform_device *pdev) } return of_platform_populate(np, NULL, NULL, dev); + +put_node: + of_node_put(smc_np); + return ret; } static __maybe_unused int atmel_ebi_resume(struct device *dev) diff --git a/drivers/memory/fsl_ifc.c b/drivers/memory/fsl_ifc.c index 2f6939da21cd..e83b61c925a4 100644 --- a/drivers/memory/fsl_ifc.c +++ b/drivers/memory/fsl_ifc.c @@ -287,8 +287,7 @@ static int fsl_ifc_ctrl_probe(struct platform_device *dev) } /* legacy dts may still use "simple-bus" compatible */ - ret = of_platform_populate(dev->dev.of_node, NULL, NULL, - &dev->dev); + ret = of_platform_default_populate(dev->dev.of_node, NULL, &dev->dev); if (ret) goto err_free_nandirq; diff --git a/drivers/memory/renesas-rpc-if.c b/drivers/memory/renesas-rpc-if.c index e4cc64f56019..019a0822bde0 100644 --- a/drivers/memory/renesas-rpc-if.c +++ b/drivers/memory/renesas-rpc-if.c @@ -164,25 +164,39 @@ static const struct regmap_access_table rpcif_volatile_table = { /* - * Custom accessor functions to ensure SMRDR0 and SMWDR0 are always accessed - * with proper width. Requires SMENR_SPIDE to be correctly set before! + * Custom accessor functions to ensure SM[RW]DR[01] are always accessed with + * proper width. Requires rpcif.xfer_size to be correctly set before! */ static int rpcif_reg_read(void *context, unsigned int reg, unsigned int *val) { struct rpcif *rpc = context; - if (reg == RPCIF_SMRDR0 || reg == RPCIF_SMWDR0) { - u32 spide = readl(rpc->base + RPCIF_SMENR) & RPCIF_SMENR_SPIDE(0xF); - - if (spide == 0x8) { + switch (reg) { + case RPCIF_SMRDR0: + case RPCIF_SMWDR0: + switch (rpc->xfer_size) { + case 1: *val = readb(rpc->base + reg); return 0; - } else if (spide == 0xC) { + + case 2: *val = readw(rpc->base + reg); return 0; - } else if (spide != 0xF) { + + case 4: + case 8: + *val = readl(rpc->base + reg); + return 0; + + default: return -EILSEQ; } + + case RPCIF_SMRDR1: + case RPCIF_SMWDR1: + if (rpc->xfer_size != 8) + return -EILSEQ; + break; } *val = readl(rpc->base + reg); @@ -193,18 +207,34 @@ static int rpcif_reg_write(void *context, unsigned int reg, unsigned int val) { struct rpcif *rpc = context; - if (reg == RPCIF_SMRDR0 || reg == RPCIF_SMWDR0) { - u32 spide = readl(rpc->base + RPCIF_SMENR) & RPCIF_SMENR_SPIDE(0xF); - - if (spide == 0x8) { + switch (reg) { + case RPCIF_SMWDR0: + switch (rpc->xfer_size) { + case 1: writeb(val, rpc->base + reg); return 0; - } else if (spide == 0xC) { + + case 2: writew(val, rpc->base + reg); return 0; - } else if (spide != 0xF) { + + case 4: + case 8: + writel(val, rpc->base + reg); + return 0; + + default: return -EILSEQ; } + + case RPCIF_SMWDR1: + if (rpc->xfer_size != 8) + return -EILSEQ; + break; + + case RPCIF_SMRDR0: + case RPCIF_SMRDR1: + return -EPERM; } writel(val, rpc->base + reg); @@ -469,6 +499,7 @@ int rpcif_manual_xfer(struct rpcif *rpc) smenr |= RPCIF_SMENR_SPIDE(rpcif_bits_set(rpc, nbytes)); regmap_write(rpc->regmap, RPCIF_SMENR, smenr); + rpc->xfer_size = nbytes; memcpy(data, rpc->buffer + pos, nbytes); if (nbytes == 8) { @@ -533,6 +564,7 @@ int rpcif_manual_xfer(struct rpcif *rpc) regmap_write(rpc->regmap, RPCIF_SMENR, smenr); regmap_write(rpc->regmap, RPCIF_SMCR, rpc->smcr | RPCIF_SMCR_SPIE); + rpc->xfer_size = nbytes; ret = wait_msg_xfer_end(rpc); if (ret) goto err_out; @@ -651,6 +683,7 @@ static int rpcif_probe(struct platform_device *pdev) struct platform_device *vdev; struct device_node *flash; const char *name; + int ret; flash = of_get_next_child(pdev->dev.of_node, NULL); if (!flash) { @@ -674,7 +707,14 @@ static int rpcif_probe(struct platform_device *pdev) return -ENOMEM; vdev->dev.parent = &pdev->dev; platform_set_drvdata(pdev, vdev); - return platform_device_add(vdev); + + ret = platform_device_add(vdev); + if (ret) { + platform_device_put(vdev); + return ret; + } + + return 0; } static int rpcif_remove(struct platform_device *pdev) diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c index 91f96abbb3f9..8d169a35cf13 100644 --- a/drivers/misc/eeprom/at25.c +++ b/drivers/misc/eeprom/at25.c @@ -31,6 +31,8 @@ */ #define FM25_SN_LEN 8 /* serial number length */ +#define EE_MAXADDRLEN 3 /* 24 bit addresses, up to 2 MBytes */ + struct at25_data { struct spi_eeprom chip; struct spi_device *spi; @@ -39,6 +41,7 @@ struct at25_data { struct nvmem_config nvmem_config; struct nvmem_device *nvmem; u8 sernum[FM25_SN_LEN]; + u8 command[EE_MAXADDRLEN + 1]; }; #define AT25_WREN 0x06 /* latch the write enable */ @@ -61,8 +64,6 @@ struct at25_data { #define FM25_ID_LEN 9 /* ID length */ -#define EE_MAXADDRLEN 3 /* 24 bit addresses, up to 2 MBytes */ - /* * Specs often allow 5ms for a page write, sometimes 20ms; * it's important to recover from write timeouts. @@ -78,7 +79,6 @@ static int at25_ee_read(void *priv, unsigned int offset, { struct at25_data *at25 = priv; char *buf = val; - u8 command[EE_MAXADDRLEN + 1]; u8 *cp; ssize_t status; struct spi_transfer t[2]; @@ -92,12 +92,15 @@ static int at25_ee_read(void *priv, unsigned int offset, if (unlikely(!count)) return -EINVAL; - cp = command; + cp = at25->command; instr = AT25_READ; if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR) if (offset >= BIT(at25->addrlen * 8)) instr |= AT25_INSTR_BIT3; + + mutex_lock(&at25->lock); + *cp++ = instr; /* 8/16/24-bit address is written MSB first */ @@ -116,7 +119,7 @@ static int at25_ee_read(void *priv, unsigned int offset, spi_message_init(&m); memset(t, 0, sizeof(t)); - t[0].tx_buf = command; + t[0].tx_buf = at25->command; t[0].len = at25->addrlen + 1; spi_message_add_tail(&t[0], &m); @@ -124,8 +127,6 @@ static int at25_ee_read(void *priv, unsigned int offset, t[1].len = count; spi_message_add_tail(&t[1], &m); - mutex_lock(&at25->lock); - /* * Read it all at once. * @@ -152,7 +153,7 @@ static int fm25_aux_read(struct at25_data *at25, u8 *buf, uint8_t command, spi_message_init(&m); memset(t, 0, sizeof(t)); - t[0].tx_buf = &command; + t[0].tx_buf = at25->command; t[0].len = 1; spi_message_add_tail(&t[0], &m); @@ -162,6 +163,8 @@ static int fm25_aux_read(struct at25_data *at25, u8 *buf, uint8_t command, mutex_lock(&at25->lock); + at25->command[0] = command; + status = spi_sync(at25->spi, &m); dev_dbg(&at25->spi->dev, "read %d aux bytes --> %d\n", len, status); diff --git a/drivers/misc/mei/Kconfig b/drivers/misc/mei/Kconfig index 0e0bcd0da852..d21486d69df2 100644 --- a/drivers/misc/mei/Kconfig +++ b/drivers/misc/mei/Kconfig @@ -46,6 +46,20 @@ config INTEL_MEI_TXE Supported SoCs: Intel Bay Trail +config INTEL_MEI_GSC + tristate "Intel MEI GSC embedded device" + depends on INTEL_MEI + depends on INTEL_MEI_ME + depends on X86 && PCI + depends on DRM_I915 + help + Intel auxiliary driver for GSC devices embedded in Intel graphics devices. + + An MEI device here called GSC can be embedded in an + Intel graphics devices, to support a range of chassis + tasks such as graphics card firmware update and security + tasks. + source "drivers/misc/mei/hdcp/Kconfig" source "drivers/misc/mei/pxp/Kconfig" diff --git a/drivers/misc/mei/Makefile b/drivers/misc/mei/Makefile index d8e5165917f2..fb740d754900 100644 --- a/drivers/misc/mei/Makefile +++ b/drivers/misc/mei/Makefile @@ -18,6 +18,9 @@ obj-$(CONFIG_INTEL_MEI_ME) += mei-me.o mei-me-objs := pci-me.o mei-me-objs += hw-me.o +obj-$(CONFIG_INTEL_MEI_GSC) += mei-gsc.o +mei-gsc-objs := gsc-me.o + obj-$(CONFIG_INTEL_MEI_TXE) += mei-txe.o mei-txe-objs := pci-txe.o mei-txe-objs += hw-txe.o diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c index 67844089db21..59506ba6fc48 100644 --- a/drivers/misc/mei/bus-fixup.c +++ b/drivers/misc/mei/bus-fixup.c @@ -30,6 +30,12 @@ static const uuid_le mei_nfc_info_guid = MEI_UUID_NFC_INFO; #define MEI_UUID_MKHIF_FIX UUID_LE(0x55213584, 0x9a29, 0x4916, \ 0xba, 0xdf, 0xf, 0xb7, 0xed, 0x68, 0x2a, 0xeb) +#define MEI_UUID_IGSC_MKHI UUID_LE(0xE2C2AFA2, 0x3817, 0x4D19, \ + 0x9D, 0x95, 0x06, 0xB1, 0x6B, 0x58, 0x8A, 0x5D) + +#define MEI_UUID_IGSC_MKHI_FIX UUID_LE(0x46E0C1FB, 0xA546, 0x414F, \ + 0x91, 0x70, 0xB7, 0xF4, 0x6D, 0x57, 0xB4, 0xAD) + #define MEI_UUID_HDCP UUID_LE(0xB638AB7E, 0x94E2, 0x4EA2, \ 0xA5, 0x52, 0xD1, 0xC5, 0x4B, 0x62, 0x7F, 0x04) @@ -241,6 +247,23 @@ static void mei_mkhi_fix(struct mei_cl_device *cldev) mei_cldev_disable(cldev); } +static void mei_gsc_mkhi_ver(struct mei_cl_device *cldev) +{ + int ret; + + /* No need to enable the client if nothing is needed from it */ + if (!cldev->bus->fw_f_fw_ver_supported) + return; + + ret = mei_cldev_enable(cldev); + if (ret) + return; + + ret = mei_fwver(cldev); + if (ret < 0) + dev_err(&cldev->dev, "FW version command failed %d\n", ret); + mei_cldev_disable(cldev); +} /** * mei_wd - wd client on the bus, change protocol version * as the API has changed. @@ -492,6 +515,8 @@ static struct mei_fixup { MEI_FIXUP(MEI_UUID_NFC_HCI, mei_nfc), MEI_FIXUP(MEI_UUID_WD, mei_wd), MEI_FIXUP(MEI_UUID_MKHIF_FIX, mei_mkhi_fix), + MEI_FIXUP(MEI_UUID_IGSC_MKHI, mei_gsc_mkhi_ver), + MEI_FIXUP(MEI_UUID_IGSC_MKHI_FIX, mei_gsc_mkhi_ver), MEI_FIXUP(MEI_UUID_HDCP, whitelist), MEI_FIXUP(MEI_UUID_ANY, vt_support), MEI_FIXUP(MEI_UUID_PAVP, whitelist), diff --git a/drivers/misc/mei/gsc-me.c b/drivers/misc/mei/gsc-me.c new file mode 100644 index 000000000000..c8145e9b62b6 --- /dev/null +++ b/drivers/misc/mei/gsc-me.c @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright(c) 2019-2022, Intel Corporation. All rights reserved. + * + * Intel Management Engine Interface (Intel MEI) Linux driver + */ + +#include <linux/module.h> +#include <linux/mei_aux.h> +#include <linux/device.h> +#include <linux/irqreturn.h> +#include <linux/jiffies.h> +#include <linux/ktime.h> +#include <linux/delay.h> +#include <linux/pm_runtime.h> + +#include "mei_dev.h" +#include "hw-me.h" +#include "hw-me-regs.h" + +#include "mei-trace.h" + +#define MEI_GSC_RPM_TIMEOUT 500 + +static int mei_gsc_read_hfs(const struct mei_device *dev, int where, u32 *val) +{ + struct mei_me_hw *hw = to_me_hw(dev); + + *val = ioread32(hw->mem_addr + where + 0xC00); + + return 0; +} + +static int mei_gsc_probe(struct auxiliary_device *aux_dev, + const struct auxiliary_device_id *aux_dev_id) +{ + struct mei_aux_device *adev = auxiliary_dev_to_mei_aux_dev(aux_dev); + struct mei_device *dev; + struct mei_me_hw *hw; + struct device *device; + const struct mei_cfg *cfg; + int ret; + + cfg = mei_me_get_cfg(aux_dev_id->driver_data); + if (!cfg) + return -ENODEV; + + device = &aux_dev->dev; + + dev = mei_me_dev_init(device, cfg); + if (!dev) { + ret = -ENOMEM; + goto err; + } + + hw = to_me_hw(dev); + hw->mem_addr = devm_ioremap_resource(device, &adev->bar); + if (IS_ERR(hw->mem_addr)) { + dev_err(device, "mmio not mapped\n"); + ret = PTR_ERR(hw->mem_addr); + goto err; + } + + hw->irq = adev->irq; + hw->read_fws = mei_gsc_read_hfs; + + dev_set_drvdata(device, dev); + + ret = devm_request_threaded_irq(device, hw->irq, + mei_me_irq_quick_handler, + mei_me_irq_thread_handler, + IRQF_ONESHOT, KBUILD_MODNAME, dev); + if (ret) { + dev_err(device, "irq register failed %d\n", ret); + goto err; + } + + pm_runtime_get_noresume(device); + pm_runtime_set_active(device); + pm_runtime_enable(device); + + /* Continue to char device setup in spite of firmware handshake failure. + * In order to provide access to the firmware status registers to the user + * space via sysfs. + */ + if (mei_start(dev)) + dev_warn(device, "init hw failure.\n"); + + pm_runtime_set_autosuspend_delay(device, MEI_GSC_RPM_TIMEOUT); + pm_runtime_use_autosuspend(device); + + ret = mei_register(dev, device); + if (ret) + goto register_err; + + pm_runtime_put_noidle(device); + return 0; + +register_err: + mei_stop(dev); + devm_free_irq(device, hw->irq, dev); + +err: + dev_err(device, "probe failed: %d\n", ret); + dev_set_drvdata(device, NULL); + return ret; +} + +static void mei_gsc_remove(struct auxiliary_device *aux_dev) +{ + struct mei_device *dev; + struct mei_me_hw *hw; + + dev = dev_get_drvdata(&aux_dev->dev); + if (!dev) + return; + + hw = to_me_hw(dev); + + mei_stop(dev); + + mei_deregister(dev); + + pm_runtime_disable(&aux_dev->dev); + + mei_disable_interrupts(dev); + devm_free_irq(&aux_dev->dev, hw->irq, dev); +} + +static int __maybe_unused mei_gsc_pm_suspend(struct device *device) +{ + struct mei_device *dev = dev_get_drvdata(device); + + if (!dev) + return -ENODEV; + + mei_stop(dev); + + mei_disable_interrupts(dev); + + return 0; +} + +static int __maybe_unused mei_gsc_pm_resume(struct device *device) +{ + struct mei_device *dev = dev_get_drvdata(device); + int err; + + if (!dev) + return -ENODEV; + + err = mei_restart(dev); + if (err) + return err; + + /* Start timer if stopped in suspend */ + schedule_delayed_work(&dev->timer_work, HZ); + + return 0; +} + +static int __maybe_unused mei_gsc_pm_runtime_idle(struct device *device) +{ + struct mei_device *dev = dev_get_drvdata(device); + + if (!dev) + return -ENODEV; + if (mei_write_is_idle(dev)) + pm_runtime_autosuspend(device); + + return -EBUSY; +} + +static int __maybe_unused mei_gsc_pm_runtime_suspend(struct device *device) +{ + struct mei_device *dev = dev_get_drvdata(device); + struct mei_me_hw *hw; + int ret; + + if (!dev) + return -ENODEV; + + mutex_lock(&dev->device_lock); + + if (mei_write_is_idle(dev)) { + hw = to_me_hw(dev); + hw->pg_state = MEI_PG_ON; + ret = 0; + } else { + ret = -EAGAIN; + } + + mutex_unlock(&dev->device_lock); + + return ret; +} + +static int __maybe_unused mei_gsc_pm_runtime_resume(struct device *device) +{ + struct mei_device *dev = dev_get_drvdata(device); + struct mei_me_hw *hw; + irqreturn_t irq_ret; + + if (!dev) + return -ENODEV; + + mutex_lock(&dev->device_lock); + + hw = to_me_hw(dev); + hw->pg_state = MEI_PG_OFF; + + mutex_unlock(&dev->device_lock); + + irq_ret = mei_me_irq_thread_handler(1, dev); + if (irq_ret != IRQ_HANDLED) + dev_err(dev->dev, "thread handler fail %d\n", irq_ret); + + return 0; +} + +static const struct dev_pm_ops mei_gsc_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(mei_gsc_pm_suspend, + mei_gsc_pm_resume) + SET_RUNTIME_PM_OPS(mei_gsc_pm_runtime_suspend, + mei_gsc_pm_runtime_resume, + mei_gsc_pm_runtime_idle) +}; + +static const struct auxiliary_device_id mei_gsc_id_table[] = { + { + .name = "i915.mei-gsc", + .driver_data = MEI_ME_GSC_CFG, + + }, + { + .name = "i915.mei-gscfi", + .driver_data = MEI_ME_GSCFI_CFG, + }, + { + /* sentinel */ + } +}; +MODULE_DEVICE_TABLE(auxiliary, mei_gsc_id_table); + +static struct auxiliary_driver mei_gsc_driver = { + .probe = mei_gsc_probe, + .remove = mei_gsc_remove, + .driver = { + /* auxiliary_driver_register() sets .name to be the modname */ + .pm = &mei_gsc_pm_ops, + }, + .id_table = mei_gsc_id_table +}; +module_auxiliary_driver(mei_gsc_driver); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_ALIAS("auxiliary:i915.mei-gsc"); +MODULE_ALIAS("auxiliary:i915.mei-gscfi"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index 719fee9af156..9870bf717979 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -1226,6 +1226,7 @@ irqreturn_t mei_me_irq_quick_handler(int irq, void *dev_id) me_intr_disable(dev, hcsr); return IRQ_WAKE_THREAD; } +EXPORT_SYMBOL_GPL(mei_me_irq_quick_handler); /** * mei_me_irq_thread_handler - function called after ISR to handle the interrupt @@ -1326,6 +1327,7 @@ end: mutex_unlock(&dev->device_lock); return IRQ_HANDLED; } +EXPORT_SYMBOL_GPL(mei_me_irq_thread_handler); static const struct mei_hw_ops mei_me_hw_ops = { @@ -1440,6 +1442,12 @@ static bool mei_me_fw_type_sps_ign(const struct pci_dev *pdev) #define MEI_CFG_KIND_ITOUCH \ .kind = "itouch" +#define MEI_CFG_TYPE_GSC \ + .kind = "gsc" + +#define MEI_CFG_TYPE_GSCFI \ + .kind = "gscfi" + #define MEI_CFG_FW_SPS_IGN \ .quirk_probe = mei_me_fw_type_sps_ign @@ -1572,6 +1580,20 @@ static const struct mei_cfg mei_me_pch15_sps_cfg = { MEI_CFG_FW_SPS_IGN, }; +/* Graphics System Controller */ +static const struct mei_cfg mei_me_gsc_cfg = { + MEI_CFG_TYPE_GSC, + MEI_CFG_PCH8_HFS, + MEI_CFG_FW_VER_SUPP, +}; + +/* Graphics System Controller Firmware Interface */ +static const struct mei_cfg mei_me_gscfi_cfg = { + MEI_CFG_TYPE_GSCFI, + MEI_CFG_PCH8_HFS, + MEI_CFG_FW_VER_SUPP, +}; + /* * mei_cfg_list - A list of platform platform specific configurations. * Note: has to be synchronized with enum mei_cfg_idx. @@ -1592,6 +1614,8 @@ static const struct mei_cfg *const mei_cfg_list[] = { [MEI_ME_PCH12_SPS_ITOUCH_CFG] = &mei_me_pch12_itouch_sps_cfg, [MEI_ME_PCH15_CFG] = &mei_me_pch15_cfg, [MEI_ME_PCH15_SPS_CFG] = &mei_me_pch15_sps_cfg, + [MEI_ME_GSC_CFG] = &mei_me_gsc_cfg, + [MEI_ME_GSCFI_CFG] = &mei_me_gscfi_cfg, }; const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx) @@ -1602,7 +1626,8 @@ const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx) return NULL; return mei_cfg_list[idx]; -}; +} +EXPORT_SYMBOL_GPL(mei_me_get_cfg); /** * mei_me_dev_init - allocates and initializes the mei device structure @@ -1637,4 +1662,4 @@ struct mei_device *mei_me_dev_init(struct device *parent, return dev; } - +EXPORT_SYMBOL_GPL(mei_me_dev_init); diff --git a/drivers/misc/mei/hw-me.h b/drivers/misc/mei/hw-me.h index 00a7132ac7a2..a071c645e905 100644 --- a/drivers/misc/mei/hw-me.h +++ b/drivers/misc/mei/hw-me.h @@ -112,6 +112,8 @@ enum mei_cfg_idx { MEI_ME_PCH12_SPS_ITOUCH_CFG, MEI_ME_PCH15_CFG, MEI_ME_PCH15_SPS_CFG, + MEI_ME_GSC_CFG, + MEI_ME_GSCFI_CFG, MEI_ME_NUM_CFG, }; diff --git a/drivers/mtd/nand/raw/mtk_ecc.c b/drivers/mtd/nand/raw/mtk_ecc.c index e7df3dac705e..49ab3448b9b1 100644 --- a/drivers/mtd/nand/raw/mtk_ecc.c +++ b/drivers/mtd/nand/raw/mtk_ecc.c @@ -43,6 +43,7 @@ struct mtk_ecc_caps { u32 err_mask; + u32 err_shift; const u8 *ecc_strength; const u32 *ecc_regs; u8 num_ecc_strength; @@ -76,7 +77,7 @@ static const u8 ecc_strength_mt2712[] = { }; static const u8 ecc_strength_mt7622[] = { - 4, 6, 8, 10, 12, 14, 16 + 4, 6, 8, 10, 12 }; enum mtk_ecc_regs { @@ -221,7 +222,7 @@ void mtk_ecc_get_stats(struct mtk_ecc *ecc, struct mtk_ecc_stats *stats, for (i = 0; i < sectors; i++) { offset = (i >> 2) << 2; err = readl(ecc->regs + ECC_DECENUM0 + offset); - err = err >> ((i % 4) * 8); + err = err >> ((i % 4) * ecc->caps->err_shift); err &= ecc->caps->err_mask; if (err == ecc->caps->err_mask) { /* uncorrectable errors */ @@ -449,6 +450,7 @@ EXPORT_SYMBOL(mtk_ecc_get_parity_bits); static const struct mtk_ecc_caps mtk_ecc_caps_mt2701 = { .err_mask = 0x3f, + .err_shift = 8, .ecc_strength = ecc_strength_mt2701, .ecc_regs = mt2701_ecc_regs, .num_ecc_strength = 20, @@ -459,6 +461,7 @@ static const struct mtk_ecc_caps mtk_ecc_caps_mt2701 = { static const struct mtk_ecc_caps mtk_ecc_caps_mt2712 = { .err_mask = 0x7f, + .err_shift = 8, .ecc_strength = ecc_strength_mt2712, .ecc_regs = mt2712_ecc_regs, .num_ecc_strength = 23, @@ -468,10 +471,11 @@ static const struct mtk_ecc_caps mtk_ecc_caps_mt2712 = { }; static const struct mtk_ecc_caps mtk_ecc_caps_mt7622 = { - .err_mask = 0x3f, + .err_mask = 0x1f, + .err_shift = 5, .ecc_strength = ecc_strength_mt7622, .ecc_regs = mt7622_ecc_regs, - .num_ecc_strength = 7, + .num_ecc_strength = 5, .ecc_mode_shift = 4, .parity_bits = 13, .pg_irq_sel = 0, diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c index 1a77542c6d67..048b255faa76 100644 --- a/drivers/mtd/nand/raw/qcom_nandc.c +++ b/drivers/mtd/nand/raw/qcom_nandc.c @@ -2651,10 +2651,23 @@ static int qcom_nand_attach_chip(struct nand_chip *chip) ecc->engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; mtd_set_ooblayout(mtd, &qcom_nand_ooblayout_ops); + /* Free the initially allocated BAM transaction for reading the ONFI params */ + if (nandc->props->is_bam) + free_bam_transaction(nandc); nandc->max_cwperpage = max_t(unsigned int, nandc->max_cwperpage, cwperpage); + /* Now allocate the BAM transaction based on updated max_cwperpage */ + if (nandc->props->is_bam) { + nandc->bam_txn = alloc_bam_transaction(nandc); + if (!nandc->bam_txn) { + dev_err(nandc->dev, + "failed to allocate bam transaction\n"); + return -ENOMEM; + } + } + /* * DATA_UD_BYTES varies based on whether the read/write command protects * spare data with ECC too. We protect spare data by default, so we set @@ -2955,17 +2968,6 @@ static int qcom_nand_host_init_and_register(struct qcom_nand_controller *nandc, if (ret) return ret; - if (nandc->props->is_bam) { - free_bam_transaction(nandc); - nandc->bam_txn = alloc_bam_transaction(nandc); - if (!nandc->bam_txn) { - dev_err(nandc->dev, - "failed to allocate bam transaction\n"); - nand_cleanup(chip); - return -ENOMEM; - } - } - ret = mtd_device_parse_register(mtd, probes, NULL, NULL, 0); if (ret) nand_cleanup(chip); diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c index b85b9c6fcc42..a278829469d6 100644 --- a/drivers/mtd/nand/raw/sh_flctl.c +++ b/drivers/mtd/nand/raw/sh_flctl.c @@ -384,7 +384,8 @@ static int flctl_dma_fifo0_transfer(struct sh_flctl *flctl, unsigned long *buf, dma_addr_t dma_addr; dma_cookie_t cookie; uint32_t reg; - int ret; + int ret = 0; + unsigned long time_left; if (dir == DMA_FROM_DEVICE) { chan = flctl->chan_fifo0_rx; @@ -425,13 +426,14 @@ static int flctl_dma_fifo0_transfer(struct sh_flctl *flctl, unsigned long *buf, goto out; } - ret = + time_left = wait_for_completion_timeout(&flctl->dma_complete, msecs_to_jiffies(3000)); - if (ret <= 0) { + if (time_left == 0) { dmaengine_terminate_all(chan); dev_err(&flctl->pdev->dev, "wait_for_completion_timeout\n"); + ret = -ETIMEDOUT; } out: @@ -441,7 +443,7 @@ out: dma_unmap_single(chan->device->dev, dma_addr, len, dir); - /* ret > 0 is success */ + /* ret == 0 is success */ return ret; } @@ -465,7 +467,7 @@ static void read_fiforeg(struct sh_flctl *flctl, int rlen, int offset) /* initiate DMA transfer */ if (flctl->chan_fifo0_rx && rlen >= 32 && - flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_FROM_DEVICE) > 0) + !flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_FROM_DEVICE)) goto convert; /* DMA success */ /* do polling transfer */ @@ -524,7 +526,7 @@ static void write_ec_fiforeg(struct sh_flctl *flctl, int rlen, /* initiate DMA transfer */ if (flctl->chan_fifo0_tx && rlen >= 32 && - flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_TO_DEVICE) > 0) + !flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_TO_DEVICE)) return; /* DMA success */ /* do polling transfer */ diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 15eddca7b4b6..38e152548126 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4027,14 +4027,19 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, const v return true; } -static u32 bond_ip_hash(u32 hash, struct flow_keys *flow) +static u32 bond_ip_hash(u32 hash, struct flow_keys *flow, int xmit_policy) { hash ^= (__force u32)flow_get_u32_dst(flow) ^ (__force u32)flow_get_u32_src(flow); hash ^= (hash >> 16); hash ^= (hash >> 8); + /* discard lowest hash bit to deal with the common even ports pattern */ - return hash >> 1; + if (xmit_policy == BOND_XMIT_POLICY_LAYER34 || + xmit_policy == BOND_XMIT_POLICY_ENCAP34) + return hash >> 1; + + return hash; } /* Generate hash based on xmit policy. If @skb is given it is used to linearize @@ -4064,7 +4069,7 @@ static u32 __bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, const voi memcpy(&hash, &flow.ports.ports, sizeof(hash)); } - return bond_ip_hash(hash, &flow); + return bond_ip_hash(hash, &flow, bond->params.xmit_policy); } /** @@ -5259,7 +5264,7 @@ static u32 bond_sk_hash_l34(struct sock *sk) /* L4 */ memcpy(&hash, &flow.ports.ports, sizeof(hash)); /* L3 */ - return bond_ip_hash(hash, &flow); + return bond_ip_hash(hash, &flow, BOND_XMIT_POLICY_LAYER34); } static struct net_device *__bond_sk_get_lower_dev(struct bonding *bond, diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index a416240d001b..12c15da55664 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -1681,9 +1681,6 @@ static void gswip_phylink_mac_config(struct dsa_switch *ds, int port, break; case PHY_INTERFACE_MODE_RMII: miicfg |= GSWIP_MII_CFG_MODE_RMIIM; - - /* Configure the RMII clock as output: */ - miicfg |= GSWIP_MII_CFG_RMII_CLK; break; case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_ID: diff --git a/drivers/net/dsa/mv88e6xxx/port_hidden.c b/drivers/net/dsa/mv88e6xxx/port_hidden.c index b49d05f0e117..7a9f9ff6dedf 100644 --- a/drivers/net/dsa/mv88e6xxx/port_hidden.c +++ b/drivers/net/dsa/mv88e6xxx/port_hidden.c @@ -40,8 +40,9 @@ int mv88e6xxx_port_hidden_wait(struct mv88e6xxx_chip *chip) { int bit = __bf_shf(MV88E6XXX_PORT_RESERVED_1A_BUSY); - return mv88e6xxx_wait_bit(chip, MV88E6XXX_PORT_RESERVED_1A_CTRL_PORT, - MV88E6XXX_PORT_RESERVED_1A, bit, 0); + return mv88e6xxx_port_wait_bit(chip, + MV88E6XXX_PORT_RESERVED_1A_CTRL_PORT, + MV88E6XXX_PORT_RESERVED_1A, bit, 0); } int mv88e6xxx_port_hidden_read(struct mv88e6xxx_chip *chip, int block, int port, diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 413b0006e9a2..9e28219b223d 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -670,6 +670,8 @@ static int felix_change_tag_protocol(struct dsa_switch *ds, int cpu, struct ocelot *ocelot = ds->priv; struct felix *felix = ocelot_to_felix(ocelot); enum dsa_tag_protocol old_proto = felix->tag_proto; + bool cpu_port_active = false; + struct dsa_port *dp; int err; if (proto != DSA_TAG_PROTO_SEVILLE && @@ -677,6 +679,27 @@ static int felix_change_tag_protocol(struct dsa_switch *ds, int cpu, proto != DSA_TAG_PROTO_OCELOT_8021Q) return -EPROTONOSUPPORT; + /* We don't support multiple CPU ports, yet the DT blob may have + * multiple CPU ports defined. The first CPU port is the active one, + * the others are inactive. In this case, DSA will call + * ->change_tag_protocol() multiple times, once per CPU port. + * Since we implement the tagging protocol change towards "ocelot" or + * "seville" as effectively initializing the NPI port, what we are + * doing is effectively changing who the NPI port is to the last @cpu + * argument passed, which is an unused DSA CPU port and not the one + * that should actively pass traffic. + * Suppress DSA's calls on CPU ports that are inactive. + */ + dsa_switch_for_each_user_port(dp, ds) { + if (dp->cpu_dp->index == cpu) { + cpu_port_active = true; + break; + } + } + + if (!cpu_port_active) + return 0; + felix_del_tag_protocol(ds, cpu, old_proto); err = felix_set_tag_protocol(ds, cpu, proto); diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 8d382b27e625..52a8566071ed 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -2316,7 +2316,7 @@ static int felix_pci_probe(struct pci_dev *pdev, err = dsa_register_switch(ds); if (err) { - dev_err(&pdev->dev, "Failed to register DSA switch: %d\n", err); + dev_err_probe(&pdev->dev, err, "Failed to register DSA switch\n"); goto err_register_ds; } diff --git a/drivers/net/dsa/realtek/Kconfig b/drivers/net/dsa/realtek/Kconfig index 1aa79735355f..060165a85fb7 100644 --- a/drivers/net/dsa/realtek/Kconfig +++ b/drivers/net/dsa/realtek/Kconfig @@ -9,34 +9,46 @@ menuconfig NET_DSA_REALTEK help Select to enable support for Realtek Ethernet switch chips. + Note that at least one interface driver must be enabled for the + subdrivers to be loaded. Moreover, an interface driver cannot achieve + anything without at least one subdriver enabled. + +if NET_DSA_REALTEK + config NET_DSA_REALTEK_MDIO - tristate "Realtek MDIO connected switch driver" - depends on NET_DSA_REALTEK + tristate "Realtek MDIO interface driver" depends on OF + depends on NET_DSA_REALTEK_RTL8365MB || NET_DSA_REALTEK_RTL8366RB + depends on NET_DSA_REALTEK_RTL8365MB || !NET_DSA_REALTEK_RTL8365MB + depends on NET_DSA_REALTEK_RTL8366RB || !NET_DSA_REALTEK_RTL8366RB help Select to enable support for registering switches configured through MDIO. config NET_DSA_REALTEK_SMI - tristate "Realtek SMI connected switch driver" - depends on NET_DSA_REALTEK + tristate "Realtek SMI interface driver" depends on OF + depends on NET_DSA_REALTEK_RTL8365MB || NET_DSA_REALTEK_RTL8366RB + depends on NET_DSA_REALTEK_RTL8365MB || !NET_DSA_REALTEK_RTL8365MB + depends on NET_DSA_REALTEK_RTL8366RB || !NET_DSA_REALTEK_RTL8366RB help Select to enable support for registering switches connected through SMI. config NET_DSA_REALTEK_RTL8365MB tristate "Realtek RTL8365MB switch subdriver" - depends on NET_DSA_REALTEK - depends on NET_DSA_REALTEK_SMI || NET_DSA_REALTEK_MDIO + imply NET_DSA_REALTEK_SMI + imply NET_DSA_REALTEK_MDIO select NET_DSA_TAG_RTL8_4 help Select to enable support for Realtek RTL8365MB-VC and RTL8367S. config NET_DSA_REALTEK_RTL8366RB tristate "Realtek RTL8366RB switch subdriver" - depends on NET_DSA_REALTEK - depends on NET_DSA_REALTEK_SMI || NET_DSA_REALTEK_MDIO + imply NET_DSA_REALTEK_SMI + imply NET_DSA_REALTEK_MDIO select NET_DSA_TAG_RTL4_A help - Select to enable support for Realtek RTL8366RB + Select to enable support for Realtek RTL8366RB. + +endif diff --git a/drivers/net/dsa/realtek/realtek-mdio.c b/drivers/net/dsa/realtek/realtek-mdio.c index 31e1f100e48e..c58f49d558d2 100644 --- a/drivers/net/dsa/realtek/realtek-mdio.c +++ b/drivers/net/dsa/realtek/realtek-mdio.c @@ -267,7 +267,6 @@ static const struct of_device_id realtek_mdio_of_match[] = { #endif #if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8365MB) { .compatible = "realtek,rtl8365mb", .data = &rtl8365mb_variant, }, - { .compatible = "realtek,rtl8367s", .data = &rtl8365mb_variant, }, #endif { /* sentinel */ }, }; diff --git a/drivers/net/dsa/realtek/realtek-smi.c b/drivers/net/dsa/realtek/realtek-smi.c index 2243d3da55b2..45992f79ec8d 100644 --- a/drivers/net/dsa/realtek/realtek-smi.c +++ b/drivers/net/dsa/realtek/realtek-smi.c @@ -546,20 +546,11 @@ static const struct of_device_id realtek_smi_of_match[] = { .data = &rtl8366rb_variant, }, #endif - { - /* FIXME: add support for RTL8366S and more */ - .compatible = "realtek,rtl8366s", - .data = NULL, - }, #if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8365MB) { .compatible = "realtek,rtl8365mb", .data = &rtl8365mb_variant, }, - { - .compatible = "realtek,rtl8367s", - .data = &rtl8365mb_variant, - }, #endif { /* sentinel */ }, }; diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index bd4cb9d7c35d..827993022386 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -35,15 +35,6 @@ source "drivers/net/ethernet/aquantia/Kconfig" source "drivers/net/ethernet/arc/Kconfig" source "drivers/net/ethernet/asix/Kconfig" source "drivers/net/ethernet/atheros/Kconfig" -source "drivers/net/ethernet/broadcom/Kconfig" -source "drivers/net/ethernet/brocade/Kconfig" -source "drivers/net/ethernet/cadence/Kconfig" -source "drivers/net/ethernet/calxeda/Kconfig" -source "drivers/net/ethernet/cavium/Kconfig" -source "drivers/net/ethernet/chelsio/Kconfig" -source "drivers/net/ethernet/cirrus/Kconfig" -source "drivers/net/ethernet/cisco/Kconfig" -source "drivers/net/ethernet/cortina/Kconfig" config CX_ECAT tristate "Beckhoff CX5020 EtherCAT master support" @@ -57,6 +48,14 @@ config CX_ECAT To compile this driver as a module, choose M here. The module will be called ec_bhf. +source "drivers/net/ethernet/broadcom/Kconfig" +source "drivers/net/ethernet/cadence/Kconfig" +source "drivers/net/ethernet/calxeda/Kconfig" +source "drivers/net/ethernet/cavium/Kconfig" +source "drivers/net/ethernet/chelsio/Kconfig" +source "drivers/net/ethernet/cirrus/Kconfig" +source "drivers/net/ethernet/cisco/Kconfig" +source "drivers/net/ethernet/cortina/Kconfig" source "drivers/net/ethernet/davicom/Kconfig" config DNET @@ -85,7 +84,6 @@ source "drivers/net/ethernet/huawei/Kconfig" source "drivers/net/ethernet/i825xx/Kconfig" source "drivers/net/ethernet/ibm/Kconfig" source "drivers/net/ethernet/intel/Kconfig" -source "drivers/net/ethernet/microsoft/Kconfig" source "drivers/net/ethernet/xscale/Kconfig" config JME @@ -128,8 +126,9 @@ source "drivers/net/ethernet/mediatek/Kconfig" source "drivers/net/ethernet/mellanox/Kconfig" source "drivers/net/ethernet/micrel/Kconfig" source "drivers/net/ethernet/microchip/Kconfig" -source "drivers/net/ethernet/moxa/Kconfig" source "drivers/net/ethernet/mscc/Kconfig" +source "drivers/net/ethernet/microsoft/Kconfig" +source "drivers/net/ethernet/moxa/Kconfig" source "drivers/net/ethernet/myricom/Kconfig" config FEALNX @@ -141,10 +140,10 @@ config FEALNX Say Y here to support the Myson MTD-800 family of PCI-based Ethernet cards. <http://www.myson.com.tw/> +source "drivers/net/ethernet/ni/Kconfig" source "drivers/net/ethernet/natsemi/Kconfig" source "drivers/net/ethernet/neterion/Kconfig" source "drivers/net/ethernet/netronome/Kconfig" -source "drivers/net/ethernet/ni/Kconfig" source "drivers/net/ethernet/8390/Kconfig" source "drivers/net/ethernet/nvidia/Kconfig" source "drivers/net/ethernet/nxp/Kconfig" @@ -164,6 +163,7 @@ source "drivers/net/ethernet/packetengines/Kconfig" source "drivers/net/ethernet/pasemi/Kconfig" source "drivers/net/ethernet/pensando/Kconfig" source "drivers/net/ethernet/qlogic/Kconfig" +source "drivers/net/ethernet/brocade/Kconfig" source "drivers/net/ethernet/qualcomm/Kconfig" source "drivers/net/ethernet/rdc/Kconfig" source "drivers/net/ethernet/realtek/Kconfig" @@ -171,10 +171,10 @@ source "drivers/net/ethernet/renesas/Kconfig" source "drivers/net/ethernet/rocker/Kconfig" source "drivers/net/ethernet/samsung/Kconfig" source "drivers/net/ethernet/seeq/Kconfig" -source "drivers/net/ethernet/sfc/Kconfig" source "drivers/net/ethernet/sgi/Kconfig" source "drivers/net/ethernet/silan/Kconfig" source "drivers/net/ethernet/sis/Kconfig" +source "drivers/net/ethernet/sfc/Kconfig" source "drivers/net/ethernet/smsc/Kconfig" source "drivers/net/ethernet/socionext/Kconfig" source "drivers/net/ethernet/stmicro/Kconfig" diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 33f1a1377588..24d715c28a35 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -486,8 +486,8 @@ int aq_nic_start(struct aq_nic_s *self) if (err < 0) goto err_exit; - for (i = 0U, aq_vec = self->aq_vec[0]; - self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) { + for (i = 0U; self->aq_vecs > i; ++i) { + aq_vec = self->aq_vec[i]; err = aq_vec_start(aq_vec); if (err < 0) goto err_exit; @@ -517,8 +517,8 @@ int aq_nic_start(struct aq_nic_s *self) mod_timer(&self->polling_timer, jiffies + AQ_CFG_POLLING_TIMER_INTERVAL); } else { - for (i = 0U, aq_vec = self->aq_vec[0]; - self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) { + for (i = 0U; self->aq_vecs > i; ++i) { + aq_vec = self->aq_vec[i]; err = aq_pci_func_alloc_irq(self, i, self->ndev->name, aq_vec_isr, aq_vec, aq_vec_get_affinity_mask(aq_vec)); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 797a95142d1f..3a529ee8c834 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -444,22 +444,22 @@ err_exit: static int aq_pm_freeze(struct device *dev) { - return aq_suspend_common(dev, false); + return aq_suspend_common(dev, true); } static int aq_pm_suspend_poweroff(struct device *dev) { - return aq_suspend_common(dev, true); + return aq_suspend_common(dev, false); } static int aq_pm_thaw(struct device *dev) { - return atl_resume_common(dev, false); + return atl_resume_common(dev, true); } static int aq_pm_resume_restore(struct device *dev) { - return atl_resume_common(dev, true); + return atl_resume_common(dev, false); } static const struct dev_pm_ops aq_pm_ops = { diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c index f4774cf051c9..6ab1f3212d24 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c @@ -43,8 +43,8 @@ static int aq_vec_poll(struct napi_struct *napi, int budget) if (!self) { err = -EINVAL; } else { - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; u64_stats_update_begin(&ring[AQ_VEC_RX_ID].stats.rx.syncp); ring[AQ_VEC_RX_ID].stats.rx.polls++; u64_stats_update_end(&ring[AQ_VEC_RX_ID].stats.rx.syncp); @@ -182,8 +182,8 @@ int aq_vec_init(struct aq_vec_s *self, const struct aq_hw_ops *aq_hw_ops, self->aq_hw_ops = aq_hw_ops; self->aq_hw = aq_hw; - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; err = aq_ring_init(&ring[AQ_VEC_TX_ID], ATL_RING_TX); if (err < 0) goto err_exit; @@ -224,8 +224,8 @@ int aq_vec_start(struct aq_vec_s *self) unsigned int i = 0U; int err = 0; - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; err = self->aq_hw_ops->hw_ring_tx_start(self->aq_hw, &ring[AQ_VEC_TX_ID]); if (err < 0) @@ -248,8 +248,8 @@ void aq_vec_stop(struct aq_vec_s *self) struct aq_ring_s *ring = NULL; unsigned int i = 0U; - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; self->aq_hw_ops->hw_ring_tx_stop(self->aq_hw, &ring[AQ_VEC_TX_ID]); @@ -268,8 +268,8 @@ void aq_vec_deinit(struct aq_vec_s *self) if (!self) goto err_exit; - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; aq_ring_tx_clean(&ring[AQ_VEC_TX_ID]); aq_ring_rx_deinit(&ring[AQ_VEC_RX_ID]); } @@ -297,8 +297,8 @@ void aq_vec_ring_free(struct aq_vec_s *self) if (!self) goto err_exit; - for (i = 0U, ring = self->ring[0]; - self->tx_rings > i; ++i, ring = self->ring[i]) { + for (i = 0U; self->tx_rings > i; ++i) { + ring = self->ring[i]; aq_ring_free(&ring[AQ_VEC_TX_ID]); if (i < self->rx_rings) aq_ring_free(&ring[AQ_VEC_RX_ID]); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index c19b072f3a23..962253db25b8 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -14153,10 +14153,6 @@ static int bnx2x_eeh_nic_unload(struct bnx2x *bp) /* Stop Tx */ bnx2x_tx_disable(bp); - /* Delete all NAPI objects */ - bnx2x_del_all_napi(bp); - if (CNIC_LOADED(bp)) - bnx2x_del_all_napi_cnic(bp); netdev_reset_tc(bp->dev); del_timer_sync(&bp->timer); @@ -14261,6 +14257,11 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev) bnx2x_drain_tx_queues(bp); bnx2x_send_unload_req(bp, UNLOAD_RECOVERY); bnx2x_netif_stop(bp, 1); + bnx2x_del_all_napi(bp); + + if (CNIC_LOADED(bp)) + bnx2x_del_all_napi_cnic(bp); + bnx2x_free_irq(bp); /* Report UNLOAD_DONE to MCP */ diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 2dd79af9411b..bf1ec8fdc2ad 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -76,7 +76,7 @@ static inline void bcmgenet_writel(u32 value, void __iomem *offset) if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) __raw_writel(value, offset); else - writel(value, offset); + writel_relaxed(value, offset); } static inline u32 bcmgenet_readl(void __iomem *offset) @@ -84,7 +84,7 @@ static inline u32 bcmgenet_readl(void __iomem *offset) if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) return __raw_readl(offset); else - return readl(offset); + return readl_relaxed(offset); } static inline void dmadesc_set_length_status(struct bcmgenet_priv *priv, @@ -2035,6 +2035,11 @@ static struct sk_buff *bcmgenet_add_tsb(struct net_device *dev, return skb; } +static void bcmgenet_hide_tsb(struct sk_buff *skb) +{ + __skb_pull(skb, sizeof(struct status_64)); +} + static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); @@ -2141,6 +2146,8 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) } GENET_CB(skb)->last_cb = tx_cb_ptr; + + bcmgenet_hide_tsb(skb); skb_tx_timestamp(skb); /* Decrement total BD count and advance our write pointer */ diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 800d5ced5800..e475be29845c 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1658,6 +1658,7 @@ static void macb_tx_restart(struct macb_queue *queue) unsigned int head = queue->tx_head; unsigned int tail = queue->tx_tail; struct macb *bp = queue->bp; + unsigned int head_idx, tbqp; if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) queue_writel(queue, ISR, MACB_BIT(TXUBR)); @@ -1665,6 +1666,13 @@ static void macb_tx_restart(struct macb_queue *queue) if (head == tail) return; + tbqp = queue_readl(queue, TBQP) / macb_dma_desc_get_size(bp); + tbqp = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, tbqp)); + head_idx = macb_adj_dma_desc_idx(bp, macb_tx_ring_wrap(bp, head)); + + if (tbqp == head_idx) + return; + macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); } diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index d5356db7539a..caf48023f8ea 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1835,11 +1835,6 @@ static int ftgmac100_probe(struct platform_device *pdev) priv->rxdes0_edorr_mask = BIT(30); priv->txdes0_edotr_mask = BIT(30); priv->is_aspeed = true; - /* Disable ast2600 problematic HW arbitration */ - if (of_device_is_compatible(np, "aspeed,ast2600-mac")) { - iowrite32(FTGMAC100_TM_DEFAULT, - priv->base + FTGMAC100_OFFSET_TM); - } } else { priv->rxdes0_edorr_mask = BIT(15); priv->txdes0_edotr_mask = BIT(15); @@ -1911,6 +1906,11 @@ static int ftgmac100_probe(struct platform_device *pdev) err = ftgmac100_setup_clk(priv); if (err) goto err_phy_connect; + + /* Disable ast2600 problematic HW arbitration */ + if (of_device_is_compatible(np, "aspeed,ast2600-mac")) + iowrite32(FTGMAC100_TM_DEFAULT, + priv->base + FTGMAC100_OFFSET_TM); } /* Default ring sizes */ diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c index 763d2c7b5fb1..5750f9a56393 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c @@ -489,11 +489,15 @@ static int dpaa_get_ts_info(struct net_device *net_dev, info->phc_index = -1; fman_node = of_get_parent(mac_node); - if (fman_node) + if (fman_node) { ptp_node = of_parse_phandle(fman_node, "ptimer-handle", 0); + of_node_put(fman_node); + } - if (ptp_node) + if (ptp_node) { ptp_dev = of_find_device_by_node(ptp_node); + of_node_put(ptp_node); + } if (ptp_dev) ptp = platform_get_drvdata(ptp_dev); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 79afb1d7289b..9182631856d5 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -297,10 +297,6 @@ int enetc_setup_tc_txtime(struct net_device *ndev, void *type_data) if (tc < 0 || tc >= priv->num_tx_rings) return -EINVAL; - /* Do not support TXSTART and TX CSUM offload simutaniously */ - if (ndev->features & NETIF_F_CSUM_MASK) - return -EBUSY; - /* TSD and Qbv are mutually exclusive in hardware */ if (enetc_rd(&priv->si->hw, ENETC_QBV_PTGCR_OFFSET) & ENETC_QBV_TGE) return -EBUSY; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 11227f51404c..9f33ec838b52 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3731,7 +3731,7 @@ static int fec_enet_init_stop_mode(struct fec_enet_private *fep, ARRAY_SIZE(out_val)); if (ret) { dev_dbg(&fep->pdev->dev, "no stop mode property\n"); - return ret; + goto out; } fep->stop_gpr.gpr = syscon_node_to_regmap(gpr_np); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 7edf8569514c..928d934cb21a 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -1065,19 +1065,23 @@ int hns_mac_init(struct dsaf_device *dsaf_dev) device_for_each_child_node(dsaf_dev->dev, child) { ret = fwnode_property_read_u32(child, "reg", &port_id); if (ret) { + fwnode_handle_put(child); dev_err(dsaf_dev->dev, "get reg fail, ret=%d!\n", ret); return ret; } if (port_id >= max_port_num) { + fwnode_handle_put(child); dev_err(dsaf_dev->dev, "reg(%u) out of range!\n", port_id); return -EINVAL; } mac_cb = devm_kzalloc(dsaf_dev->dev, sizeof(*mac_cb), GFP_KERNEL); - if (!mac_cb) + if (!mac_cb) { + fwnode_handle_put(child); return -ENOMEM; + } mac_cb->fw_port = child; mac_cb->mac_id = (u8)port_id; dsaf_dev->mac_cb[port_id] = mac_cb; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c index 0c60f41fca8a..f3c9395d8351 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c @@ -75,7 +75,7 @@ int hclge_comm_tqps_update_stats(struct hnae3_handle *handle, ret = hclge_comm_cmd_send(hw, &desc, 1); if (ret) { dev_err(&hw->cmq.csq.pdev->dev, - "failed to get tqp stat, ret = %d, tx = %u.\n", + "failed to get tqp stat, ret = %d, rx = %u.\n", ret, i); return ret; } @@ -89,7 +89,7 @@ int hclge_comm_tqps_update_stats(struct hnae3_handle *handle, ret = hclge_comm_cmd_send(hw, &desc, 1); if (ret) { dev_err(&hw->cmq.csq.pdev->dev, - "failed to get tqp stat, ret = %d, rx = %u.\n", + "failed to get tqp stat, ret = %d, tx = %u.\n", ret, i); return ret; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 44d9b560b337..93aeb615191d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -562,12 +562,12 @@ static void hns3_dbg_tx_spare_info(struct hns3_enet_ring *ring, char *buf, for (i = 0; i < ring_num; i++) { j = 0; - sprintf(result[j++], "%8u", i); - sprintf(result[j++], "%9u", ring->tx_copybreak); - sprintf(result[j++], "%3u", tx_spare->len); - sprintf(result[j++], "%3u", tx_spare->next_to_use); - sprintf(result[j++], "%3u", tx_spare->next_to_clean); - sprintf(result[j++], "%3u", tx_spare->last_to_clean); + sprintf(result[j++], "%u", i); + sprintf(result[j++], "%u", ring->tx_copybreak); + sprintf(result[j++], "%u", tx_spare->len); + sprintf(result[j++], "%u", tx_spare->next_to_use); + sprintf(result[j++], "%u", tx_spare->next_to_clean); + sprintf(result[j++], "%u", tx_spare->last_to_clean); sprintf(result[j++], "%pad", &tx_spare->dma); hns3_dbg_fill_content(content, sizeof(content), tx_spare_info_items, @@ -598,35 +598,35 @@ static void hns3_dump_rx_queue_info(struct hns3_enet_ring *ring, u32 base_add_l, base_add_h; u32 j = 0; - sprintf(result[j++], "%8u", index); + sprintf(result[j++], "%u", index); - sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_BD_NUM_REG)); - sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_BD_LEN_REG)); - sprintf(result[j++], "%4u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_TAIL_REG)); - sprintf(result[j++], "%4u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_HEAD_REG)); - sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_FBDNUM_REG)); - sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_PKTNUM_RECORD_REG)); - sprintf(result[j++], "%9u", ring->rx_copybreak); + sprintf(result[j++], "%u", ring->rx_copybreak); - sprintf(result[j++], "%7s", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%s", readl_relaxed(ring->tqp->io_base + HNS3_RING_EN_REG) ? "on" : "off"); if (hnae3_ae_dev_tqp_txrx_indep_supported(ae_dev)) - sprintf(result[j++], "%10s", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%s", readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_EN_REG) ? "on" : "off"); else - sprintf(result[j++], "%10s", "NA"); + sprintf(result[j++], "%s", "NA"); base_add_h = readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_BASEADDR_H_REG); @@ -700,36 +700,36 @@ static void hns3_dump_tx_queue_info(struct hns3_enet_ring *ring, u32 base_add_l, base_add_h; u32 j = 0; - sprintf(result[j++], "%8u", index); - sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", index); + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_BD_NUM_REG)); - sprintf(result[j++], "%2u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_TC_REG)); - sprintf(result[j++], "%4u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG)); - sprintf(result[j++], "%4u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_HEAD_REG)); - sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_FBDNUM_REG)); - sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_OFFSET_REG)); - sprintf(result[j++], "%6u", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%u", readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_PKTNUM_RECORD_REG)); - sprintf(result[j++], "%7s", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%s", readl_relaxed(ring->tqp->io_base + HNS3_RING_EN_REG) ? "on" : "off"); if (hnae3_ae_dev_tqp_txrx_indep_supported(ae_dev)) - sprintf(result[j++], "%10s", readl_relaxed(ring->tqp->io_base + + sprintf(result[j++], "%s", readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_EN_REG) ? "on" : "off"); else - sprintf(result[j++], "%10s", "NA"); + sprintf(result[j++], "%s", "NA"); base_add_h = readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_BASEADDR_H_REG); @@ -848,15 +848,15 @@ static void hns3_dump_rx_bd_info(struct hns3_nic_priv *priv, { unsigned int j = 0; - sprintf(result[j++], "%5d", idx); + sprintf(result[j++], "%d", idx); sprintf(result[j++], "%#x", le32_to_cpu(desc->rx.l234_info)); - sprintf(result[j++], "%7u", le16_to_cpu(desc->rx.pkt_len)); - sprintf(result[j++], "%4u", le16_to_cpu(desc->rx.size)); + sprintf(result[j++], "%u", le16_to_cpu(desc->rx.pkt_len)); + sprintf(result[j++], "%u", le16_to_cpu(desc->rx.size)); sprintf(result[j++], "%#x", le32_to_cpu(desc->rx.rss_hash)); - sprintf(result[j++], "%5u", le16_to_cpu(desc->rx.fd_id)); - sprintf(result[j++], "%8u", le16_to_cpu(desc->rx.vlan_tag)); - sprintf(result[j++], "%15u", le16_to_cpu(desc->rx.o_dm_vlan_id_fb)); - sprintf(result[j++], "%11u", le16_to_cpu(desc->rx.ot_vlan_tag)); + sprintf(result[j++], "%u", le16_to_cpu(desc->rx.fd_id)); + sprintf(result[j++], "%u", le16_to_cpu(desc->rx.vlan_tag)); + sprintf(result[j++], "%u", le16_to_cpu(desc->rx.o_dm_vlan_id_fb)); + sprintf(result[j++], "%u", le16_to_cpu(desc->rx.ot_vlan_tag)); sprintf(result[j++], "%#x", le32_to_cpu(desc->rx.bd_base_info)); if (test_bit(HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, &priv->state)) { u32 ol_info = le32_to_cpu(desc->rx.ol_info); @@ -930,19 +930,19 @@ static void hns3_dump_tx_bd_info(struct hns3_nic_priv *priv, { unsigned int j = 0; - sprintf(result[j++], "%6d", idx); + sprintf(result[j++], "%d", idx); sprintf(result[j++], "%#llx", le64_to_cpu(desc->addr)); - sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.vlan_tag)); - sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.send_size)); + sprintf(result[j++], "%u", le16_to_cpu(desc->tx.vlan_tag)); + sprintf(result[j++], "%u", le16_to_cpu(desc->tx.send_size)); sprintf(result[j++], "%#x", le32_to_cpu(desc->tx.type_cs_vlan_tso_len)); - sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.outer_vlan_tag)); - sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.tv)); - sprintf(result[j++], "%10u", + sprintf(result[j++], "%u", le16_to_cpu(desc->tx.outer_vlan_tag)); + sprintf(result[j++], "%u", le16_to_cpu(desc->tx.tv)); + sprintf(result[j++], "%u", le32_to_cpu(desc->tx.ol_type_vlan_len_msec)); sprintf(result[j++], "%#x", le32_to_cpu(desc->tx.paylen_ol4cs)); sprintf(result[j++], "%#x", le16_to_cpu(desc->tx.bdtp_fe_sc_vld_ra_ri)); - sprintf(result[j++], "%5u", le16_to_cpu(desc->tx.mss_hw_csum)); + sprintf(result[j++], "%u", le16_to_cpu(desc->tx.mss_hw_csum)); } static int hns3_dbg_tx_bd_info(struct hns3_dbg_data *d, char *buf, int len) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 14dc12c2155d..a3ee7875d6a7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -5203,6 +5203,13 @@ static void hns3_state_init(struct hnae3_handle *handle) set_bit(HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, &priv->state); } +static void hns3_state_uninit(struct hnae3_handle *handle) +{ + struct hns3_nic_priv *priv = handle->priv; + + clear_bit(HNS3_NIC_STATE_INITED, &priv->state); +} + static int hns3_client_init(struct hnae3_handle *handle) { struct pci_dev *pdev = handle->pdev; @@ -5320,7 +5327,9 @@ static int hns3_client_init(struct hnae3_handle *handle) return ret; out_reg_netdev_fail: + hns3_state_uninit(handle); hns3_dbg_uninit(handle); + hns3_client_stop(handle); out_client_start: hns3_free_rx_cpu_rmap(netdev); hns3_nic_uninit_irq(priv); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index 6799d16de34b..7998ca617a92 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -94,6 +94,13 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len, enum hclge_comm_cmd_status status; struct hclge_desc desc; + if (msg_len > HCLGE_MBX_MAX_MSG_SIZE) { + dev_err(&hdev->pdev->dev, + "msg data length(=%u) exceeds maximum(=%u)\n", + msg_len, HCLGE_MBX_MAX_MSG_SIZE); + return -EMSGSIZE; + } + resp_pf_to_vf = (struct hclge_mbx_pf_to_vf_cmd *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_PF_TO_VF, false); @@ -176,7 +183,7 @@ static int hclge_get_ring_chain_from_mbx( ring_num = req->msg.ring_num; if (ring_num > HCLGE_MBX_MAX_RING_CHAIN_PARAM_NUM) - return -ENOMEM; + return -EINVAL; for (i = 0; i < ring_num; i++) { if (req->msg.param[i].tqp_index >= vport->nic.kinfo.rss_size) { @@ -587,9 +594,9 @@ static int hclge_set_vf_mtu(struct hclge_vport *vport, return hclge_set_vport_mtu(vport, mtu); } -static void hclge_get_queue_id_in_pf(struct hclge_vport *vport, - struct hclge_mbx_vf_to_pf_cmd *mbx_req, - struct hclge_respond_to_vf_msg *resp_msg) +static int hclge_get_queue_id_in_pf(struct hclge_vport *vport, + struct hclge_mbx_vf_to_pf_cmd *mbx_req, + struct hclge_respond_to_vf_msg *resp_msg) { struct hnae3_handle *handle = &vport->nic; struct hclge_dev *hdev = vport->back; @@ -599,17 +606,18 @@ static void hclge_get_queue_id_in_pf(struct hclge_vport *vport, if (queue_id >= handle->kinfo.num_tqps) { dev_err(&hdev->pdev->dev, "Invalid queue id(%u) from VF %u\n", queue_id, mbx_req->mbx_src_vfid); - return; + return -EINVAL; } qid_in_pf = hclge_covert_handle_qid_global(&vport->nic, queue_id); memcpy(resp_msg->data, &qid_in_pf, sizeof(qid_in_pf)); resp_msg->len = sizeof(qid_in_pf); + return 0; } -static void hclge_get_rss_key(struct hclge_vport *vport, - struct hclge_mbx_vf_to_pf_cmd *mbx_req, - struct hclge_respond_to_vf_msg *resp_msg) +static int hclge_get_rss_key(struct hclge_vport *vport, + struct hclge_mbx_vf_to_pf_cmd *mbx_req, + struct hclge_respond_to_vf_msg *resp_msg) { #define HCLGE_RSS_MBX_RESP_LEN 8 struct hclge_dev *hdev = vport->back; @@ -627,13 +635,14 @@ static void hclge_get_rss_key(struct hclge_vport *vport, dev_warn(&hdev->pdev->dev, "failed to get the rss hash key, the index(%u) invalid !\n", index); - return; + return -EINVAL; } memcpy(resp_msg->data, &rss_cfg->rss_hash_key[index * HCLGE_RSS_MBX_RESP_LEN], HCLGE_RSS_MBX_RESP_LEN); resp_msg->len = HCLGE_RSS_MBX_RESP_LEN; + return 0; } static void hclge_link_fail_parse(struct hclge_dev *hdev, u8 link_fail_code) @@ -809,10 +818,10 @@ void hclge_mbx_handler(struct hclge_dev *hdev) "VF fail(%d) to set mtu\n", ret); break; case HCLGE_MBX_GET_QID_IN_PF: - hclge_get_queue_id_in_pf(vport, req, &resp_msg); + ret = hclge_get_queue_id_in_pf(vport, req, &resp_msg); break; case HCLGE_MBX_GET_RSS_KEY: - hclge_get_rss_key(vport, req, &resp_msg); + ret = hclge_get_rss_key(vport, req, &resp_msg); break; case HCLGE_MBX_GET_LINK_MODE: hclge_get_link_mode(vport, req); diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 77683909ca3d..5c5931dba51d 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3210,13 +3210,8 @@ static void ibmvnic_get_ringparam(struct net_device *netdev, { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - if (adapter->priv_flags & IBMVNIC_USE_SERVER_MAXES) { - ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq; - ring->tx_max_pending = adapter->max_tx_entries_per_subcrq; - } else { - ring->rx_max_pending = IBMVNIC_MAX_QUEUE_SZ; - ring->tx_max_pending = IBMVNIC_MAX_QUEUE_SZ; - } + ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq; + ring->tx_max_pending = adapter->max_tx_entries_per_subcrq; ring->rx_mini_max_pending = 0; ring->rx_jumbo_max_pending = 0; ring->rx_pending = adapter->req_rx_add_entries_per_subcrq; @@ -3231,23 +3226,21 @@ static int ibmvnic_set_ringparam(struct net_device *netdev, struct netlink_ext_ack *extack) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - int ret; - ret = 0; + if (ring->rx_pending > adapter->max_rx_add_entries_per_subcrq || + ring->tx_pending > adapter->max_tx_entries_per_subcrq) { + netdev_err(netdev, "Invalid request.\n"); + netdev_err(netdev, "Max tx buffers = %llu\n", + adapter->max_rx_add_entries_per_subcrq); + netdev_err(netdev, "Max rx buffers = %llu\n", + adapter->max_tx_entries_per_subcrq); + return -EINVAL; + } + adapter->desired.rx_entries = ring->rx_pending; adapter->desired.tx_entries = ring->tx_pending; - ret = wait_for_reset(adapter); - - if (!ret && - (adapter->req_rx_add_entries_per_subcrq != ring->rx_pending || - adapter->req_tx_entries_per_subcrq != ring->tx_pending)) - netdev_info(netdev, - "Could not match full ringsize request. Requested: RX %d, TX %d; Allowed: RX %llu, TX %llu\n", - ring->rx_pending, ring->tx_pending, - adapter->req_rx_add_entries_per_subcrq, - adapter->req_tx_entries_per_subcrq); - return ret; + return wait_for_reset(adapter); } static void ibmvnic_get_channels(struct net_device *netdev, @@ -3255,14 +3248,8 @@ static void ibmvnic_get_channels(struct net_device *netdev, { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - if (adapter->priv_flags & IBMVNIC_USE_SERVER_MAXES) { - channels->max_rx = adapter->max_rx_queues; - channels->max_tx = adapter->max_tx_queues; - } else { - channels->max_rx = IBMVNIC_MAX_QUEUES; - channels->max_tx = IBMVNIC_MAX_QUEUES; - } - + channels->max_rx = adapter->max_rx_queues; + channels->max_tx = adapter->max_tx_queues; channels->max_other = 0; channels->max_combined = 0; channels->rx_count = adapter->req_rx_queues; @@ -3275,22 +3262,11 @@ static int ibmvnic_set_channels(struct net_device *netdev, struct ethtool_channels *channels) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - int ret; - ret = 0; adapter->desired.rx_queues = channels->rx_count; adapter->desired.tx_queues = channels->tx_count; - ret = wait_for_reset(adapter); - - if (!ret && - (adapter->req_rx_queues != channels->rx_count || - adapter->req_tx_queues != channels->tx_count)) - netdev_info(netdev, - "Could not match full channels request. Requested: RX %d, TX %d; Allowed: RX %llu, TX %llu\n", - channels->rx_count, channels->tx_count, - adapter->req_rx_queues, adapter->req_tx_queues); - return ret; + return wait_for_reset(adapter); } static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data) @@ -3298,43 +3274,32 @@ static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data) struct ibmvnic_adapter *adapter = netdev_priv(dev); int i; - switch (stringset) { - case ETH_SS_STATS: - for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); - i++, data += ETH_GSTRING_LEN) - memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN); + if (stringset != ETH_SS_STATS) + return; - for (i = 0; i < adapter->req_tx_queues; i++) { - snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i); - data += ETH_GSTRING_LEN; + for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++, data += ETH_GSTRING_LEN) + memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN); - snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i); - data += ETH_GSTRING_LEN; + for (i = 0; i < adapter->req_tx_queues; i++) { + snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i); + data += ETH_GSTRING_LEN; - snprintf(data, ETH_GSTRING_LEN, - "tx%d_dropped_packets", i); - data += ETH_GSTRING_LEN; - } + snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i); + data += ETH_GSTRING_LEN; - for (i = 0; i < adapter->req_rx_queues; i++) { - snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i); - data += ETH_GSTRING_LEN; + snprintf(data, ETH_GSTRING_LEN, "tx%d_dropped_packets", i); + data += ETH_GSTRING_LEN; + } - snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i); - data += ETH_GSTRING_LEN; + for (i = 0; i < adapter->req_rx_queues; i++) { + snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i); + data += ETH_GSTRING_LEN; - snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i); - data += ETH_GSTRING_LEN; - } - break; + snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i); + data += ETH_GSTRING_LEN; - case ETH_SS_PRIV_FLAGS: - for (i = 0; i < ARRAY_SIZE(ibmvnic_priv_flags); i++) - strcpy(data + i * ETH_GSTRING_LEN, - ibmvnic_priv_flags[i]); - break; - default: - return; + snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i); + data += ETH_GSTRING_LEN; } } @@ -3347,8 +3312,6 @@ static int ibmvnic_get_sset_count(struct net_device *dev, int sset) return ARRAY_SIZE(ibmvnic_stats) + adapter->req_tx_queues * NUM_TX_STATS + adapter->req_rx_queues * NUM_RX_STATS; - case ETH_SS_PRIV_FLAGS: - return ARRAY_SIZE(ibmvnic_priv_flags); default: return -EOPNOTSUPP; } @@ -3401,26 +3364,6 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev, } } -static u32 ibmvnic_get_priv_flags(struct net_device *netdev) -{ - struct ibmvnic_adapter *adapter = netdev_priv(netdev); - - return adapter->priv_flags; -} - -static int ibmvnic_set_priv_flags(struct net_device *netdev, u32 flags) -{ - struct ibmvnic_adapter *adapter = netdev_priv(netdev); - bool which_maxes = !!(flags & IBMVNIC_USE_SERVER_MAXES); - - if (which_maxes) - adapter->priv_flags |= IBMVNIC_USE_SERVER_MAXES; - else - adapter->priv_flags &= ~IBMVNIC_USE_SERVER_MAXES; - - return 0; -} - static const struct ethtool_ops ibmvnic_ethtool_ops = { .get_drvinfo = ibmvnic_get_drvinfo, .get_msglevel = ibmvnic_get_msglevel, @@ -3434,8 +3377,6 @@ static const struct ethtool_ops ibmvnic_ethtool_ops = { .get_sset_count = ibmvnic_get_sset_count, .get_ethtool_stats = ibmvnic_get_ethtool_stats, .get_link_ksettings = ibmvnic_get_link_ksettings, - .get_priv_flags = ibmvnic_get_priv_flags, - .set_priv_flags = ibmvnic_set_priv_flags, }; /* Routines for managing CRQs/sCRQs */ diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 8f5cefb932dd..1310c861bf83 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -41,11 +41,6 @@ #define IBMVNIC_RESET_DELAY 100 -static const char ibmvnic_priv_flags[][ETH_GSTRING_LEN] = { -#define IBMVNIC_USE_SERVER_MAXES 0x1 - "use-server-maxes" -}; - struct ibmvnic_login_buffer { __be32 len; __be32 version; @@ -883,7 +878,6 @@ struct ibmvnic_adapter { struct ibmvnic_control_ip_offload_buffer ip_offload_ctrl; dma_addr_t ip_offload_ctrl_tok; u32 msg_enable; - u32 priv_flags; /* Vital Product Data (VPD) */ struct ibmvnic_vpd *vpd; diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index d60e2016d03c..e6c8e6d5234f 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1009,8 +1009,8 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link) { u32 reg = link << (E1000_LTRV_REQ_SHIFT + E1000_LTRV_NOSNOOP_SHIFT) | link << E1000_LTRV_REQ_SHIFT | E1000_LTRV_SEND; - u16 max_ltr_enc_d = 0; /* maximum LTR decoded by platform */ - u16 lat_enc_d = 0; /* latency decoded */ + u32 max_ltr_enc_d = 0; /* maximum LTR decoded by platform */ + u32 lat_enc_d = 0; /* latency decoded */ u16 lat_enc = 0; /* latency encoded */ if (link) { diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 190590d32faf..7dfcf78b57fb 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2871,7 +2871,6 @@ continue_reset: running = adapter->state == __IAVF_RUNNING; if (running) { - netdev->flags &= ~IFF_UP; netif_carrier_off(netdev); netif_tx_stop_all_queues(netdev); adapter->link_up = false; @@ -2988,7 +2987,7 @@ continue_reset: * to __IAVF_RUNNING */ iavf_up_complete(adapter); - netdev->flags |= IFF_UP; + iavf_irq_enable(adapter, true); } else { iavf_change_state(adapter, __IAVF_DOWN); @@ -3004,10 +3003,8 @@ continue_reset: reset_err: mutex_unlock(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); - if (running) { + if (running) iavf_change_state(adapter, __IAVF_RUNNING); - netdev->flags |= IFF_UP; - } dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); iavf_close(netdev); } diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c index 5daade32ea62..fba178e07600 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.c +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c @@ -577,7 +577,7 @@ void ice_free_cpu_rx_rmap(struct ice_vsi *vsi) { struct net_device *netdev; - if (!vsi || vsi->type != ICE_VSI_PF || !vsi->arfs_fltr_list) + if (!vsi || vsi->type != ICE_VSI_PF) return; netdev = vsi->netdev; @@ -599,7 +599,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi) int base_idx, i; if (!vsi || vsi->type != ICE_VSI_PF) - return -EINVAL; + return 0; pf = vsi->back; netdev = vsi->netdev; @@ -636,7 +636,6 @@ void ice_remove_arfs(struct ice_pf *pf) if (!pf_vsi) return; - ice_free_cpu_rx_rmap(pf_vsi); ice_clear_arfs(pf_vsi); } @@ -653,9 +652,5 @@ void ice_rebuild_arfs(struct ice_pf *pf) return; ice_remove_arfs(pf); - if (ice_set_cpu_rx_rmap(pf_vsi)) { - dev_err(ice_pf_to_dev(pf), "Failed to rebuild aRFS\n"); - return; - } ice_init_arfs(pf_vsi); } diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index 9a84d746a6c4..6a463b242c7d 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -361,7 +361,8 @@ ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev) np = netdev_priv(netdev); vsi = np->vsi; - if (ice_is_reset_in_progress(vsi->back->state)) + if (ice_is_reset_in_progress(vsi->back->state) || + test_bit(ICE_VF_DIS, vsi->back->state)) return NETDEV_TX_BUSY; repr = ice_netdev_to_repr(netdev); diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.h b/drivers/net/ethernet/intel/ice/ice_eswitch.h index bd58d9d2e565..6a413331572b 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.h +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.h @@ -52,7 +52,7 @@ static inline void ice_eswitch_update_repr(struct ice_vsi *vsi) { } static inline int ice_eswitch_configure(struct ice_pf *pf) { - return -EOPNOTSUPP; + return 0; } static inline int ice_eswitch_rebuild(struct ice_pf *pf) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 2774cbd5b12a..6d19c58ccacd 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2689,6 +2689,8 @@ void ice_vsi_free_irq(struct ice_vsi *vsi) return; vsi->irqs_ready = false; + ice_free_cpu_rx_rmap(vsi); + ice_for_each_q_vector(vsi, i) { u16 vector = i + base; int irq_num; @@ -2702,7 +2704,8 @@ void ice_vsi_free_irq(struct ice_vsi *vsi) continue; /* clear the affinity notifier in the IRQ descriptor */ - irq_set_affinity_notifier(irq_num, NULL); + if (!IS_ENABLED(CONFIG_RFS_ACCEL)) + irq_set_affinity_notifier(irq_num, NULL); /* clear the affinity_mask in the IRQ descriptor */ irq_set_affinity_hint(irq_num, NULL); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index d768925785ca..9a0a358a15c2 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2510,6 +2510,13 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) irq_set_affinity_hint(irq_num, &q_vector->affinity_mask); } + err = ice_set_cpu_rx_rmap(vsi); + if (err) { + netdev_err(vsi->netdev, "Failed to setup CPU RMAP on VSI %u: %pe\n", + vsi->vsi_num, ERR_PTR(err)); + goto free_q_irqs; + } + vsi->irqs_ready = true; return 0; @@ -3692,20 +3699,12 @@ static int ice_setup_pf_sw(struct ice_pf *pf) */ ice_napi_add(vsi); - status = ice_set_cpu_rx_rmap(vsi); - if (status) { - dev_err(dev, "Failed to set CPU Rx map VSI %d error %d\n", - vsi->vsi_num, status); - goto unroll_napi_add; - } status = ice_init_mac_fltr(pf); if (status) - goto free_cpu_rx_map; + goto unroll_napi_add; return 0; -free_cpu_rx_map: - ice_free_cpu_rx_rmap(vsi); unroll_napi_add: ice_tc_indir_block_unregister(vsi); unroll_cfg_netdev: @@ -5167,7 +5166,6 @@ static int __maybe_unused ice_suspend(struct device *dev) continue; ice_vsi_free_q_vectors(pf->vsi[v]); } - ice_free_cpu_rx_rmap(ice_get_main_vsi(pf)); ice_clear_interrupt_scheme(pf); pci_save_state(pdev); @@ -6931,12 +6929,15 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) dev_dbg(dev, "rebuilding PF after reset_type=%d\n", reset_type); +#define ICE_EMP_RESET_SLEEP_MS 5000 if (reset_type == ICE_RESET_EMPR) { /* If an EMP reset has occurred, any previously pending flash * update will have completed. We no longer know whether or * not the NVM update EMP reset is restricted. */ pf->fw_emp_reset_disabled = false; + + msleep(ICE_EMP_RESET_SLEEP_MS); } err = ice_init_all_ctrlq(hw); diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index 4eb0599714f4..13cdb5ea594d 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -641,6 +641,7 @@ ice_get_orom_civd_data(struct ice_hw *hw, enum ice_bank_select bank, status = ice_read_flash_module(hw, bank, ICE_SR_1ST_OROM_BANK_PTR, 0, orom_data, hw->flash.banks.orom_size); if (status) { + vfree(orom_data); ice_debug(hw, ICE_DBG_NVM, "Unable to read Option ROM data\n"); return status; } diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 8915a9d39e36..0c438219f7a3 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1046,8 +1046,8 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!num_vfs) { if (!pci_vfs_assigned(pdev)) { - ice_mbx_deinit_snapshot(&pf->hw); ice_free_vfs(pf); + ice_mbx_deinit_snapshot(&pf->hw); if (pf->lag) ice_enable_lag(pf->lag); return 0; diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index 69ff4b929772..b72606c9e6d0 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -3625,6 +3625,8 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) return; } + mutex_lock(&vf->cfg_lock); + /* Check if VF is disabled. */ if (test_bit(ICE_VF_STATE_DIS, vf->vf_states)) { err = -EPERM; @@ -3642,32 +3644,20 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) err = -EINVAL; } - if (!ice_vc_is_opcode_allowed(vf, v_opcode)) { - ice_vc_send_msg_to_vf(vf, v_opcode, - VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, NULL, - 0); - ice_put_vf(vf); - return; - } - error_handler: if (err) { ice_vc_send_msg_to_vf(vf, v_opcode, VIRTCHNL_STATUS_ERR_PARAM, NULL, 0); dev_err(dev, "Invalid message from VF %d, opcode %d, len %d, error %d\n", vf_id, v_opcode, msglen, err); - ice_put_vf(vf); - return; + goto finish; } - /* VF is being configured in another context that triggers a VFR, so no - * need to process this message - */ - if (!mutex_trylock(&vf->cfg_lock)) { - dev_info(dev, "VF %u is being configured in another context that will trigger a VFR, so there is no need to handle this message\n", - vf->vf_id); - ice_put_vf(vf); - return; + if (!ice_vc_is_opcode_allowed(vf, v_opcode)) { + ice_vc_send_msg_to_vf(vf, v_opcode, + VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, NULL, + 0); + goto finish; } switch (v_opcode) { @@ -3780,6 +3770,7 @@ error_handler: vf_id, v_opcode, err); } +finish: mutex_unlock(&vf->cfg_lock); ice_put_vf(vf); } diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 866ee4df9671..9dd38f667059 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -415,8 +415,8 @@ static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp, */ static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) { + u32 nb_buffs_extra = 0, nb_buffs = 0; union ice_32b_rx_flex_desc *rx_desc; - u32 nb_buffs_extra = 0, nb_buffs; u16 ntu = rx_ring->next_to_use; u16 total_count = count; struct xdp_buff **xdp; @@ -428,6 +428,10 @@ static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) nb_buffs_extra = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, rx_desc, rx_ring->count - ntu); + if (nb_buffs_extra != rx_ring->count - ntu) { + ntu += nb_buffs_extra; + goto exit; + } rx_desc = ICE_RX_DESC(rx_ring, 0); xdp = ice_xdp_buf(rx_ring, 0); ntu = 0; @@ -441,6 +445,7 @@ static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) if (ntu == rx_ring->count) ntu = 0; +exit: if (rx_ring->next_to_use != ntu) ice_release_rx_desc(rx_ring, ntu); diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c index 66ea566488d1..59d5c467ea6e 100644 --- a/drivers/net/ethernet/intel/igc/igc_i225.c +++ b/drivers/net/ethernet/intel/igc/igc_i225.c @@ -156,8 +156,15 @@ void igc_release_swfw_sync_i225(struct igc_hw *hw, u16 mask) { u32 swfw_sync; - while (igc_get_hw_semaphore_i225(hw)) - ; /* Empty */ + /* Releasing the resource requires first getting the HW semaphore. + * If we fail to get the semaphore, there is nothing we can do, + * except log an error and quit. We are not allowed to hang here + * indefinitely, as it may cause denial of service or system crash. + */ + if (igc_get_hw_semaphore_i225(hw)) { + hw_dbg("Failed to release SW_FW_SYNC.\n"); + return; + } swfw_sync = rd32(IGC_SW_FW_SYNC); swfw_sync &= ~mask; diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c index 40dbf4b43234..6961f65d36b9 100644 --- a/drivers/net/ethernet/intel/igc/igc_phy.c +++ b/drivers/net/ethernet/intel/igc/igc_phy.c @@ -581,7 +581,7 @@ static s32 igc_read_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 *data) * the lower time out */ for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) { - usleep_range(500, 1000); + udelay(50); mdic = rd32(IGC_MDIC); if (mdic & IGC_MDIC_READY) break; @@ -638,7 +638,7 @@ static s32 igc_write_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 data) * the lower time out */ for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) { - usleep_range(500, 1000); + udelay(50); mdic = rd32(IGC_MDIC); if (mdic & IGC_MDIC_READY) break; diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 0d6e3215e98f..653e9f1e35b5 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -992,6 +992,17 @@ static void igc_ptp_time_restore(struct igc_adapter *adapter) igc_ptp_write_i225(adapter, &ts); } +static void igc_ptm_stop(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u32 ctrl; + + ctrl = rd32(IGC_PTM_CTRL); + ctrl &= ~IGC_PTM_CTRL_EN; + + wr32(IGC_PTM_CTRL, ctrl); +} + /** * igc_ptp_suspend - Disable PTP work items and prepare for suspend * @adapter: Board private structure @@ -1009,8 +1020,10 @@ void igc_ptp_suspend(struct igc_adapter *adapter) adapter->ptp_tx_skb = NULL; clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); - if (pci_device_is_present(adapter->pdev)) + if (pci_device_is_present(adapter->pdev)) { igc_ptp_time_save(adapter); + igc_ptm_stop(adapter); + } } /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index e596e1a9fc75..69d11ff7677d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -903,7 +903,8 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) /* Tx IPsec offload doesn't seem to work on this * device, so block these requests for now. */ - if (!(sam->flags & XFRM_OFFLOAD_INBOUND)) { + sam->flags = sam->flags & ~XFRM_OFFLOAD_IPV6; + if (sam->flags != XFRM_OFFLOAD_INBOUND) { err = -EOPNOTSUPP; goto err_out; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index 939b692ffc33..ce843ea91464 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -650,6 +650,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client, return 0; errout: + mutex_destroy(&mlxsw_i2c->cmd.lock); i2c_set_clientdata(client, NULL); return err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index b73466470f75..fe663b0ab708 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -423,7 +423,7 @@ mlxsw_sp_span_gretap4_route(const struct net_device *to_dev, parms = mlxsw_sp_ipip_netdev_parms4(to_dev); ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp, - 0, 0, parms.link, tun->fwmark, 0); + 0, 0, dev_net(to_dev), parms.link, tun->fwmark, 0); rt = ip_route_output_key(tun->net, &fl4); if (IS_ERR(rt)) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c index ce5970bdcc6a..005e56ea5da1 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c @@ -346,7 +346,8 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row, lan966x_mac_process_raw_entry(&raw_entries[column], mac, &vid, &dest_idx); - WARN_ON(dest_idx > lan966x->num_phys_ports); + if (WARN_ON(dest_idx >= lan966x->num_phys_ports)) + continue; /* If the entry in SW is found, then there is nothing * to do @@ -392,7 +393,8 @@ static void lan966x_mac_irq_process(struct lan966x *lan966x, u32 row, lan966x_mac_process_raw_entry(&raw_entries[column], mac, &vid, &dest_idx); - WARN_ON(dest_idx > lan966x->num_phys_ports); + if (WARN_ON(dest_idx >= lan966x->num_phys_ports)) + continue; mac_entry = lan966x_mac_alloc_entry(mac, vid, dest_idx); if (!mac_entry) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 1f8c67f0261b..95830e3e2b1f 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -446,6 +446,12 @@ static bool lan966x_hw_offload(struct lan966x *lan966x, u32 port, ANA_CPU_FWD_CFG_MLD_REDIR_ENA))) return true; + if (eth_type_vlan(skb->protocol)) { + skb = skb_vlan_untag(skb); + if (unlikely(!skb)) + return false; + } + if (skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->protocol == IPPROTO_IGMP) return false; @@ -665,6 +671,9 @@ static void lan966x_cleanup_ports(struct lan966x *lan966x) disable_irq(lan966x->ana_irq); lan966x->ana_irq = -ENXIO; } + + if (lan966x->ptp_irq) + devm_free_irq(lan966x->dev, lan966x->ptp_irq, lan966x); } static int lan966x_probe_port(struct lan966x *lan966x, u32 p, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c index ae782778d6dd..0a1041da4384 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c @@ -29,10 +29,10 @@ enum { static u64 lan966x_ptp_get_nominal_value(void) { - u64 res = 0x304d2df1; - - res <<= 32; - return res; + /* This is the default value that for each system clock, the time of day + * is increased. It has the format 5.59 nanosecond. + */ + return 0x304d4873ecade305; } int lan966x_ptp_hwtstamp_set(struct lan966x_port *port, struct ifreq *ifr) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c index e3555c94294d..df2bee678559 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c @@ -322,8 +322,7 @@ static int lan966x_port_prechangeupper(struct net_device *dev, if (netif_is_bridge_master(info->upper_dev) && !info->linking) switchdev_bridge_port_unoffload(port->dev, port, - &lan966x_switchdev_nb, - &lan966x_switchdev_blocking_nb); + NULL, NULL); return NOTIFY_DONE; } diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index e443bd8b2d09..ca71b62a44dc 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -551,7 +551,7 @@ int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, struct ocelot_vcap_block *block = &ocelot->block[VCAP_IS1]; struct ocelot_port *ocelot_port = ocelot->ports[port]; struct ocelot_vcap_filter *filter; - int err; + int err = 0; u32 val; list_for_each_entry(filter, &block->rules, list) { @@ -570,7 +570,7 @@ int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, if (vlan_aware) err = ocelot_del_vlan_unaware_pvid(ocelot, port, ocelot_port->bridge); - else + else if (ocelot_port->bridge) err = ocelot_add_vlan_unaware_pvid(ocelot, port, ocelot_port->bridge); if (err) @@ -629,6 +629,13 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, { int err; + /* Ignore VID 0 added to our RX filter by the 8021q module, since + * that collides with OCELOT_STANDALONE_PVID and changes it from + * egress-untagged to egress-tagged. + */ + if (!vid) + return 0; + err = ocelot_vlan_member_add(ocelot, port, vid, untagged); if (err) return err; @@ -651,6 +658,9 @@ int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid) bool del_pvid = false; int err; + if (!vid) + return 0; + if (ocelot_port->pvid_vlan && ocelot_port->pvid_vlan->vid == vid) del_pvid = true; @@ -2859,6 +2869,8 @@ static void ocelot_port_set_mcast_flood(struct ocelot *ocelot, int port, val = BIT(port); ocelot_rmw_rix(ocelot, val, BIT(port), ANA_PGID_PGID, PGID_MC); + ocelot_rmw_rix(ocelot, val, BIT(port), ANA_PGID_PGID, PGID_MCIPV4); + ocelot_rmw_rix(ocelot, val, BIT(port), ANA_PGID_PGID, PGID_MCIPV6); } static void ocelot_port_set_bcast_flood(struct ocelot *ocelot, int port, diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c index cd478d2cd871..00f6d347eaf7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c +++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c @@ -57,10 +57,6 @@ #define TSE_PCS_USE_SGMII_ENA BIT(0) #define TSE_PCS_IF_USE_SGMII 0x03 -#define SGMII_ADAPTER_CTRL_REG 0x00 -#define SGMII_ADAPTER_DISABLE 0x0001 -#define SGMII_ADAPTER_ENABLE 0x0000 - #define AUTONEGO_LINK_TIMER 20 static int tse_pcs_reset(void __iomem *base, struct tse_pcs *pcs) @@ -202,12 +198,8 @@ void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev, unsigned int speed) { void __iomem *tse_pcs_base = pcs->tse_pcs_base; - void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base; u32 val; - writew(SGMII_ADAPTER_ENABLE, - sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); - pcs->autoneg = phy_dev->autoneg; if (phy_dev->autoneg == AUTONEG_ENABLE) { diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h index 442812c0a4bd..694ac25ef426 100644 --- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h +++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h @@ -10,6 +10,10 @@ #include <linux/phy.h> #include <linux/timer.h> +#define SGMII_ADAPTER_CTRL_REG 0x00 +#define SGMII_ADAPTER_ENABLE 0x0000 +#define SGMII_ADAPTER_DISABLE 0x0001 + struct tse_pcs { struct device *dev; void __iomem *tse_pcs_base; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index b7c2579c963b..6b447d8f0bd8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -18,9 +18,6 @@ #include "altr_tse_pcs.h" -#define SGMII_ADAPTER_CTRL_REG 0x00 -#define SGMII_ADAPTER_DISABLE 0x0001 - #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII 0x0 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII 0x1 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII 0x2 @@ -62,14 +59,13 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed) { struct socfpga_dwmac *dwmac = (struct socfpga_dwmac *)priv; void __iomem *splitter_base = dwmac->splitter_base; - void __iomem *tse_pcs_base = dwmac->pcs.tse_pcs_base; void __iomem *sgmii_adapter_base = dwmac->pcs.sgmii_adapter_base; struct device *dev = dwmac->dev; struct net_device *ndev = dev_get_drvdata(dev); struct phy_device *phy_dev = ndev->phydev; u32 val; - if ((tse_pcs_base) && (sgmii_adapter_base)) + if (sgmii_adapter_base) writew(SGMII_ADAPTER_DISABLE, sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); @@ -93,8 +89,11 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed) writel(val, splitter_base + EMAC_SPLITTER_CTRL_REG); } - if (tse_pcs_base && sgmii_adapter_base) + if (phy_dev && sgmii_adapter_base) { + writew(SGMII_ADAPTER_ENABLE, + sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); tse_pcs_fix_mac_speed(&dwmac->pcs, phy_dev, speed); + } } static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *dev) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index 22fea0f67245..92d32940aff0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -71,9 +71,9 @@ static int init_systime(void __iomem *ioaddr, u32 sec, u32 nsec) writel(value, ioaddr + PTP_TCR); /* wait for present system time initialize to complete */ - return readl_poll_timeout(ioaddr + PTP_TCR, value, + return readl_poll_timeout_atomic(ioaddr + PTP_TCR, value, !(value & PTP_TCR_TSINIT), - 10000, 100000); + 10, 100000); } static int config_addend(void __iomem *ioaddr, u32 addend) diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c index 16105292b140..74e845fa2e07 100644 --- a/drivers/net/hippi/rrunner.c +++ b/drivers/net/hippi/rrunner.c @@ -1355,7 +1355,9 @@ static int rr_close(struct net_device *dev) rrpriv->fw_running = 0; + spin_unlock_irqrestore(&rrpriv->lock, flags); del_timer_sync(&rrpriv->timer); + spin_lock_irqsave(&rrpriv->lock, flags); writel(0, ®s->TxPi); writel(0, ®s->IpRxPi); diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 069e8824c264..b00bc8173abe 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -460,8 +460,10 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb) return RX_HANDLER_CONSUMED; *pskb = skb; eth = eth_hdr(skb); - if (macvlan_forward_source(skb, port, eth->h_source)) + if (macvlan_forward_source(skb, port, eth->h_source)) { + kfree_skb(skb); return RX_HANDLER_CONSUMED; + } src = macvlan_hash_lookup(port, eth->h_source); if (src && src->mode != MACVLAN_MODE_VEPA && src->mode != MACVLAN_MODE_BRIDGE) { @@ -480,8 +482,10 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb) return RX_HANDLER_PASS; } - if (macvlan_forward_source(skb, port, eth->h_source)) + if (macvlan_forward_source(skb, port, eth->h_source)) { + kfree_skb(skb); return RX_HANDLER_CONSUMED; + } if (macvlan_passthru(port)) vlan = list_first_or_null_rcu(&port->vlans, struct macvlan_dev, list); diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c index 1becb1a731f6..1c1584fca632 100644 --- a/drivers/net/mdio/fwnode_mdio.c +++ b/drivers/net/mdio/fwnode_mdio.c @@ -43,6 +43,11 @@ int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio, int rc; rc = fwnode_irq_get(child, 0); + /* Don't wait forever if the IRQ provider doesn't become available, + * just fall back to poll mode + */ + if (rc == -EPROBE_DEFER) + rc = driver_deferred_probe_check_state(&phy->mdio.dev); if (rc == -EPROBE_DEFER) return rc; diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index b6fea119fe13..2b7d0720720b 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -880,7 +880,7 @@ static int mv3310_read_status_copper(struct phy_device *phydev) cssr1 = phy_read_mmd(phydev, MDIO_MMD_PCS, MV_PCS_CSSR1); if (cssr1 < 0) - return val; + return cssr1; /* If the link settings are not resolved, mark the link down */ if (!(cssr1 & MV_PCS_CSSR1_RESOLVED)) { diff --git a/drivers/net/phy/microchip_t1.c b/drivers/net/phy/microchip_t1.c index 389df3f4293c..c2c0e361fd3d 100644 --- a/drivers/net/phy/microchip_t1.c +++ b/drivers/net/phy/microchip_t1.c @@ -706,7 +706,6 @@ static int lan87xx_read_status(struct phy_device *phydev) static int lan87xx_config_aneg(struct phy_device *phydev) { u16 ctl = 0; - int rc; switch (phydev->master_slave_set) { case MASTER_SLAVE_CFG_MASTER_FORCE: @@ -722,11 +721,7 @@ static int lan87xx_config_aneg(struct phy_device *phydev) return -EOPNOTSUPP; } - rc = phy_modify_changed(phydev, MII_CTRL1000, CTL1000_AS_MASTER, ctl); - if (rc == 1) - rc = genphy_soft_reset(phydev); - - return rc; + return phy_modify_changed(phydev, MII_CTRL1000, CTL1000_AS_MASTER, ctl); } static struct phy_driver microchip_t1_phy_driver[] = { @@ -748,6 +743,7 @@ static struct phy_driver microchip_t1_phy_driver[] = { { PHY_ID_MATCH_MODEL(PHY_ID_LAN937X), .name = "Microchip LAN937x T1", + .flags = PHY_POLL_CABLE_TEST, .features = PHY_BASIC_T1_FEATURES, .config_init = lan87xx_config_init, .suspend = genphy_suspend, diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 276a0e42ca8e..dbe4c0a4be2c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1124,7 +1124,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) /* NETIF_F_LLTX requires to do our own update of trans_start */ queue = netdev_get_tx_queue(dev, txq); - queue->trans_start = jiffies; + txq_trans_cond_update(queue); /* Notify and wake up reader process */ if (tfile->flags & TUN_FASYNC) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 1b5714926d81..eb0121a64d6d 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -320,7 +320,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) rcu_read_lock(); rcv = rcu_dereference(priv->peer); - if (unlikely(!rcv)) { + if (unlikely(!rcv) || !pskb_may_pull(skb, ETH_HLEN)) { kfree_skb(skb); goto drop; } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 87838cbe38cf..cbba9d2e8f32 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1005,6 +1005,24 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, * xdp.data_meta were adjusted */ len = xdp.data_end - xdp.data + vi->hdr_len + metasize; + + /* recalculate headroom if xdp.data or xdp_data_meta + * were adjusted, note that offset should always point + * to the start of the reserved bytes for virtio_net + * header which are followed by xdp.data, that means + * that offset is equal to the headroom (when buf is + * starting at the beginning of the page, otherwise + * there is a base offset inside the page) but it's used + * with a different starting point (buf start) than + * xdp.data (buf start + vnet hdr size). If xdp.data or + * data_meta were adjusted by the xdp prog then the + * headroom size has changed and so has the offset, we + * can use data_hard_start, which points at buf start + + * vnet hdr size, to calculate the new headroom and use + * it later to compute buf start in page_to_skb() + */ + headroom = xdp.data - xdp.data_hard_start - metasize; + /* We can only create skb based on xdp_page. */ if (unlikely(xdp_page != page)) { rcu_read_unlock(); @@ -1012,7 +1030,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, head_skb = page_to_skb(vi, rq, xdp_page, offset, len, PAGE_SIZE, false, metasize, - VIRTIO_XDP_HEADROOM); + headroom); return head_skb; } break; diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index de97ff98d36e..8a5e3a6d32d7 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -651,11 +651,11 @@ static int vxlan_fdb_append(struct vxlan_fdb *f, rd = kmalloc(sizeof(*rd), GFP_ATOMIC); if (rd == NULL) - return -ENOBUFS; + return -ENOMEM; if (dst_cache_init(&rd->dst_cache, GFP_ATOMIC)) { kfree(rd); - return -ENOBUFS; + return -ENOMEM; } rd->remote_ip = *ip; diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index 23d2954d9747..1e5672019922 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -349,7 +349,7 @@ static int __init cosa_init(void) } } else { cosa_major = register_chrdev(0, "cosa", &cosa_fops); - if (!cosa_major) { + if (cosa_major < 0) { pr_warn("unable to register chardev\n"); err = -EIO; goto out; diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index 0fad1331303c..aa9a7a5970fd 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -19,6 +19,7 @@ #include <linux/if_arp.h> #include <linux/icmp.h> #include <linux/suspend.h> +#include <net/dst_metadata.h> #include <net/icmp.h> #include <net/rtnetlink.h> #include <net/ip_tunnels.h> @@ -167,7 +168,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) goto err_peer; } - mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; + mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; __skb_queue_head_init(&packets); if (!skb_is_gso(skb)) { diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index d5b83f90d27a..e6b34b0d61bd 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -3136,6 +3136,20 @@ static void ath11k_mac_op_bss_info_changed(struct ieee80211_hw *hw, arvif->do_not_send_tmpl = true; else arvif->do_not_send_tmpl = false; + + if (vif->bss_conf.he_support) { + ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, + WMI_VDEV_PARAM_BA_MODE, + WMI_BA_MODE_BUFFER_SIZE_256); + if (ret) + ath11k_warn(ar->ab, + "failed to set BA BUFFER SIZE 256 for vdev: %d\n", + arvif->vdev_id); + else + ath11k_dbg(ar->ab, ATH11K_DBG_MAC, + "Set BA BUFFER SIZE 256 for VDEV: %d\n", + arvif->vdev_id); + } } if (changed & (BSS_CHANGED_BEACON_INFO | BSS_CHANGED_BEACON)) { @@ -3171,14 +3185,6 @@ static void ath11k_mac_op_bss_info_changed(struct ieee80211_hw *hw, if (arvif->is_up && vif->bss_conf.he_support && vif->bss_conf.he_oper.params) { - ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, - WMI_VDEV_PARAM_BA_MODE, - WMI_BA_MODE_BUFFER_SIZE_256); - if (ret) - ath11k_warn(ar->ab, - "failed to set BA BUFFER SIZE 256 for vdev: %d\n", - arvif->vdev_id); - param_id = WMI_VDEV_PARAM_HEOPS_0_31; param_value = vif->bss_conf.he_oper.params; ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 98090e40e1cf..e2791d45f5f5 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -839,7 +839,7 @@ static bool ath9k_txq_list_has_key(struct list_head *txq_list, u32 keyix) continue; txinfo = IEEE80211_SKB_CB(bf->bf_mpdu); - fi = (struct ath_frame_info *)&txinfo->rate_driver_data[0]; + fi = (struct ath_frame_info *)&txinfo->status.status_driver_data[0]; if (fi->keyix == keyix) return true; } diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index d0caf1de2bde..db83cc4ba810 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -141,8 +141,8 @@ static struct ath_frame_info *get_frame_info(struct sk_buff *skb) { struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); BUILD_BUG_ON(sizeof(struct ath_frame_info) > - sizeof(tx_info->rate_driver_data)); - return (struct ath_frame_info *) &tx_info->rate_driver_data[0]; + sizeof(tx_info->status.status_driver_data)); + return (struct ath_frame_info *) &tx_info->status.status_driver_data[0]; } static void ath_send_bar(struct ath_atx_tid *tid, u16 seqno) @@ -2542,6 +2542,16 @@ skip_tx_complete: spin_unlock_irqrestore(&sc->tx.txbuflock, flags); } +static void ath_clear_tx_status(struct ieee80211_tx_info *tx_info) +{ + void *ptr = &tx_info->status; + + memset(ptr + sizeof(tx_info->status.rates), 0, + sizeof(tx_info->status) - + sizeof(tx_info->status.rates) - + sizeof(tx_info->status.status_driver_data)); +} + static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, struct ath_tx_status *ts, int nframes, int nbad, int txok) @@ -2553,6 +2563,8 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, struct ath_hw *ah = sc->sc_ah; u8 i, tx_rateindex; + ath_clear_tx_status(tx_info); + if (txok) tx_info->status.ack_signal = ts->ts_rssi; @@ -2567,6 +2579,13 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, tx_info->status.ampdu_len = nframes; tx_info->status.ampdu_ack_len = nframes - nbad; + tx_info->status.rates[tx_rateindex].count = ts->ts_longretry + 1; + + for (i = tx_rateindex + 1; i < hw->max_rates; i++) { + tx_info->status.rates[i].count = 0; + tx_info->status.rates[i].idx = -1; + } + if ((ts->ts_status & ATH9K_TXERR_FILT) == 0 && (tx_info->flags & IEEE80211_TX_CTL_NO_ACK) == 0) { /* @@ -2588,16 +2607,6 @@ static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, tx_info->status.rates[tx_rateindex].count = hw->max_rate_tries; } - - for (i = tx_rateindex + 1; i < hw->max_rates; i++) { - tx_info->status.rates[i].count = 0; - tx_info->status.rates[i].idx = -1; - } - - tx_info->status.rates[tx_rateindex].count = ts->ts_longretry + 1; - - /* we report airtime in ath_tx_count_airtime(), don't report twice */ - tx_info->status.tx_time = 0; } static void ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 55285cad527f..212fbbe1cd7e 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -557,7 +557,7 @@ enum brcmf_sdio_frmtype { BRCMF_SDIO_FT_SUB, }; -#define SDIOD_DRVSTR_KEY(chip, pmu) (((chip) << 16) | (pmu)) +#define SDIOD_DRVSTR_KEY(chip, pmu) (((unsigned int)(chip) << 16) | (pmu)) /* SDIO Pad drive strength to select value mappings */ struct sdiod_drive_str { diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c index 8a22ee581674..df85ebc6e1df 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c @@ -80,7 +80,7 @@ mt76x2e_probe(struct pci_dev *pdev, const struct pci_device_id *id) mt76_rmw_field(dev, 0x15a10, 0x1f << 16, 0x9); /* RG_SSUSB_G1_CDR_BIC_LTR = 0xf */ - mt76_rmw_field(dev, 0x15a0c, 0xf << 28, 0xf); + mt76_rmw_field(dev, 0x15a0c, 0xfU << 28, 0xf); /* RG_SSUSB_CDR_BR_PE1D = 0x3 */ mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index efb85c6d8e2d..e1846d04817f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -366,7 +366,7 @@ static inline void nvme_end_req(struct request *req) { blk_status_t status = nvme_error_status(nvme_req(req)->status); - if (unlikely(nvme_req(req)->status != NVME_SC_SUCCESS)) + if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET))) nvme_log_error(req); nvme_end_req_zoned(req); nvme_trace_bio_complete(req); @@ -1015,6 +1015,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, goto out; } + req->rq_flags |= RQF_QUIET; ret = nvme_execute_rq(req, at_head); if (result && ret >= 0) *result = nvme_req(req)->result; @@ -1287,6 +1288,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, warn_str, cur->nidl); return -1; } + if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) + return NVME_NIDT_EUI64_LEN; memcpy(ids->eui64, data + sizeof(*cur), NVME_NIDT_EUI64_LEN); return NVME_NIDT_EUI64_LEN; case NVME_NIDT_NGUID: @@ -1295,6 +1298,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, warn_str, cur->nidl); return -1; } + if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) + return NVME_NIDT_NGUID_LEN; memcpy(ids->nguid, data + sizeof(*cur), NVME_NIDT_NGUID_LEN); return NVME_NIDT_NGUID_LEN; case NVME_NIDT_UUID: @@ -1303,6 +1308,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, warn_str, cur->nidl); return -1; } + if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) + return NVME_NIDT_UUID_LEN; uuid_copy(&ids->uuid, data + sizeof(*cur)); return NVME_NIDT_UUID_LEN; case NVME_NIDT_CSI: @@ -1399,12 +1406,18 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, if ((*id)->ncap == 0) /* namespace not allocated or attached */ goto out_free_id; - if (ctrl->vs >= NVME_VS(1, 1, 0) && - !memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) - memcpy(ids->eui64, (*id)->eui64, sizeof(ids->eui64)); - if (ctrl->vs >= NVME_VS(1, 2, 0) && - !memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) - memcpy(ids->nguid, (*id)->nguid, sizeof(ids->nguid)); + + if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) { + dev_info(ctrl->device, + "Ignoring bogus Namespace Identifiers\n"); + } else { + if (ctrl->vs >= NVME_VS(1, 1, 0) && + !memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) + memcpy(ids->eui64, (*id)->eui64, sizeof(ids->eui64)); + if (ctrl->vs >= NVME_VS(1, 2, 0) && + !memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) + memcpy(ids->nguid, (*id)->nguid, sizeof(ids->nguid)); + } return 0; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 1393bbf82d71..a2b53ca63335 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -144,6 +144,11 @@ enum nvme_quirks { * encoding the generation sequence number. */ NVME_QUIRK_SKIP_CID_GEN = (1 << 17), + + /* + * Reports garbage in the namespace identifiers (eui64, nguid, uuid). + */ + NVME_QUIRK_BOGUS_NID = (1 << 18), }; /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d817ca17463e..3aacf1c0d5a5 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3409,7 +3409,10 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_IDENTIFY_CNS | - NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + NVME_QUIRK_DISABLE_WRITE_ZEROES | + NVME_QUIRK_BOGUS_NID, }, + { PCI_VDEVICE(REDHAT, 0x0010), /* Qemu emulated controller */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x126f, 0x2263), /* Silicon Motion unidentified */ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST, }, { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */ @@ -3447,6 +3450,10 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(0x2646, 0x2263), /* KINGSTON A2000 NVMe SSD */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, + { PCI_DEVICE(0x1e4B, 0x1002), /* MAXIO MAP1002 */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1e4B, 0x1202), /* MAXIO MAP1202 */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061), .driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, }, { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065), diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 9694370651fa..59d3980b8ca2 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -400,6 +400,9 @@ validate_group(struct perf_event *event) if (!validate_event(event->pmu, &fake_pmu, leader)) return -EINVAL; + if (event == leader) + return 0; + for_each_sibling_event(sibling, leader) { if (!validate_event(event->pmu, &fake_pmu, sibling)) return -EINVAL; @@ -489,12 +492,7 @@ __hw_perf_event_init(struct perf_event *event) local64_set(&hwc->period_left, hwc->sample_period); } - if (event->group_leader != event) { - if (validate_group(event) != 0) - return -EINVAL; - } - - return 0; + return validate_group(event); } static int armpmu_event_init(struct perf_event *event) diff --git a/drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c b/drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c index 5b471ab80fe2..54d65a6f0fcc 100644 --- a/drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c +++ b/drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c @@ -414,19 +414,19 @@ static int phy_g12a_usb3_pcie_probe(struct platform_device *pdev) ret = clk_prepare_enable(priv->clk_ref); if (ret) - goto err_disable_clk_ref; + return ret; priv->reset = devm_reset_control_array_get_exclusive(dev); - if (IS_ERR(priv->reset)) - return PTR_ERR(priv->reset); + if (IS_ERR(priv->reset)) { + ret = PTR_ERR(priv->reset); + goto err_disable_clk_ref; + } priv->phy = devm_phy_create(dev, np, &phy_g12a_usb3_pcie_ops); if (IS_ERR(priv->phy)) { ret = PTR_ERR(priv->phy); - if (ret != -EPROBE_DEFER) - dev_err(dev, "failed to create PHY\n"); - - return ret; + dev_err_probe(dev, ret, "failed to create PHY\n"); + goto err_disable_clk_ref; } phy_set_drvdata(priv->phy, priv); @@ -434,8 +434,12 @@ static int phy_g12a_usb3_pcie_probe(struct platform_device *pdev) phy_provider = devm_of_phy_provider_register(dev, phy_g12a_usb3_pcie_xlate); + if (IS_ERR(phy_provider)) { + ret = PTR_ERR(phy_provider); + goto err_disable_clk_ref; + } - return PTR_ERR_OR_ZERO(phy_provider); + return 0; err_disable_clk_ref: clk_disable_unprepare(priv->clk_ref); diff --git a/drivers/phy/motorola/phy-mapphone-mdm6600.c b/drivers/phy/motorola/phy-mapphone-mdm6600.c index 5172971f4c36..3cd4d51c247c 100644 --- a/drivers/phy/motorola/phy-mapphone-mdm6600.c +++ b/drivers/phy/motorola/phy-mapphone-mdm6600.c @@ -629,7 +629,8 @@ idle: cleanup: if (error < 0) phy_mdm6600_device_power_off(ddata); - + pm_runtime_disable(ddata->dev); + pm_runtime_dont_use_autosuspend(ddata->dev); return error; } diff --git a/drivers/phy/samsung/phy-exynos5250-sata.c b/drivers/phy/samsung/phy-exynos5250-sata.c index 9ec234243f7c..595adba5fb8f 100644 --- a/drivers/phy/samsung/phy-exynos5250-sata.c +++ b/drivers/phy/samsung/phy-exynos5250-sata.c @@ -187,6 +187,7 @@ static int exynos_sata_phy_probe(struct platform_device *pdev) return -EINVAL; sata_phy->client = of_find_i2c_device_by_node(node); + of_node_put(node); if (!sata_phy->client) return -EPROBE_DEFER; @@ -195,20 +196,21 @@ static int exynos_sata_phy_probe(struct platform_device *pdev) sata_phy->phyclk = devm_clk_get(dev, "sata_phyctrl"); if (IS_ERR(sata_phy->phyclk)) { dev_err(dev, "failed to get clk for PHY\n"); - return PTR_ERR(sata_phy->phyclk); + ret = PTR_ERR(sata_phy->phyclk); + goto put_dev; } ret = clk_prepare_enable(sata_phy->phyclk); if (ret < 0) { dev_err(dev, "failed to enable source clk\n"); - return ret; + goto put_dev; } sata_phy->phy = devm_phy_create(dev, NULL, &exynos_sata_phy_ops); if (IS_ERR(sata_phy->phy)) { - clk_disable_unprepare(sata_phy->phyclk); dev_err(dev, "failed to create PHY\n"); - return PTR_ERR(sata_phy->phy); + ret = PTR_ERR(sata_phy->phy); + goto clk_disable; } phy_set_drvdata(sata_phy->phy, sata_phy); @@ -216,11 +218,18 @@ static int exynos_sata_phy_probe(struct platform_device *pdev) phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); if (IS_ERR(phy_provider)) { - clk_disable_unprepare(sata_phy->phyclk); - return PTR_ERR(phy_provider); + ret = PTR_ERR(phy_provider); + goto clk_disable; } return 0; + +clk_disable: + clk_disable_unprepare(sata_phy->phyclk); +put_dev: + put_device(&sata_phy->client->dev); + + return ret; } static const struct of_device_id exynos_sata_phy_of_match[] = { diff --git a/drivers/phy/ti/phy-am654-serdes.c b/drivers/phy/ti/phy-am654-serdes.c index c1211c4f863c..0be727bb9f79 100644 --- a/drivers/phy/ti/phy-am654-serdes.c +++ b/drivers/phy/ti/phy-am654-serdes.c @@ -838,7 +838,7 @@ static int serdes_am654_probe(struct platform_device *pdev) clk_err: of_clk_del_provider(node); - + pm_runtime_disable(dev); return ret; } diff --git a/drivers/phy/ti/phy-omap-usb2.c b/drivers/phy/ti/phy-omap-usb2.c index 3a505fe5715a..31a775877f6e 100644 --- a/drivers/phy/ti/phy-omap-usb2.c +++ b/drivers/phy/ti/phy-omap-usb2.c @@ -215,7 +215,7 @@ static int omap_usb2_enable_clocks(struct omap_usb *phy) return 0; err1: - clk_disable(phy->wkupclk); + clk_disable_unprepare(phy->wkupclk); err0: return ret; diff --git a/drivers/phy/ti/phy-ti-pipe3.c b/drivers/phy/ti/phy-ti-pipe3.c index 2cbc91e535d4..f502c36f3be5 100644 --- a/drivers/phy/ti/phy-ti-pipe3.c +++ b/drivers/phy/ti/phy-ti-pipe3.c @@ -696,6 +696,7 @@ static int ti_pipe3_get_sysctrl(struct ti_pipe3 *phy) } control_pdev = of_find_device_by_node(control_node); + of_node_put(control_node); if (!control_pdev) { dev_err(dev, "Failed to get control device\n"); return -EINVAL; diff --git a/drivers/phy/ti/phy-tusb1210.c b/drivers/phy/ti/phy-tusb1210.c index a0cdbcadf09e..c3ab4b69ea68 100644 --- a/drivers/phy/ti/phy-tusb1210.c +++ b/drivers/phy/ti/phy-tusb1210.c @@ -155,7 +155,7 @@ static int tusb1210_set_mode(struct phy *phy, enum phy_mode mode, int submode) } #ifdef CONFIG_POWER_SUPPLY -const char * const tusb1210_chg_det_states[] = { +static const char * const tusb1210_chg_det_states[] = { "CHG_DET_CONNECTING", "CHG_DET_START_DET", "CHG_DET_READ_DET", @@ -537,12 +537,18 @@ static int tusb1210_probe(struct ulpi *ulpi) tusb1210_probe_charger_detect(tusb); tusb->phy = ulpi_phy_create(ulpi, &phy_ops); - if (IS_ERR(tusb->phy)) - return PTR_ERR(tusb->phy); + if (IS_ERR(tusb->phy)) { + ret = PTR_ERR(tusb->phy); + goto err_remove_charger; + } phy_set_drvdata(tusb->phy, tusb); ulpi_set_drvdata(ulpi, tusb); return 0; + +err_remove_charger: + tusb1210_remove_charger_detect(tusb); + return ret; } static void tusb1210_remove(struct ulpi *ulpi) diff --git a/drivers/pinctrl/intel/pinctrl-alderlake.c b/drivers/pinctrl/intel/pinctrl-alderlake.c index 32ba50efbceb..62dbd1e67513 100644 --- a/drivers/pinctrl/intel/pinctrl-alderlake.c +++ b/drivers/pinctrl/intel/pinctrl-alderlake.c @@ -14,11 +14,17 @@ #include "pinctrl-intel.h" -#define ADL_PAD_OWN 0x0a0 -#define ADL_PADCFGLOCK 0x110 -#define ADL_HOSTSW_OWN 0x150 -#define ADL_GPI_IS 0x200 -#define ADL_GPI_IE 0x220 +#define ADL_N_PAD_OWN 0x020 +#define ADL_N_PADCFGLOCK 0x080 +#define ADL_N_HOSTSW_OWN 0x0b0 +#define ADL_N_GPI_IS 0x100 +#define ADL_N_GPI_IE 0x120 + +#define ADL_S_PAD_OWN 0x0a0 +#define ADL_S_PADCFGLOCK 0x110 +#define ADL_S_HOSTSW_OWN 0x150 +#define ADL_S_GPI_IS 0x200 +#define ADL_S_GPI_IE 0x220 #define ADL_GPP(r, s, e, g) \ { \ @@ -28,14 +34,28 @@ .gpio_base = (g), \ } -#define ADL_COMMUNITY(b, s, e, g) \ +#define ADL_N_COMMUNITY(b, s, e, g) \ + { \ + .barno = (b), \ + .padown_offset = ADL_N_PAD_OWN, \ + .padcfglock_offset = ADL_N_PADCFGLOCK, \ + .hostown_offset = ADL_N_HOSTSW_OWN, \ + .is_offset = ADL_N_GPI_IS, \ + .ie_offset = ADL_N_GPI_IE, \ + .pin_base = (s), \ + .npins = ((e) - (s) + 1), \ + .gpps = (g), \ + .ngpps = ARRAY_SIZE(g), \ + } + +#define ADL_S_COMMUNITY(b, s, e, g) \ { \ .barno = (b), \ - .padown_offset = ADL_PAD_OWN, \ - .padcfglock_offset = ADL_PADCFGLOCK, \ - .hostown_offset = ADL_HOSTSW_OWN, \ - .is_offset = ADL_GPI_IS, \ - .ie_offset = ADL_GPI_IE, \ + .padown_offset = ADL_S_PAD_OWN, \ + .padcfglock_offset = ADL_S_PADCFGLOCK, \ + .hostown_offset = ADL_S_HOSTSW_OWN, \ + .is_offset = ADL_S_GPI_IS, \ + .ie_offset = ADL_S_GPI_IE, \ .pin_base = (s), \ .npins = ((e) - (s) + 1), \ .gpps = (g), \ @@ -342,10 +362,10 @@ static const struct intel_padgroup adln_community5_gpps[] = { }; static const struct intel_community adln_communities[] = { - ADL_COMMUNITY(0, 0, 66, adln_community0_gpps), - ADL_COMMUNITY(1, 67, 168, adln_community1_gpps), - ADL_COMMUNITY(2, 169, 248, adln_community4_gpps), - ADL_COMMUNITY(3, 249, 256, adln_community5_gpps), + ADL_N_COMMUNITY(0, 0, 66, adln_community0_gpps), + ADL_N_COMMUNITY(1, 67, 168, adln_community1_gpps), + ADL_N_COMMUNITY(2, 169, 248, adln_community4_gpps), + ADL_N_COMMUNITY(3, 249, 256, adln_community5_gpps), }; static const struct intel_pinctrl_soc_data adln_soc_data = { @@ -713,11 +733,11 @@ static const struct intel_padgroup adls_community5_gpps[] = { }; static const struct intel_community adls_communities[] = { - ADL_COMMUNITY(0, 0, 94, adls_community0_gpps), - ADL_COMMUNITY(1, 95, 150, adls_community1_gpps), - ADL_COMMUNITY(2, 151, 199, adls_community3_gpps), - ADL_COMMUNITY(3, 200, 269, adls_community4_gpps), - ADL_COMMUNITY(4, 270, 303, adls_community5_gpps), + ADL_S_COMMUNITY(0, 0, 94, adls_community0_gpps), + ADL_S_COMMUNITY(1, 95, 150, adls_community1_gpps), + ADL_S_COMMUNITY(2, 151, 199, adls_community3_gpps), + ADL_S_COMMUNITY(3, 200, 269, adls_community4_gpps), + ADL_S_COMMUNITY(4, 270, 303, adls_community5_gpps), }; static const struct intel_pinctrl_soc_data adls_soc_data = { diff --git a/drivers/pinctrl/mediatek/Kconfig b/drivers/pinctrl/mediatek/Kconfig index 8dca1ef04965..40accd110c3d 100644 --- a/drivers/pinctrl/mediatek/Kconfig +++ b/drivers/pinctrl/mediatek/Kconfig @@ -30,6 +30,7 @@ config PINCTRL_MTK_MOORE select GENERIC_PINMUX_FUNCTIONS select GPIOLIB select OF_GPIO + select EINT_MTK select PINCTRL_MTK_V2 config PINCTRL_MTK_PARIS diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c index 8d271c6b0ca4..5de691c630b4 100644 --- a/drivers/pinctrl/pinctrl-pistachio.c +++ b/drivers/pinctrl/pinctrl-pistachio.c @@ -1374,10 +1374,10 @@ static int pistachio_gpio_register(struct pistachio_pinctrl *pctl) } irq = irq_of_parse_and_map(child, 0); - if (irq < 0) { - dev_err(pctl->dev, "No IRQ for bank %u: %d\n", i, irq); + if (!irq) { + dev_err(pctl->dev, "No IRQ for bank %u\n", i); of_node_put(child); - ret = irq; + ret = -EINVAL; goto err; } diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index a1b598b86aa9..2cb79e649fcf 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -457,95 +457,110 @@ static struct rockchip_mux_recalced_data rk3128_mux_recalced_data[] = { static struct rockchip_mux_recalced_data rk3308_mux_recalced_data[] = { { + /* gpio1b6_sel */ .num = 1, .pin = 14, .reg = 0x28, .bit = 12, .mask = 0xf }, { + /* gpio1b7_sel */ .num = 1, .pin = 15, .reg = 0x2c, .bit = 0, .mask = 0x3 }, { + /* gpio1c2_sel */ .num = 1, .pin = 18, .reg = 0x30, .bit = 4, .mask = 0xf }, { + /* gpio1c3_sel */ .num = 1, .pin = 19, .reg = 0x30, .bit = 8, .mask = 0xf }, { + /* gpio1c4_sel */ .num = 1, .pin = 20, .reg = 0x30, .bit = 12, .mask = 0xf }, { + /* gpio1c5_sel */ .num = 1, .pin = 21, .reg = 0x34, .bit = 0, .mask = 0xf }, { + /* gpio1c6_sel */ .num = 1, .pin = 22, .reg = 0x34, .bit = 4, .mask = 0xf }, { + /* gpio1c7_sel */ .num = 1, .pin = 23, .reg = 0x34, .bit = 8, .mask = 0xf }, { - .num = 3, - .pin = 12, - .reg = 0x68, - .bit = 8, - .mask = 0xf - }, { - .num = 3, - .pin = 13, - .reg = 0x68, - .bit = 12, - .mask = 0xf - }, { + /* gpio2a2_sel */ .num = 2, .pin = 2, - .reg = 0x608, - .bit = 0, - .mask = 0x7 + .reg = 0x40, + .bit = 4, + .mask = 0x3 }, { + /* gpio2a3_sel */ .num = 2, .pin = 3, - .reg = 0x608, - .bit = 4, - .mask = 0x7 + .reg = 0x40, + .bit = 6, + .mask = 0x3 }, { + /* gpio2c0_sel */ .num = 2, .pin = 16, - .reg = 0x610, - .bit = 8, - .mask = 0x7 + .reg = 0x50, + .bit = 0, + .mask = 0x3 }, { + /* gpio3b2_sel */ .num = 3, .pin = 10, - .reg = 0x610, - .bit = 0, - .mask = 0x7 + .reg = 0x68, + .bit = 4, + .mask = 0x3 }, { + /* gpio3b3_sel */ .num = 3, .pin = 11, - .reg = 0x610, - .bit = 4, - .mask = 0x7 + .reg = 0x68, + .bit = 6, + .mask = 0x3 + }, { + /* gpio3b4_sel */ + .num = 3, + .pin = 12, + .reg = 0x68, + .bit = 8, + .mask = 0xf + }, { + /* gpio3b5_sel */ + .num = 3, + .pin = 13, + .reg = 0x68, + .bit = 12, + .mask = 0xf }, }; diff --git a/drivers/pinctrl/qcom/pinctrl-sm6350.c b/drivers/pinctrl/qcom/pinctrl-sm6350.c index 4d37b817b232..a91a86628f2f 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm6350.c +++ b/drivers/pinctrl/qcom/pinctrl-sm6350.c @@ -264,14 +264,14 @@ static const struct pinctrl_pin_desc sm6350_pins[] = { PINCTRL_PIN(153, "GPIO_153"), PINCTRL_PIN(154, "GPIO_154"), PINCTRL_PIN(155, "GPIO_155"), - PINCTRL_PIN(156, "SDC1_RCLK"), - PINCTRL_PIN(157, "SDC1_CLK"), - PINCTRL_PIN(158, "SDC1_CMD"), - PINCTRL_PIN(159, "SDC1_DATA"), - PINCTRL_PIN(160, "SDC2_CLK"), - PINCTRL_PIN(161, "SDC2_CMD"), - PINCTRL_PIN(162, "SDC2_DATA"), - PINCTRL_PIN(163, "UFS_RESET"), + PINCTRL_PIN(156, "UFS_RESET"), + PINCTRL_PIN(157, "SDC1_RCLK"), + PINCTRL_PIN(158, "SDC1_CLK"), + PINCTRL_PIN(159, "SDC1_CMD"), + PINCTRL_PIN(160, "SDC1_DATA"), + PINCTRL_PIN(161, "SDC2_CLK"), + PINCTRL_PIN(162, "SDC2_CMD"), + PINCTRL_PIN(163, "SDC2_DATA"), }; #define DECLARE_MSM_GPIO_PINS(pin) \ diff --git a/drivers/pinctrl/samsung/Kconfig b/drivers/pinctrl/samsung/Kconfig index dfd805e76862..7b0576f71376 100644 --- a/drivers/pinctrl/samsung/Kconfig +++ b/drivers/pinctrl/samsung/Kconfig @@ -4,14 +4,13 @@ # config PINCTRL_SAMSUNG bool - depends on OF_GPIO + select GPIOLIB select PINMUX select PINCONF config PINCTRL_EXYNOS bool "Pinctrl common driver part for Samsung Exynos SoCs" - depends on OF_GPIO - depends on ARCH_EXYNOS || ARCH_S5PV210 || COMPILE_TEST + depends on ARCH_EXYNOS || ARCH_S5PV210 || (COMPILE_TEST && OF) select PINCTRL_SAMSUNG select PINCTRL_EXYNOS_ARM if ARM && (ARCH_EXYNOS || ARCH_S5PV210) select PINCTRL_EXYNOS_ARM64 if ARM64 && ARCH_EXYNOS @@ -26,12 +25,10 @@ config PINCTRL_EXYNOS_ARM64 config PINCTRL_S3C24XX bool "Samsung S3C24XX SoC pinctrl driver" - depends on OF_GPIO - depends on ARCH_S3C24XX || COMPILE_TEST + depends on ARCH_S3C24XX || (COMPILE_TEST && OF) select PINCTRL_SAMSUNG config PINCTRL_S3C64XX bool "Samsung S3C64XX SoC pinctrl driver" - depends on OF_GPIO - depends on ARCH_S3C64XX || COMPILE_TEST + depends on ARCH_S3C64XX || (COMPILE_TEST && OF) select PINCTRL_SAMSUNG diff --git a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c index d291819c2f77..cb965cf93705 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c @@ -770,7 +770,7 @@ static const struct samsung_pin_bank_data fsd_pin_banks2[] __initconst = { EXYNOS850_PIN_BANK_EINTN(3, 0x00, "gpq0"), }; -const struct samsung_pin_ctrl fsd_pin_ctrl[] __initconst = { +static const struct samsung_pin_ctrl fsd_pin_ctrl[] __initconst = { { /* pin-controller instance 0 FSYS0 data */ .pin_banks = fsd_pin_banks0, diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index 9ed764731570..f7c9459f6628 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -225,6 +225,13 @@ static void stm32_gpio_free(struct gpio_chip *chip, unsigned offset) pinctrl_gpio_free(chip->base + offset); } +static int stm32_gpio_get_noclk(struct gpio_chip *chip, unsigned int offset) +{ + struct stm32_gpio_bank *bank = gpiochip_get_data(chip); + + return !!(readl_relaxed(bank->base + STM32_GPIO_IDR) & BIT(offset)); +} + static int stm32_gpio_get(struct gpio_chip *chip, unsigned offset) { struct stm32_gpio_bank *bank = gpiochip_get_data(chip); @@ -232,7 +239,7 @@ static int stm32_gpio_get(struct gpio_chip *chip, unsigned offset) clk_enable(bank->clk); - ret = !!(readl_relaxed(bank->base + STM32_GPIO_IDR) & BIT(offset)); + ret = stm32_gpio_get_noclk(chip, offset); clk_disable(bank->clk); @@ -311,8 +318,12 @@ static void stm32_gpio_irq_trigger(struct irq_data *d) struct stm32_gpio_bank *bank = d->domain->host_data; int level; + /* Do not access the GPIO if this is not LEVEL triggered IRQ. */ + if (!(bank->irq_type[d->hwirq] & IRQ_TYPE_LEVEL_MASK)) + return; + /* If level interrupt type then retrig */ - level = stm32_gpio_get(&bank->gpio_chip, d->hwirq); + level = stm32_gpio_get_noclk(&bank->gpio_chip, d->hwirq); if ((level == 0 && bank->irq_type[d->hwirq] == IRQ_TYPE_LEVEL_LOW) || (level == 1 && bank->irq_type[d->hwirq] == IRQ_TYPE_LEVEL_HIGH)) irq_chip_retrigger_hierarchy(d); @@ -354,6 +365,7 @@ static int stm32_gpio_irq_request_resources(struct irq_data *irq_data) { struct stm32_gpio_bank *bank = irq_data->domain->host_data; struct stm32_pinctrl *pctl = dev_get_drvdata(bank->gpio_chip.parent); + unsigned long flags; int ret; ret = stm32_gpio_direction_input(&bank->gpio_chip, irq_data->hwirq); @@ -367,6 +379,10 @@ static int stm32_gpio_irq_request_resources(struct irq_data *irq_data) return ret; } + flags = irqd_get_trigger_type(irq_data); + if (flags & IRQ_TYPE_LEVEL_MASK) + clk_enable(bank->clk); + return 0; } @@ -374,6 +390,9 @@ static void stm32_gpio_irq_release_resources(struct irq_data *irq_data) { struct stm32_gpio_bank *bank = irq_data->domain->host_data; + if (bank->irq_type[irq_data->hwirq] & IRQ_TYPE_LEVEL_MASK) + clk_disable(bank->clk); + gpiochip_unlock_as_irq(&bank->gpio_chip, irq_data->hwirq); } diff --git a/drivers/pinctrl/sunplus/sppctl_sp7021.c b/drivers/pinctrl/sunplus/sppctl_sp7021.c index 9748345b9298..cd657760a644 100644 --- a/drivers/pinctrl/sunplus/sppctl_sp7021.c +++ b/drivers/pinctrl/sunplus/sppctl_sp7021.c @@ -419,7 +419,15 @@ static const struct sppctl_grp sp7021grps_prbp[] = { EGRP("PROBE_PORT2", 2, pins_prp2), }; +/* + * Due to compatible reason, the first valid item should start at the third + * position of the array. Please keep the first two items of the table + * no use (dummy). + */ const struct sppctl_func sppctl_list_funcs[] = { + FNCN("", pinmux_type_fpmx, 0x00, 0, 0), + FNCN("", pinmux_type_fpmx, 0x00, 0, 0), + FNCN("L2SW_CLK_OUT", pinmux_type_fpmx, 0x00, 0, 7), FNCN("L2SW_MAC_SMI_MDC", pinmux_type_fpmx, 0x00, 8, 7), FNCN("L2SW_LED_FLASH0", pinmux_type_fpmx, 0x01, 0, 7), diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c index 6b8b3ab8db48..3463629f8764 100644 --- a/drivers/platform/x86/acerhdf.c +++ b/drivers/platform/x86/acerhdf.c @@ -584,21 +584,6 @@ static struct platform_driver acerhdf_driver = { .remove = acerhdf_remove, }; -/* checks if str begins with start */ -static int str_starts_with(const char *str, const char *start) -{ - unsigned long str_len = 0, start_len = 0; - - str_len = strlen(str); - start_len = strlen(start); - - if (str_len >= start_len && - !strncmp(str, start, start_len)) - return 1; - - return 0; -} - /* check hardware */ static int __init acerhdf_check_hardware(void) { @@ -651,9 +636,9 @@ static int __init acerhdf_check_hardware(void) * check if actual hardware BIOS vendor, product and version * IDs start with the strings of BIOS table entry */ - if (str_starts_with(vendor, bt->vendor) && - str_starts_with(product, bt->product) && - str_starts_with(version, bt->version)) { + if (strstarts(vendor, bt->vendor) && + strstarts(product, bt->product) && + strstarts(version, bt->version)) { found = 1; break; } diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index e9d0dbbb2887..fa4123dbdf7f 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -160,8 +160,10 @@ MODULE_PARM_DESC(enable_stb, "Enable the STB debug mechanism"); static struct amd_pmc_dev pmc; static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, u32 arg, u32 *data, u8 msg, bool ret); -static int amd_pmc_write_stb(struct amd_pmc_dev *dev, u32 data); static int amd_pmc_read_stb(struct amd_pmc_dev *dev, u32 *buf); +#ifdef CONFIG_SUSPEND +static int amd_pmc_write_stb(struct amd_pmc_dev *dev, u32 data); +#endif static inline u32 amd_pmc_reg_read(struct amd_pmc_dev *dev, int reg_offset) { @@ -325,6 +327,7 @@ static int get_metrics_table(struct amd_pmc_dev *pdev, struct smu_metrics *table return 0; } +#ifdef CONFIG_SUSPEND static void amd_pmc_validate_deepest(struct amd_pmc_dev *pdev) { struct smu_metrics table; @@ -338,6 +341,7 @@ static void amd_pmc_validate_deepest(struct amd_pmc_dev *pdev) dev_dbg(pdev->dev, "Last suspend in deepest state for %lluus\n", table.timein_s0i3_lastcapture); } +#endif #ifdef CONFIG_DEBUG_FS static int smu_fw_info_show(struct seq_file *s, void *unused) @@ -569,6 +573,7 @@ out_unlock: return rc; } +#ifdef CONFIG_SUSPEND static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev) { switch (dev->cpu_id) { @@ -694,6 +699,7 @@ static struct acpi_s2idle_dev_ops amd_pmc_s2idle_dev_ops = { .prepare = amd_pmc_s2idle_prepare, .restore = amd_pmc_s2idle_restore, }; +#endif static const struct pci_device_id pmc_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_YC) }, @@ -733,6 +739,7 @@ static int amd_pmc_s2d_init(struct amd_pmc_dev *dev) return 0; } +#ifdef CONFIG_SUSPEND static int amd_pmc_write_stb(struct amd_pmc_dev *dev, u32 data) { int err; @@ -753,6 +760,7 @@ static int amd_pmc_write_stb(struct amd_pmc_dev *dev, u32 data) return 0; } +#endif static int amd_pmc_read_stb(struct amd_pmc_dev *dev, u32 *buf) { @@ -859,9 +867,11 @@ static int amd_pmc_probe(struct platform_device *pdev) amd_pmc_get_smu_version(dev); platform_set_drvdata(pdev, dev); +#ifdef CONFIG_SUSPEND err = acpi_register_lps0_dev(&amd_pmc_s2idle_dev_ops); if (err) dev_warn(dev->dev, "failed to register LPS0 sleep handler, expect increased power consumption\n"); +#endif amd_pmc_dbgfs_register(dev); return 0; @@ -875,7 +885,9 @@ static int amd_pmc_remove(struct platform_device *pdev) { struct amd_pmc_dev *dev = platform_get_drvdata(pdev); +#ifdef CONFIG_SUSPEND acpi_unregister_lps0_dev(&amd_pmc_s2idle_dev_ops); +#endif amd_pmc_dbgfs_unregister(dev); pci_dev_put(dev->rdev); mutex_destroy(&dev->lock); diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 2104a2621e50..0e7fbed8a50d 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -371,10 +371,14 @@ static int asus_wmi_evaluate_method_buf(u32 method_id, switch (obj->type) { case ACPI_TYPE_BUFFER: - if (obj->buffer.length > size) + if (obj->buffer.length > size) { err = -ENOSPC; - if (obj->buffer.length == 0) + break; + } + if (obj->buffer.length == 0) { err = -ENODATA; + break; + } memcpy(ret_buffer, obj->buffer.pointer, obj->buffer.length); break; @@ -2223,9 +2227,10 @@ static int fan_curve_check_present(struct asus_wmi *asus, bool *available, err = fan_curve_get_factory_default(asus, fan_dev); if (err) { - if (err == -ENODEV || err == -ENODATA) - return 0; - return err; + pr_debug("fan_curve_get_factory_default(0x%08x) failed: %d\n", + fan_dev, err); + /* Don't cause probe to fail on devices without fan-curves */ + return 0; } *available = true; diff --git a/drivers/platform/x86/barco-p50-gpio.c b/drivers/platform/x86/barco-p50-gpio.c index f5c72e33f9ae..05534287bc26 100644 --- a/drivers/platform/x86/barco-p50-gpio.c +++ b/drivers/platform/x86/barco-p50-gpio.c @@ -10,7 +10,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/io.h> #include <linux/delay.h> #include <linux/dmi.h> #include <linux/err.h> diff --git a/drivers/platform/x86/dell/dell-laptop.c b/drivers/platform/x86/dell/dell-laptop.c index 8230e7a68a5e..1321687d923e 100644 --- a/drivers/platform/x86/dell/dell-laptop.c +++ b/drivers/platform/x86/dell/dell-laptop.c @@ -80,6 +80,10 @@ static struct quirk_entry quirk_dell_inspiron_1012 = { .kbd_led_not_present = true, }; +static struct quirk_entry quirk_dell_latitude_7520 = { + .kbd_missing_ac_tag = true, +}; + static struct platform_driver platform_driver = { .driver = { .name = "dell-laptop", @@ -336,6 +340,15 @@ static const struct dmi_system_id dell_quirks[] __initconst = { }, .driver_data = &quirk_dell_inspiron_1012, }, + { + .callback = dmi_matched, + .ident = "Dell Latitude 7520", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude 7520"), + }, + .driver_data = &quirk_dell_latitude_7520, + }, { } }; diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c index 658bab4b7964..e87a931eab1e 100644 --- a/drivers/platform/x86/gigabyte-wmi.c +++ b/drivers/platform/x86/gigabyte-wmi.c @@ -148,6 +148,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550I AORUS PRO AX"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"), + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B660 GAMING X DDR4"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z390 I AORUS PRO WIFI-CF"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 AORUS ELITE"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 GAMING X"), diff --git a/drivers/platform/x86/intel/pmc/core.h b/drivers/platform/x86/intel/pmc/core.h index a46d3b53bf61..7a059e02c265 100644 --- a/drivers/platform/x86/intel/pmc/core.h +++ b/drivers/platform/x86/intel/pmc/core.h @@ -236,7 +236,7 @@ enum ppfear_regs { #define ADL_LPM_STATUS_LATCH_EN_OFFSET 0x1704 #define ADL_LPM_LIVE_STATUS_OFFSET 0x1764 -const char *pmc_lpm_modes[] = { +static const char *pmc_lpm_modes[] = { "S0i2.0", "S0i2.1", "S0i2.2", diff --git a/drivers/platform/x86/intel/sdsi.c b/drivers/platform/x86/intel/sdsi.c index 11d14cc0ff0a..c830e98dfa38 100644 --- a/drivers/platform/x86/intel/sdsi.c +++ b/drivers/platform/x86/intel/sdsi.c @@ -51,6 +51,8 @@ #define MBOX_TIMEOUT_US 2000 #define MBOX_TIMEOUT_ACQUIRE_US 1000 #define MBOX_POLLING_PERIOD_US 100 +#define MBOX_ACQUIRE_NUM_RETRIES 5 +#define MBOX_ACQUIRE_RETRY_DELAY_MS 500 #define MBOX_MAX_PACKETS 4 #define MBOX_OWNER_NONE 0x00 @@ -81,7 +83,7 @@ enum sdsi_command { struct sdsi_mbox_info { u64 *payload; - u64 *buffer; + void *buffer; int size; }; @@ -163,9 +165,7 @@ static int sdsi_mbox_cmd_read(struct sdsi_priv *priv, struct sdsi_mbox_info *inf total = 0; loop = 0; do { - int offset = SDSI_SIZE_MAILBOX * loop; - void __iomem *addr = priv->mbox_addr + offset; - u64 *buf = info->buffer + offset / SDSI_SIZE_CMD; + void *buf = info->buffer + (SDSI_SIZE_MAILBOX * loop); u32 packet_size; /* Poll on ready bit */ @@ -196,7 +196,7 @@ static int sdsi_mbox_cmd_read(struct sdsi_priv *priv, struct sdsi_mbox_info *inf break; } - sdsi_memcpy64_fromio(buf, addr, round_up(packet_size, SDSI_SIZE_CMD)); + sdsi_memcpy64_fromio(buf, priv->mbox_addr, round_up(packet_size, SDSI_SIZE_CMD)); total += packet_size; @@ -243,8 +243,8 @@ static int sdsi_mbox_cmd_write(struct sdsi_priv *priv, struct sdsi_mbox_info *in FIELD_PREP(CTRL_PACKET_SIZE, info->size); writeq(control, priv->control_addr); - /* Poll on run_busy bit */ - ret = readq_poll_timeout(priv->control_addr, control, !(control & CTRL_RUN_BUSY), + /* Poll on ready bit */ + ret = readq_poll_timeout(priv->control_addr, control, control & CTRL_READY, MBOX_POLLING_PERIOD_US, MBOX_TIMEOUT_US); if (ret) @@ -263,7 +263,7 @@ static int sdsi_mbox_acquire(struct sdsi_priv *priv, struct sdsi_mbox_info *info { u64 control; u32 owner; - int ret; + int ret, retries = 0; lockdep_assert_held(&priv->mb_lock); @@ -273,13 +273,29 @@ static int sdsi_mbox_acquire(struct sdsi_priv *priv, struct sdsi_mbox_info *info if (owner != MBOX_OWNER_NONE) return -EBUSY; - /* Write first qword of payload */ - writeq(info->payload[0], priv->mbox_addr); + /* + * If there has been no recent transaction and no one owns the mailbox, + * we should acquire it in under 1ms. However, if we've accessed it + * recently it may take up to 2.1 seconds to acquire it again. + */ + do { + /* Write first qword of payload */ + writeq(info->payload[0], priv->mbox_addr); + + /* Check for ownership */ + ret = readq_poll_timeout(priv->control_addr, control, + FIELD_GET(CTRL_OWNER, control) == MBOX_OWNER_INBAND, + MBOX_POLLING_PERIOD_US, MBOX_TIMEOUT_ACQUIRE_US); + + if (FIELD_GET(CTRL_OWNER, control) == MBOX_OWNER_NONE && + retries++ < MBOX_ACQUIRE_NUM_RETRIES) { + msleep(MBOX_ACQUIRE_RETRY_DELAY_MS); + continue; + } - /* Check for ownership */ - ret = readq_poll_timeout(priv->control_addr, control, - FIELD_GET(CTRL_OWNER, control) & MBOX_OWNER_INBAND, - MBOX_POLLING_PERIOD_US, MBOX_TIMEOUT_ACQUIRE_US); + /* Either we got it or someone else did. */ + break; + } while (true); return ret; } diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c index c61f804dd44e..8f9c571d7257 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c @@ -212,6 +212,9 @@ static int __init intel_uncore_init(void) const struct x86_cpu_id *id; int ret; + if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return -ENODEV; + id = x86_match_cpu(intel_uncore_cpu_ids); if (!id) return -ENODEV; diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c index c1d9ed9b7b67..19f6b456234f 100644 --- a/drivers/platform/x86/samsung-laptop.c +++ b/drivers/platform/x86/samsung-laptop.c @@ -1121,8 +1121,6 @@ static void kbd_led_set(struct led_classdev *led_cdev, if (value > samsung->kbd_led.max_brightness) value = samsung->kbd_led.max_brightness; - else if (value < 0) - value = 0; samsung->kbd_led_wk = value; queue_work(samsung->led_workqueue, &samsung->kbd_led_work); diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index bce17ca97947..a01a92769c1a 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -740,16 +740,8 @@ static ssize_t certificate_store(struct kobject *kobj, if (!tlmi_priv.certificate_support) return -EOPNOTSUPP; - new_cert = kstrdup(buf, GFP_KERNEL); - if (!new_cert) - return -ENOMEM; - /* Strip out CR if one is present */ - strip_cr(new_cert); - /* If empty then clear installed certificate */ - if (new_cert[0] == '\0') { /* Clear installed certificate */ - kfree(new_cert); - + if ((buf[0] == '\0') || (buf[0] == '\n')) { /* Clear installed certificate */ /* Check that signature is set */ if (!setting->signature || !setting->signature[0]) return -EACCES; @@ -763,14 +755,16 @@ static ssize_t certificate_store(struct kobject *kobj, ret = tlmi_simple_call(LENOVO_CLEAR_BIOS_CERT_GUID, auth_str); kfree(auth_str); - if (ret) - return ret; - kfree(setting->certificate); - setting->certificate = NULL; - return count; + return ret ?: count; } + new_cert = kstrdup(buf, GFP_KERNEL); + if (!new_cert) + return -ENOMEM; + /* Strip out CR if one is present */ + strip_cr(new_cert); + if (setting->cert_installed) { /* Certificate is installed so this is an update */ if (!setting->signature || !setting->signature[0]) { @@ -792,21 +786,14 @@ static ssize_t certificate_store(struct kobject *kobj, auth_str = kasprintf(GFP_KERNEL, "%s,%s", new_cert, setting->password); } - if (!auth_str) { - kfree(new_cert); + kfree(new_cert); + if (!auth_str) return -ENOMEM; - } ret = tlmi_simple_call(guid, auth_str); kfree(auth_str); - if (ret) { - kfree(new_cert); - return ret; - } - kfree(setting->certificate); - setting->certificate = new_cert; - return count; + return ret ?: count; } static struct kobj_attribute auth_certificate = __ATTR_WO(certificate); @@ -1194,6 +1181,10 @@ static void tlmi_release_attr(void) kset_unregister(tlmi_priv.attribute_kset); + /* Free up any saved signatures */ + kfree(tlmi_priv.pwd_admin->signature); + kfree(tlmi_priv.pwd_admin->save_signature); + /* Authentication structures */ sysfs_remove_group(&tlmi_priv.pwd_admin->kobj, &auth_attr_group); kobject_put(&tlmi_priv.pwd_admin->kobj); @@ -1210,11 +1201,6 @@ static void tlmi_release_attr(void) } kset_unregister(tlmi_priv.authentication_kset); - - /* Free up any saved certificates/signatures */ - kfree(tlmi_priv.pwd_admin->certificate); - kfree(tlmi_priv.pwd_admin->signature); - kfree(tlmi_priv.pwd_admin->save_signature); } static int tlmi_sysfs_init(void) diff --git a/drivers/platform/x86/think-lmi.h b/drivers/platform/x86/think-lmi.h index 4f69df6eed07..4daba6151cd6 100644 --- a/drivers/platform/x86/think-lmi.h +++ b/drivers/platform/x86/think-lmi.h @@ -63,7 +63,6 @@ struct tlmi_pwd_setting { int index; /*Used for HDD and NVME auth */ enum level_option level; bool cert_installed; - char *certificate; char *signature; char *save_signature; }; diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index ea02c8dcd748..d925cb137e12 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -604,6 +604,12 @@ int power_supply_get_battery_info(struct power_supply *psy, err = samsung_sdi_battery_get_info(&psy->dev, value, &info); if (!err) goto out_ret_pointer; + else if (err == -ENODEV) + /* + * Device does not have a static battery. + * Proceed to look for a simple battery. + */ + err = 0; if (strcmp("simple-battery", value)) { err = -ENODEV; diff --git a/drivers/power/supply/samsung-sdi-battery.c b/drivers/power/supply/samsung-sdi-battery.c index 9d59f277f519..b33daab798b9 100644 --- a/drivers/power/supply/samsung-sdi-battery.c +++ b/drivers/power/supply/samsung-sdi-battery.c @@ -824,6 +824,7 @@ static struct samsung_sdi_battery samsung_sdi_batteries[] = { .constant_charge_current_max_ua = 900000, .constant_charge_voltage_max_uv = 4200000, .charge_term_current_ua = 200000, + .charge_restart_voltage_uv = 4170000, .maintenance_charge = samsung_maint_charge_table, .maintenance_charge_size = ARRAY_SIZE(samsung_maint_charge_table), .alert_low_temp_charge_current_ua = 300000, @@ -867,6 +868,7 @@ static struct samsung_sdi_battery samsung_sdi_batteries[] = { .constant_charge_current_max_ua = 1500000, .constant_charge_voltage_max_uv = 4350000, .charge_term_current_ua = 120000, + .charge_restart_voltage_uv = 4300000, .maintenance_charge = samsung_maint_charge_table, .maintenance_charge_size = ARRAY_SIZE(samsung_maint_charge_table), .alert_low_temp_charge_current_ua = 300000, diff --git a/drivers/reset/reset-rzg2l-usbphy-ctrl.c b/drivers/reset/reset-rzg2l-usbphy-ctrl.c index 1e8315038850..a8dde4606360 100644 --- a/drivers/reset/reset-rzg2l-usbphy-ctrl.c +++ b/drivers/reset/reset-rzg2l-usbphy-ctrl.c @@ -121,7 +121,9 @@ static int rzg2l_usbphy_ctrl_probe(struct platform_device *pdev) return dev_err_probe(dev, PTR_ERR(priv->rstc), "failed to get reset\n"); - reset_control_deassert(priv->rstc); + error = reset_control_deassert(priv->rstc); + if (error) + return error; priv->rcdev.ops = &rzg2l_usbphy_ctrl_reset_ops; priv->rcdev.of_reset_n_cells = 1; diff --git a/drivers/reset/tegra/reset-bpmp.c b/drivers/reset/tegra/reset-bpmp.c index 24d3395964cc..4c5bba52b105 100644 --- a/drivers/reset/tegra/reset-bpmp.c +++ b/drivers/reset/tegra/reset-bpmp.c @@ -20,6 +20,7 @@ static int tegra_bpmp_reset_common(struct reset_controller_dev *rstc, struct tegra_bpmp *bpmp = to_tegra_bpmp(rstc); struct mrq_reset_request request; struct tegra_bpmp_message msg; + int err; memset(&request, 0, sizeof(request)); request.cmd = command; @@ -30,7 +31,13 @@ static int tegra_bpmp_reset_common(struct reset_controller_dev *rstc, msg.tx.data = &request; msg.tx.size = sizeof(request); - return tegra_bpmp_transfer(bpmp, &msg); + err = tegra_bpmp_transfer(bpmp, &msg); + if (err) + return err; + if (msg.rx.ret) + return -EINVAL; + + return 0; } static int tegra_bpmp_reset_module(struct reset_controller_dev *rstc, diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c index 7fe7f53a41c0..6c864b093ac9 100644 --- a/drivers/scsi/bnx2i/bnx2i_hwi.c +++ b/drivers/scsi/bnx2i/bnx2i_hwi.c @@ -1977,7 +1977,7 @@ static int bnx2i_process_new_cqes(struct bnx2i_conn *bnx2i_conn) if (nopin->cq_req_sn != qp->cqe_exp_seq_sn) break; - if (unlikely(test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx))) { + if (unlikely(test_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags))) { if (nopin->op_code == ISCSI_OP_NOOP_IN && nopin->itt == (u16) RESERVED_ITT) { printk(KERN_ALERT "bnx2i: Unsolicited " diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c index fe86fd61a995..15fbd09baa94 100644 --- a/drivers/scsi/bnx2i/bnx2i_iscsi.c +++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c @@ -1721,7 +1721,7 @@ static int bnx2i_tear_down_conn(struct bnx2i_hba *hba, struct iscsi_conn *conn = ep->conn->cls_conn->dd_data; /* Must suspend all rx queue activity for this ep */ - set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx); + set_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags); } /* CONN_DISCONNECT timeout may or may not be an issue depending * on what transcribed in TCP layer, different targets behave diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c index 8c7d4dda4cf2..4365d52c6430 100644 --- a/drivers/scsi/cxgbi/libcxgbi.c +++ b/drivers/scsi/cxgbi/libcxgbi.c @@ -1634,11 +1634,11 @@ void cxgbi_conn_pdu_ready(struct cxgbi_sock *csk) log_debug(1 << CXGBI_DBG_PDU_RX, "csk 0x%p, conn 0x%p.\n", csk, conn); - if (unlikely(!conn || conn->suspend_rx)) { + if (unlikely(!conn || test_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags))) { log_debug(1 << CXGBI_DBG_PDU_RX, - "csk 0x%p, conn 0x%p, id %d, suspend_rx %lu!\n", + "csk 0x%p, conn 0x%p, id %d, conn flags 0x%lx!\n", csk, conn, conn ? conn->id : 0xFF, - conn ? conn->suspend_rx : 0xFF); + conn ? conn->flags : 0xFF); return; } diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index cf4211c6500d..797abf4f5399 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -678,7 +678,8 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, struct iscsi_task *task; itt_t itt; - if (session->state == ISCSI_STATE_TERMINATE) + if (session->state == ISCSI_STATE_TERMINATE || + !test_bit(ISCSI_CONN_FLAG_BOUND, &conn->flags)) return NULL; if (opcode == ISCSI_OP_LOGIN || opcode == ISCSI_OP_TEXT) { @@ -1392,8 +1393,8 @@ static bool iscsi_set_conn_failed(struct iscsi_conn *conn) if (conn->stop_stage == 0) session->state = ISCSI_STATE_FAILED; - set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); - set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx); + set_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags); + set_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags); return true; } @@ -1454,7 +1455,7 @@ static int iscsi_xmit_task(struct iscsi_conn *conn, struct iscsi_task *task, * Do this after dropping the extra ref because if this was a requeue * it's removed from that list and cleanup_queued_task would miss it. */ - if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) { + if (test_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags)) { /* * Save the task and ref in case we weren't cleaning up this * task and get woken up again. @@ -1532,7 +1533,7 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) int rc = 0; spin_lock_bh(&conn->session->frwd_lock); - if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) { + if (test_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags)) { ISCSI_DBG_SESSION(conn->session, "Tx suspended!\n"); spin_unlock_bh(&conn->session->frwd_lock); return -ENODATA; @@ -1746,7 +1747,7 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) goto fault; } - if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) { + if (test_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags)) { reason = FAILURE_SESSION_IN_RECOVERY; sc->result = DID_REQUEUE << 16; goto fault; @@ -1935,7 +1936,7 @@ static void fail_scsi_tasks(struct iscsi_conn *conn, u64 lun, int error) void iscsi_suspend_queue(struct iscsi_conn *conn) { spin_lock_bh(&conn->session->frwd_lock); - set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); + set_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags); spin_unlock_bh(&conn->session->frwd_lock); } EXPORT_SYMBOL_GPL(iscsi_suspend_queue); @@ -1953,7 +1954,7 @@ void iscsi_suspend_tx(struct iscsi_conn *conn) struct Scsi_Host *shost = conn->session->host; struct iscsi_host *ihost = shost_priv(shost); - set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); + set_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags); if (ihost->workq) flush_workqueue(ihost->workq); } @@ -1961,7 +1962,7 @@ EXPORT_SYMBOL_GPL(iscsi_suspend_tx); static void iscsi_start_tx(struct iscsi_conn *conn) { - clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); + clear_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags); iscsi_conn_queue_work(conn); } @@ -2214,6 +2215,8 @@ void iscsi_conn_unbind(struct iscsi_cls_conn *cls_conn, bool is_active) iscsi_suspend_tx(conn); spin_lock_bh(&session->frwd_lock); + clear_bit(ISCSI_CONN_FLAG_BOUND, &conn->flags); + if (!is_active) { /* * if logout timed out before userspace could even send a PDU @@ -3317,6 +3320,8 @@ int iscsi_conn_bind(struct iscsi_cls_session *cls_session, spin_lock_bh(&session->frwd_lock); if (is_leading) session->leadconn = conn; + + set_bit(ISCSI_CONN_FLAG_BOUND, &conn->flags); spin_unlock_bh(&session->frwd_lock); /* @@ -3329,8 +3334,8 @@ int iscsi_conn_bind(struct iscsi_cls_session *cls_session, /* * Unblock xmitworker(), Login Phase will pass through. */ - clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx); - clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); + clear_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags); + clear_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags); return 0; } EXPORT_SYMBOL_GPL(iscsi_conn_bind); diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c index 2e9ffe3d1a55..883005757ddb 100644 --- a/drivers/scsi/libiscsi_tcp.c +++ b/drivers/scsi/libiscsi_tcp.c @@ -927,7 +927,7 @@ int iscsi_tcp_recv_skb(struct iscsi_conn *conn, struct sk_buff *skb, */ conn->last_recv = jiffies; - if (unlikely(conn->suspend_rx)) { + if (unlikely(test_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags))) { ISCSI_DBG_TCP(conn, "Rx suspended!\n"); *status = ISCSI_TCP_SUSPENDED; return 0; diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index f90b707c190b..01c5e8ff4cc5 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -766,6 +766,10 @@ static void init_default_table_values(struct pm8001_hba_info *pm8001_ha) pm8001_ha->main_cfg_tbl.pm80xx_tbl.pcs_event_log_severity = 0x01; pm8001_ha->main_cfg_tbl.pm80xx_tbl.fatal_err_interrupt = 0x01; + /* Enable higher IQs and OQs, 32 to 63, bit 16 */ + if (pm8001_ha->max_q_num > 32) + pm8001_ha->main_cfg_tbl.pm80xx_tbl.fatal_err_interrupt |= + 1 << 16; /* Disable end to end CRC checking */ pm8001_ha->main_cfg_tbl.pm80xx_tbl.crc_core_dump = (0x1 << 16); @@ -1027,6 +1031,13 @@ static int mpi_init_check(struct pm8001_hba_info *pm8001_ha) if (0x0000 != gst_len_mpistate) return -EBUSY; + /* + * As per controller datasheet, after successful MPI + * initialization minimum 500ms delay is required before + * issuing commands. + */ + msleep(500); + return 0; } @@ -1727,10 +1738,11 @@ static void pm80xx_chip_interrupt_enable(struct pm8001_hba_info *pm8001_ha, u8 vec) { #ifdef PM8001_USE_MSIX - u32 mask; - mask = (u32)(1 << vec); - - pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_CLR, (u32)(mask & 0xFFFFFFFF)); + if (vec < 32) + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_CLR, 1U << vec); + else + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_CLR_U, + 1U << (vec - 32)); return; #endif pm80xx_chip_intx_interrupt_enable(pm8001_ha); @@ -1746,12 +1758,15 @@ static void pm80xx_chip_interrupt_disable(struct pm8001_hba_info *pm8001_ha, u8 vec) { #ifdef PM8001_USE_MSIX - u32 mask; - if (vec == 0xFF) - mask = 0xFFFFFFFF; + if (vec == 0xFF) { + /* disable all vectors 0-31, 32-63 */ + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, 0xFFFFFFFF); + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_U, 0xFFFFFFFF); + } else if (vec < 32) + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, 1U << vec); else - mask = (u32)(1 << vec); - pm8001_cw32(pm8001_ha, 0, MSGU_ODMR, (u32)(mask & 0xFFFFFFFF)); + pm8001_cw32(pm8001_ha, 0, MSGU_ODMR_U, + 1U << (vec - 32)); return; #endif pm80xx_chip_intx_interrupt_disable(pm8001_ha); diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c index 8196f89f404e..31ec429104e2 100644 --- a/drivers/scsi/qedi/qedi_iscsi.c +++ b/drivers/scsi/qedi/qedi_iscsi.c @@ -860,6 +860,37 @@ static int qedi_task_xmit(struct iscsi_task *task) return qedi_iscsi_send_ioreq(task); } +static void qedi_offload_work(struct work_struct *work) +{ + struct qedi_endpoint *qedi_ep = + container_of(work, struct qedi_endpoint, offload_work); + struct qedi_ctx *qedi; + int wait_delay = 5 * HZ; + int ret; + + qedi = qedi_ep->qedi; + + ret = qedi_iscsi_offload_conn(qedi_ep); + if (ret) { + QEDI_ERR(&qedi->dbg_ctx, + "offload error: iscsi_cid=%u, qedi_ep=%p, ret=%d\n", + qedi_ep->iscsi_cid, qedi_ep, ret); + qedi_ep->state = EP_STATE_OFLDCONN_FAILED; + return; + } + + ret = wait_event_interruptible_timeout(qedi_ep->tcp_ofld_wait, + (qedi_ep->state == + EP_STATE_OFLDCONN_COMPL), + wait_delay); + if (ret <= 0 || qedi_ep->state != EP_STATE_OFLDCONN_COMPL) { + qedi_ep->state = EP_STATE_OFLDCONN_FAILED; + QEDI_ERR(&qedi->dbg_ctx, + "Offload conn TIMEOUT iscsi_cid=%u, qedi_ep=%p\n", + qedi_ep->iscsi_cid, qedi_ep); + } +} + static struct iscsi_endpoint * qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, int non_blocking) @@ -908,6 +939,7 @@ qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, } qedi_ep = ep->dd_data; memset(qedi_ep, 0, sizeof(struct qedi_endpoint)); + INIT_WORK(&qedi_ep->offload_work, qedi_offload_work); qedi_ep->state = EP_STATE_IDLE; qedi_ep->iscsi_cid = (u32)-1; qedi_ep->qedi = qedi; @@ -1056,12 +1088,11 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep) qedi_ep = ep->dd_data; qedi = qedi_ep->qedi; + flush_work(&qedi_ep->offload_work); + if (qedi_ep->state == EP_STATE_OFLDCONN_START) goto ep_exit_recover; - if (qedi_ep->state != EP_STATE_OFLDCONN_NONE) - flush_work(&qedi_ep->offload_work); - if (qedi_ep->conn) { qedi_conn = qedi_ep->conn; abrt_conn = qedi_conn->abrt_conn; @@ -1235,37 +1266,6 @@ static int qedi_data_avail(struct qedi_ctx *qedi, u16 vlanid) return rc; } -static void qedi_offload_work(struct work_struct *work) -{ - struct qedi_endpoint *qedi_ep = - container_of(work, struct qedi_endpoint, offload_work); - struct qedi_ctx *qedi; - int wait_delay = 5 * HZ; - int ret; - - qedi = qedi_ep->qedi; - - ret = qedi_iscsi_offload_conn(qedi_ep); - if (ret) { - QEDI_ERR(&qedi->dbg_ctx, - "offload error: iscsi_cid=%u, qedi_ep=%p, ret=%d\n", - qedi_ep->iscsi_cid, qedi_ep, ret); - qedi_ep->state = EP_STATE_OFLDCONN_FAILED; - return; - } - - ret = wait_event_interruptible_timeout(qedi_ep->tcp_ofld_wait, - (qedi_ep->state == - EP_STATE_OFLDCONN_COMPL), - wait_delay); - if ((ret <= 0) || (qedi_ep->state != EP_STATE_OFLDCONN_COMPL)) { - qedi_ep->state = EP_STATE_OFLDCONN_FAILED; - QEDI_ERR(&qedi->dbg_ctx, - "Offload conn TIMEOUT iscsi_cid=%u, qedi_ep=%p\n", - qedi_ep->iscsi_cid, qedi_ep); - } -} - static int qedi_set_path(struct Scsi_Host *shost, struct iscsi_path *path_data) { struct qedi_ctx *qedi; @@ -1381,7 +1381,6 @@ static int qedi_set_path(struct Scsi_Host *shost, struct iscsi_path *path_data) qedi_ep->dst_addr, qedi_ep->dst_port); } - INIT_WORK(&qedi_ep->offload_work, qedi_offload_work); queue_work(qedi->offload_thread, &qedi_ep->offload_work); ret = 0; diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index ff78ef702f22..592a290e6cfa 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -32,7 +32,6 @@ #include <linux/blkdev.h> #include <linux/crc-t10dif.h> #include <linux/spinlock.h> -#include <linux/mutex.h> #include <linux/interrupt.h> #include <linux/atomic.h> #include <linux/hrtimer.h> @@ -732,9 +731,7 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEM_P1 + 1] = { {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, }; -static atomic_t sdebug_num_hosts; -static DEFINE_MUTEX(add_host_mutex); - +static int sdebug_num_hosts; static int sdebug_add_host = DEF_NUM_HOST; /* in sysfs this is relative */ static int sdebug_ato = DEF_ATO; static int sdebug_cdb_len = DEF_CDB_LEN; @@ -781,7 +778,6 @@ static int sdebug_uuid_ctl = DEF_UUID_CTL; static bool sdebug_random = DEF_RANDOM; static bool sdebug_per_host_store = DEF_PER_HOST_STORE; static bool sdebug_removable = DEF_REMOVABLE; -static bool sdebug_deflect_incoming; static bool sdebug_clustering; static bool sdebug_host_lock = DEF_HOST_LOCK; static bool sdebug_strict = DEF_STRICT; @@ -5122,10 +5118,6 @@ static int scsi_debug_slave_configure(struct scsi_device *sdp) sdp->host->host_no, sdp->channel, sdp->id, sdp->lun); if (sdp->host->max_cmd_len != SDEBUG_MAX_CMD_LEN) sdp->host->max_cmd_len = SDEBUG_MAX_CMD_LEN; - if (smp_load_acquire(&sdebug_deflect_incoming)) { - pr_info("Exit early due to deflect_incoming\n"); - return 1; - } if (devip == NULL) { devip = find_build_dev_info(sdp); if (devip == NULL) @@ -5211,7 +5203,7 @@ static bool stop_queued_cmnd(struct scsi_cmnd *cmnd) } /* Deletes (stops) timers or work queues of all queued commands */ -static void stop_all_queued(bool done_with_no_conn) +static void stop_all_queued(void) { unsigned long iflags; int j, k; @@ -5220,15 +5212,13 @@ static void stop_all_queued(bool done_with_no_conn) struct sdebug_queued_cmd *sqcp; struct sdebug_dev_info *devip; struct sdebug_defer *sd_dp; - struct scsi_cmnd *scp; for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) { spin_lock_irqsave(&sqp->qc_lock, iflags); for (k = 0; k < SDEBUG_CANQUEUE; ++k) { if (test_bit(k, sqp->in_use_bm)) { sqcp = &sqp->qc_arr[k]; - scp = sqcp->a_cmnd; - if (!scp) + if (sqcp->a_cmnd == NULL) continue; devip = (struct sdebug_dev_info *) sqcp->a_cmnd->device->hostdata; @@ -5243,10 +5233,6 @@ static void stop_all_queued(bool done_with_no_conn) l_defer_t = SDEB_DEFER_NONE; spin_unlock_irqrestore(&sqp->qc_lock, iflags); stop_qc_helper(sd_dp, l_defer_t); - if (done_with_no_conn && l_defer_t != SDEB_DEFER_NONE) { - scp->result = DID_NO_CONNECT << 16; - scsi_done(scp); - } clear_bit(k, sqp->in_use_bm); spin_lock_irqsave(&sqp->qc_lock, iflags); } @@ -5389,7 +5375,7 @@ static int scsi_debug_host_reset(struct scsi_cmnd *SCpnt) } } spin_unlock(&sdebug_host_list_lock); - stop_all_queued(false); + stop_all_queued(); if (SDEBUG_OPT_RESET_NOISE & sdebug_opts) sdev_printk(KERN_INFO, SCpnt->device, "%s: %d device(s) found\n", __func__, k); @@ -5449,50 +5435,13 @@ static void sdebug_build_parts(unsigned char *ramp, unsigned long store_size) } } -static void sdeb_block_all_queues(void) -{ - int j; - struct sdebug_queue *sqp; - - for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) - atomic_set(&sqp->blocked, (int)true); -} - -static void sdeb_unblock_all_queues(void) +static void block_unblock_all_queues(bool block) { int j; struct sdebug_queue *sqp; for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) - atomic_set(&sqp->blocked, (int)false); -} - -static void -sdeb_add_n_hosts(int num_hosts) -{ - if (num_hosts < 1) - return; - do { - bool found; - unsigned long idx; - struct sdeb_store_info *sip; - bool want_phs = (sdebug_fake_rw == 0) && sdebug_per_host_store; - - found = false; - if (want_phs) { - xa_for_each_marked(per_store_ap, idx, sip, SDEB_XA_NOT_IN_USE) { - sdeb_most_recent_idx = (int)idx; - found = true; - break; - } - if (found) /* re-use case */ - sdebug_add_host_helper((int)idx); - else - sdebug_do_add_host(true /* make new store */); - } else { - sdebug_do_add_host(false); - } - } while (--num_hosts); + atomic_set(&sqp->blocked, (int)block); } /* Adjust (by rounding down) the sdebug_cmnd_count so abs(every_nth)-1 @@ -5505,10 +5454,10 @@ static void tweak_cmnd_count(void) modulo = abs(sdebug_every_nth); if (modulo < 2) return; - sdeb_block_all_queues(); + block_unblock_all_queues(true); count = atomic_read(&sdebug_cmnd_count); atomic_set(&sdebug_cmnd_count, (count / modulo) * modulo); - sdeb_unblock_all_queues(); + block_unblock_all_queues(false); } static void clear_queue_stats(void) @@ -5526,15 +5475,6 @@ static bool inject_on_this_cmd(void) return (atomic_read(&sdebug_cmnd_count) % abs(sdebug_every_nth)) == 0; } -static int process_deflect_incoming(struct scsi_cmnd *scp) -{ - u8 opcode = scp->cmnd[0]; - - if (opcode == SYNCHRONIZE_CACHE || opcode == SYNCHRONIZE_CACHE_16) - return 0; - return DID_NO_CONNECT << 16; -} - #define INCLUSIVE_TIMING_MAX_NS 1000000 /* 1 millisecond */ /* Complete the processing of the thread that queued a SCSI command to this @@ -5544,7 +5484,8 @@ static int process_deflect_incoming(struct scsi_cmnd *scp) */ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, int scsi_result, - int (*pfp)(struct scsi_cmnd *, struct sdebug_dev_info *), + int (*pfp)(struct scsi_cmnd *, + struct sdebug_dev_info *), int delta_jiff, int ndelay) { bool new_sd_dp; @@ -5565,27 +5506,13 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, } sdp = cmnd->device; - if (delta_jiff == 0) { - sqp = get_queue(cmnd); - if (atomic_read(&sqp->blocked)) { - if (smp_load_acquire(&sdebug_deflect_incoming)) - return process_deflect_incoming(cmnd); - else - return SCSI_MLQUEUE_HOST_BUSY; - } + if (delta_jiff == 0) goto respond_in_thread; - } sqp = get_queue(cmnd); spin_lock_irqsave(&sqp->qc_lock, iflags); if (unlikely(atomic_read(&sqp->blocked))) { spin_unlock_irqrestore(&sqp->qc_lock, iflags); - if (smp_load_acquire(&sdebug_deflect_incoming)) { - scsi_result = process_deflect_incoming(cmnd); - goto respond_in_thread; - } - if (sdebug_verbose) - pr_info("blocked --> SCSI_MLQUEUE_HOST_BUSY\n"); return SCSI_MLQUEUE_HOST_BUSY; } num_in_q = atomic_read(&devip->num_in_q); @@ -5774,12 +5701,8 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, respond_in_thread: /* call back to mid-layer using invocation thread */ cmnd->result = pfp != NULL ? pfp(cmnd, devip) : 0; cmnd->result &= ~SDEG_RES_IMMED_MASK; - if (cmnd->result == 0 && scsi_result != 0) { + if (cmnd->result == 0 && scsi_result != 0) cmnd->result = scsi_result; - if (sdebug_verbose) - pr_info("respond_in_thread: tag=0x%x, scp->result=0x%x\n", - blk_mq_unique_tag(scsi_cmd_to_rq(cmnd)), scsi_result); - } scsi_done(cmnd); return 0; } @@ -6064,7 +5987,7 @@ static ssize_t delay_store(struct device_driver *ddp, const char *buf, int j, k; struct sdebug_queue *sqp; - sdeb_block_all_queues(); + block_unblock_all_queues(true); for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) { k = find_first_bit(sqp->in_use_bm, @@ -6078,7 +6001,7 @@ static ssize_t delay_store(struct device_driver *ddp, const char *buf, sdebug_jdelay = jdelay; sdebug_ndelay = 0; } - sdeb_unblock_all_queues(); + block_unblock_all_queues(false); } return res; } @@ -6104,7 +6027,7 @@ static ssize_t ndelay_store(struct device_driver *ddp, const char *buf, int j, k; struct sdebug_queue *sqp; - sdeb_block_all_queues(); + block_unblock_all_queues(true); for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) { k = find_first_bit(sqp->in_use_bm, @@ -6119,7 +6042,7 @@ static ssize_t ndelay_store(struct device_driver *ddp, const char *buf, sdebug_jdelay = ndelay ? JDELAY_OVERRIDDEN : DEF_JDELAY; } - sdeb_unblock_all_queues(); + block_unblock_all_queues(false); } return res; } @@ -6433,7 +6356,7 @@ static ssize_t max_queue_store(struct device_driver *ddp, const char *buf, if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n > 0) && (n <= SDEBUG_CANQUEUE) && (sdebug_host_max_queue == 0)) { - sdeb_block_all_queues(); + block_unblock_all_queues(true); k = 0; for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) { @@ -6448,7 +6371,7 @@ static ssize_t max_queue_store(struct device_driver *ddp, const char *buf, atomic_set(&retired_max_queue, k + 1); else atomic_set(&retired_max_queue, 0); - sdeb_unblock_all_queues(); + block_unblock_all_queues(false); return count; } return -EINVAL; @@ -6537,48 +6460,43 @@ static DRIVER_ATTR_RW(virtual_gb); static ssize_t add_host_show(struct device_driver *ddp, char *buf) { /* absolute number of hosts currently active is what is shown */ - return scnprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&sdebug_num_hosts)); + return scnprintf(buf, PAGE_SIZE, "%d\n", sdebug_num_hosts); } -/* - * Accept positive and negative values. Hex values (only positive) may be prefixed by '0x'. - * To remove all hosts use a large negative number (e.g. -9999). The value 0 does nothing. - * Returns -EBUSY if another add_host sysfs invocation is active. - */ static ssize_t add_host_store(struct device_driver *ddp, const char *buf, size_t count) { + bool found; + unsigned long idx; + struct sdeb_store_info *sip; + bool want_phs = (sdebug_fake_rw == 0) && sdebug_per_host_store; int delta_hosts; - if (count == 0 || kstrtoint(buf, 0, &delta_hosts)) + if (sscanf(buf, "%d", &delta_hosts) != 1) return -EINVAL; - if (sdebug_verbose) - pr_info("prior num_hosts=%d, num_to_add=%d\n", - atomic_read(&sdebug_num_hosts), delta_hosts); - if (delta_hosts == 0) - return count; - if (mutex_trylock(&add_host_mutex) == 0) - return -EBUSY; if (delta_hosts > 0) { - sdeb_add_n_hosts(delta_hosts); - } else if (delta_hosts < 0) { - smp_store_release(&sdebug_deflect_incoming, true); - sdeb_block_all_queues(); - if (delta_hosts >= atomic_read(&sdebug_num_hosts)) - stop_all_queued(true); do { - if (atomic_read(&sdebug_num_hosts) < 1) { - free_all_queued(); - break; + found = false; + if (want_phs) { + xa_for_each_marked(per_store_ap, idx, sip, + SDEB_XA_NOT_IN_USE) { + sdeb_most_recent_idx = (int)idx; + found = true; + break; + } + if (found) /* re-use case */ + sdebug_add_host_helper((int)idx); + else + sdebug_do_add_host(true); + } else { + sdebug_do_add_host(false); } + } while (--delta_hosts); + } else if (delta_hosts < 0) { + do { sdebug_do_remove_host(false); } while (++delta_hosts); - sdeb_unblock_all_queues(); - smp_store_release(&sdebug_deflect_incoming, false); } - mutex_unlock(&add_host_mutex); - if (sdebug_verbose) - pr_info("post num_hosts=%d\n", atomic_read(&sdebug_num_hosts)); return count; } static DRIVER_ATTR_RW(add_host); @@ -7089,10 +7007,6 @@ static int __init scsi_debug_init(void) sdebug_add_host = 0; for (k = 0; k < hosts_to_add; k++) { - if (smp_load_acquire(&sdebug_deflect_incoming)) { - pr_info("exit early as sdebug_deflect_incoming is set\n"); - return 0; - } if (want_store && k == 0) { ret = sdebug_add_host_helper(idx); if (ret < 0) { @@ -7110,12 +7024,8 @@ static int __init scsi_debug_init(void) } } if (sdebug_verbose) - pr_info("built %d host(s)\n", atomic_read(&sdebug_num_hosts)); + pr_info("built %d host(s)\n", sdebug_num_hosts); - /* - * Even though all the hosts have been established, due to async device (LU) scanning - * by the scsi mid-level, there may still be devices (LUs) being set up. - */ return 0; bus_unreg: @@ -7131,17 +7041,12 @@ free_q_arr: static void __exit scsi_debug_exit(void) { - int k; + int k = sdebug_num_hosts; - /* Possible race with LUs still being set up; stop them asap */ - sdeb_block_all_queues(); - smp_store_release(&sdebug_deflect_incoming, true); - stop_all_queued(false); - for (k = 0; atomic_read(&sdebug_num_hosts) > 0; k++) + stop_all_queued(); + for (; k; k--) sdebug_do_remove_host(true); free_all_queued(); - if (sdebug_verbose) - pr_info("removed %d hosts\n", k); driver_unregister(&sdebug_driverfs_driver); bus_unregister(&pseudo_lld_bus); root_device_unregister(pseudo_primary); @@ -7311,13 +7216,13 @@ static int sdebug_add_host_helper(int per_host_idx) sdbg_host->dev.bus = &pseudo_lld_bus; sdbg_host->dev.parent = pseudo_primary; sdbg_host->dev.release = &sdebug_release_adapter; - dev_set_name(&sdbg_host->dev, "adapter%d", atomic_read(&sdebug_num_hosts)); + dev_set_name(&sdbg_host->dev, "adapter%d", sdebug_num_hosts); error = device_register(&sdbg_host->dev); if (error) goto clean; - atomic_inc(&sdebug_num_hosts); + ++sdebug_num_hosts; return 0; clean: @@ -7381,7 +7286,7 @@ static void sdebug_do_remove_host(bool the_end) return; device_unregister(&sdbg_host->dev); - atomic_dec(&sdebug_num_hosts); + --sdebug_num_hosts; } static int sdebug_change_qdepth(struct scsi_device *sdev, int qdepth) @@ -7389,10 +7294,10 @@ static int sdebug_change_qdepth(struct scsi_device *sdev, int qdepth) int num_in_q = 0; struct sdebug_dev_info *devip; - sdeb_block_all_queues(); + block_unblock_all_queues(true); devip = (struct sdebug_dev_info *)sdev->hostdata; if (NULL == devip) { - sdeb_unblock_all_queues(); + block_unblock_all_queues(false); return -ENODEV; } num_in_q = atomic_read(&devip->num_in_q); @@ -7411,7 +7316,7 @@ static int sdebug_change_qdepth(struct scsi_device *sdev, int qdepth) sdev_printk(KERN_INFO, sdev, "%s: qdepth=%d, num_in_q=%d\n", __func__, qdepth, num_in_q); } - sdeb_unblock_all_queues(); + block_unblock_all_queues(false); return sdev->queue_depth; } diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 27951ea05dd4..2c0dd64159b0 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -86,6 +86,9 @@ struct iscsi_internal { struct transport_container session_cont; }; +static DEFINE_IDR(iscsi_ep_idr); +static DEFINE_MUTEX(iscsi_ep_idr_mutex); + static atomic_t iscsi_session_nr; /* sysfs session id for next new session */ static struct workqueue_struct *iscsi_conn_cleanup_workq; @@ -168,6 +171,11 @@ struct device_attribute dev_attr_##_prefix##_##_name = \ static void iscsi_endpoint_release(struct device *dev) { struct iscsi_endpoint *ep = iscsi_dev_to_endpoint(dev); + + mutex_lock(&iscsi_ep_idr_mutex); + idr_remove(&iscsi_ep_idr, ep->id); + mutex_unlock(&iscsi_ep_idr_mutex); + kfree(ep); } @@ -180,7 +188,7 @@ static ssize_t show_ep_handle(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_endpoint *ep = iscsi_dev_to_endpoint(dev); - return sysfs_emit(buf, "%llu\n", (unsigned long long) ep->id); + return sysfs_emit(buf, "%d\n", ep->id); } static ISCSI_ATTR(ep, handle, S_IRUGO, show_ep_handle, NULL); @@ -193,48 +201,32 @@ static struct attribute_group iscsi_endpoint_group = { .attrs = iscsi_endpoint_attrs, }; -#define ISCSI_MAX_EPID -1 - -static int iscsi_match_epid(struct device *dev, const void *data) -{ - struct iscsi_endpoint *ep = iscsi_dev_to_endpoint(dev); - const uint64_t *epid = data; - - return *epid == ep->id; -} - struct iscsi_endpoint * iscsi_create_endpoint(int dd_size) { - struct device *dev; struct iscsi_endpoint *ep; - uint64_t id; - int err; - - for (id = 1; id < ISCSI_MAX_EPID; id++) { - dev = class_find_device(&iscsi_endpoint_class, NULL, &id, - iscsi_match_epid); - if (!dev) - break; - else - put_device(dev); - } - if (id == ISCSI_MAX_EPID) { - printk(KERN_ERR "Too many connections. Max supported %u\n", - ISCSI_MAX_EPID - 1); - return NULL; - } + int err, id; ep = kzalloc(sizeof(*ep) + dd_size, GFP_KERNEL); if (!ep) return NULL; + mutex_lock(&iscsi_ep_idr_mutex); + id = idr_alloc(&iscsi_ep_idr, ep, 0, -1, GFP_NOIO); + if (id < 0) { + mutex_unlock(&iscsi_ep_idr_mutex); + printk(KERN_ERR "Could not allocate endpoint ID. Error %d.\n", + id); + goto free_ep; + } + mutex_unlock(&iscsi_ep_idr_mutex); + ep->id = id; ep->dev.class = &iscsi_endpoint_class; - dev_set_name(&ep->dev, "ep-%llu", (unsigned long long) id); + dev_set_name(&ep->dev, "ep-%d", id); err = device_register(&ep->dev); if (err) - goto free_ep; + goto free_id; err = sysfs_create_group(&ep->dev.kobj, &iscsi_endpoint_group); if (err) @@ -248,6 +240,10 @@ unregister_dev: device_unregister(&ep->dev); return NULL; +free_id: + mutex_lock(&iscsi_ep_idr_mutex); + idr_remove(&iscsi_ep_idr, id); + mutex_unlock(&iscsi_ep_idr_mutex); free_ep: kfree(ep); return NULL; @@ -275,14 +271,17 @@ EXPORT_SYMBOL_GPL(iscsi_put_endpoint); */ struct iscsi_endpoint *iscsi_lookup_endpoint(u64 handle) { - struct device *dev; + struct iscsi_endpoint *ep; - dev = class_find_device(&iscsi_endpoint_class, NULL, &handle, - iscsi_match_epid); - if (!dev) - return NULL; + mutex_lock(&iscsi_ep_idr_mutex); + ep = idr_find(&iscsi_ep_idr, handle); + if (!ep) + goto unlock; - return iscsi_dev_to_endpoint(dev); + get_device(&ep->dev); +unlock: + mutex_unlock(&iscsi_ep_idr_mutex); + return ep; } EXPORT_SYMBOL_GPL(iscsi_lookup_endpoint); @@ -2202,10 +2201,10 @@ static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag) switch (flag) { case STOP_CONN_RECOVER: - conn->state = ISCSI_CONN_FAILED; + WRITE_ONCE(conn->state, ISCSI_CONN_FAILED); break; case STOP_CONN_TERM: - conn->state = ISCSI_CONN_DOWN; + WRITE_ONCE(conn->state, ISCSI_CONN_DOWN); break; default: iscsi_cls_conn_printk(KERN_ERR, conn, "invalid stop flag %d\n", @@ -2217,6 +2216,49 @@ static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag) ISCSI_DBG_TRANS_CONN(conn, "Stopping conn done.\n"); } +static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active) +{ + struct iscsi_cls_session *session = iscsi_conn_to_session(conn); + struct iscsi_endpoint *ep; + + ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n"); + WRITE_ONCE(conn->state, ISCSI_CONN_FAILED); + + if (!conn->ep || !session->transport->ep_disconnect) + return; + + ep = conn->ep; + conn->ep = NULL; + + session->transport->unbind_conn(conn, is_active); + session->transport->ep_disconnect(ep); + ISCSI_DBG_TRANS_CONN(conn, "disconnect ep done.\n"); +} + +static void iscsi_if_disconnect_bound_ep(struct iscsi_cls_conn *conn, + struct iscsi_endpoint *ep, + bool is_active) +{ + /* Check if this was a conn error and the kernel took ownership */ + spin_lock_irq(&conn->lock); + if (!test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { + spin_unlock_irq(&conn->lock); + iscsi_ep_disconnect(conn, is_active); + } else { + spin_unlock_irq(&conn->lock); + ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); + mutex_unlock(&conn->ep_mutex); + + flush_work(&conn->cleanup_work); + /* + * Userspace is now done with the EP so we can release the ref + * iscsi_cleanup_conn_work_fn took. + */ + iscsi_put_endpoint(ep); + mutex_lock(&conn->ep_mutex); + } +} + static int iscsi_if_stop_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev) { @@ -2238,11 +2280,24 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport, iscsi_stop_conn(conn, flag); } else { /* + * For offload, when iscsid is restarted it won't know about + * existing endpoints so it can't do a ep_disconnect. We clean + * it up here for userspace. + */ + mutex_lock(&conn->ep_mutex); + if (conn->ep) + iscsi_if_disconnect_bound_ep(conn, conn->ep, true); + mutex_unlock(&conn->ep_mutex); + + /* * Figure out if it was the kernel or userspace initiating this. */ + spin_lock_irq(&conn->lock); if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { + spin_unlock_irq(&conn->lock); iscsi_stop_conn(conn, flag); } else { + spin_unlock_irq(&conn->lock); ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); flush_work(&conn->cleanup_work); @@ -2251,31 +2306,14 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport, * Only clear for recovery to avoid extra cleanup runs during * termination. */ + spin_lock_irq(&conn->lock); clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags); + spin_unlock_irq(&conn->lock); } ISCSI_DBG_TRANS_CONN(conn, "iscsi if conn stop done.\n"); return 0; } -static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active) -{ - struct iscsi_cls_session *session = iscsi_conn_to_session(conn); - struct iscsi_endpoint *ep; - - ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n"); - conn->state = ISCSI_CONN_FAILED; - - if (!conn->ep || !session->transport->ep_disconnect) - return; - - ep = conn->ep; - conn->ep = NULL; - - session->transport->unbind_conn(conn, is_active); - session->transport->ep_disconnect(ep); - ISCSI_DBG_TRANS_CONN(conn, "disconnect ep done.\n"); -} - static void iscsi_cleanup_conn_work_fn(struct work_struct *work) { struct iscsi_cls_conn *conn = container_of(work, struct iscsi_cls_conn, @@ -2284,18 +2322,11 @@ static void iscsi_cleanup_conn_work_fn(struct work_struct *work) mutex_lock(&conn->ep_mutex); /* - * If we are not at least bound there is nothing for us to do. Userspace - * will do a ep_disconnect call if offload is used, but will not be - * doing a stop since there is nothing to clean up, so we have to clear - * the cleanup bit here. + * Get a ref to the ep, so we don't release its ID until after + * userspace is done referencing it in iscsi_if_disconnect_bound_ep. */ - if (conn->state != ISCSI_CONN_BOUND && conn->state != ISCSI_CONN_UP) { - ISCSI_DBG_TRANS_CONN(conn, "Got error while conn is already failed. Ignoring.\n"); - clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags); - mutex_unlock(&conn->ep_mutex); - return; - } - + if (conn->ep) + get_device(&conn->ep->dev); iscsi_ep_disconnect(conn, false); if (system_state != SYSTEM_RUNNING) { @@ -2340,11 +2371,12 @@ iscsi_alloc_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid) conn->dd_data = &conn[1]; mutex_init(&conn->ep_mutex); + spin_lock_init(&conn->lock); INIT_LIST_HEAD(&conn->conn_list); INIT_WORK(&conn->cleanup_work, iscsi_cleanup_conn_work_fn); conn->transport = transport; conn->cid = cid; - conn->state = ISCSI_CONN_DOWN; + WRITE_ONCE(conn->state, ISCSI_CONN_DOWN); /* this is released in the dev's release function */ if (!get_device(&session->dev)) @@ -2542,9 +2574,32 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) struct iscsi_uevent *ev; struct iscsi_internal *priv; int len = nlmsg_total_size(sizeof(*ev)); + unsigned long flags; + int state; - if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) - queue_work(iscsi_conn_cleanup_workq, &conn->cleanup_work); + spin_lock_irqsave(&conn->lock, flags); + /* + * Userspace will only do a stop call if we are at least bound. And, we + * only need to do the in kernel cleanup if in the UP state so cmds can + * be released to upper layers. If in other states just wait for + * userspace to avoid races that can leave the cleanup_work queued. + */ + state = READ_ONCE(conn->state); + switch (state) { + case ISCSI_CONN_BOUND: + case ISCSI_CONN_UP: + if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, + &conn->flags)) { + queue_work(iscsi_conn_cleanup_workq, + &conn->cleanup_work); + } + break; + default: + ISCSI_DBG_TRANS_CONN(conn, "Got conn error in state %d\n", + state); + break; + } + spin_unlock_irqrestore(&conn->lock, flags); priv = iscsi_if_transport_lookup(conn->transport); if (!priv) @@ -2894,7 +2949,7 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) char *data = (char*)ev + sizeof(*ev); struct iscsi_cls_conn *conn; struct iscsi_cls_session *session; - int err = 0, value = 0; + int err = 0, value = 0, state; if (ev->u.set_param.len > PAGE_SIZE) return -EINVAL; @@ -2911,8 +2966,8 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) session->recovery_tmo = value; break; default: - if ((conn->state == ISCSI_CONN_BOUND) || - (conn->state == ISCSI_CONN_UP)) { + state = READ_ONCE(conn->state); + if (state == ISCSI_CONN_BOUND || state == ISCSI_CONN_UP) { err = transport->set_param(conn, ev->u.set_param.param, data, ev->u.set_param.len); } else { @@ -2984,16 +3039,7 @@ static int iscsi_if_ep_disconnect(struct iscsi_transport *transport, } mutex_lock(&conn->ep_mutex); - /* Check if this was a conn error and the kernel took ownership */ - if (test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { - ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); - mutex_unlock(&conn->ep_mutex); - - flush_work(&conn->cleanup_work); - goto put_ep; - } - - iscsi_ep_disconnect(conn, false); + iscsi_if_disconnect_bound_ep(conn, ep, false); mutex_unlock(&conn->ep_mutex); put_ep: iscsi_put_endpoint(ep); @@ -3696,24 +3742,17 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, return -EINVAL; mutex_lock(&conn->ep_mutex); + spin_lock_irq(&conn->lock); if (test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { + spin_unlock_irq(&conn->lock); mutex_unlock(&conn->ep_mutex); ev->r.retcode = -ENOTCONN; return 0; } + spin_unlock_irq(&conn->lock); switch (nlh->nlmsg_type) { case ISCSI_UEVENT_BIND_CONN: - if (conn->ep) { - /* - * For offload boot support where iscsid is restarted - * during the pivot root stage, the ep will be intact - * here when the new iscsid instance starts up and - * reconnects. - */ - iscsi_ep_disconnect(conn, true); - } - session = iscsi_session_lookup(ev->u.b_conn.sid); if (!session) { err = -EINVAL; @@ -3724,7 +3763,7 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, ev->u.b_conn.transport_eph, ev->u.b_conn.is_leading); if (!ev->r.retcode) - conn->state = ISCSI_CONN_BOUND; + WRITE_ONCE(conn->state, ISCSI_CONN_BOUND); if (ev->r.retcode || !transport->ep_connect) break; @@ -3743,7 +3782,8 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, case ISCSI_UEVENT_START_CONN: ev->r.retcode = transport->start_conn(conn); if (!ev->r.retcode) - conn->state = ISCSI_CONN_UP; + WRITE_ONCE(conn->state, ISCSI_CONN_UP); + break; case ISCSI_UEVENT_SEND_PDU: pdu_len = nlh->nlmsg_len - sizeof(*nlh) - sizeof(*ev); @@ -4050,10 +4090,11 @@ static ssize_t show_conn_state(struct device *dev, { struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev->parent); const char *state = "unknown"; + int conn_state = READ_ONCE(conn->state); - if (conn->state >= 0 && - conn->state < ARRAY_SIZE(connection_state_names)) - state = connection_state_names[conn->state]; + if (conn_state >= 0 && + conn_state < ARRAY_SIZE(connection_state_names)) + state = connection_state_names[conn_state]; return sysfs_emit(buf, "%s\n", state); } diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c index ddd00efc4882..fbdb5124d7f7 100644 --- a/drivers/scsi/sr_ioctl.c +++ b/drivers/scsi/sr_ioctl.c @@ -41,7 +41,7 @@ static int sr_read_tochdr(struct cdrom_device_info *cdi, int result; unsigned char *buffer; - buffer = kmalloc(32, GFP_KERNEL); + buffer = kzalloc(32, GFP_KERNEL); if (!buffer) return -ENOMEM; @@ -55,10 +55,13 @@ static int sr_read_tochdr(struct cdrom_device_info *cdi, cgc.data_direction = DMA_FROM_DEVICE; result = sr_do_ioctl(cd, &cgc); + if (result) + goto err; tochdr->cdth_trk0 = buffer[2]; tochdr->cdth_trk1 = buffer[3]; +err: kfree(buffer); return result; } @@ -71,7 +74,7 @@ static int sr_read_tocentry(struct cdrom_device_info *cdi, int result; unsigned char *buffer; - buffer = kmalloc(32, GFP_KERNEL); + buffer = kzalloc(32, GFP_KERNEL); if (!buffer) return -ENOMEM; @@ -86,6 +89,8 @@ static int sr_read_tocentry(struct cdrom_device_info *cdi, cgc.data_direction = DMA_FROM_DEVICE; result = sr_do_ioctl(cd, &cgc); + if (result) + goto err; tocentry->cdte_ctrl = buffer[5] & 0xf; tocentry->cdte_adr = buffer[5] >> 4; @@ -98,6 +103,7 @@ static int sr_read_tocentry(struct cdrom_device_info *cdi, tocentry->cdte_addr.lba = (((((buffer[8] << 8) + buffer[9]) << 8) + buffer[10]) << 8) + buffer[11]; +err: kfree(buffer); return result; } @@ -384,7 +390,7 @@ int sr_get_mcn(struct cdrom_device_info *cdi, struct cdrom_mcn *mcn) { Scsi_CD *cd = cdi->handle; struct packet_command cgc; - char *buffer = kmalloc(32, GFP_KERNEL); + char *buffer = kzalloc(32, GFP_KERNEL); int result; if (!buffer) @@ -400,10 +406,13 @@ int sr_get_mcn(struct cdrom_device_info *cdi, struct cdrom_mcn *mcn) cgc.data_direction = DMA_FROM_DEVICE; cgc.timeout = IOCTL_TIMEOUT; result = sr_do_ioctl(cd, &cgc); + if (result) + goto err; memcpy(mcn->medium_catalog_number, buffer + 9, 13); mcn->medium_catalog_number[13] = 0; +err: kfree(buffer); return result; } diff --git a/drivers/soc/imx/imx8m-blk-ctrl.c b/drivers/soc/imx/imx8m-blk-ctrl.c index 122f9c884b38..ccd0577a771e 100644 --- a/drivers/soc/imx/imx8m-blk-ctrl.c +++ b/drivers/soc/imx/imx8m-blk-ctrl.c @@ -50,7 +50,7 @@ struct imx8m_blk_ctrl_domain_data { u32 mipi_phy_rst_mask; }; -#define DOMAIN_MAX_CLKS 3 +#define DOMAIN_MAX_CLKS 4 struct imx8m_blk_ctrl_domain { struct generic_pm_domain genpd; diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c index 92d9610df1fd..938017a60c8e 100644 --- a/drivers/spi/atmel-quadspi.c +++ b/drivers/spi/atmel-quadspi.c @@ -277,6 +277,9 @@ static int atmel_qspi_find_mode(const struct spi_mem_op *op) static bool atmel_qspi_supports_op(struct spi_mem *mem, const struct spi_mem_op *op) { + if (!spi_mem_default_supports_op(mem, op)) + return false; + if (atmel_qspi_find_mode(op) < 0) return false; diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 616ada891974..19686fb47bb3 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -1415,9 +1415,24 @@ static bool cqspi_supports_mem_op(struct spi_mem *mem, all_false = !op->cmd.dtr && !op->addr.dtr && !op->dummy.dtr && !op->data.dtr; - /* Mixed DTR modes not supported. */ - if (!(all_true || all_false)) + if (all_true) { + /* Right now we only support 8-8-8 DTR mode. */ + if (op->cmd.nbytes && op->cmd.buswidth != 8) + return false; + if (op->addr.nbytes && op->addr.buswidth != 8) + return false; + if (op->data.nbytes && op->data.buswidth != 8) + return false; + } else if (all_false) { + /* Only 1-1-X ops are supported without DTR */ + if (op->cmd.nbytes && op->cmd.buswidth > 1) + return false; + if (op->addr.nbytes && op->addr.buswidth > 1) + return false; + } else { + /* Mixed DTR modes are not supported. */ return false; + } return spi_mem_default_supports_op(mem, op); } diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c index a5ef7a526a7f..f6eec7a869b6 100644 --- a/drivers/spi/spi-intel-pci.c +++ b/drivers/spi/spi-intel-pci.c @@ -72,6 +72,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x4da4), (unsigned long)&bxt_info }, { PCI_VDEVICE(INTEL, 0x51a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x54a4), (unsigned long)&cnl_info }, + { PCI_VDEVICE(INTEL, 0x7a24), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7aa4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0xa0a4), (unsigned long)&bxt_info }, { PCI_VDEVICE(INTEL, 0xa1a4), (unsigned long)&bxt_info }, diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c index 94fb09696677..d167699a1a96 100644 --- a/drivers/spi/spi-mtk-nor.c +++ b/drivers/spi/spi-mtk-nor.c @@ -960,7 +960,17 @@ static int __maybe_unused mtk_nor_suspend(struct device *dev) static int __maybe_unused mtk_nor_resume(struct device *dev) { - return pm_runtime_force_resume(dev); + struct spi_controller *ctlr = dev_get_drvdata(dev); + struct mtk_nor *sp = spi_controller_get_devdata(ctlr); + int ret; + + ret = pm_runtime_force_resume(dev); + if (ret) + return ret; + + mtk_nor_init(sp); + + return 0; } static const struct dev_pm_ops mtk_nor_pm_ops = { diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index ff292b75e23f..60dafe4c581b 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -588,7 +588,7 @@ static void pscsi_destroy_device(struct se_device *dev) } static void pscsi_complete_cmd(struct se_cmd *cmd, u8 scsi_status, - unsigned char *req_sense) + unsigned char *req_sense, int valid_data) { struct pscsi_dev_virt *pdv = PSCSI_DEV(cmd->se_dev); struct scsi_device *sd = pdv->pdv_sd; @@ -681,7 +681,7 @@ after_mode_select: * back despite framework assumption that a * check condition means there is no data */ - if (sd->type == TYPE_TAPE && + if (sd->type == TYPE_TAPE && valid_data && cmd->data_direction == DMA_FROM_DEVICE) { /* * is sense data valid, fixed format, @@ -1032,6 +1032,7 @@ static void pscsi_req_done(struct request *req, blk_status_t status) struct se_cmd *cmd = req->end_io_data; struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req); enum sam_status scsi_status = scmd->result & 0xff; + int valid_data = cmd->data_length - scmd->resid_len; u8 *cdb = cmd->priv; if (scsi_status != SAM_STAT_GOOD) { @@ -1039,12 +1040,11 @@ static void pscsi_req_done(struct request *req, blk_status_t status) " 0x%02x Result: 0x%08x\n", cmd, cdb[0], scmd->result); } - pscsi_complete_cmd(cmd, scsi_status, scmd->sense_buffer); + pscsi_complete_cmd(cmd, scsi_status, scmd->sense_buffer, valid_data); switch (host_byte(scmd->result)) { case DID_OK: - target_complete_cmd_with_length(cmd, scsi_status, - cmd->data_length - scmd->resid_len); + target_complete_cmd_with_length(cmd, scsi_status, valid_data); break; default: pr_debug("PSCSI Host Byte exception at cmd: %p CDB:" diff --git a/drivers/tee/optee/ffa_abi.c b/drivers/tee/optee/ffa_abi.c index a5eb4ef46971..c9b3b2cfb2b2 100644 --- a/drivers/tee/optee/ffa_abi.c +++ b/drivers/tee/optee/ffa_abi.c @@ -865,6 +865,7 @@ err_rhashtable_free: rhashtable_free_and_destroy(&optee->ffa.global_ids, rh_free_fn, NULL); optee_supp_uninit(&optee->supp); mutex_destroy(&optee->call_queue.mutex); + mutex_destroy(&optee->ffa.mutex); err_unreg_supp_teedev: tee_device_unregister(optee->supp_teedev); err_unreg_teedev: diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index e37691e0bf20..0e5cc948373c 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -113,8 +113,10 @@ config THERMAL_DEFAULT_GOV_USER_SPACE bool "user_space" select THERMAL_GOV_USER_SPACE help - Select this if you want to let the user space manage the - platform thermals. + The Userspace governor allows to get trip point crossed + notification from the kernel via uevents. It is recommended + to use the netlink interface instead which gives richer + information about the thermal framework events. config THERMAL_DEFAULT_GOV_POWER_ALLOCATOR bool "power_allocator" diff --git a/drivers/thermal/gov_user_space.c b/drivers/thermal/gov_user_space.c index 64a18e354a20..a62a4e90bd3f 100644 --- a/drivers/thermal/gov_user_space.c +++ b/drivers/thermal/gov_user_space.c @@ -17,8 +17,7 @@ static int user_space_bind(struct thermal_zone_device *tz) { - pr_warn_once("Userspace governor deprecated: use thermal netlink " \ - "notification instead\n"); + pr_info_once("Consider using thermal netlink events interface\n"); return 0; } diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c index 4954800b9850..d97f496bab9b 100644 --- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c @@ -68,7 +68,7 @@ static int evaluate_odvp(struct int3400_thermal_priv *priv); struct odvp_attr { int odvp; struct int3400_thermal_priv *priv; - struct kobj_attribute attr; + struct device_attribute attr; }; static ssize_t data_vault_read(struct file *file, struct kobject *kobj, @@ -311,7 +311,7 @@ end: return result; } -static ssize_t odvp_show(struct kobject *kobj, struct kobj_attribute *attr, +static ssize_t odvp_show(struct device *dev, struct device_attribute *attr, char *buf) { struct odvp_attr *odvp_attr; diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c index f154bada2906..1c4aac8464a7 100644 --- a/drivers/thermal/thermal_sysfs.c +++ b/drivers/thermal/thermal_sysfs.c @@ -610,9 +610,6 @@ cur_state_store(struct device *dev, struct device_attribute *attr, unsigned long state; int result; - dev_warn_once(&cdev->device, - "Setting cooling device state is deprecated\n"); - if (sscanf(buf, "%ld\n", &state) != 1) return -EINVAL; diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index fa92f727fdf8..a38b922bcbc1 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -73,6 +73,8 @@ module_param(debug, int, 0600); */ #define MAX_MRU 1500 #define MAX_MTU 1500 +/* SOF, ADDR, CTRL, LEN1, LEN2, ..., FCS, EOF */ +#define PROT_OVERHEAD 7 #define GSM_NET_TX_TIMEOUT (HZ*10) /* @@ -219,7 +221,6 @@ struct gsm_mux { int encoding; u8 control; u8 fcs; - u8 received_fcs; u8 *txframe; /* TX framing buffer */ /* Method for the receiver side */ @@ -231,6 +232,7 @@ struct gsm_mux { int initiator; /* Did we initiate connection */ bool dead; /* Has the mux been shut down */ struct gsm_dlci *dlci[NUM_DLCI]; + int old_c_iflag; /* termios c_iflag value before attach */ bool constipated; /* Asked by remote to shut up */ spinlock_t tx_lock; @@ -271,10 +273,6 @@ static DEFINE_SPINLOCK(gsm_mux_lock); static struct tty_driver *gsm_tty_driver; -/* Save dlci open address */ -static int addr_open[256] = { 0 }; -/* Save dlci open count */ -static int addr_cnt; /* * This section of the driver logic implements the GSM encodings * both the basic and the 'advanced'. Reliable transport is not @@ -369,6 +367,7 @@ static const u8 gsm_fcs8[256] = { #define GOOD_FCS 0xCF static int gsmld_output(struct gsm_mux *gsm, u8 *data, int len); +static int gsm_modem_update(struct gsm_dlci *dlci, u8 brk); /** * gsm_fcs_add - update FCS @@ -832,7 +831,7 @@ static int gsm_dlci_data_output(struct gsm_mux *gsm, struct gsm_dlci *dlci) break; case 2: /* Unstructed with modem bits. Always one byte as we never send inline break data */ - *dp++ = gsm_encode_modem(dlci); + *dp++ = (gsm_encode_modem(dlci) << 1) | EA; break; } WARN_ON(kfifo_out_locked(&dlci->fifo, dp , len, &dlci->lock) != len); @@ -917,6 +916,66 @@ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm, } /** + * gsm_dlci_modem_output - try and push modem status out of a DLCI + * @gsm: mux + * @dlci: the DLCI to pull modem status from + * @brk: break signal + * + * Push an empty frame in to the transmit queue to update the modem status + * bits and to transmit an optional break. + * + * Caller must hold the tx_lock of the mux. + */ + +static int gsm_dlci_modem_output(struct gsm_mux *gsm, struct gsm_dlci *dlci, + u8 brk) +{ + u8 *dp = NULL; + struct gsm_msg *msg; + int size = 0; + + /* for modem bits without break data */ + switch (dlci->adaption) { + case 1: /* Unstructured */ + break; + case 2: /* Unstructured with modem bits. */ + size++; + if (brk > 0) + size++; + break; + default: + pr_err("%s: unsupported adaption %d\n", __func__, + dlci->adaption); + return -EINVAL; + } + + msg = gsm_data_alloc(gsm, dlci->addr, size, gsm->ftype); + if (!msg) { + pr_err("%s: gsm_data_alloc error", __func__); + return -ENOMEM; + } + dp = msg->data; + switch (dlci->adaption) { + case 1: /* Unstructured */ + break; + case 2: /* Unstructured with modem bits. */ + if (brk == 0) { + *dp++ = (gsm_encode_modem(dlci) << 1) | EA; + } else { + *dp++ = gsm_encode_modem(dlci) << 1; + *dp++ = (brk << 4) | 2 | EA; /* Length, Break, EA */ + } + break; + default: + /* Handled above */ + break; + } + + __gsm_data_queue(dlci, msg); + return size; +} + +/** * gsm_dlci_data_sweep - look for data to send * @gsm: the GSM mux * @@ -1093,7 +1152,6 @@ static void gsm_control_modem(struct gsm_mux *gsm, const u8 *data, int clen) { unsigned int addr = 0; unsigned int modem = 0; - unsigned int brk = 0; struct gsm_dlci *dlci; int len = clen; int slen; @@ -1123,17 +1181,8 @@ static void gsm_control_modem(struct gsm_mux *gsm, const u8 *data, int clen) return; } len--; - if (len > 0) { - while (gsm_read_ea(&brk, *dp++) == 0) { - len--; - if (len == 0) - return; - } - modem <<= 7; - modem |= (brk & 0x7f); - } tty = tty_port_tty_get(&dlci->port); - gsm_process_modem(tty, dlci, modem, slen); + gsm_process_modem(tty, dlci, modem, slen - len); if (tty) { tty_wakeup(tty); tty_kref_put(tty); @@ -1193,7 +1242,6 @@ static void gsm_control_rls(struct gsm_mux *gsm, const u8 *data, int clen) } static void gsm_dlci_begin_close(struct gsm_dlci *dlci); -static void gsm_dlci_close(struct gsm_dlci *dlci); /** * gsm_control_message - DLCI 0 control processing @@ -1212,28 +1260,15 @@ static void gsm_control_message(struct gsm_mux *gsm, unsigned int command, { u8 buf[1]; unsigned long flags; - struct gsm_dlci *dlci; - int i; - int address; switch (command) { case CMD_CLD: { - if (addr_cnt > 0) { - for (i = 0; i < addr_cnt; i++) { - address = addr_open[i]; - dlci = gsm->dlci[address]; - gsm_dlci_close(dlci); - addr_open[i] = 0; - } - } + struct gsm_dlci *dlci = gsm->dlci[0]; /* Modem wishes to close down */ - dlci = gsm->dlci[0]; if (dlci) { dlci->dead = true; gsm->dead = true; - gsm_dlci_close(dlci); - addr_cnt = 0; - gsm_response(gsm, 0, UA|PF); + gsm_dlci_begin_close(dlci); } } break; @@ -1326,11 +1361,12 @@ static void gsm_control_response(struct gsm_mux *gsm, unsigned int command, static void gsm_control_transmit(struct gsm_mux *gsm, struct gsm_control *ctrl) { - struct gsm_msg *msg = gsm_data_alloc(gsm, 0, ctrl->len + 1, gsm->ftype); + struct gsm_msg *msg = gsm_data_alloc(gsm, 0, ctrl->len + 2, gsm->ftype); if (msg == NULL) return; - msg->data[0] = (ctrl->cmd << 1) | 2 | EA; /* command */ - memcpy(msg->data + 1, ctrl->data, ctrl->len); + msg->data[0] = (ctrl->cmd << 1) | CR | EA; /* command */ + msg->data[1] = (ctrl->len << 1) | EA; + memcpy(msg->data + 2, ctrl->data, ctrl->len); gsm_data_queue(gsm->dlci[0], msg); } @@ -1353,7 +1389,6 @@ static void gsm_control_retransmit(struct timer_list *t) spin_lock_irqsave(&gsm->control_lock, flags); ctrl = gsm->pending_cmd; if (ctrl) { - gsm->cretries--; if (gsm->cretries == 0) { gsm->pending_cmd = NULL; ctrl->error = -ETIMEDOUT; @@ -1362,6 +1397,7 @@ static void gsm_control_retransmit(struct timer_list *t) wake_up(&gsm->event); return; } + gsm->cretries--; gsm_control_transmit(gsm, ctrl); mod_timer(&gsm->t2_timer, jiffies + gsm->t2 * HZ / 100); } @@ -1402,7 +1438,7 @@ retry: /* If DLCI0 is in ADM mode skip retries, it won't respond */ if (gsm->dlci[0]->mode == DLCI_MODE_ADM) - gsm->cretries = 1; + gsm->cretries = 0; else gsm->cretries = gsm->n2; @@ -1450,20 +1486,22 @@ static int gsm_control_wait(struct gsm_mux *gsm, struct gsm_control *control) static void gsm_dlci_close(struct gsm_dlci *dlci) { + unsigned long flags; + del_timer(&dlci->t1); if (debug & 8) pr_debug("DLCI %d goes closed.\n", dlci->addr); dlci->state = DLCI_CLOSED; if (dlci->addr != 0) { tty_port_tty_hangup(&dlci->port, false); + spin_lock_irqsave(&dlci->lock, flags); kfifo_reset(&dlci->fifo); + spin_unlock_irqrestore(&dlci->lock, flags); /* Ensure that gsmtty_open() can return. */ tty_port_set_initialized(&dlci->port, 0); wake_up_interruptible(&dlci->port.open_wait); } else dlci->gsm->dead = true; - /* Unregister gsmtty driver,report gsmtty dev remove uevent for user */ - tty_unregister_device(gsm_tty_driver, dlci->addr); wake_up(&dlci->gsm->event); /* A DLCI 0 close is a MUX termination so we need to kick that back to userspace somehow */ @@ -1485,8 +1523,9 @@ static void gsm_dlci_open(struct gsm_dlci *dlci) dlci->state = DLCI_OPEN; if (debug & 8) pr_debug("DLCI %d goes open.\n", dlci->addr); - /* Register gsmtty driver,report gsmtty dev add uevent for user */ - tty_register_device(gsm_tty_driver, dlci->addr, NULL); + /* Send current modem state */ + if (dlci->addr) + gsm_modem_update(dlci, 0); wake_up(&dlci->gsm->event); } @@ -1623,6 +1662,7 @@ static void gsm_dlci_data(struct gsm_dlci *dlci, const u8 *data, int clen) tty = tty_port_tty_get(port); if (tty) { gsm_process_modem(tty, dlci, modem, slen); + tty_wakeup(tty); tty_kref_put(tty); } fallthrough; @@ -1793,19 +1833,7 @@ static void gsm_queue(struct gsm_mux *gsm) struct gsm_dlci *dlci; u8 cr; int address; - int i, j, k, address_tmp; - /* We have to sneak a look at the packet body to do the FCS. - A somewhat layering violation in the spec */ - if ((gsm->control & ~PF) == UI) - gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf, gsm->len); - if (gsm->encoding == 0) { - /* WARNING: gsm->received_fcs is used for - gsm->encoding = 0 only. - In this case it contain the last piece of data - required to generate final CRC */ - gsm->fcs = gsm_fcs_add(gsm->fcs, gsm->received_fcs); - } if (gsm->fcs != GOOD_FCS) { gsm->bad_fcs++; if (debug & 4) @@ -1836,11 +1864,6 @@ static void gsm_queue(struct gsm_mux *gsm) else { gsm_response(gsm, address, UA|PF); gsm_dlci_open(dlci); - /* Save dlci open address */ - if (address) { - addr_open[addr_cnt] = address; - addr_cnt++; - } } break; case DISC|PF: @@ -1851,35 +1874,9 @@ static void gsm_queue(struct gsm_mux *gsm) return; } /* Real close complete */ - if (!address) { - if (addr_cnt > 0) { - for (i = 0; i < addr_cnt; i++) { - address = addr_open[i]; - dlci = gsm->dlci[address]; - gsm_dlci_close(dlci); - addr_open[i] = 0; - } - } - dlci = gsm->dlci[0]; - gsm_dlci_close(dlci); - addr_cnt = 0; - gsm_response(gsm, 0, UA|PF); - } else { - gsm_response(gsm, address, UA|PF); - gsm_dlci_close(dlci); - /* clear dlci address */ - for (j = 0; j < addr_cnt; j++) { - address_tmp = addr_open[j]; - if (address_tmp == address) { - for (k = j; k < addr_cnt; k++) - addr_open[k] = addr_open[k+1]; - addr_cnt--; - break; - } - } - } + gsm_response(gsm, address, UA|PF); + gsm_dlci_close(dlci); break; - case UA: case UA|PF: if (cr == 0 || dlci == NULL) break; @@ -1993,19 +1990,25 @@ static void gsm0_receive(struct gsm_mux *gsm, unsigned char c) break; case GSM_DATA: /* Data */ gsm->buf[gsm->count++] = c; - if (gsm->count == gsm->len) + if (gsm->count == gsm->len) { + /* Calculate final FCS for UI frames over all data */ + if ((gsm->control & ~PF) != UIH) { + gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf, + gsm->count); + } gsm->state = GSM_FCS; + } break; case GSM_FCS: /* FCS follows the packet */ - gsm->received_fcs = c; - gsm_queue(gsm); + gsm->fcs = gsm_fcs_add(gsm->fcs, c); gsm->state = GSM_SSOF; break; case GSM_SSOF: - if (c == GSM0_SOF) { - gsm->state = GSM_SEARCH; - break; - } + gsm->state = GSM_SEARCH; + if (c == GSM0_SOF) + gsm_queue(gsm); + else + gsm->bad_size++; break; default: pr_debug("%s: unhandled state: %d\n", __func__, gsm->state); @@ -2023,12 +2026,35 @@ static void gsm0_receive(struct gsm_mux *gsm, unsigned char c) static void gsm1_receive(struct gsm_mux *gsm, unsigned char c) { + /* handle XON/XOFF */ + if ((c & ISO_IEC_646_MASK) == XON) { + gsm->constipated = true; + return; + } else if ((c & ISO_IEC_646_MASK) == XOFF) { + gsm->constipated = false; + /* Kick the link in case it is idling */ + gsm_data_kick(gsm, NULL); + return; + } if (c == GSM1_SOF) { - /* EOF is only valid in frame if we have got to the data state - and received at least one byte (the FCS) */ - if (gsm->state == GSM_DATA && gsm->count) { - /* Extract the FCS */ + /* EOF is only valid in frame if we have got to the data state */ + if (gsm->state == GSM_DATA) { + if (gsm->count < 1) { + /* Missing FSC */ + gsm->malformed++; + gsm->state = GSM_START; + return; + } + /* Remove the FCS from data */ gsm->count--; + if ((gsm->control & ~PF) != UIH) { + /* Calculate final FCS for UI frames over all + * data but FCS + */ + gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf, + gsm->count); + } + /* Add the FCS itself to test against GOOD_FCS */ gsm->fcs = gsm_fcs_add(gsm->fcs, gsm->buf[gsm->count]); gsm->len = gsm->count; gsm_queue(gsm); @@ -2037,7 +2063,8 @@ static void gsm1_receive(struct gsm_mux *gsm, unsigned char c) } /* Any partial frame was a runt so go back to start */ if (gsm->state != GSM_START) { - gsm->malformed++; + if (gsm->state != GSM_SEARCH) + gsm->malformed++; gsm->state = GSM_START; } /* A SOF in GSM_START means we are still reading idling or @@ -2106,74 +2133,43 @@ static void gsm_error(struct gsm_mux *gsm) gsm->io_error++; } -static int gsm_disconnect(struct gsm_mux *gsm) -{ - struct gsm_dlci *dlci = gsm->dlci[0]; - struct gsm_control *gc; - - if (!dlci) - return 0; - - /* In theory disconnecting DLCI 0 is sufficient but for some - modems this is apparently not the case. */ - gc = gsm_control_send(gsm, CMD_CLD, NULL, 0); - if (gc) - gsm_control_wait(gsm, gc); - - del_timer_sync(&gsm->t2_timer); - /* Now we are sure T2 has stopped */ - - gsm_dlci_begin_close(dlci); - wait_event_interruptible(gsm->event, - dlci->state == DLCI_CLOSED); - - if (signal_pending(current)) - return -EINTR; - - return 0; -} - /** * gsm_cleanup_mux - generic GSM protocol cleanup * @gsm: our mux + * @disc: disconnect link? * * Clean up the bits of the mux which are the same for all framing * protocols. Remove the mux from the mux table, stop all the timers * and then shut down each device hanging up the channels as we go. */ -static void gsm_cleanup_mux(struct gsm_mux *gsm) +static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc) { int i; struct gsm_dlci *dlci = gsm->dlci[0]; struct gsm_msg *txq, *ntxq; gsm->dead = true; + mutex_lock(&gsm->mutex); - spin_lock(&gsm_mux_lock); - for (i = 0; i < MAX_MUX; i++) { - if (gsm_mux[i] == gsm) { - gsm_mux[i] = NULL; - break; + if (dlci) { + if (disc && dlci->state != DLCI_CLOSED) { + gsm_dlci_begin_close(dlci); + wait_event(gsm->event, dlci->state == DLCI_CLOSED); } + dlci->dead = true; } - spin_unlock(&gsm_mux_lock); - /* open failed before registering => nothing to do */ - if (i == MAX_MUX) - return; + /* Finish outstanding timers, making sure they are done */ del_timer_sync(&gsm->t2_timer); - /* Now we are sure T2 has stopped */ - if (dlci) - dlci->dead = true; - /* Free up any link layer users */ - mutex_lock(&gsm->mutex); - for (i = 0; i < NUM_DLCI; i++) + /* Free up any link layer users and finally the control channel */ + for (i = NUM_DLCI - 1; i >= 0; i--) if (gsm->dlci[i]) gsm_dlci_release(gsm->dlci[i]); mutex_unlock(&gsm->mutex); /* Now wipe the queues */ + tty_ldisc_flush(gsm->tty); list_for_each_entry_safe(txq, ntxq, &gsm->tx_list, list) kfree(txq); INIT_LIST_HEAD(&gsm->tx_list); @@ -2191,7 +2187,6 @@ static void gsm_cleanup_mux(struct gsm_mux *gsm) static int gsm_activate_mux(struct gsm_mux *gsm) { struct gsm_dlci *dlci; - int i = 0; timer_setup(&gsm->t2_timer, gsm_control_retransmit, 0); init_waitqueue_head(&gsm->event); @@ -2203,18 +2198,6 @@ static int gsm_activate_mux(struct gsm_mux *gsm) else gsm->receive = gsm1_receive; - spin_lock(&gsm_mux_lock); - for (i = 0; i < MAX_MUX; i++) { - if (gsm_mux[i] == NULL) { - gsm->num = i; - gsm_mux[i] = gsm; - break; - } - } - spin_unlock(&gsm_mux_lock); - if (i == MAX_MUX) - return -EBUSY; - dlci = gsm_dlci_alloc(gsm, 0); if (dlci == NULL) return -ENOMEM; @@ -2230,6 +2213,15 @@ static int gsm_activate_mux(struct gsm_mux *gsm) */ static void gsm_free_mux(struct gsm_mux *gsm) { + int i; + + for (i = 0; i < MAX_MUX; i++) { + if (gsm == gsm_mux[i]) { + gsm_mux[i] = NULL; + break; + } + } + mutex_destroy(&gsm->mutex); kfree(gsm->txframe); kfree(gsm->buf); kfree(gsm); @@ -2249,12 +2241,20 @@ static void gsm_free_muxr(struct kref *ref) static inline void mux_get(struct gsm_mux *gsm) { + unsigned long flags; + + spin_lock_irqsave(&gsm_mux_lock, flags); kref_get(&gsm->ref); + spin_unlock_irqrestore(&gsm_mux_lock, flags); } static inline void mux_put(struct gsm_mux *gsm) { + unsigned long flags; + + spin_lock_irqsave(&gsm_mux_lock, flags); kref_put(&gsm->ref, gsm_free_muxr); + spin_unlock_irqrestore(&gsm_mux_lock, flags); } static inline unsigned int mux_num_to_base(struct gsm_mux *gsm) @@ -2275,6 +2275,7 @@ static inline unsigned int mux_line_to_num(unsigned int line) static struct gsm_mux *gsm_alloc_mux(void) { + int i; struct gsm_mux *gsm = kzalloc(sizeof(struct gsm_mux), GFP_KERNEL); if (gsm == NULL) return NULL; @@ -2283,7 +2284,7 @@ static struct gsm_mux *gsm_alloc_mux(void) kfree(gsm); return NULL; } - gsm->txframe = kmalloc(2 * MAX_MRU + 2, GFP_KERNEL); + gsm->txframe = kmalloc(2 * (MAX_MTU + PROT_OVERHEAD - 1), GFP_KERNEL); if (gsm->txframe == NULL) { kfree(gsm->buf); kfree(gsm); @@ -2304,6 +2305,26 @@ static struct gsm_mux *gsm_alloc_mux(void) gsm->mtu = 64; gsm->dead = true; /* Avoid early tty opens */ + /* Store the instance to the mux array or abort if no space is + * available. + */ + spin_lock(&gsm_mux_lock); + for (i = 0; i < MAX_MUX; i++) { + if (!gsm_mux[i]) { + gsm_mux[i] = gsm; + gsm->num = i; + break; + } + } + spin_unlock(&gsm_mux_lock); + if (i == MAX_MUX) { + mutex_destroy(&gsm->mutex); + kfree(gsm->txframe); + kfree(gsm->buf); + kfree(gsm); + return NULL; + } + return gsm; } @@ -2339,7 +2360,7 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c) /* Check the MRU/MTU range looks sane */ if (c->mru > MAX_MRU || c->mtu > MAX_MTU || c->mru < 8 || c->mtu < 8) return -EINVAL; - if (c->n2 < 3) + if (c->n2 > 255) return -EINVAL; if (c->encapsulation > 1) /* Basic, advanced, no I */ return -EINVAL; @@ -2370,19 +2391,11 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c) /* * Close down what is needed, restart and initiate the new - * configuration + * configuration. On the first time there is no DLCI[0] + * and closing or cleaning up is not necessary. */ - - if (gsm->initiator && (need_close || need_restart)) { - int ret; - - ret = gsm_disconnect(gsm); - - if (ret) - return ret; - } - if (need_restart) - gsm_cleanup_mux(gsm); + if (need_close || need_restart) + gsm_cleanup_mux(gsm, true); gsm->initiator = c->initiator; gsm->mru = c->mru; @@ -2450,25 +2463,26 @@ static int gsmld_attach_gsm(struct tty_struct *tty, struct gsm_mux *gsm) int ret, i; gsm->tty = tty_kref_get(tty); + /* Turn off tty XON/XOFF handling to handle it explicitly. */ + gsm->old_c_iflag = tty->termios.c_iflag; + tty->termios.c_iflag &= (IXON | IXOFF); ret = gsm_activate_mux(gsm); if (ret != 0) tty_kref_put(gsm->tty); else { /* Don't register device 0 - this is the control channel and not a usable tty interface */ - if (gsm->initiator) { - base = mux_num_to_base(gsm); /* Base for this MUX */ - for (i = 1; i < NUM_DLCI; i++) { - struct device *dev; + base = mux_num_to_base(gsm); /* Base for this MUX */ + for (i = 1; i < NUM_DLCI; i++) { + struct device *dev; - dev = tty_register_device(gsm_tty_driver, + dev = tty_register_device(gsm_tty_driver, base + i, NULL); - if (IS_ERR(dev)) { - for (i--; i >= 1; i--) - tty_unregister_device(gsm_tty_driver, - base + i); - return PTR_ERR(dev); - } + if (IS_ERR(dev)) { + for (i--; i >= 1; i--) + tty_unregister_device(gsm_tty_driver, + base + i); + return PTR_ERR(dev); } } } @@ -2490,11 +2504,10 @@ static void gsmld_detach_gsm(struct tty_struct *tty, struct gsm_mux *gsm) int i; WARN_ON(tty != gsm->tty); - if (gsm->initiator) { - for (i = 1; i < NUM_DLCI; i++) - tty_unregister_device(gsm_tty_driver, base + i); - } - gsm_cleanup_mux(gsm); + for (i = 1; i < NUM_DLCI; i++) + tty_unregister_device(gsm_tty_driver, base + i); + /* Restore tty XON/XOFF handling. */ + gsm->tty->termios.c_iflag = gsm->old_c_iflag; tty_kref_put(gsm->tty); gsm->tty = NULL; } @@ -2559,6 +2572,12 @@ static void gsmld_close(struct tty_struct *tty) { struct gsm_mux *gsm = tty->disc_data; + /* The ldisc locks and closes the port before calling our close. This + * means we have no way to do a proper disconnect. We will not bother + * to do one. + */ + gsm_cleanup_mux(gsm, false); + gsmld_detach_gsm(tty, gsm); gsmld_flush_buffer(tty); @@ -2597,7 +2616,7 @@ static int gsmld_open(struct tty_struct *tty) ret = gsmld_attach_gsm(tty, gsm); if (ret != 0) { - gsm_cleanup_mux(gsm); + gsm_cleanup_mux(gsm, false); mux_put(gsm); } return ret; @@ -2954,26 +2973,78 @@ static struct tty_ldisc_ops tty_ldisc_packet = { #define TX_SIZE 512 -static int gsmtty_modem_update(struct gsm_dlci *dlci, u8 brk) +/** + * gsm_modem_upd_via_data - send modem bits via convergence layer + * @dlci: channel + * @brk: break signal + * + * Send an empty frame to signal mobile state changes and to transmit the + * break signal for adaption 2. + */ + +static void gsm_modem_upd_via_data(struct gsm_dlci *dlci, u8 brk) { - u8 modembits[5]; + struct gsm_mux *gsm = dlci->gsm; + unsigned long flags; + + if (dlci->state != DLCI_OPEN || dlci->adaption != 2) + return; + + spin_lock_irqsave(&gsm->tx_lock, flags); + gsm_dlci_modem_output(gsm, dlci, brk); + spin_unlock_irqrestore(&gsm->tx_lock, flags); +} + +/** + * gsm_modem_upd_via_msc - send modem bits via control frame + * @dlci: channel + * @brk: break signal + */ + +static int gsm_modem_upd_via_msc(struct gsm_dlci *dlci, u8 brk) +{ + u8 modembits[3]; struct gsm_control *ctrl; int len = 2; - if (brk) - len++; + if (dlci->gsm->encoding != 0) + return 0; - modembits[0] = len << 1 | EA; /* Data bytes */ - modembits[1] = dlci->addr << 2 | 3; /* DLCI, EA, 1 */ - modembits[2] = gsm_encode_modem(dlci) << 1 | EA; - if (brk) - modembits[3] = brk << 4 | 2 | EA; /* Valid, EA */ - ctrl = gsm_control_send(dlci->gsm, CMD_MSC, modembits, len + 1); + modembits[0] = (dlci->addr << 2) | 2 | EA; /* DLCI, Valid, EA */ + if (!brk) { + modembits[1] = (gsm_encode_modem(dlci) << 1) | EA; + } else { + modembits[1] = gsm_encode_modem(dlci) << 1; + modembits[2] = (brk << 4) | 2 | EA; /* Length, Break, EA */ + len++; + } + ctrl = gsm_control_send(dlci->gsm, CMD_MSC, modembits, len); if (ctrl == NULL) return -ENOMEM; return gsm_control_wait(dlci->gsm, ctrl); } +/** + * gsm_modem_update - send modem status line state + * @dlci: channel + * @brk: break signal + */ + +static int gsm_modem_update(struct gsm_dlci *dlci, u8 brk) +{ + if (dlci->adaption == 2) { + /* Send convergence layer type 2 empty data frame. */ + gsm_modem_upd_via_data(dlci, brk); + return 0; + } else if (dlci->gsm->encoding == 0) { + /* Send as MSC control message. */ + return gsm_modem_upd_via_msc(dlci, brk); + } + + /* Modem status lines are not supported. */ + return -EPROTONOSUPPORT; +} + static int gsm_carrier_raised(struct tty_port *port) { struct gsm_dlci *dlci = container_of(port, struct gsm_dlci, port); @@ -3006,7 +3077,7 @@ static void gsm_dtr_rts(struct tty_port *port, int onoff) modem_tx &= ~(TIOCM_DTR | TIOCM_RTS); if (modem_tx != dlci->modem_tx) { dlci->modem_tx = modem_tx; - gsmtty_modem_update(dlci, 0); + gsm_modem_update(dlci, 0); } } @@ -3155,13 +3226,17 @@ static unsigned int gsmtty_chars_in_buffer(struct tty_struct *tty) static void gsmtty_flush_buffer(struct tty_struct *tty) { struct gsm_dlci *dlci = tty->driver_data; + unsigned long flags; + if (dlci->state == DLCI_CLOSED) return; /* Caution needed: If we implement reliable transport classes then the data being transmitted can't simply be junked once it has first hit the stack. Until then we can just blow it away */ + spin_lock_irqsave(&dlci->lock, flags); kfifo_reset(&dlci->fifo); + spin_unlock_irqrestore(&dlci->lock, flags); /* Need to unhook this DLCI from the transmit queue logic */ } @@ -3193,7 +3268,7 @@ static int gsmtty_tiocmset(struct tty_struct *tty, if (modem_tx != dlci->modem_tx) { dlci->modem_tx = modem_tx; - return gsmtty_modem_update(dlci, 0); + return gsm_modem_update(dlci, 0); } return 0; } @@ -3254,7 +3329,7 @@ static void gsmtty_throttle(struct tty_struct *tty) dlci->modem_tx &= ~TIOCM_RTS; dlci->throttled = true; /* Send an MSC with RTS cleared */ - gsmtty_modem_update(dlci, 0); + gsm_modem_update(dlci, 0); } static void gsmtty_unthrottle(struct tty_struct *tty) @@ -3266,7 +3341,7 @@ static void gsmtty_unthrottle(struct tty_struct *tty) dlci->modem_tx |= TIOCM_RTS; dlci->throttled = false; /* Send an MSC with RTS set */ - gsmtty_modem_update(dlci, 0); + gsm_modem_update(dlci, 0); } static int gsmtty_break_ctl(struct tty_struct *tty, int state) @@ -3284,7 +3359,7 @@ static int gsmtty_break_ctl(struct tty_struct *tty, int state) if (encode > 0x0F) encode = 0x0F; /* Best effort */ } - return gsmtty_modem_update(dlci, encode); + return gsm_modem_update(dlci, encode); } static void gsmtty_cleanup(struct tty_struct *tty) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index e17e97ea86fa..a293e9f107d0 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -2667,7 +2667,7 @@ enum pci_board_num_t { pbn_panacom2, pbn_panacom4, pbn_plx_romulus, - pbn_endrun_2_4000000, + pbn_endrun_2_3906250, pbn_oxsemi, pbn_oxsemi_1_3906250, pbn_oxsemi_2_3906250, @@ -3195,10 +3195,10 @@ static struct pciserial_board pci_boards[] = { * signal now many ports are available * 2 port 952 Uart support */ - [pbn_endrun_2_4000000] = { + [pbn_endrun_2_3906250] = { .flags = FL_BASE0, .num_ports = 2, - .base_baud = 4000000, + .base_baud = 3906250, .uart_offset = 0x200, .first_offset = 0x1000, }, @@ -4115,7 +4115,7 @@ static const struct pci_device_id serial_pci_tbl[] = { */ { PCI_VENDOR_ID_ENDRUN, PCI_DEVICE_ID_ENDRUN_1588, PCI_ANY_ID, PCI_ANY_ID, 0, 0, - pbn_endrun_2_4000000 }, + pbn_endrun_2_3906250 }, /* * Quatech cards. These actually have configurable clocks but for * now we just use the default. diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 318af6f13605..1fbd5bf264be 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1675,11 +1675,11 @@ static void serial8250_start_tx(struct uart_port *port) struct uart_8250_port *up = up_to_u8250p(port); struct uart_8250_em485 *em485 = up->em485; - serial8250_rpm_get_tx(up); - if (!port->x_char && uart_circ_empty(&port->state->xmit)) return; + serial8250_rpm_get_tx(up); + if (em485 && em485->active_timer == &em485->start_tx_timer) return; @@ -3329,7 +3329,7 @@ static void serial8250_console_restore(struct uart_8250_port *up) serial8250_set_divisor(port, baud, quot, frac); serial_port_out(port, UART_LCR, up->lcr); - serial8250_out_MCR(up, UART_MCR_DTR | UART_MCR_RTS); + serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS); } /* diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 51ecb050ae40..4d11a3e547f9 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1255,13 +1255,18 @@ static inline bool pl011_dma_rx_running(struct uart_amba_port *uap) static void pl011_rs485_tx_stop(struct uart_amba_port *uap) { + /* + * To be on the safe side only time out after twice as many iterations + * as fifo size. + */ + const int MAX_TX_DRAIN_ITERS = uap->port.fifosize * 2; struct uart_port *port = &uap->port; int i = 0; u32 cr; /* Wait until hardware tx queue is empty */ while (!pl011_tx_empty(port)) { - if (i == port->fifosize) { + if (i > MAX_TX_DRAIN_ITERS) { dev_warn(port->dev, "timeout while draining hardware tx queue\n"); break; @@ -2052,7 +2057,7 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios, * with the given baud rate. We use this as the poll interval when we * wait for the tx queue to empty. */ - uap->rs485_tx_drain_interval = (bits * 1000 * 1000) / baud; + uap->rs485_tx_drain_interval = DIV_ROUND_UP(bits * 1000 * 1000, baud); pl011_setup_status_masks(port, termios); diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index fd38e6ed4fda..a2100be8d554 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -1448,7 +1448,7 @@ static int imx_uart_startup(struct uart_port *port) imx_uart_writel(sport, ucr1, UCR1); ucr4 = imx_uart_readl(sport, UCR4) & ~(UCR4_OREN | UCR4_INVR); - if (!sport->dma_is_enabled) + if (!dma_is_inited) ucr4 |= UCR4_OREN; if (sport->inverted_rx) ucr4 |= UCR4_INVR; diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index e857fb61efbf..5fb201c1b563 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -1238,12 +1238,10 @@ static void sc16is7xx_shutdown(struct uart_port *port) /* Disable all interrupts */ sc16is7xx_port_write(port, SC16IS7XX_IER_REG, 0); - /* Disable TX/RX, clear auto RS485 and RTS invert */ + /* Disable TX/RX */ sc16is7xx_port_update(port, SC16IS7XX_EFCR_REG, SC16IS7XX_EFCR_RXDISABLE_BIT | - SC16IS7XX_EFCR_TXDISABLE_BIT | - SC16IS7XX_EFCR_AUTO_RS485_BIT | - SC16IS7XX_EFCR_RTS_INVERT_BIT, + SC16IS7XX_EFCR_TXDISABLE_BIT, SC16IS7XX_EFCR_RXDISABLE_BIT | SC16IS7XX_EFCR_TXDISABLE_BIT); diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index f9af7ebe003d..d6d515d598dc 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -2684,6 +2684,7 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep) struct usb_request *request; struct cdns3_request *priv_req; struct cdns3_trb *trb = NULL; + struct cdns3_trb trb_tmp; int ret; int val; @@ -2693,8 +2694,10 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep) if (request) { priv_req = to_cdns3_request(request); trb = priv_req->trb; - if (trb) + if (trb) { + trb_tmp = *trb; trb->control = trb->control ^ cpu_to_le32(TRB_CYCLE); + } } writel(EP_CMD_CSTALL | EP_CMD_EPRST, &priv_dev->regs->ep_cmd); @@ -2709,7 +2712,7 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep) if (request) { if (trb) - trb->control = trb->control ^ cpu_to_le32(TRB_CYCLE); + *trb = trb_tmp; cdns3_rearm_transfer(priv_ep, 1); } diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 6abb7294e919..b5b85bf80329 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1209,12 +1209,16 @@ static int do_proc_control(struct usb_dev_state *ps, usb_unlock_device(dev); i = usbfs_start_wait_urb(urb, tmo, &actlen); + + /* Linger a bit, prior to the next control message. */ + if (dev->quirks & USB_QUIRK_DELAY_CTRL_MSG) + msleep(200); usb_lock_device(dev); snoop_urb(dev, NULL, pipe, actlen, i, COMPLETE, tbuf, actlen); if (!i && actlen) { if (copy_to_user(ctrl->data, tbuf, actlen)) { ret = -EFAULT; - goto recv_fault; + goto done; } } } else { @@ -1231,6 +1235,10 @@ static int do_proc_control(struct usb_dev_state *ps, usb_unlock_device(dev); i = usbfs_start_wait_urb(urb, tmo, &actlen); + + /* Linger a bit, prior to the next control message. */ + if (dev->quirks & USB_QUIRK_DELAY_CTRL_MSG) + msleep(200); usb_lock_device(dev); snoop_urb(dev, NULL, pipe, actlen, i, COMPLETE, NULL, 0); } @@ -1242,10 +1250,6 @@ static int do_proc_control(struct usb_dev_state *ps, } ret = (i < 0 ? i : actlen); - recv_fault: - /* Linger a bit, prior to the next control message. */ - if (dev->quirks & USB_QUIRK_DELAY_CTRL_MSG) - msleep(200); done: kfree(dr); usb_free_urb(urb); diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index d3c14b5ed4a1..97b44a68668a 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -404,6 +404,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0b05, 0x17e0), .driver_info = USB_QUIRK_IGNORE_REMOTE_WAKEUP }, + /* Realtek Semiconductor Corp. Mass Storage Device (Multicard Reader)*/ + { USB_DEVICE(0x0bda, 0x0151), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, + /* Realtek hub in Dell WD19 (Type-C) */ { USB_DEVICE(0x0bda, 0x0487), .driver_info = USB_QUIRK_NO_LPM }, @@ -507,6 +510,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* DJI CineSSD */ { USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM }, + /* VCOM device */ + { USB_DEVICE(0x4296, 0x7570), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, + /* INTEL VALUE SSD */ { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 1170b800acdc..d28cd1a6709b 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -274,7 +274,8 @@ int dwc3_core_soft_reset(struct dwc3 *dwc) reg = dwc3_readl(dwc->regs, DWC3_DCTL); reg |= DWC3_DCTL_CSFTRST; - dwc3_writel(dwc->regs, DWC3_DCTL, reg); + reg &= ~DWC3_DCTL_RUN_STOP; + dwc3_gadget_dctl_write_safe(dwc, reg); /* * For DWC_usb31 controller 1.90a and later, the DCTL.CSFRST bit @@ -1377,10 +1378,10 @@ static void dwc3_get_properties(struct dwc3 *dwc) u8 lpm_nyet_threshold; u8 tx_de_emphasis; u8 hird_threshold; - u8 rx_thr_num_pkt_prd; - u8 rx_max_burst_prd; - u8 tx_thr_num_pkt_prd; - u8 tx_max_burst_prd; + u8 rx_thr_num_pkt_prd = 0; + u8 rx_max_burst_prd = 0; + u8 tx_thr_num_pkt_prd = 0; + u8 tx_max_burst_prd = 0; u8 tx_fifo_resize_max_num; const char *usb_psy_name; int ret; @@ -1690,21 +1691,44 @@ static int dwc3_probe(struct platform_device *pdev) /* * Clocks are optional, but new DT platforms should support all * clocks as required by the DT-binding. + * Some devices have different clock names in legacy device trees, + * check for them to retain backwards compatibility. */ dwc->bus_clk = devm_clk_get_optional(dev, "bus_early"); if (IS_ERR(dwc->bus_clk)) return dev_err_probe(dev, PTR_ERR(dwc->bus_clk), "could not get bus clock\n"); + if (dwc->bus_clk == NULL) { + dwc->bus_clk = devm_clk_get_optional(dev, "bus_clk"); + if (IS_ERR(dwc->bus_clk)) + return dev_err_probe(dev, PTR_ERR(dwc->bus_clk), + "could not get bus clock\n"); + } + dwc->ref_clk = devm_clk_get_optional(dev, "ref"); if (IS_ERR(dwc->ref_clk)) return dev_err_probe(dev, PTR_ERR(dwc->ref_clk), "could not get ref clock\n"); + if (dwc->ref_clk == NULL) { + dwc->ref_clk = devm_clk_get_optional(dev, "ref_clk"); + if (IS_ERR(dwc->ref_clk)) + return dev_err_probe(dev, PTR_ERR(dwc->ref_clk), + "could not get ref clock\n"); + } + dwc->susp_clk = devm_clk_get_optional(dev, "suspend"); if (IS_ERR(dwc->susp_clk)) return dev_err_probe(dev, PTR_ERR(dwc->susp_clk), "could not get suspend clock\n"); + + if (dwc->susp_clk == NULL) { + dwc->susp_clk = devm_clk_get_optional(dev, "suspend_clk"); + if (IS_ERR(dwc->susp_clk)) + return dev_err_probe(dev, PTR_ERR(dwc->susp_clk), + "could not get suspend clock\n"); + } } ret = reset_control_deassert(dwc->reset); diff --git a/drivers/usb/dwc3/drd.c b/drivers/usb/dwc3/drd.c index b60b5f7b6dff..8cad9e7d3368 100644 --- a/drivers/usb/dwc3/drd.c +++ b/drivers/usb/dwc3/drd.c @@ -584,16 +584,15 @@ int dwc3_drd_init(struct dwc3 *dwc) { int ret, irq; + if (ROLE_SWITCH && + device_property_read_bool(dwc->dev, "usb-role-switch")) + return dwc3_setup_role_switch(dwc); + dwc->edev = dwc3_get_extcon(dwc); if (IS_ERR(dwc->edev)) return PTR_ERR(dwc->edev); - if (ROLE_SWITCH && - device_property_read_bool(dwc->dev, "usb-role-switch")) { - ret = dwc3_setup_role_switch(dwc); - if (ret < 0) - return ret; - } else if (dwc->edev) { + if (dwc->edev) { dwc->edev_nb.notifier_call = dwc3_drd_notifier; ret = extcon_register_notifier(dwc->edev, EXTCON_USB_HOST, &dwc->edev_nb); diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 33f657d83246..2e19e0e4ea53 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -45,6 +45,8 @@ #define PCI_DEVICE_ID_INTEL_ADLM 0x54ee #define PCI_DEVICE_ID_INTEL_ADLS 0x7ae1 #define PCI_DEVICE_ID_INTEL_RPLS 0x7a61 +#define PCI_DEVICE_ID_INTEL_MTLP 0x7ec1 +#define PCI_DEVICE_ID_INTEL_MTL 0x7e7e #define PCI_DEVICE_ID_INTEL_TGL 0x9a15 #define PCI_DEVICE_ID_AMD_MR 0x163a @@ -456,6 +458,12 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_RPLS), (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTLP), + (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTL), + (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGL), (kernel_ulong_t) &dwc3_pci_intel_swnode, }, diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index ab725d2262d6..0b9c2493844a 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3274,6 +3274,7 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep, const struct dwc3_event_depevt *event, struct dwc3_request *req, int status) { + int request_status; int ret; if (req->request.num_mapped_sgs) @@ -3294,7 +3295,35 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep, req->needs_extra_trb = false; } - dwc3_gadget_giveback(dep, req, status); + /* + * The event status only reflects the status of the TRB with IOC set. + * For the requests that don't set interrupt on completion, the driver + * needs to check and return the status of the completed TRBs associated + * with the request. Use the status of the last TRB of the request. + */ + if (req->request.no_interrupt) { + struct dwc3_trb *trb; + + trb = dwc3_ep_prev_trb(dep, dep->trb_dequeue); + switch (DWC3_TRB_SIZE_TRBSTS(trb->size)) { + case DWC3_TRBSTS_MISSED_ISOC: + /* Isoc endpoint only */ + request_status = -EXDEV; + break; + case DWC3_TRB_STS_XFER_IN_PROG: + /* Applicable when End Transfer with ForceRM=0 */ + case DWC3_TRBSTS_SETUP_PENDING: + /* Control endpoint only */ + case DWC3_TRBSTS_OK: + default: + request_status = 0; + break; + } + } else { + request_status = status; + } + + dwc3_gadget_giveback(dep, req, request_status); out: return ret; diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 1fb837d9271e..84b73cb03f87 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1438,6 +1438,8 @@ static void configfs_composite_unbind(struct usb_gadget *gadget) usb_ep_autoconfig_reset(cdev->gadget); spin_lock_irqsave(&gi->spinlock, flags); cdev->gadget = NULL; + cdev->deactivations = 0; + gadget->deactivated = false; set_gadget_data(gadget, NULL); spin_unlock_irqrestore(&gi->spinlock, flags); } diff --git a/drivers/usb/gadget/function/uvc_queue.c b/drivers/usb/gadget/function/uvc_queue.c index d852ac9e47e7..2cda982f3765 100644 --- a/drivers/usb/gadget/function/uvc_queue.c +++ b/drivers/usb/gadget/function/uvc_queue.c @@ -264,6 +264,8 @@ void uvcg_queue_cancel(struct uvc_video_queue *queue, int disconnect) buf->state = UVC_BUF_STATE_ERROR; vb2_buffer_done(&buf->buf.vb2_buf, VB2_BUF_STATE_ERROR); } + queue->buf_used = 0; + /* This must be protected by the irqlock spinlock to avoid race * conditions between uvc_queue_buffer and the disconnection event that * could result in an interruptible wait in uvc_dequeue_buffer. Do not diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index 3d82e0b853be..684164fa9716 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -1103,6 +1103,26 @@ static void ehci_remove_device(struct usb_hcd *hcd, struct usb_device *udev) #ifdef CONFIG_PM +/* Clear wakeup signal locked in zhaoxin platform when device plug in. */ +static void ehci_zx_wakeup_clear(struct ehci_hcd *ehci) +{ + u32 __iomem *reg = &ehci->regs->port_status[4]; + u32 t1 = ehci_readl(ehci, reg); + + t1 &= (u32)~0xf0000; + t1 |= PORT_TEST_FORCE; + ehci_writel(ehci, t1, reg); + t1 = ehci_readl(ehci, reg); + msleep(1); + t1 &= (u32)~0xf0000; + ehci_writel(ehci, t1, reg); + ehci_readl(ehci, reg); + msleep(1); + t1 = ehci_readl(ehci, reg); + ehci_writel(ehci, t1 | PORT_CSC, reg); + ehci_readl(ehci, reg); +} + /* suspend/resume, section 4.3 */ /* These routines handle the generic parts of controller suspend/resume */ @@ -1154,6 +1174,9 @@ int ehci_resume(struct usb_hcd *hcd, bool force_reset) if (ehci->shutdown) return 0; /* Controller is dead */ + if (ehci->zx_wakeup_clear_needed) + ehci_zx_wakeup_clear(ehci); + /* * If CF is still set and reset isn't forced * then we maintained suspend power. diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c index 638f03b89739..9937c5a7efc2 100644 --- a/drivers/usb/host/ehci-pci.c +++ b/drivers/usb/host/ehci-pci.c @@ -231,6 +231,10 @@ static int ehci_pci_setup(struct usb_hcd *hcd) ehci->is_aspeed = 1; } break; + case PCI_VENDOR_ID_ZHAOXIN: + if (pdev->device == 0x3104 && (pdev->revision & 0xf0) == 0x90) + ehci->zx_wakeup_clear_needed = 1; + break; } /* optional debug port, normally in the first BAR */ diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h index fdd073cc053b..ad3f13a3eaf1 100644 --- a/drivers/usb/host/ehci.h +++ b/drivers/usb/host/ehci.h @@ -220,6 +220,7 @@ struct ehci_hcd { /* one per controller */ unsigned imx28_write_fix:1; /* For Freescale i.MX28 */ unsigned spurious_oc:1; unsigned is_aspeed:1; + unsigned zx_wakeup_clear_needed:1; /* required for usb32 quirk */ #define OHCI_CTRL_HCFS (3 << 6) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 1e7dc130c39a..f65f1ba2b592 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1434,7 +1434,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, } spin_unlock_irqrestore(&xhci->lock, flags); if (!wait_for_completion_timeout(&bus_state->u3exit_done[wIndex], - msecs_to_jiffies(100))) + msecs_to_jiffies(500))) xhci_dbg(xhci, "missing U0 port change event for port %d-%d\n", hcd->self.busnum, wIndex + 1); spin_lock_irqsave(&xhci->lock, flags); diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 5c351970cdf1..d7e0e6ebf080 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -59,6 +59,7 @@ #define PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI 0x9a13 #define PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI 0x1138 #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI 0x461e +#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI 0x51ed #define PCI_DEVICE_ID_AMD_RENOIR_XHCI 0x1639 #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 @@ -266,7 +267,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_ICE_LAKE_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI)) + pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI)) xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; if (pdev->vendor == PCI_VENDOR_ID_ETRON && diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index d0b6806275e0..f9707997969d 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -3141,6 +3141,7 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) if (event_loop++ < TRBS_PER_SEGMENT / 2) continue; xhci_update_erst_dequeue(xhci, event_ring_deq); + event_ring_deq = xhci->event_ring->dequeue; /* ring is half-full, force isoc trbs to interrupt more often */ if (xhci->isoc_bei_interval > AVOID_BEI_INTERVAL_MIN) diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c index c8af2cd2216d..996958a6565c 100644 --- a/drivers/usb/host/xhci-tegra.c +++ b/drivers/usb/host/xhci-tegra.c @@ -1034,13 +1034,13 @@ static int tegra_xusb_unpowergate_partitions(struct tegra_xusb *tegra) int rc; if (tegra->use_genpd) { - rc = pm_runtime_get_sync(tegra->genpd_dev_ss); + rc = pm_runtime_resume_and_get(tegra->genpd_dev_ss); if (rc < 0) { dev_err(dev, "failed to enable XUSB SS partition\n"); return rc; } - rc = pm_runtime_get_sync(tegra->genpd_dev_host); + rc = pm_runtime_resume_and_get(tegra->genpd_dev_host); if (rc < 0) { dev_err(dev, "failed to enable XUSB Host partition\n"); pm_runtime_put_sync(tegra->genpd_dev_ss); diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 642610c78f58..25b87e99b4dd 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -781,6 +781,17 @@ void xhci_shutdown(struct usb_hcd *hcd) if (xhci->quirks & XHCI_SPURIOUS_REBOOT) usb_disable_xhci_ports(to_pci_dev(hcd->self.sysdev)); + /* Don't poll the roothubs after shutdown. */ + xhci_dbg(xhci, "%s: stopping usb%d port polling.\n", + __func__, hcd->self.busnum); + clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); + del_timer_sync(&hcd->rh_timer); + + if (xhci->shared_hcd) { + clear_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags); + del_timer_sync(&xhci->shared_hcd->rh_timer); + } + spin_lock_irq(&xhci->lock); xhci_halt(xhci); /* Workaround for spurious wakeups at shutdown with HSW */ diff --git a/drivers/usb/misc/qcom_eud.c b/drivers/usb/misc/qcom_eud.c index f929bffdc5d1..b7f13df00764 100644 --- a/drivers/usb/misc/qcom_eud.c +++ b/drivers/usb/misc/qcom_eud.c @@ -186,16 +186,16 @@ static int eud_probe(struct platform_device *pdev) chip->dev = &pdev->dev; - ret = devm_add_action_or_reset(chip->dev, eud_role_switch_release, chip); - if (ret) - return dev_err_probe(chip->dev, ret, - "failed to add role switch release action\n"); - chip->role_sw = usb_role_switch_get(&pdev->dev); if (IS_ERR(chip->role_sw)) return dev_err_probe(chip->dev, PTR_ERR(chip->role_sw), "failed to get role switch\n"); + ret = devm_add_action_or_reset(chip->dev, eud_role_switch_release, chip); + if (ret) + return dev_err_probe(chip->dev, ret, + "failed to add role switch release action\n"); + chip->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(chip->base)) return PTR_ERR(chip->base); diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c index 748139d26263..0be8efcda15d 100644 --- a/drivers/usb/misc/uss720.c +++ b/drivers/usb/misc/uss720.c @@ -71,6 +71,7 @@ static void destroy_priv(struct kref *kref) dev_dbg(&priv->usbdev->dev, "destroying priv datastructure\n"); usb_put_dev(priv->usbdev); + priv->usbdev = NULL; kfree(priv); } @@ -736,7 +737,6 @@ static int uss720_probe(struct usb_interface *intf, parport_announce_port(pp); usb_set_intfdata(intf, pp); - usb_put_dev(usbdev); return 0; probe_abort: @@ -754,7 +754,6 @@ static void uss720_disconnect(struct usb_interface *intf) usb_set_intfdata(intf, NULL); if (pp) { priv = pp->private_data; - priv->usbdev = NULL; priv->pp = NULL; dev_dbg(&intf->dev, "parport_remove_port\n"); parport_remove_port(pp); diff --git a/drivers/usb/mtu3/mtu3_dr.c b/drivers/usb/mtu3/mtu3_dr.c index a6b04831b20b..9b8aded3d95e 100644 --- a/drivers/usb/mtu3/mtu3_dr.c +++ b/drivers/usb/mtu3/mtu3_dr.c @@ -21,10 +21,8 @@ static inline struct ssusb_mtk *otg_sx_to_ssusb(struct otg_switch_mtk *otg_sx) static void toggle_opstate(struct ssusb_mtk *ssusb) { - if (!ssusb->otg_switch.is_u3_drd) { - mtu3_setbits(ssusb->mac_base, U3D_DEVICE_CONTROL, DC_SESSION); - mtu3_setbits(ssusb->mac_base, U3D_POWER_MANAGEMENT, SOFT_CONN); - } + mtu3_setbits(ssusb->mac_base, U3D_DEVICE_CONTROL, DC_SESSION); + mtu3_setbits(ssusb->mac_base, U3D_POWER_MANAGEMENT, SOFT_CONN); } /* only port0 supports dual-role mode */ diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c index 661a229c105d..34b9f8140187 100644 --- a/drivers/usb/phy/phy-generic.c +++ b/drivers/usb/phy/phy-generic.c @@ -268,6 +268,13 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop) return -EPROBE_DEFER; } + nop->vbus_draw = devm_regulator_get_exclusive(dev, "vbus"); + if (PTR_ERR(nop->vbus_draw) == -ENODEV) + nop->vbus_draw = NULL; + if (IS_ERR(nop->vbus_draw)) + return dev_err_probe(dev, PTR_ERR(nop->vbus_draw), + "could not get vbus regulator\n"); + nop->dev = dev; nop->phy.dev = nop->dev; nop->phy.label = "nop-xceiv"; diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index a27f7efcec6a..c374620a486f 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -194,6 +194,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x16DC, 0x0015) }, /* W-IE-NE-R Plein & Baus GmbH CML Control, Monitoring and Data Logger */ { USB_DEVICE(0x17A8, 0x0001) }, /* Kamstrup Optical Eye/3-wire */ { USB_DEVICE(0x17A8, 0x0005) }, /* Kamstrup M-Bus Master MultiPort 250D */ + { USB_DEVICE(0x17A8, 0x0101) }, /* Kamstrup 868 MHz wM-Bus C-Mode Meter Reader (Int Ant) */ + { USB_DEVICE(0x17A8, 0x0102) }, /* Kamstrup 868 MHz wM-Bus C-Mode Meter Reader (Ext Ant) */ { USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */ { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index e7755d9cfc61..1364ce7f0abf 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -432,6 +432,8 @@ static void option_instat_callback(struct urb *urb); #define CINTERION_PRODUCT_CLS8 0x00b0 #define CINTERION_PRODUCT_MV31_MBIM 0x00b3 #define CINTERION_PRODUCT_MV31_RMNET 0x00b7 +#define CINTERION_PRODUCT_MV32_WA 0x00f1 +#define CINTERION_PRODUCT_MV32_WB 0x00f2 /* Olivetti products */ #define OLIVETTI_VENDOR_ID 0x0b3c @@ -1217,6 +1219,10 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1056, 0xff), /* Telit FD980 */ .driver_info = NCTRL(2) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1057, 0xff), /* Telit FN980 */ + .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1058, 0xff), /* Telit FN980 (PCIe) */ + .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1060, 0xff), /* Telit LN920 (rmnet) */ .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1061, 0xff), /* Telit LN920 (MBIM) */ @@ -1233,6 +1239,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1073, 0xff), /* Telit FN990 (ECM) */ .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1075, 0xff), /* Telit FN990 (PCIe) */ + .driver_info = RSVD(0) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), @@ -1969,6 +1977,10 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(3)}, { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_RMNET, 0xff), .driver_info = RSVD(0)}, + { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WA, 0xff), + .driver_info = RSVD(3)}, + { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WB, 0xff), + .driver_info = RSVD(3)}, { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD100), .driver_info = RSVD(4) }, { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD120), diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c index da65d14c9ed5..06aad0d727dd 100644 --- a/drivers/usb/serial/whiteheat.c +++ b/drivers/usb/serial/whiteheat.c @@ -584,9 +584,8 @@ static int firm_send_command(struct usb_serial_port *port, __u8 command, switch (command) { case WHITEHEAT_GET_DTR_RTS: info = usb_get_serial_port_data(port); - memcpy(&info->mcr, command_info->result_buffer, - sizeof(struct whiteheat_dr_info)); - break; + info->mcr = command_info->result_buffer[0]; + break; } } exit: diff --git a/drivers/usb/typec/Kconfig b/drivers/usb/typec/Kconfig index 8f921213b17d..ba24847fb245 100644 --- a/drivers/usb/typec/Kconfig +++ b/drivers/usb/typec/Kconfig @@ -56,6 +56,7 @@ config TYPEC_RT1719 tristate "Richtek RT1719 Sink Only Type-C controller driver" depends on USB_ROLE_SWITCH || !USB_ROLE_SWITCH depends on I2C + depends on POWER_SUPPLY select REGMAP_I2C help Say Y or M here if your system has Richtek RT1719 sink only diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index f0c2fa19f3e0..a6045aef0d04 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -949,6 +949,8 @@ static int ucsi_dr_swap(struct typec_port *port, enum typec_data_role role) role == TYPEC_HOST)) goto out_unlock; + reinit_completion(&con->complete); + command = UCSI_SET_UOR | UCSI_CONNECTOR_NUMBER(con->num); command |= UCSI_SET_UOR_ROLE(role); command |= UCSI_SET_UOR_ACCEPT_ROLE_SWAPS; @@ -956,14 +958,18 @@ static int ucsi_dr_swap(struct typec_port *port, enum typec_data_role role) if (ret < 0) goto out_unlock; + mutex_unlock(&con->lock); + if (!wait_for_completion_timeout(&con->complete, - msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS))) - ret = -ETIMEDOUT; + msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS))) + return -ETIMEDOUT; + + return 0; out_unlock: mutex_unlock(&con->lock); - return ret < 0 ? ret : 0; + return ret; } static int ucsi_pr_swap(struct typec_port *port, enum typec_role role) @@ -985,6 +991,8 @@ static int ucsi_pr_swap(struct typec_port *port, enum typec_role role) if (cur_role == role) goto out_unlock; + reinit_completion(&con->complete); + command = UCSI_SET_PDR | UCSI_CONNECTOR_NUMBER(con->num); command |= UCSI_SET_PDR_ROLE(role); command |= UCSI_SET_PDR_ACCEPT_ROLE_SWAPS; @@ -992,11 +1000,13 @@ static int ucsi_pr_swap(struct typec_port *port, enum typec_role role) if (ret < 0) goto out_unlock; + mutex_unlock(&con->lock); + if (!wait_for_completion_timeout(&con->complete, - msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS))) { - ret = -ETIMEDOUT; - goto out_unlock; - } + msecs_to_jiffies(UCSI_SWAP_TIMEOUT_MS))) + return -ETIMEDOUT; + + mutex_lock(&con->lock); /* Something has gone wrong while swapping the role */ if (UCSI_CONSTAT_PWR_OPMODE(con->status.flags) != diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index b7bb16f92ac6..06b6f3594a13 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -36,6 +36,10 @@ static bool nointxmask; static bool disable_vga; static bool disable_idle_d3; +/* List of PF's that vfio_pci_core_sriov_configure() has been called on */ +static DEFINE_MUTEX(vfio_pci_sriov_pfs_mutex); +static LIST_HEAD(vfio_pci_sriov_pfs); + static inline bool vfio_vga_disabled(void) { #ifdef CONFIG_VFIO_PCI_VGA @@ -434,47 +438,17 @@ out: } EXPORT_SYMBOL_GPL(vfio_pci_core_disable); -static struct vfio_pci_core_device *get_pf_vdev(struct vfio_pci_core_device *vdev) -{ - struct pci_dev *physfn = pci_physfn(vdev->pdev); - struct vfio_device *pf_dev; - - if (!vdev->pdev->is_virtfn) - return NULL; - - pf_dev = vfio_device_get_from_dev(&physfn->dev); - if (!pf_dev) - return NULL; - - if (pci_dev_driver(physfn) != pci_dev_driver(vdev->pdev)) { - vfio_device_put(pf_dev); - return NULL; - } - - return container_of(pf_dev, struct vfio_pci_core_device, vdev); -} - -static void vfio_pci_vf_token_user_add(struct vfio_pci_core_device *vdev, int val) -{ - struct vfio_pci_core_device *pf_vdev = get_pf_vdev(vdev); - - if (!pf_vdev) - return; - - mutex_lock(&pf_vdev->vf_token->lock); - pf_vdev->vf_token->users += val; - WARN_ON(pf_vdev->vf_token->users < 0); - mutex_unlock(&pf_vdev->vf_token->lock); - - vfio_device_put(&pf_vdev->vdev); -} - void vfio_pci_core_close_device(struct vfio_device *core_vdev) { struct vfio_pci_core_device *vdev = container_of(core_vdev, struct vfio_pci_core_device, vdev); - vfio_pci_vf_token_user_add(vdev, -1); + if (vdev->sriov_pf_core_dev) { + mutex_lock(&vdev->sriov_pf_core_dev->vf_token->lock); + WARN_ON(!vdev->sriov_pf_core_dev->vf_token->users); + vdev->sriov_pf_core_dev->vf_token->users--; + mutex_unlock(&vdev->sriov_pf_core_dev->vf_token->lock); + } vfio_spapr_pci_eeh_release(vdev->pdev); vfio_pci_core_disable(vdev); @@ -495,7 +469,12 @@ void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev) { vfio_pci_probe_mmaps(vdev); vfio_spapr_pci_eeh_open(vdev->pdev); - vfio_pci_vf_token_user_add(vdev, 1); + + if (vdev->sriov_pf_core_dev) { + mutex_lock(&vdev->sriov_pf_core_dev->vf_token->lock); + vdev->sriov_pf_core_dev->vf_token->users++; + mutex_unlock(&vdev->sriov_pf_core_dev->vf_token->lock); + } } EXPORT_SYMBOL_GPL(vfio_pci_core_finish_enable); @@ -1583,11 +1562,8 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, * * If the VF token is provided but unused, an error is generated. */ - if (!vdev->pdev->is_virtfn && !vdev->vf_token && !vf_token) - return 0; /* No VF token provided or required */ - if (vdev->pdev->is_virtfn) { - struct vfio_pci_core_device *pf_vdev = get_pf_vdev(vdev); + struct vfio_pci_core_device *pf_vdev = vdev->sriov_pf_core_dev; bool match; if (!pf_vdev) { @@ -1600,7 +1576,6 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, } if (!vf_token) { - vfio_device_put(&pf_vdev->vdev); pci_info_ratelimited(vdev->pdev, "VF token required to access device\n"); return -EACCES; @@ -1610,8 +1585,6 @@ static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev, match = uuid_equal(uuid, &pf_vdev->vf_token->uuid); mutex_unlock(&pf_vdev->vf_token->lock); - vfio_device_put(&pf_vdev->vdev); - if (!match) { pci_info_ratelimited(vdev->pdev, "Incorrect VF token provided for device\n"); @@ -1732,8 +1705,30 @@ static int vfio_pci_bus_notifier(struct notifier_block *nb, static int vfio_pci_vf_init(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; + struct vfio_pci_core_device *cur; + struct pci_dev *physfn; int ret; + if (pdev->is_virtfn) { + /* + * If this VF was created by our vfio_pci_core_sriov_configure() + * then we can find the PF vfio_pci_core_device now, and due to + * the locking in pci_disable_sriov() it cannot change until + * this VF device driver is removed. + */ + physfn = pci_physfn(vdev->pdev); + mutex_lock(&vfio_pci_sriov_pfs_mutex); + list_for_each_entry(cur, &vfio_pci_sriov_pfs, sriov_pfs_item) { + if (cur->pdev == physfn) { + vdev->sriov_pf_core_dev = cur; + break; + } + } + mutex_unlock(&vfio_pci_sriov_pfs_mutex); + return 0; + } + + /* Not a SRIOV PF */ if (!pdev->is_physfn) return 0; @@ -1805,6 +1800,7 @@ void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev, INIT_LIST_HEAD(&vdev->ioeventfds_list); mutex_init(&vdev->vma_lock); INIT_LIST_HEAD(&vdev->vma_list); + INIT_LIST_HEAD(&vdev->sriov_pfs_item); init_rwsem(&vdev->memory_lock); } EXPORT_SYMBOL_GPL(vfio_pci_core_init_device); @@ -1896,7 +1892,7 @@ void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; - pci_disable_sriov(pdev); + vfio_pci_core_sriov_configure(pdev, 0); vfio_unregister_group_dev(&vdev->vdev); @@ -1935,21 +1931,49 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_aer_err_detected); int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn) { + struct vfio_pci_core_device *vdev; struct vfio_device *device; int ret = 0; + device_lock_assert(&pdev->dev); + device = vfio_device_get_from_dev(&pdev->dev); if (!device) return -ENODEV; - if (nr_virtfn == 0) - pci_disable_sriov(pdev); - else + vdev = container_of(device, struct vfio_pci_core_device, vdev); + + if (nr_virtfn) { + mutex_lock(&vfio_pci_sriov_pfs_mutex); + /* + * The thread that adds the vdev to the list is the only thread + * that gets to call pci_enable_sriov() and we will only allow + * it to be called once without going through + * pci_disable_sriov() + */ + if (!list_empty(&vdev->sriov_pfs_item)) { + ret = -EINVAL; + goto out_unlock; + } + list_add_tail(&vdev->sriov_pfs_item, &vfio_pci_sriov_pfs); + mutex_unlock(&vfio_pci_sriov_pfs_mutex); ret = pci_enable_sriov(pdev, nr_virtfn); + if (ret) + goto out_del; + ret = nr_virtfn; + goto out_put; + } - vfio_device_put(device); + pci_disable_sriov(pdev); - return ret < 0 ? ret : nr_virtfn; +out_del: + mutex_lock(&vfio_pci_sriov_pfs_mutex); + list_del_init(&vdev->sriov_pfs_item); +out_unlock: + mutex_unlock(&vfio_pci_sriov_pfs_mutex); +out_put: + vfio_device_put(device); + return ret; } EXPORT_SYMBOL_GPL(vfio_pci_core_sriov_configure); diff --git a/drivers/video/fbdev/arkfb.c b/drivers/video/fbdev/arkfb.c index edf169d0816e..eb3e47c58c5f 100644 --- a/drivers/video/fbdev/arkfb.c +++ b/drivers/video/fbdev/arkfb.c @@ -566,6 +566,9 @@ static int arkfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) { int rv, mem, step; + if (!var->pixclock) + return -EINVAL; + /* Find appropriate format */ rv = svga_match_format (arkfb_formats, var, NULL); if (rv < 0) diff --git a/drivers/video/fbdev/aty/aty128fb.c b/drivers/video/fbdev/aty/aty128fb.c index 6ff16d3132e5..b26c81233b6b 100644 --- a/drivers/video/fbdev/aty/aty128fb.c +++ b/drivers/video/fbdev/aty/aty128fb.c @@ -68,7 +68,6 @@ #ifdef CONFIG_PPC_PMAC #include <asm/machdep.h> #include <asm/pmac_feature.h> -#include <asm/prom.h> #include "../macmodes.h" #endif diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c index 1aef3d6ebd88..a3e6faed7745 100644 --- a/drivers/video/fbdev/aty/atyfb_base.c +++ b/drivers/video/fbdev/aty/atyfb_base.c @@ -79,7 +79,6 @@ #ifdef __powerpc__ #include <asm/machdep.h> -#include <asm/prom.h> #include "../macmodes.h" #endif #ifdef __sparc__ diff --git a/drivers/video/fbdev/aty/radeon_pm.c b/drivers/video/fbdev/aty/radeon_pm.c index b5fbd5329652..97a5972f5b1f 100644 --- a/drivers/video/fbdev/aty/radeon_pm.c +++ b/drivers/video/fbdev/aty/radeon_pm.c @@ -22,7 +22,6 @@ #ifdef CONFIG_PPC_PMAC #include <asm/machdep.h> -#include <asm/prom.h> #include <asm/pmac_feature.h> #endif diff --git a/drivers/video/fbdev/aty/radeonfb.h b/drivers/video/fbdev/aty/radeonfb.h index 93f403cbb415..91d81b576231 100644 --- a/drivers/video/fbdev/aty/radeonfb.h +++ b/drivers/video/fbdev/aty/radeonfb.h @@ -21,7 +21,7 @@ #include <asm/io.h> -#if defined(CONFIG_PPC) || defined(CONFIG_SPARC) +#ifdef CONFIG_SPARC #include <asm/prom.h> #endif diff --git a/drivers/video/fbdev/clps711x-fb.c b/drivers/video/fbdev/clps711x-fb.c index c5d15c6db287..771ce1f76951 100644 --- a/drivers/video/fbdev/clps711x-fb.c +++ b/drivers/video/fbdev/clps711x-fb.c @@ -268,8 +268,7 @@ static int clps711x_fb_probe(struct platform_device *pdev) goto out_fb_release; } - cfb->syscon = - syscon_regmap_lookup_by_compatible("cirrus,ep7209-syscon1"); + cfb->syscon = syscon_regmap_lookup_by_phandle(np, "syscon"); if (IS_ERR(cfb->syscon)) { ret = PTR_ERR(cfb->syscon); goto out_fb_release; diff --git a/drivers/video/fbdev/controlfb.c b/drivers/video/fbdev/controlfb.c index bd59e7b11ed5..aba46118b208 100644 --- a/drivers/video/fbdev/controlfb.c +++ b/drivers/video/fbdev/controlfb.c @@ -47,9 +47,6 @@ #include <linux/nvram.h> #include <linux/adb.h> #include <linux/cuda.h> -#ifdef CONFIG_PPC_PMAC -#include <asm/prom.h> -#endif #ifdef CONFIG_BOOTX_TEXT #include <asm/btext.h> #endif diff --git a/drivers/video/fbdev/i740fb.c b/drivers/video/fbdev/i740fb.c index 52cce0db8bd3..09dd85553d4f 100644 --- a/drivers/video/fbdev/i740fb.c +++ b/drivers/video/fbdev/i740fb.c @@ -657,6 +657,9 @@ static int i740fb_decode_var(const struct fb_var_screeninfo *var, static int i740fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) { + if (!var->pixclock) + return -EINVAL; + switch (var->bits_per_pixel) { case 8: var->red.offset = var->green.offset = var->blue.offset = 0; @@ -740,7 +743,7 @@ static int i740fb_set_par(struct fb_info *info) if (i) return i; - memset(info->screen_base, 0, info->screen_size); + memset_io(info->screen_base, 0, info->screen_size); vga_protect(par); diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c index 68288756ffff..a2f644c97f28 100644 --- a/drivers/video/fbdev/imxfb.c +++ b/drivers/video/fbdev/imxfb.c @@ -925,10 +925,12 @@ static int imxfb_probe(struct platform_device *pdev) sizeof(struct imx_fb_videomode), GFP_KERNEL); if (!fbi->mode) { ret = -ENOMEM; + of_node_put(display_np); goto failed_of_parse; } ret = imxfb_of_read_mode(&pdev->dev, display_np, fbi->mode); + of_node_put(display_np); if (ret) goto failed_of_parse; } diff --git a/drivers/video/fbdev/kyro/fbdev.c b/drivers/video/fbdev/kyro/fbdev.c index 25801e8e3f74..d57772f96ad2 100644 --- a/drivers/video/fbdev/kyro/fbdev.c +++ b/drivers/video/fbdev/kyro/fbdev.c @@ -494,6 +494,8 @@ static int kyrofb_set_par(struct fb_info *info) info->var.hsync_len + info->var.left_margin)) / 1000; + if (!lineclock) + return -EINVAL; /* time for a frame in ns (precision in 32bpp) */ frameclock = lineclock * (info->var.yres + diff --git a/drivers/video/fbdev/matrox/matroxfb_base.h b/drivers/video/fbdev/matrox/matroxfb_base.h index 759dee996af1..958be6805f87 100644 --- a/drivers/video/fbdev/matrox/matroxfb_base.h +++ b/drivers/video/fbdev/matrox/matroxfb_base.h @@ -47,7 +47,6 @@ #include <asm/unaligned.h> #if defined(CONFIG_PPC_PMAC) -#include <asm/prom.h> #include "../macmodes.h" #endif diff --git a/drivers/video/fbdev/mb862xx/mb862xxfbdrv.c b/drivers/video/fbdev/mb862xx/mb862xxfbdrv.c index 63721337a377..a7508f5be343 100644 --- a/drivers/video/fbdev/mb862xx/mb862xxfbdrv.c +++ b/drivers/video/fbdev/mb862xx/mb862xxfbdrv.c @@ -18,6 +18,8 @@ #include <linux/interrupt.h> #include <linux/pci.h> #if defined(CONFIG_OF) +#include <linux/of_address.h> +#include <linux/of_irq.h> #include <linux/of_platform.h> #endif #include "mb862xxfb.h" diff --git a/drivers/video/fbdev/mmp/core.c b/drivers/video/fbdev/mmp/core.c index 154127256a2c..03707461eced 100644 --- a/drivers/video/fbdev/mmp/core.c +++ b/drivers/video/fbdev/mmp/core.c @@ -127,19 +127,18 @@ EXPORT_SYMBOL_GPL(mmp_unregister_panel); */ struct mmp_path *mmp_get_path(const char *name) { - struct mmp_path *path; - int found = 0; + struct mmp_path *path = NULL, *iter; mutex_lock(&disp_lock); - list_for_each_entry(path, &path_list, node) { - if (!strcmp(name, path->name)) { - found = 1; + list_for_each_entry(iter, &path_list, node) { + if (!strcmp(name, iter->name)) { + path = iter; break; } } mutex_unlock(&disp_lock); - return found ? path : NULL; + return path; } EXPORT_SYMBOL_GPL(mmp_get_path); diff --git a/drivers/video/fbdev/neofb.c b/drivers/video/fbdev/neofb.c index 966df2a07360..28d32cbf496b 100644 --- a/drivers/video/fbdev/neofb.c +++ b/drivers/video/fbdev/neofb.c @@ -585,7 +585,7 @@ neofb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) DBG("neofb_check_var"); - if (var->pixclock && PICOS2KHZ(var->pixclock) > par->maxClock) + if (!var->pixclock || PICOS2KHZ(var->pixclock) > par->maxClock) return -EINVAL; /* Is the mode larger than the LCD panel? */ diff --git a/drivers/video/fbdev/omap/hwa742.c b/drivers/video/fbdev/omap/hwa742.c index b191bef22d98..9d9fe5c3a7a1 100644 --- a/drivers/video/fbdev/omap/hwa742.c +++ b/drivers/video/fbdev/omap/hwa742.c @@ -964,7 +964,7 @@ static int hwa742_init(struct omapfb_device *fbdev, int ext_mode, if ((r = calc_extif_timings(ext_clk, &extif_mem_div)) < 0) goto err3; hwa742.extif->set_timings(&hwa742.reg_timings); - clk_enable(hwa742.sys_ck); + clk_prepare_enable(hwa742.sys_ck); calc_hwa742_clk_rates(ext_clk, &sys_clk, &pix_clk); if ((r = calc_extif_timings(sys_clk, &extif_mem_div)) < 0) @@ -1023,7 +1023,7 @@ static int hwa742_init(struct omapfb_device *fbdev, int ext_mode, return 0; err4: - clk_disable(hwa742.sys_ck); + clk_disable_unprepare(hwa742.sys_ck); err3: hwa742.extif->cleanup(); err2: @@ -1037,7 +1037,7 @@ static void hwa742_cleanup(void) hwa742_set_update_mode(OMAPFB_UPDATE_DISABLED); hwa742.extif->cleanup(); hwa742.int_ctrl->cleanup(); - clk_disable(hwa742.sys_ck); + clk_disable_unprepare(hwa742.sys_ck); } struct lcd_ctrl hwa742_ctrl = { diff --git a/drivers/video/fbdev/omap/lcdc.c b/drivers/video/fbdev/omap/lcdc.c index 7317c9aad677..97d20dc0d1d0 100644 --- a/drivers/video/fbdev/omap/lcdc.c +++ b/drivers/video/fbdev/omap/lcdc.c @@ -711,7 +711,7 @@ static int omap_lcdc_init(struct omapfb_device *fbdev, int ext_mode, dev_err(fbdev->dev, "failed to adjust LCD rate\n"); goto fail1; } - clk_enable(lcdc.lcd_ck); + clk_prepare_enable(lcdc.lcd_ck); r = request_irq(OMAP_LCDC_IRQ, lcdc_irq_handler, 0, MODULE_NAME, fbdev); if (r) { @@ -746,7 +746,7 @@ fail4: fail3: free_irq(OMAP_LCDC_IRQ, lcdc.fbdev); fail2: - clk_disable(lcdc.lcd_ck); + clk_disable_unprepare(lcdc.lcd_ck); fail1: clk_put(lcdc.lcd_ck); fail0: @@ -760,7 +760,7 @@ static void omap_lcdc_cleanup(void) free_fbmem(); omap_free_lcd_dma(); free_irq(OMAP_LCDC_IRQ, lcdc.fbdev); - clk_disable(lcdc.lcd_ck); + clk_disable_unprepare(lcdc.lcd_ck); clk_put(lcdc.lcd_ck); } diff --git a/drivers/video/fbdev/omap/sossi.c b/drivers/video/fbdev/omap/sossi.c index 80ac67f27f0d..b9cb8b386627 100644 --- a/drivers/video/fbdev/omap/sossi.c +++ b/drivers/video/fbdev/omap/sossi.c @@ -598,7 +598,7 @@ static int sossi_init(struct omapfb_device *fbdev) l &= ~CONF_SOSSI_RESET_R; omap_writel(l, MOD_CONF_CTRL_1); - clk_enable(sossi.fck); + clk_prepare_enable(sossi.fck); l = omap_readl(ARM_IDLECT2); l &= ~(1 << 8); /* DMACK_REQ */ omap_writel(l, ARM_IDLECT2); @@ -649,7 +649,7 @@ static int sossi_init(struct omapfb_device *fbdev) return 0; err: - clk_disable(sossi.fck); + clk_disable_unprepare(sossi.fck); clk_put(sossi.fck); return r; } @@ -657,6 +657,7 @@ err: static void sossi_cleanup(void) { omap_lcdc_free_dma_callback(); + clk_unprepare(sossi.fck); clk_put(sossi.fck); iounmap(sossi.base); } diff --git a/drivers/video/fbdev/platinumfb.c b/drivers/video/fbdev/platinumfb.c index ce413a9df06e..5b9e26ea6449 100644 --- a/drivers/video/fbdev/platinumfb.c +++ b/drivers/video/fbdev/platinumfb.c @@ -30,9 +30,9 @@ #include <linux/fb.h> #include <linux/init.h> #include <linux/nvram.h> +#include <linux/of_address.h> #include <linux/of_device.h> #include <linux/of_platform.h> -#include <asm/prom.h> #include "macmodes.h" #include "platinumfb.h" diff --git a/drivers/video/fbdev/pm2fb.c b/drivers/video/fbdev/pm2fb.c index c68725eebee3..d3be2c64f1c0 100644 --- a/drivers/video/fbdev/pm2fb.c +++ b/drivers/video/fbdev/pm2fb.c @@ -1504,9 +1504,7 @@ static const struct fb_ops pm2fb_ops = { /** - * Device initialisation - * - * Initialise and allocate resource for PCI device. + * pm2fb_probe - Initialise and allocate resource for PCI device. * * @pdev: PCI device. * @id: PCI device ID. @@ -1711,9 +1709,7 @@ static int pm2fb_probe(struct pci_dev *pdev, const struct pci_device_id *id) } /** - * Device removal. - * - * Release all device resources. + * pm2fb_remove - Release all device resources. * * @pdev: PCI device to clean up. */ diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c index f1551e00eb12..8ad91c251fe6 100644 --- a/drivers/video/fbdev/pxafb.c +++ b/drivers/video/fbdev/pxafb.c @@ -2256,10 +2256,10 @@ static int pxafb_probe(struct platform_device *dev) goto failed; for (i = 0; i < inf->num_modes; i++) inf->modes[i] = pdata->modes[i]; + } else { + inf = of_pxafb_of_mach_info(&dev->dev); } - if (!pdata) - inf = of_pxafb_of_mach_info(&dev->dev); if (IS_ERR_OR_NULL(inf)) goto failed; diff --git a/drivers/video/fbdev/s3fb.c b/drivers/video/fbdev/s3fb.c index 5c74253e7b2c..b93c8eb02336 100644 --- a/drivers/video/fbdev/s3fb.c +++ b/drivers/video/fbdev/s3fb.c @@ -549,6 +549,9 @@ static int s3fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) int rv, mem, step; u16 m, n, r; + if (!var->pixclock) + return -EINVAL; + /* Find appropriate format */ rv = svga_match_format (s3fb_formats, var, NULL); diff --git a/drivers/video/fbdev/sh_mobile_lcdcfb.c b/drivers/video/fbdev/sh_mobile_lcdcfb.c index aa4ebe3192ec..9a4417430b4e 100644 --- a/drivers/video/fbdev/sh_mobile_lcdcfb.c +++ b/drivers/video/fbdev/sh_mobile_lcdcfb.c @@ -531,9 +531,6 @@ static void sh_mobile_lcdc_display_off(struct sh_mobile_lcdc_chan *ch) ch->tx_dev->ops->display_off(ch->tx_dev); } -static int sh_mobile_lcdc_check_var(struct fb_var_screeninfo *var, - struct fb_info *info); - /* ----------------------------------------------------------------------------- * Format helpers */ diff --git a/drivers/video/fbdev/sis/sis_main.c b/drivers/video/fbdev/sis/sis_main.c index 742f62986b80..f28fd69d5eb7 100644 --- a/drivers/video/fbdev/sis/sis_main.c +++ b/drivers/video/fbdev/sis/sis_main.c @@ -4463,7 +4463,7 @@ static void sisfb_post_sis300(struct pci_dev *pdev) SiS_SetReg(SISCR, 0x37, 0x02); SiS_SetReg(SISPART2, 0x00, 0x1c); v4 = 0x00; v5 = 0x00; v6 = 0x10; - if(ivideo->SiS_Pr.UseROM) { + if (ivideo->SiS_Pr.UseROM && bios) { v4 = bios[0xf5]; v5 = bios[0xf6]; v6 = bios[0xf7]; diff --git a/drivers/video/fbdev/tridentfb.c b/drivers/video/fbdev/tridentfb.c index 4d20cb557ff0..319131bd72cf 100644 --- a/drivers/video/fbdev/tridentfb.c +++ b/drivers/video/fbdev/tridentfb.c @@ -996,6 +996,9 @@ static int tridentfb_check_var(struct fb_var_screeninfo *var, int ramdac = 230000; /* 230MHz for most 3D chips */ debug("enter\n"); + if (!var->pixclock) + return -EINVAL; + /* check color depth */ if (bpp == 24) bpp = var->bits_per_pixel = 32; diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c index b6ec0b8e2b72..d280733f283b 100644 --- a/drivers/video/fbdev/udlfb.c +++ b/drivers/video/fbdev/udlfb.c @@ -1650,8 +1650,9 @@ static int dlfb_usb_probe(struct usb_interface *intf, const struct device_attribute *attr; struct dlfb_data *dlfb; struct fb_info *info; - int retval = -ENOMEM; + int retval; struct usb_device *usbdev = interface_to_usbdev(intf); + struct usb_endpoint_descriptor *out; /* usb initialization */ dlfb = kzalloc(sizeof(*dlfb), GFP_KERNEL); @@ -1665,6 +1666,12 @@ static int dlfb_usb_probe(struct usb_interface *intf, dlfb->udev = usb_get_dev(usbdev); usb_set_intfdata(intf, dlfb); + retval = usb_find_common_endpoints(intf->cur_altsetting, NULL, &out, NULL, NULL); + if (retval) { + dev_err(&intf->dev, "Device should have at lease 1 bulk endpoint!\n"); + goto error; + } + dev_dbg(&intf->dev, "console enable=%d\n", console); dev_dbg(&intf->dev, "fb_defio enable=%d\n", fb_defio); dev_dbg(&intf->dev, "shadow enable=%d\n", shadow); @@ -1674,6 +1681,7 @@ static int dlfb_usb_probe(struct usb_interface *intf, if (!dlfb_parse_vendor_descriptor(dlfb, intf)) { dev_err(&intf->dev, "firmware not recognized, incompatible device?\n"); + retval = -ENODEV; goto error; } @@ -1687,8 +1695,10 @@ static int dlfb_usb_probe(struct usb_interface *intf, /* allocates framebuffer driver structure, not framebuffer memory */ info = framebuffer_alloc(0, &dlfb->udev->dev); - if (!info) + if (!info) { + retval = -ENOMEM; goto error; + } dlfb->info = info; info->par = dlfb; diff --git a/drivers/video/fbdev/valkyriefb.c b/drivers/video/fbdev/valkyriefb.c index 8425afe37d7c..a6c9d4f26669 100644 --- a/drivers/video/fbdev/valkyriefb.c +++ b/drivers/video/fbdev/valkyriefb.c @@ -54,10 +54,9 @@ #include <linux/nvram.h> #include <linux/adb.h> #include <linux/cuda.h> +#include <linux/of_address.h> #ifdef CONFIG_MAC #include <asm/macintosh.h> -#else -#include <asm/prom.h> #endif #include "macmodes.h" diff --git a/drivers/video/fbdev/vt8623fb.c b/drivers/video/fbdev/vt8623fb.c index 7a959e5ba90b..a92a8c670cf0 100644 --- a/drivers/video/fbdev/vt8623fb.c +++ b/drivers/video/fbdev/vt8623fb.c @@ -321,6 +321,9 @@ static int vt8623fb_check_var(struct fb_var_screeninfo *var, struct fb_info *inf { int rv, mem, step; + if (!var->pixclock) + return -EINVAL; + /* Find appropriate format */ rv = svga_match_format (vt8623fb_formats, var, NULL); if (rv < 0) diff --git a/drivers/video/of_display_timing.c b/drivers/video/of_display_timing.c index f93b6abbe258..bebd371c6b93 100644 --- a/drivers/video/of_display_timing.c +++ b/drivers/video/of_display_timing.c @@ -199,7 +199,7 @@ struct display_timings *of_get_display_timings(const struct device_node *np) struct display_timing *dt; int r; - dt = kzalloc(sizeof(*dt), GFP_KERNEL); + dt = kmalloc(sizeof(*dt), GFP_KERNEL); if (!dt) { pr_err("%pOF: could not allocate display_timing struct\n", np); diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index dfe26fa17e95..617a7f4f07a8 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -689,29 +689,34 @@ void xen_free_ballooned_pages(unsigned int nr_pages, struct page **pages) } EXPORT_SYMBOL(xen_free_ballooned_pages); -#if defined(CONFIG_XEN_PV) && !defined(CONFIG_XEN_UNPOPULATED_ALLOC) -static void __init balloon_add_region(unsigned long start_pfn, - unsigned long pages) +static void __init balloon_add_regions(void) { +#if defined(CONFIG_XEN_PV) + unsigned long start_pfn, pages; unsigned long pfn, extra_pfn_end; + unsigned int i; - /* - * If the amount of usable memory has been limited (e.g., with - * the 'mem' command line parameter), don't add pages beyond - * this limit. - */ - extra_pfn_end = min(max_pfn, start_pfn + pages); + for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { + pages = xen_extra_mem[i].n_pfns; + if (!pages) + continue; - for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) { - /* totalram_pages and totalhigh_pages do not - include the boot-time balloon extension, so - don't subtract from it. */ - balloon_append(pfn_to_page(pfn)); - } + start_pfn = xen_extra_mem[i].start_pfn; - balloon_stats.total_pages += extra_pfn_end - start_pfn; -} + /* + * If the amount of usable memory has been limited (e.g., with + * the 'mem' command line parameter), don't add pages beyond + * this limit. + */ + extra_pfn_end = min(max_pfn, start_pfn + pages); + + for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) + balloon_append(pfn_to_page(pfn)); + + balloon_stats.total_pages += extra_pfn_end - start_pfn; + } #endif +} static int __init balloon_init(void) { @@ -745,20 +750,7 @@ static int __init balloon_init(void) register_sysctl_table(xen_root); #endif -#if defined(CONFIG_XEN_PV) && !defined(CONFIG_XEN_UNPOPULATED_ALLOC) - { - int i; - - /* - * Initialize the balloon with pages from the extra memory - * regions (see arch/x86/xen/setup.c). - */ - for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) - if (xen_extra_mem[i].n_pfns) - balloon_add_region(xen_extra_mem[i].start_pfn, - xen_extra_mem[i].n_pfns); - } -#endif + balloon_add_regions(); task = kthread_run(balloon_thread, NULL, "xen-balloon"); if (IS_ERR(task)) { diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index 4849f94372a4..55acb32842a3 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -178,9 +178,9 @@ static void __del_gref(struct gntalloc_gref *gref) unsigned long addr; if (gref->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { - uint8_t *tmp = kmap(gref->page); + uint8_t *tmp = kmap_local_page(gref->page); tmp[gref->notify.pgoff] = 0; - kunmap(gref->page); + kunmap_local(tmp); } if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { notify_remote_via_evtchn(gref->notify.event); diff --git a/drivers/xen/unpopulated-alloc.c b/drivers/xen/unpopulated-alloc.c index a8b41057c382..a39f2d36dd9c 100644 --- a/drivers/xen/unpopulated-alloc.c +++ b/drivers/xen/unpopulated-alloc.c @@ -230,39 +230,6 @@ void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages) } EXPORT_SYMBOL(xen_free_unpopulated_pages); -#ifdef CONFIG_XEN_PV -static int __init init(void) -{ - unsigned int i; - - if (!xen_domain()) - return -ENODEV; - - if (!xen_pv_domain()) - return 0; - - /* - * Initialize with pages from the extra memory regions (see - * arch/x86/xen/setup.c). - */ - for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { - unsigned int j; - - for (j = 0; j < xen_extra_mem[i].n_pfns; j++) { - struct page *pg = - pfn_to_page(xen_extra_mem[i].start_pfn + j); - - pg->zone_device_data = page_list; - page_list = pg; - list_count++; - } - } - - return 0; -} -subsys_initcall(init); -#endif - static int __init unpopulated_init(void) { int ret; |