From 86a570c577b7975f31c0539f6a724194892ebc43 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 24 Oct 2014 19:03:57 -0700 Subject: ASoC: fsl_asrc: Add reg_defaults for regmap to fix kernel dump Kernel dump (WARN_ON) ocurred during system boot-up inside regmap_write(): ------------[ cut here ]------------ WARNING: CPU: 0 PID: 47 at kernel/locking/lockdep.c:2744 lockdep_trace_alloc+0xe8/0x108() DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags)) Modules linked in: CPU: 0 PID: 47 Comm: kworker/u2:2 Not tainted 3.18.0-rc1-10245-gb75d289-dirty #56 Workqueue: deferwq deferred_probe_work_func Backtrace: [<80012294>] (dump_backtrace) from [<80012578>] (show_stack+0x18/0x1c) r6:8097c73c r5:8097c73c r4:00000000 r3:be33ba80 [<80012560>] (show_stack) from [<806aac48>] (dump_stack+0x8c/0xa4) [<806aabbc>] (dump_stack) from [<8002a694>] (warn_slowpath_common+0x70/0x94) r6:80062838 r5:00000009 r4:bd827b30 r3:be33ba80 [<8002a624>] (warn_slowpath_common) from [<8002a6f0>] (warn_slowpath_fmt+0x38/0x40) r8:00000004 r7:00000001 r6:000080d0 r5:60000193 r4:bd826010 [<8002a6bc>] (warn_slowpath_fmt) from [<80062838>] (lockdep_trace_alloc+0xe8/0x108) r3:80831590 r2:8082e160 [<80062750>] (lockdep_trace_alloc) from [<800ea5dc>] (kmem_cache_alloc+0x28/0x134) r5:000080d0 r4:be001f00 [<800ea5b4>] (kmem_cache_alloc) from [<8038d72c>] (regcache_rbtree_write+0x15c/0x648) r10:00000000 r9:0000001c r8:00000004 r7:00000001 r6:00000000 r5:bd819a00 r4:00000000 r3:811aea88 [<8038d5d0>] (regcache_rbtree_write) from [<8038c4d8>] (regcache_write+0x5c/0x64) r10:be3f9f88 r9:00000000 r8:00000004 r7:00000001 r6:00000000 r5:00000001 r4:bd819a00 [<8038c47c>] (regcache_write) from [<8038b0dc>] (_regmap_raw_write+0x134/0x5f4) r6:be3f9f84 r5:00000001 r4:bd819a00 r3:00000001 [<8038afa8>] (_regmap_raw_write) from [<8038b610>] (_regmap_bus_raw_write+0x74/0x94) r10:00000000 r9:00000001 r8:be3fb080 r7:bd819a00 r6:00000001 r5:00000000 r4:bd819a00 [<8038b59c>] (_regmap_bus_raw_write) from [<8038a8b4>] (_regmap_write+0x60/0x9c) r6:00000001 r5:00000000 r4:bd819a00 r3:8038b59c [<8038a854>] (_regmap_write) from [<8038ba24>] (regmap_write+0x48/0x68) r7:bd81ad80 r6:00000001 r5:00000000 r4:bd819a00 [<8038b9dc>] (regmap_write) from [<80528f30>] (fsl_asrc_dai_probe+0x34/0x104) r6:bd888628 r5:be3fb080 r4:be3b4410 r3:be3b442c ------------[ dump end ]------------ ============================================================================= 2741 /* 2742 * Oi! Can't be having __GFP_FS allocations with IRQs disabled. 2743 */ 2744 if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))) 2745 return; ============================================================================= By looking at 2744 line, we can get that it's because regcache_rbtree_write() would call kmalloc() with GFP flag if it couldn't find an existing block to insert nodes while this kmalloc() call is inside a spin_lock_irq_save pair, i.e. IRQs disabled. Even though this may be a bug that should be fixed, I still try to send this patch as a quick fix (work around) since it does no harm to assign default values of every registers when using regcache. Signed-off-by: Nicolin Chen Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_asrc.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c index 3b145313f93e..653dbaab83a6 100644 --- a/sound/soc/fsl/fsl_asrc.c +++ b/sound/soc/fsl/fsl_asrc.c @@ -684,12 +684,38 @@ static bool fsl_asrc_writeable_reg(struct device *dev, unsigned int reg) } } +static struct reg_default fsl_asrc_reg[] = { + { REG_ASRCTR, 0x0000 }, { REG_ASRIER, 0x0000 }, + { REG_ASRCNCR, 0x0000 }, { REG_ASRCFG, 0x0000 }, + { REG_ASRCSR, 0x0000 }, { REG_ASRCDR1, 0x0000 }, + { REG_ASRCDR2, 0x0000 }, { REG_ASRSTR, 0x0000 }, + { REG_ASRRA, 0x0000 }, { REG_ASRRB, 0x0000 }, + { REG_ASRRC, 0x0000 }, { REG_ASRPM1, 0x0000 }, + { REG_ASRPM2, 0x0000 }, { REG_ASRPM3, 0x0000 }, + { REG_ASRPM4, 0x0000 }, { REG_ASRPM5, 0x0000 }, + { REG_ASRTFR1, 0x0000 }, { REG_ASRCCR, 0x0000 }, + { REG_ASRDIA, 0x0000 }, { REG_ASRDOA, 0x0000 }, + { REG_ASRDIB, 0x0000 }, { REG_ASRDOB, 0x0000 }, + { REG_ASRDIC, 0x0000 }, { REG_ASRDOC, 0x0000 }, + { REG_ASRIDRHA, 0x0000 }, { REG_ASRIDRLA, 0x0000 }, + { REG_ASRIDRHB, 0x0000 }, { REG_ASRIDRLB, 0x0000 }, + { REG_ASRIDRHC, 0x0000 }, { REG_ASRIDRLC, 0x0000 }, + { REG_ASR76K, 0x0A47 }, { REG_ASR56K, 0x0DF3 }, + { REG_ASRMCRA, 0x0000 }, { REG_ASRFSTA, 0x0000 }, + { REG_ASRMCRB, 0x0000 }, { REG_ASRFSTB, 0x0000 }, + { REG_ASRMCRC, 0x0000 }, { REG_ASRFSTC, 0x0000 }, + { REG_ASRMCR1A, 0x0000 }, { REG_ASRMCR1B, 0x0000 }, + { REG_ASRMCR1C, 0x0000 }, +}; + static const struct regmap_config fsl_asrc_regmap_config = { .reg_bits = 32, .reg_stride = 4, .val_bits = 32, .max_register = REG_ASRMCR1C, + .reg_defaults = fsl_asrc_reg, + .num_reg_defaults = ARRAY_SIZE(fsl_asrc_reg), .readable_reg = fsl_asrc_readable_reg, .volatile_reg = fsl_asrc_volatile_reg, .writeable_reg = fsl_asrc_writeable_reg, -- cgit v1.2.1 From b2a9a3b818db727479cc2b9c2924e4ab9a4bbb07 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 28 Oct 2014 08:59:37 +0800 Subject: ASoC: es8328-i2c: Fix i2c_device_id name field in es8328_id The convention for i2c_device_id name does not need to have company prefix. Signed-off-by: Axel Lin Signed-off-by: Mark Brown --- sound/soc/codecs/es8328-i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/es8328-i2c.c b/sound/soc/codecs/es8328-i2c.c index aae410d122ee..2d05b5d3a6ce 100644 --- a/sound/soc/codecs/es8328-i2c.c +++ b/sound/soc/codecs/es8328-i2c.c @@ -19,7 +19,7 @@ #include "es8328.h" static const struct i2c_device_id es8328_id[] = { - { "everest,es8328", 0 }, + { "es8328", 0 }, { } }; MODULE_DEVICE_TABLE(i2c, es8328_id); -- cgit v1.2.1 From 885e7b0e181c14e4d0ddd26c688bad2b84c1ada9 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 14 Oct 2014 14:16:24 -0700 Subject: target: Don't call TFO->write_pending if data_length == 0 If an initiator sends a zero-length command (e.g. TEST UNIT READY) but sets the transfer direction in the transport layer to indicate a data-out phase, we still shouldn't try to transfer data. At best it's a NOP, and depending on the transport, we might crash on an uninitialized sg list. Reported-by: Craig Watson Signed-off-by: Roland Dreier Cc: # 3.1 Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 9ea0d5f03f7a..be877bf6f730 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2292,7 +2292,7 @@ transport_generic_new_cmd(struct se_cmd *cmd) * and let it call back once the write buffers are ready. */ target_add_to_state_list(cmd); - if (cmd->data_direction != DMA_TO_DEVICE) { + if (cmd->data_direction != DMA_TO_DEVICE || cmd->data_length == 0) { target_execute_cmd(cmd); return 0; } -- cgit v1.2.1 From ab8edab132829b26dd13db6caca3c242cce35dc1 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 8 Oct 2014 06:19:20 +0000 Subject: vhost-scsi: Take configfs group dependency during VHOST_SCSI_SET_ENDPOINT This patch addresses a bug where individual vhost-scsi configfs endpoint groups can be removed from below while active exports to QEMU userspace still exist, resulting in an OOPs. It adds a configfs_depend_item() in vhost_scsi_set_endpoint() to obtain an explicit dependency on se_tpg->tpg_group in order to prevent individual vhost-scsi WWPN endpoints from being released via normal configfs methods while an QEMU ioctl reference still exists. Also, add matching configfs_undepend_item() in vhost_scsi_clear_endpoint() to release the dependency, once QEMU's reference to the individual group at /sys/kernel/config/target/vhost/$WWPN/$TPGT is released. (Fix up vhost_scsi_clear_endpoint() error path - DanC) Cc: Michael S. Tsirkin Cc: Paolo Bonzini Cc: Stefan Hajnoczi Cc: # 3.6+ Signed-off-by: Nicholas Bellinger --- drivers/vhost/scsi.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 69906cacd04f..a17f11850669 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1312,6 +1312,7 @@ static int vhost_scsi_set_endpoint(struct vhost_scsi *vs, struct vhost_scsi_target *t) { + struct se_portal_group *se_tpg; struct tcm_vhost_tport *tv_tport; struct tcm_vhost_tpg *tpg; struct tcm_vhost_tpg **vs_tpg; @@ -1359,6 +1360,21 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, ret = -EEXIST; goto out; } + /* + * In order to ensure individual vhost-scsi configfs + * groups cannot be removed while in use by vhost ioctl, + * go ahead and take an explicit se_tpg->tpg_group.cg_item + * dependency now. + */ + se_tpg = &tpg->se_tpg; + ret = configfs_depend_item(se_tpg->se_tpg_tfo->tf_subsys, + &se_tpg->tpg_group.cg_item); + if (ret) { + pr_warn("configfs_depend_item() failed: %d\n", ret); + kfree(vs_tpg); + mutex_unlock(&tpg->tv_tpg_mutex); + goto out; + } tpg->tv_tpg_vhost_count++; tpg->vhost_scsi = vs; vs_tpg[tpg->tport_tpgt] = tpg; @@ -1401,6 +1417,7 @@ static int vhost_scsi_clear_endpoint(struct vhost_scsi *vs, struct vhost_scsi_target *t) { + struct se_portal_group *se_tpg; struct tcm_vhost_tport *tv_tport; struct tcm_vhost_tpg *tpg; struct vhost_virtqueue *vq; @@ -1449,6 +1466,13 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, vs->vs_tpg[target] = NULL; match = true; mutex_unlock(&tpg->tv_tpg_mutex); + /* + * Release se_tpg->tpg_group.cg_item configfs dependency now + * to allow vhost-scsi WWPN se_tpg->tpg_group shutdown to occur. + */ + se_tpg = &tpg->se_tpg; + configfs_undepend_item(se_tpg->se_tpg_tfo->tf_subsys, + &se_tpg->tpg_group.cg_item); } if (match) { for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { -- cgit v1.2.1 From f2774f430ea65fddc068865d364bb254d2816648 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Wed, 15 Oct 2014 10:59:21 -0700 Subject: iscsi-target: return the correct port in SendTargets The fact that a target is published on the any address has no bearing on which port(s) it is published. SendTargets should always send the portal's port, not the port used for discovery. Signed-off-by: Steven Allen Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index b19e4329ba00..73e58d22e325 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -3491,7 +3491,7 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd, len = sprintf(buf, "TargetAddress=" "%s:%hu,%hu", inaddr_any ? conn->local_ip : np->np_ip, - inaddr_any ? conn->local_port : np->np_port, + np->np_port, tpg->tpgt); len += 1; -- cgit v1.2.1 From ab477c1ff5e0a744c072404bf7db51bfe1f05b6e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sun, 19 Oct 2014 18:05:33 +0300 Subject: srp-target: Retry when QP creation fails with ENOMEM It is not guaranteed to that srp_sq_size is supported by the HCA. So if we failed to create the QP with ENOMEM, try with a smaller srp_sq_size. Keep it up until we hit MIN_SRPT_SQ_SIZE, then fail the connection. Reported-by: Mark Lehrer Signed-off-by: Bart Van Assche Signed-off-by: Sagi Grimberg Cc: # 3.4+ Signed-off-by: Nicholas Bellinger --- drivers/infiniband/ulp/srpt/ib_srpt.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 7206547c13ce..dc829682701a 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2092,6 +2092,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) if (!qp_init) goto out; +retry: ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, ch->rq_size + srp_sq_size, 0); if (IS_ERR(ch->cq)) { @@ -2115,6 +2116,13 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) ch->qp = ib_create_qp(sdev->pd, qp_init); if (IS_ERR(ch->qp)) { ret = PTR_ERR(ch->qp); + if (ret == -ENOMEM) { + srp_sq_size /= 2; + if (srp_sq_size >= MIN_SRPT_SQ_SIZE) { + ib_destroy_cq(ch->cq); + goto retry; + } + } printk(KERN_ERR "failed to create_qp ret= %d\n", ret); goto err_destroy_cq; } -- cgit v1.2.1 From 29f95bd76f6ec1eff88eec6a04191104a11a7f97 Mon Sep 17 00:00:00 2001 From: Jianqun Date: Wed, 29 Oct 2014 17:45:51 +0800 Subject: ASoC: rockchip-i2s: fix infinite loop in rockchip_snd_rxctrl We can get into an infinite loop if the I2S_CLR register fails to clear due to a missing break statement, so add that. Signed-off-by: Jianqun Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/rockchip/rockchip_i2s.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c index f373e37f8305..c74ba37f862c 100644 --- a/sound/soc/rockchip/rockchip_i2s.c +++ b/sound/soc/rockchip/rockchip_i2s.c @@ -154,8 +154,10 @@ static void rockchip_snd_rxctrl(struct rk_i2s_dev *i2s, int on) while (val) { regmap_read(i2s->regmap, I2S_CLR, &val); retry--; - if (!retry) + if (!retry) { dev_warn(i2s->dev, "fail to clear\n"); + break; + } } } } -- cgit v1.2.1 From df078d291d9b50e3ccfb8f030f85de701696a5ad Mon Sep 17 00:00:00 2001 From: "Fang, Yang A" Date: Tue, 28 Oct 2014 18:36:36 -0300 Subject: ASoC: rt5645: Mark RT5645_TDM_CTRL_3 as readable amixer query fails due to it is not readable reigster Signed-off-by: Fang, Yang A Acked-by: Bard Liao Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 3fb83bf09768..d16331e0b64d 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -139,6 +139,7 @@ static const struct reg_default rt5645_reg[] = { { 0x76, 0x000a }, { 0x77, 0x0c00 }, { 0x78, 0x0000 }, + { 0x79, 0x0123 }, { 0x80, 0x0000 }, { 0x81, 0x0000 }, { 0x82, 0x0000 }, @@ -334,6 +335,7 @@ static bool rt5645_readable_register(struct device *dev, unsigned int reg) case RT5645_DMIC_CTRL2: case RT5645_TDM_CTRL_1: case RT5645_TDM_CTRL_2: + case RT5645_TDM_CTRL_3: case RT5645_GLB_CLK: case RT5645_PLL_CTRL1: case RT5645_PLL_CTRL2: -- cgit v1.2.1 From c1b9b9b1ad2df6144ca3fbe6989f7bd9ea5c5562 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 28 Oct 2014 21:01:53 -0700 Subject: ASoC: fsi: remove unsupported PAUSE flag FSI doesn't support PAUSE. Remove SNDRV_PCM_INFO_PAUSE flags from snd_pcm_hardware info Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/sh/fsi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/sh/fsi.c b/sound/soc/sh/fsi.c index 66fddec9543d..88e5df474ccf 100644 --- a/sound/soc/sh/fsi.c +++ b/sound/soc/sh/fsi.c @@ -1711,8 +1711,7 @@ static const struct snd_soc_dai_ops fsi_dai_ops = { static struct snd_pcm_hardware fsi_pcm_hardware = { .info = SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_MMAP | - SNDRV_PCM_INFO_MMAP_VALID | - SNDRV_PCM_INFO_PAUSE, + SNDRV_PCM_INFO_MMAP_VALID, .buffer_bytes_max = 64 * 1024, .period_bytes_min = 32, .period_bytes_max = 8192, -- cgit v1.2.1 From 706c66213e5e623e23f521b1acbd8171af7a3549 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 28 Oct 2014 21:02:03 -0700 Subject: ASoC: rsnd: remove unsupported PAUSE flag R-Car sound doesn't support PAUSE. Remove SNDRV_PCM_INFO_PAUSE flags from snd_pcm_hardware info Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/sh/rcar/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index 1922ec57d10a..70042197f9e2 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -886,8 +886,7 @@ static int rsnd_dai_probe(struct platform_device *pdev, static struct snd_pcm_hardware rsnd_pcm_hardware = { .info = SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_MMAP | - SNDRV_PCM_INFO_MMAP_VALID | - SNDRV_PCM_INFO_PAUSE, + SNDRV_PCM_INFO_MMAP_VALID, .buffer_bytes_max = 64 * 1024, .period_bytes_min = 32, .period_bytes_max = 8192, -- cgit v1.2.1 From 1ffae3612fe53dd18b92e696ab4f29df319a508d Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 29 Oct 2014 21:46:30 +0100 Subject: ASoC: Fix snd_soc_find_dai() matching component by name Commit 14621c7e5e72 ("ASoC: Consolidate CPU and CODEC DAI lookup") consolidated the lookup of CPU DAIs and CODEC DAIs into a single function. When matching a component by name for CODEC DAIs the code previous to the patch compared the name in the DAI link table with component->name. For CPU DAIs the code compared to dev_name(component->dev). The newly introduced function ended up using the later as well. For most components dev_name(component->dev) and component->name are the same. The main notable exception are I2C devices where the driver name and the device name are concatenated to form the component name. By using dev_name(component->dev) instead of component->name the patch broke the matching of I2C CODECs by name. This patch restores the original behavior by using component->name instead of dev_name(component->dev). This will be safe even for CPU DAIs since for CPU DAIs both are the same. Fixes: 14621c7e5e72 ("ASoC: Consolidate CPU and CODEC DAI lookup") Reported-by: Dmitry Eremin-Solenikov Signed-off-by: Lars-Peter Clausen Signed-off-by: Mark Brown --- sound/soc/soc-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 4c8f8a23a0e9..b60ff56ebc0f 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -884,7 +884,7 @@ static struct snd_soc_dai *snd_soc_find_dai( list_for_each_entry(component, &component_list, list) { if (dlc->of_node && component->dev->of_node != dlc->of_node) continue; - if (dlc->name && strcmp(dev_name(component->dev), dlc->name)) + if (dlc->name && strcmp(component->name, dlc->name)) continue; list_for_each_entry(dai, &component->dai_list, list) { if (dlc->dai_name && strcmp(dai->name, dlc->dai_name)) -- cgit v1.2.1 From f57915cfa5b2b14c1cffa2e83c034f55e3f0e70d Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 22 Oct 2014 14:55:49 -0700 Subject: ib_isert: Add max_send_sge=2 minimum for control PDU responses This patch adds a max_send_sge=2 minimum in isert_conn_setup_qp() to ensure outgoing control PDU responses with tx_desc->num_sge=2 are able to function correctly. This addresses a bug with RDMA hardware using dev_attr.max_sge=3, that in the original code with the ConnectX-2 work-around would result in isert_conn->max_sge=1 being negotiated. Originally reported by Chris with ocrdma driver. Reported-by: Chris Moore Tested-by: Chris Moore Signed-off-by: Or Gerlitz Cc: # 3.10+ Signed-off-by: Nicholas Bellinger --- drivers/infiniband/ulp/isert/ib_isert.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 3effa931fce2..ef5b22b3ea83 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -115,9 +115,12 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id, attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS; /* * FIXME: Use devattr.max_sge - 2 for max_send_sge as - * work-around for RDMA_READ.. + * work-around for RDMA_READs with ConnectX-2. + * + * Also, still make sure to have at least two SGEs for + * outgoing control PDU responses. */ - attr.cap.max_send_sge = device->dev_attr.max_sge - 2; + attr.cap.max_send_sge = max(2, device->dev_attr.max_sge - 2); isert_conn->max_sge = attr.cap.max_send_sge; attr.cap.max_recv_sge = 1; -- cgit v1.2.1 From 3b726ae2de02a406cc91903f80132daee37b6f1b Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 28 Oct 2014 13:45:03 -0700 Subject: iser-target: Handle DEVICE_REMOVAL event on network portal listener correctly In this case the cm_id->context is the isert_np, and the cm_id->qp is NULL, so use that to distinct the cases. Since we don't expect any other events on this cm_id we can just return -1 for explicit termination of the cm_id by the cma layer. Signed-off-by: Sagi Grimberg Cc: # 3.10+ Signed-off-by: Nicholas Bellinger --- drivers/infiniband/ulp/isert/ib_isert.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index ef5b22b3ea83..a6c7b395c61d 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -806,14 +806,25 @@ wake_up: complete(&isert_conn->conn_wait); } -static void +static int isert_disconnected_handler(struct rdma_cm_id *cma_id, bool disconnect) { - struct isert_conn *isert_conn = (struct isert_conn *)cma_id->context; + struct isert_conn *isert_conn; + + if (!cma_id->qp) { + struct isert_np *isert_np = cma_id->context; + + isert_np->np_cm_id = NULL; + return -1; + } + + isert_conn = (struct isert_conn *)cma_id->context; isert_conn->disconnect = disconnect; INIT_WORK(&isert_conn->conn_logout_work, isert_disconnect_work); schedule_work(&isert_conn->conn_logout_work); + + return 0; } static int @@ -828,6 +839,9 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) switch (event->event) { case RDMA_CM_EVENT_CONNECT_REQUEST: ret = isert_connect_request(cma_id, event); + if (ret) + pr_err("isert_cma_handler failed RDMA_CM_EVENT: 0x%08x %d\n", + event->event, ret); break; case RDMA_CM_EVENT_ESTABLISHED: isert_connected_handler(cma_id); @@ -837,7 +851,7 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */ disconnect = true; case RDMA_CM_EVENT_TIMEWAIT_EXIT: /* FALLTHRU */ - isert_disconnected_handler(cma_id, disconnect); + ret = isert_disconnected_handler(cma_id, disconnect); break; case RDMA_CM_EVENT_CONNECT_ERROR: default: @@ -845,12 +859,6 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) break; } - if (ret != 0) { - pr_err("isert_cma_handler failed RDMA_CM_EVENT: 0x%08x %d\n", - event->event, ret); - dump_stack(); - } - return ret; } @@ -3193,7 +3201,8 @@ isert_free_np(struct iscsi_np *np) { struct isert_np *isert_np = (struct isert_np *)np->np_context; - rdma_destroy_id(isert_np->np_cm_id); + if (isert_np->np_cm_id) + rdma_destroy_id(isert_np->np_cm_id); np->np_context = NULL; kfree(isert_np); -- cgit v1.2.1 From b6932ee35f1c1364dcea0e691b2feb912a6777e5 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Fri, 24 Oct 2014 15:18:26 -0700 Subject: target: return CONFLICT only when SA key unmatched PREEMPT (and PREEMPT AND ABORT) should return CONFLICT iff a specified SERVICE ACTION RESERVATION KEY is specified and matches no existing persistent reservation. Without this patch, a PREEMPT will return CONFLICT if either all reservations are held by the initiator (self preemption) or there is nothing to preempt. According to the spec, both of these cases should succeed. Signed-off-by: Steven Allen Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_pr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 8c60a1a1ae8d..9f93b8234095 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -2738,7 +2738,8 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, struct t10_pr_registration *pr_reg, *pr_reg_tmp, *pr_reg_n, *pr_res_holder; struct t10_reservation *pr_tmpl = &dev->t10_pr; u32 pr_res_mapped_lun = 0; - int all_reg = 0, calling_it_nexus = 0, released_regs = 0; + int all_reg = 0, calling_it_nexus = 0; + bool sa_res_key_unmatched = sa_res_key != 0; int prh_type = 0, prh_scope = 0; if (!se_sess) @@ -2813,6 +2814,7 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, if (!all_reg) { if (pr_reg->pr_res_key != sa_res_key) continue; + sa_res_key_unmatched = false; calling_it_nexus = (pr_reg_n == pr_reg) ? 1 : 0; pr_reg_nacl = pr_reg->pr_reg_nacl; @@ -2820,7 +2822,6 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, __core_scsi3_free_registration(dev, pr_reg, (preempt_type == PREEMPT_AND_ABORT) ? &preempt_and_abort_list : NULL, calling_it_nexus); - released_regs++; } else { /* * Case for any existing all registrants type @@ -2838,6 +2839,7 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, if ((sa_res_key) && (pr_reg->pr_res_key != sa_res_key)) continue; + sa_res_key_unmatched = false; calling_it_nexus = (pr_reg_n == pr_reg) ? 1 : 0; if (calling_it_nexus) @@ -2848,7 +2850,6 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, __core_scsi3_free_registration(dev, pr_reg, (preempt_type == PREEMPT_AND_ABORT) ? &preempt_and_abort_list : NULL, 0); - released_regs++; } if (!calling_it_nexus) core_scsi3_ua_allocate(pr_reg_nacl, @@ -2863,7 +2864,7 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, * registered reservation key, then the device server shall * complete the command with RESERVATION CONFLICT status. */ - if (!released_regs) { + if (sa_res_key_unmatched) { spin_unlock(&dev->dev_reservation_lock); core_scsi3_put_pr_reg(pr_reg_n); return TCM_RESERVATION_CONFLICT; -- cgit v1.2.1 From ce5686d4ed12158599d2042a6c8659254ed263ce Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Wed, 29 Oct 2014 11:17:04 +0100 Subject: perf/x86: Fix embarrasing typo Because we're all human and typing sucks.. Fixes: 7fb0f1de49fc ("perf/x86: Fix compile warnings for intel_uncore") Reported-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: x86@kernel.org Link: http://lkml.kernel.org/n/tip-be0bftjh8yfm4uvmvtf3yi87@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ded8a6774ac9..41a503c15862 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -144,7 +144,7 @@ config INSTRUCTION_DECODER config PERF_EVENTS_INTEL_UNCORE def_bool y - depends on PERF_EVENTS && SUP_SUP_INTEL && PCI + depends on PERF_EVENTS && CPU_SUP_INTEL && PCI config OUTPUT_FORMAT string -- cgit v1.2.1 From f7b8a47da17c9ee4998f2ca2018fcc424e953c0e Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 28 Oct 2014 08:24:34 +0300 Subject: sched: Remove lockdep check in sched_move_task() sched_move_task() is the only interface to change sched_task_group: cpu_cgrp_subsys methods and autogroup_move_group() use it. Everything is synchronized by task_rq_lock(), so cpu_cgroup_attach() is ordered with other users of sched_move_task(). This means we do no need RCU here: if we've dereferenced a tg here, the .attach method hasn't been called for it yet. Thus, we should pass "true" to task_css_check() to silence lockdep warnings. Fixes: eeb61e53ea19 ("sched: Fix race between task_group and sched_task_group") Reported-by: Oleg Nesterov Reported-by: Fengguang Wu Signed-off-by: Kirill Tkhai Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1414473874.8574.2.camel@tkhai Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 240157c13ddc..6841fb46eb07 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7444,8 +7444,12 @@ void sched_move_task(struct task_struct *tsk) if (unlikely(running)) put_prev_task(rq, tsk); - tg = container_of(task_css_check(tsk, cpu_cgrp_id, - lockdep_is_held(&tsk->sighand->siglock)), + /* + * All callers are synchronized by task_rq_lock(); we do not use RCU + * which is pointless here. Thus, we pass "true" to task_css_check() + * to prevent lockdep warnings. + */ + tg = container_of(task_css_check(tsk, cpu_cgrp_id, true), struct task_group, css); tg = autogroup_task_group(tsk, tg); tsk->sched_task_group = tg; -- cgit v1.2.1 From ea9d0d771fcd32cd56070819749477d511ec9117 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 4 Nov 2014 16:52:28 +0100 Subject: ASoC: dpcm: Fix race between FE/BE updates and trigger DPCM can update the FE/BE connection states totally asynchronously from the FE's PCM state. Most of FE/BE state changes are protected by mutex, so that they won't race, but there are still some actions that are uncovered. For example, suppose to switch a BE while a FE's stream is running. This would call soc_dpcm_runtime_update(), which sets FE's runtime_update flag, then sets up and starts BEs, and clears FE's runtime_update flag again. When a device emits XRUN during this operation, the PCM core triggers snd_pcm_stop(XRUN). Since the trigger action is an atomic ops, this isn't blocked by the mutex, thus it kicks off DPCM's trigger action. It eventually updates and clears FE's runtime_update flag while soc_dpcm_runtime_update() is running concurrently, and it results in confusion. Usually, for avoiding such a race, we take a lock. There is a PCM stream lock for that purpose. However, as already mentioned, the trigger action is atomic, and we can't take the lock for the whole soc_dpcm_runtime_update() or other operations that include the lengthy jobs like hw_params or prepare. This patch provides an alternative solution. This adds a way to defer the conflicting trigger callback to be executed at the end of FE/BE state changes. For doing it, two things are introduced: - Each runtime_update state change of FEs is protected via PCM stream lock. - The FE's trigger callback checks the runtime_update flag. If it's not set, the trigger action is executed there. If set, mark the pending trigger action and returns immediately. - At the exit of runtime_update state change, it checks whether the pending trigger is present. If yes, it executes the trigger action at this point. Reported-and-tested-by: Qiao Zhou Signed-off-by: Takashi Iwai Acked-by: Liam Girdwood Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- include/sound/soc-dpcm.h | 2 ++ sound/soc/soc-pcm.c | 72 +++++++++++++++++++++++++++++++++++++----------- 2 files changed, 58 insertions(+), 16 deletions(-) diff --git a/include/sound/soc-dpcm.h b/include/sound/soc-dpcm.h index 2883a7a6f9f3..98f2ade0266e 100644 --- a/include/sound/soc-dpcm.h +++ b/include/sound/soc-dpcm.h @@ -102,6 +102,8 @@ struct snd_soc_dpcm_runtime { /* state and update */ enum snd_soc_dpcm_update runtime_update; enum snd_soc_dpcm_state state; + + int trigger_pending; /* trigger cmd + 1 if pending, 0 if not */ }; /* can this BE stop and free */ diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 002311afdeaa..57277dd79e11 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1522,13 +1522,36 @@ static void dpcm_set_fe_runtime(struct snd_pcm_substream *substream) dpcm_init_runtime_hw(runtime, &cpu_dai_drv->capture); } +static int dpcm_fe_dai_do_trigger(struct snd_pcm_substream *substream, int cmd); + +/* Set FE's runtime_update state; the state is protected via PCM stream lock + * for avoiding the race with trigger callback. + * If the state is unset and a trigger is pending while the previous operation, + * process the pending trigger action here. + */ +static void dpcm_set_fe_update_state(struct snd_soc_pcm_runtime *fe, + int stream, enum snd_soc_dpcm_update state) +{ + struct snd_pcm_substream *substream = + snd_soc_dpcm_get_substream(fe, stream); + + snd_pcm_stream_lock_irq(substream); + if (state == SND_SOC_DPCM_UPDATE_NO && fe->dpcm[stream].trigger_pending) { + dpcm_fe_dai_do_trigger(substream, + fe->dpcm[stream].trigger_pending - 1); + fe->dpcm[stream].trigger_pending = 0; + } + fe->dpcm[stream].runtime_update = state; + snd_pcm_stream_unlock_irq(substream); +} + static int dpcm_fe_dai_startup(struct snd_pcm_substream *fe_substream) { struct snd_soc_pcm_runtime *fe = fe_substream->private_data; struct snd_pcm_runtime *runtime = fe_substream->runtime; int stream = fe_substream->stream, ret = 0; - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_FE); ret = dpcm_be_dai_startup(fe, fe_substream->stream); if (ret < 0) { @@ -1550,13 +1573,13 @@ static int dpcm_fe_dai_startup(struct snd_pcm_substream *fe_substream) dpcm_set_fe_runtime(fe_substream); snd_pcm_limit_hw_rates(runtime); - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); return 0; unwind: dpcm_be_dai_startup_unwind(fe, fe_substream->stream); be_err: - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); return ret; } @@ -1603,7 +1626,7 @@ static int dpcm_fe_dai_shutdown(struct snd_pcm_substream *substream) struct snd_soc_pcm_runtime *fe = substream->private_data; int stream = substream->stream; - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_FE); /* shutdown the BEs */ dpcm_be_dai_shutdown(fe, substream->stream); @@ -1617,7 +1640,7 @@ static int dpcm_fe_dai_shutdown(struct snd_pcm_substream *substream) dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_STOP); fe->dpcm[stream].state = SND_SOC_DPCM_STATE_CLOSE; - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); return 0; } @@ -1665,7 +1688,7 @@ static int dpcm_fe_dai_hw_free(struct snd_pcm_substream *substream) int err, stream = substream->stream; mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME); - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_FE); dev_dbg(fe->dev, "ASoC: hw_free FE %s\n", fe->dai_link->name); @@ -1680,7 +1703,7 @@ static int dpcm_fe_dai_hw_free(struct snd_pcm_substream *substream) err = dpcm_be_dai_hw_free(fe, stream); fe->dpcm[stream].state = SND_SOC_DPCM_STATE_HW_FREE; - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); mutex_unlock(&fe->card->mutex); return 0; @@ -1773,7 +1796,7 @@ static int dpcm_fe_dai_hw_params(struct snd_pcm_substream *substream, int ret, stream = substream->stream; mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME); - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_FE); memcpy(&fe->dpcm[substream->stream].hw_params, params, sizeof(struct snd_pcm_hw_params)); @@ -1796,7 +1819,7 @@ static int dpcm_fe_dai_hw_params(struct snd_pcm_substream *substream, fe->dpcm[stream].state = SND_SOC_DPCM_STATE_HW_PARAMS; out: - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); mutex_unlock(&fe->card->mutex); return ret; } @@ -1910,7 +1933,7 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, } EXPORT_SYMBOL_GPL(dpcm_be_dai_trigger); -static int dpcm_fe_dai_trigger(struct snd_pcm_substream *substream, int cmd) +static int dpcm_fe_dai_do_trigger(struct snd_pcm_substream *substream, int cmd) { struct snd_soc_pcm_runtime *fe = substream->private_data; int stream = substream->stream, ret; @@ -1984,6 +2007,23 @@ out: return ret; } +static int dpcm_fe_dai_trigger(struct snd_pcm_substream *substream, int cmd) +{ + struct snd_soc_pcm_runtime *fe = substream->private_data; + int stream = substream->stream; + + /* if FE's runtime_update is already set, we're in race; + * process this trigger later at exit + */ + if (fe->dpcm[stream].runtime_update != SND_SOC_DPCM_UPDATE_NO) { + fe->dpcm[stream].trigger_pending = cmd + 1; + return 0; /* delayed, assuming it's successful */ + } + + /* we're alone, let's trigger */ + return dpcm_fe_dai_do_trigger(substream, cmd); +} + int dpcm_be_dai_prepare(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm; @@ -2027,7 +2067,7 @@ static int dpcm_fe_dai_prepare(struct snd_pcm_substream *substream) dev_dbg(fe->dev, "ASoC: prepare FE %s\n", fe->dai_link->name); - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_FE); /* there is no point preparing this FE if there are no BEs */ if (list_empty(&fe->dpcm[stream].be_clients)) { @@ -2054,7 +2094,7 @@ static int dpcm_fe_dai_prepare(struct snd_pcm_substream *substream) fe->dpcm[stream].state = SND_SOC_DPCM_STATE_PREPARE; out: - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); mutex_unlock(&fe->card->mutex); return ret; @@ -2201,11 +2241,11 @@ static int dpcm_run_new_update(struct snd_soc_pcm_runtime *fe, int stream) { int ret; - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_BE; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_BE); ret = dpcm_run_update_startup(fe, stream); if (ret < 0) dev_err(fe->dev, "ASoC: failed to startup some BEs\n"); - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); return ret; } @@ -2214,11 +2254,11 @@ static int dpcm_run_old_update(struct snd_soc_pcm_runtime *fe, int stream) { int ret; - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_BE; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_BE); ret = dpcm_run_update_shutdown(fe, stream); if (ret < 0) dev_err(fe->dev, "ASoC: failed to shutdown some BEs\n"); - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); return ret; } -- cgit v1.2.1 From ece509c10985ba93ccc8c68f808a9e767250041c Mon Sep 17 00:00:00 2001 From: Dylan Reid Date: Mon, 3 Nov 2014 10:28:56 -0800 Subject: ASoC: max98090: Correct pclk divisor settings The Baytrail-based chromebooks have a 20MHz mclk, the code was setting the divisor incorrectly in this case. According to the 98090 datasheet, the divisor should be set to DIV1 for 10 <= mclk <= 20. Correct this and the surrounding clock ranges as well to match the datasheet. Signed-off-by: Dylan Reid Signed-off-by: Mark Brown --- sound/soc/codecs/max98090.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c index d519294f57c7..1229554f1464 100644 --- a/sound/soc/codecs/max98090.c +++ b/sound/soc/codecs/max98090.c @@ -1941,13 +1941,13 @@ static int max98090_dai_set_sysclk(struct snd_soc_dai *dai, * 0x02 (when master clk is 20MHz to 40MHz).. * 0x03 (when master clk is 40MHz to 60MHz).. */ - if ((freq >= 10000000) && (freq < 20000000)) { + if ((freq >= 10000000) && (freq <= 20000000)) { snd_soc_write(codec, M98090_REG_SYSTEM_CLOCK, M98090_PSCLK_DIV1); - } else if ((freq >= 20000000) && (freq < 40000000)) { + } else if ((freq > 20000000) && (freq <= 40000000)) { snd_soc_write(codec, M98090_REG_SYSTEM_CLOCK, M98090_PSCLK_DIV2); - } else if ((freq >= 40000000) && (freq < 60000000)) { + } else if ((freq > 40000000) && (freq <= 60000000)) { snd_soc_write(codec, M98090_REG_SYSTEM_CLOCK, M98090_PSCLK_DIV4); } else { -- cgit v1.2.1 From 62e6a3b6f4e1f9b96fa66ab1cdf2ffd8594df9e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20F=C3=A4rber?= Date: Wed, 5 Nov 2014 17:44:52 +0100 Subject: ASoC: samsung: Add MODULE_DEVICE_TABLE for Snow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This enables the snd_soc_snow module to be auto-loaded. Signed-off-by: Andreas Färber Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/samsung/snow.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/samsung/snow.c b/sound/soc/samsung/snow.c index 0acf5d0eed53..72118a77dd5b 100644 --- a/sound/soc/samsung/snow.c +++ b/sound/soc/samsung/snow.c @@ -110,6 +110,7 @@ static const struct of_device_id snow_of_match[] = { { .compatible = "google,snow-audio-max98095", }, {}, }; +MODULE_DEVICE_TABLE(of, snow_of_match); static struct platform_driver snow_driver = { .driver = { -- cgit v1.2.1 From ac87f22147098da9cf83a0b413ef7dda42277d85 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Thu, 6 Nov 2014 12:23:52 +0800 Subject: ASoC: rt5670: correct the incorrect default values The patch corrects the incorrect default register values. Signed-off-by: Bard Liao Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/rt5670.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/sound/soc/codecs/rt5670.c b/sound/soc/codecs/rt5670.c index ba9d9b4d4857..57ab19dbaaf3 100644 --- a/sound/soc/codecs/rt5670.c +++ b/sound/soc/codecs/rt5670.c @@ -100,18 +100,18 @@ static const struct reg_default rt5670_reg[] = { { 0x4c, 0x5380 }, { 0x4f, 0x0073 }, { 0x52, 0x00d3 }, - { 0x53, 0xf0f0 }, + { 0x53, 0xf000 }, { 0x61, 0x0000 }, { 0x62, 0x0001 }, { 0x63, 0x00c3 }, { 0x64, 0x0000 }, - { 0x65, 0x0000 }, + { 0x65, 0x0001 }, { 0x66, 0x0000 }, { 0x6f, 0x8000 }, { 0x70, 0x8000 }, { 0x71, 0x8000 }, { 0x72, 0x8000 }, - { 0x73, 0x1110 }, + { 0x73, 0x7770 }, { 0x74, 0x0e00 }, { 0x75, 0x1505 }, { 0x76, 0x0015 }, @@ -125,21 +125,21 @@ static const struct reg_default rt5670_reg[] = { { 0x83, 0x0000 }, { 0x84, 0x0000 }, { 0x85, 0x0000 }, - { 0x86, 0x0008 }, + { 0x86, 0x0004 }, { 0x87, 0x0000 }, { 0x88, 0x0000 }, { 0x89, 0x0000 }, { 0x8a, 0x0000 }, { 0x8b, 0x0000 }, - { 0x8c, 0x0007 }, + { 0x8c, 0x0003 }, { 0x8d, 0x0000 }, { 0x8e, 0x0004 }, { 0x8f, 0x1100 }, { 0x90, 0x0646 }, { 0x91, 0x0c06 }, { 0x93, 0x0000 }, - { 0x94, 0x0000 }, - { 0x95, 0x0000 }, + { 0x94, 0x1270 }, + { 0x95, 0x1000 }, { 0x97, 0x0000 }, { 0x98, 0x0000 }, { 0x99, 0x0000 }, @@ -150,11 +150,11 @@ static const struct reg_default rt5670_reg[] = { { 0x9e, 0x0400 }, { 0xae, 0x7000 }, { 0xaf, 0x0000 }, - { 0xb0, 0x6000 }, + { 0xb0, 0x7000 }, { 0xb1, 0x0000 }, { 0xb2, 0x0000 }, { 0xb3, 0x001f }, - { 0xb4, 0x2206 }, + { 0xb4, 0x220c }, { 0xb5, 0x1f00 }, { 0xb6, 0x0000 }, { 0xb7, 0x0000 }, @@ -171,25 +171,25 @@ static const struct reg_default rt5670_reg[] = { { 0xcf, 0x1813 }, { 0xd0, 0x0690 }, { 0xd1, 0x1c17 }, - { 0xd3, 0xb320 }, + { 0xd3, 0xa220 }, { 0xd4, 0x0000 }, { 0xd6, 0x0400 }, { 0xd9, 0x0809 }, { 0xda, 0x0000 }, { 0xdb, 0x0001 }, { 0xdc, 0x0049 }, - { 0xdd, 0x0009 }, + { 0xdd, 0x0024 }, { 0xe6, 0x8000 }, { 0xe7, 0x0000 }, - { 0xec, 0xb300 }, + { 0xec, 0xa200 }, { 0xed, 0x0000 }, - { 0xee, 0xb300 }, + { 0xee, 0xa200 }, { 0xef, 0x0000 }, { 0xf8, 0x0000 }, { 0xf9, 0x0000 }, { 0xfa, 0x8010 }, { 0xfb, 0x0033 }, - { 0xfc, 0x0080 }, + { 0xfc, 0x0100 }, }; static bool rt5670_volatile_register(struct device *dev, unsigned int reg) -- cgit v1.2.1 From 96927ac96d7658b35a4f0f3dcdb8c6b74472a3ea Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Thu, 6 Nov 2014 12:23:54 +0800 Subject: ASoC: rt5670: change dapm routes of PLL connection PLL should be powered up once filter power is on. So, "PLL1" should be connected to filters instead of DACs. Signed-off-by: Bard Liao Signed-off-by: Mark Brown --- sound/soc/codecs/rt5670.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/rt5670.c b/sound/soc/codecs/rt5670.c index 57ab19dbaaf3..9bd8b4f63303 100644 --- a/sound/soc/codecs/rt5670.c +++ b/sound/soc/codecs/rt5670.c @@ -1877,6 +1877,10 @@ static const struct snd_soc_dapm_route rt5670_dapm_routes[] = { { "DAC1 MIXR", "DAC1 Switch", "DAC1 R Mux" }, { "DAC1 MIXR", NULL, "DAC Stereo1 Filter" }, + { "DAC Stereo1 Filter", NULL, "PLL1", is_sys_clk_from_pll }, + { "DAC Mono Left Filter", NULL, "PLL1", is_sys_clk_from_pll }, + { "DAC Mono Right Filter", NULL, "PLL1", is_sys_clk_from_pll }, + { "DAC MIX", NULL, "DAC1 MIXL" }, { "DAC MIX", NULL, "DAC1 MIXR" }, @@ -1926,14 +1930,10 @@ static const struct snd_soc_dapm_route rt5670_dapm_routes[] = { { "DAC L1", NULL, "DAC L1 Power" }, { "DAC L1", NULL, "Stereo DAC MIXL" }, - { "DAC L1", NULL, "PLL1", is_sys_clk_from_pll }, { "DAC R1", NULL, "DAC R1 Power" }, { "DAC R1", NULL, "Stereo DAC MIXR" }, - { "DAC R1", NULL, "PLL1", is_sys_clk_from_pll }, { "DAC L2", NULL, "Mono DAC MIXL" }, - { "DAC L2", NULL, "PLL1", is_sys_clk_from_pll }, { "DAC R2", NULL, "Mono DAC MIXR" }, - { "DAC R2", NULL, "PLL1", is_sys_clk_from_pll }, { "OUT MIXL", "BST1 Switch", "BST1" }, { "OUT MIXL", "INL Switch", "INL VOL" }, -- cgit v1.2.1 From 4f031fa9f188b2b0641ac20087d9e16bcfb4e49d Mon Sep 17 00:00:00 2001 From: Ronald Wahl Date: Thu, 6 Nov 2014 11:52:13 +0100 Subject: mac80211: Fix regression that triggers a kernel BUG with CCMP Commit 7ec7c4a9a686c608315739ab6a2b0527a240883c (mac80211: port CCMP to cryptoapi's CCM driver) introduced a regression when decrypting empty packets (data_len == 0). This will lead to backtraces like: (scatterwalk_start) from [] (scatterwalk_map_and_copy+0x2c/0xa8) (scatterwalk_map_and_copy) from [] (crypto_ccm_decrypt+0x7c/0x25c) (crypto_ccm_decrypt) from [] (ieee80211_aes_ccm_decrypt+0x160/0x170) (ieee80211_aes_ccm_decrypt) from [] (ieee80211_crypto_ccmp_decrypt+0x1ac/0x238) (ieee80211_crypto_ccmp_decrypt) from [] (ieee80211_rx_handlers+0x870/0x1d24) (ieee80211_rx_handlers) from [] (ieee80211_prepare_and_rx_handle+0x8a0/0x91c) (ieee80211_prepare_and_rx_handle) from [] (ieee80211_rx+0x568/0x730) (ieee80211_rx) from [] (__carl9170_rx+0x94c/0xa20) (__carl9170_rx) from [] (carl9170_rx_stream+0x1fc/0x320) (carl9170_rx_stream) from [] (carl9170_usb_tasklet+0x80/0xc8) (carl9170_usb_tasklet) from [] (tasklet_hi_action+0x88/0xcc) (tasklet_hi_action) from [] (__do_softirq+0xcc/0x200) (__do_softirq) from [] (irq_exit+0x80/0xe0) (irq_exit) from [] (handle_IRQ+0x64/0x80) (handle_IRQ) from [] (__irq_svc+0x40/0x4c) (__irq_svc) from [] (arch_cpu_idle+0x2c/0x34) Such packets can appear for example when using the carl9170 wireless driver because hardware sometimes generates garbage when the internal FIFO overruns. This patch adds an additional length check. Cc: stable@vger.kernel.org Fixes: 7ec7c4a9a686 ("mac80211: port CCMP to cryptoapi's CCM driver") Acked-by: Ard Biesheuvel Signed-off-by: Ronald Wahl Signed-off-by: Johannes Berg --- net/mac80211/aes_ccm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c index ec24378caaaf..09d9caaec591 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aes_ccm.c @@ -53,6 +53,9 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, __aligned(__alignof__(struct aead_request)); struct aead_request *aead_req = (void *) aead_req_data; + if (data_len == 0) + return -EINVAL; + memset(aead_req, 0, sizeof(aead_req_data)); sg_init_one(&pt, data, data_len); -- cgit v1.2.1 From 532425a7a7738b479406496ca4ad6b3247688e44 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Thu, 6 Nov 2014 19:56:49 +0800 Subject: ARM: dts: sun6i: Re-parent ahb1_mux to pll6 as required by dma controller The dma controller requires that the ahb1 bus clock be driven by pll6 for peripheral access to work. Previously this was done in the dma controller driver, but was since removed as part of a series to unify the ahb1_mux and ahb1 clock drivers, in 14e0e28 dmaengine: sun6i: Remove obsolete clk muxing code Unfortunately the rest of that series did not make it, leaving us with broken dma on sun6i. This patch reparents ahb1_mux to pll6 using the DT assigned-clocks properties in the dma controller node. Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun6i-a31.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi index 543f895d18d3..2e652e2339e9 100644 --- a/arch/arm/boot/dts/sun6i-a31.dtsi +++ b/arch/arm/boot/dts/sun6i-a31.dtsi @@ -361,6 +361,10 @@ clocks = <&ahb1_gates 6>; resets = <&ahb1_rst 6>; #dma-cells = <1>; + + /* DMA controller requires AHB1 clocked from PLL6 */ + assigned-clocks = <&ahb1_mux>; + assigned-clock-parents = <&pll6>; }; mmc0: mmc@01c0f000 { -- cgit v1.2.1 From a926a12b5f11007d0ba9eb2e083d86054fb29a06 Mon Sep 17 00:00:00 2001 From: Shinobu Uehara Date: Thu, 30 Oct 2014 14:57:56 +0900 Subject: ARM: shmobile: r8a7790: Fix SD3CKCR address Signed-off-by: Shinobu Uehara Acked-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/clock-r8a7790.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-shmobile/clock-r8a7790.c b/arch/arm/mach-shmobile/clock-r8a7790.c index 126ddafad526..f62265200592 100644 --- a/arch/arm/mach-shmobile/clock-r8a7790.c +++ b/arch/arm/mach-shmobile/clock-r8a7790.c @@ -68,7 +68,7 @@ #define SDCKCR 0xE6150074 #define SD2CKCR 0xE6150078 -#define SD3CKCR 0xE615007C +#define SD3CKCR 0xE615026C #define MMC0CKCR 0xE6150240 #define MMC1CKCR 0xE6150244 #define SSPCKCR 0xE6150248 -- cgit v1.2.1 From 85eb968e84686faa507daa3c5cfdfbfefc7eda95 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Nov 2014 21:59:34 +0100 Subject: ARM: shmobile: r8a7740 legacy: Add missing INTCA clock for irqpin module This clock drives the irqpin controller modules. Before, it was assumed enabled by the bootloader or reset state. By making it available to the driver, we make sure it gets enabled when needed, and allow it to be managed by system or runtime PM. Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/clock-r8a7740.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-shmobile/clock-r8a7740.c b/arch/arm/mach-shmobile/clock-r8a7740.c index 0794f0426e70..16a6b7ccbc8b 100644 --- a/arch/arm/mach-shmobile/clock-r8a7740.c +++ b/arch/arm/mach-shmobile/clock-r8a7740.c @@ -455,7 +455,7 @@ enum { MSTP128, MSTP127, MSTP125, MSTP116, MSTP111, MSTP100, MSTP117, - MSTP230, + MSTP230, MSTP229, MSTP222, MSTP218, MSTP217, MSTP216, MSTP214, MSTP207, MSTP206, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200, @@ -479,6 +479,7 @@ static struct clk mstp_clks[MSTP_NR] = { [MSTP100] = SH_CLK_MSTP32(&div4_clks[DIV4_B], SMSTPCR1, 0, 0), /* LCDC0 */ [MSTP230] = SH_CLK_MSTP32(&div6_clks[DIV6_SUB], SMSTPCR2, 30, 0), /* SCIFA6 */ + [MSTP229] = SH_CLK_MSTP32(&div4_clks[DIV4_HP], SMSTPCR2, 29, 0), /* INTCA */ [MSTP222] = SH_CLK_MSTP32(&div6_clks[DIV6_SUB], SMSTPCR2, 22, 0), /* SCIFA7 */ [MSTP218] = SH_CLK_MSTP32(&div4_clks[DIV4_HP], SMSTPCR2, 18, 0), /* DMAC1 */ [MSTP217] = SH_CLK_MSTP32(&div4_clks[DIV4_HP], SMSTPCR2, 17, 0), /* DMAC2 */ @@ -575,6 +576,10 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("sh-dma-engine.0", &mstp_clks[MSTP218]), CLKDEV_DEV_ID("sh-sci.7", &mstp_clks[MSTP222]), CLKDEV_DEV_ID("e6cd0000.serial", &mstp_clks[MSTP222]), + CLKDEV_DEV_ID("renesas_intc_irqpin.0", &mstp_clks[MSTP229]), + CLKDEV_DEV_ID("renesas_intc_irqpin.1", &mstp_clks[MSTP229]), + CLKDEV_DEV_ID("renesas_intc_irqpin.2", &mstp_clks[MSTP229]), + CLKDEV_DEV_ID("renesas_intc_irqpin.3", &mstp_clks[MSTP229]), CLKDEV_DEV_ID("sh-sci.6", &mstp_clks[MSTP230]), CLKDEV_DEV_ID("e6cc0000.serial", &mstp_clks[MSTP230]), -- cgit v1.2.1 From 4f37828d4d69a46830e0525a065da9847fc7a819 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Nov 2014 11:04:33 +0100 Subject: ARM: shmobile: r8a7740 legacy: Correct IIC0 parent clock According to the datasheet, the operating clock for IIC0 is the HPP (RT Peri) clock, not the SUB (Peri) clock. Both clocks run at the same speed (50 Mhz). This is consistent with IIC0 being located in the A4R PM domain, and IIC1 in the A3SP PM domain. Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/clock-r8a7740.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-shmobile/clock-r8a7740.c b/arch/arm/mach-shmobile/clock-r8a7740.c index 16a6b7ccbc8b..19df9cb30495 100644 --- a/arch/arm/mach-shmobile/clock-r8a7740.c +++ b/arch/arm/mach-shmobile/clock-r8a7740.c @@ -474,7 +474,7 @@ static struct clk mstp_clks[MSTP_NR] = { [MSTP127] = SH_CLK_MSTP32(&div4_clks[DIV4_S], SMSTPCR1, 27, 0), /* CEU20 */ [MSTP125] = SH_CLK_MSTP32(&div6_clks[DIV6_SUB], SMSTPCR1, 25, 0), /* TMU0 */ [MSTP117] = SH_CLK_MSTP32(&div4_clks[DIV4_B], SMSTPCR1, 17, 0), /* LCDC1 */ - [MSTP116] = SH_CLK_MSTP32(&div6_clks[DIV6_SUB], SMSTPCR1, 16, 0), /* IIC0 */ + [MSTP116] = SH_CLK_MSTP32(&div4_clks[DIV4_HPP], SMSTPCR1, 16, 0), /* IIC0 */ [MSTP111] = SH_CLK_MSTP32(&div6_clks[DIV6_SUB], SMSTPCR1, 11, 0), /* TMU1 */ [MSTP100] = SH_CLK_MSTP32(&div4_clks[DIV4_B], SMSTPCR1, 0, 0), /* LCDC0 */ -- cgit v1.2.1 From edd7b938637701567a54306adb27cfb4345fedc5 Mon Sep 17 00:00:00 2001 From: Shinobu Uehara Date: Thu, 30 Oct 2014 14:57:57 +0900 Subject: ARM: shmobile: r8a7790: Fix SD3CKCR address to device tree Signed-off-by: Shinobu Uehara Acked-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- arch/arm/boot/dts/r8a7790.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi index d0e17733dc1a..e20affe156c1 100644 --- a/arch/arm/boot/dts/r8a7790.dtsi +++ b/arch/arm/boot/dts/r8a7790.dtsi @@ -666,9 +666,9 @@ #clock-cells = <0>; clock-output-names = "sd2"; }; - sd3_clk: sd3_clk@e615007c { + sd3_clk: sd3_clk@e615026c { compatible = "renesas,r8a7790-div6-clock", "renesas,cpg-div6-clock"; - reg = <0 0xe615007c 0 4>; + reg = <0 0xe615026c 0 4>; clocks = <&pll1_div2_clk>; #clock-cells = <0>; clock-output-names = "sd3"; -- cgit v1.2.1 From b89ff7c3c2dee189489a5f45eb8d72e106179299 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Nov 2014 11:04:34 +0100 Subject: ARM: shmobile: r8a7740 dtsi: Correct IIC0 parent clock According to the datasheet, the operating clock for IIC0 is the HPP (RT Peri) clock, not the SUB (Peri) clock. Both clocks run at the same speed (50 Mhz). This is consistent with IIC0 being located in the A4R PM domain, and IIC1 in the A3SP PM domain. Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- arch/arm/boot/dts/r8a7740.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/r8a7740.dtsi b/arch/arm/boot/dts/r8a7740.dtsi index d46c213a17ad..eed697a6bd6b 100644 --- a/arch/arm/boot/dts/r8a7740.dtsi +++ b/arch/arm/boot/dts/r8a7740.dtsi @@ -433,7 +433,7 @@ clocks = <&cpg_clocks R8A7740_CLK_S>, <&cpg_clocks R8A7740_CLK_S>, <&sub_clk>, <&cpg_clocks R8A7740_CLK_B>, - <&sub_clk>, <&sub_clk>, + <&cpg_clocks R8A7740_CLK_HPP>, <&sub_clk>, <&cpg_clocks R8A7740_CLK_B>; #clock-cells = <1>; renesas,clock-indices = < -- cgit v1.2.1 From c123588b3b193d06588dfb51f475407f835ebfb2 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 7 Nov 2014 17:53:40 +0300 Subject: sched/numa: Fix out of bounds read in sched_init_numa() On latest mm + KASan patchset I've got this: ================================================================== BUG: AddressSanitizer: out of bounds access in sched_init_smp+0x3ba/0x62c at addr ffff88006d4bee6c ============================================================================= BUG kmalloc-8 (Not tainted): kasan error ----------------------------------------------------------------------------- Disabling lock debugging due to kernel taint INFO: Allocated in alloc_vfsmnt+0xb0/0x2c0 age=75 cpu=0 pid=0 __slab_alloc+0x4b4/0x4f0 __kmalloc_track_caller+0x15f/0x1e0 kstrdup+0x44/0x90 alloc_vfsmnt+0xb0/0x2c0 vfs_kern_mount+0x35/0x190 kern_mount_data+0x25/0x50 pid_ns_prepare_proc+0x19/0x50 alloc_pid+0x5e2/0x630 copy_process.part.41+0xdf5/0x2aa0 do_fork+0xf5/0x460 kernel_thread+0x21/0x30 rest_init+0x1e/0x90 start_kernel+0x522/0x531 x86_64_start_reservations+0x2a/0x2c x86_64_start_kernel+0x15b/0x16a INFO: Slab 0xffffea0001b52f80 objects=24 used=22 fp=0xffff88006d4befc0 flags=0x100000000004080 INFO: Object 0xffff88006d4bed20 @offset=3360 fp=0xffff88006d4bee70 Bytes b4 ffff88006d4bed10: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ Object ffff88006d4bed20: 70 72 6f 63 00 6b 6b a5 proc.kk. Redzone ffff88006d4bed28: cc cc cc cc cc cc cc cc ........ Padding ffff88006d4bee68: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ CPU: 0 PID: 1 Comm: swapper/0 Tainted: G B 3.18.0-rc3-mm1+ #108 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 ffff88006d4be000 0000000000000000 ffff88006d4bed20 ffff88006c86fd18 ffffffff81cd0a59 0000000000000058 ffff88006d404240 ffff88006c86fd48 ffffffff811fa3a8 ffff88006d404240 ffffea0001b52f80 ffff88006d4bed20 Call Trace: dump_stack (lib/dump_stack.c:52) print_trailer (mm/slub.c:645) object_err (mm/slub.c:652) ? sched_init_smp (kernel/sched/core.c:6552 kernel/sched/core.c:7063) kasan_report_error (mm/kasan/report.c:102 mm/kasan/report.c:178) ? kasan_poison_shadow (mm/kasan/kasan.c:48) ? kasan_unpoison_shadow (mm/kasan/kasan.c:54) ? kasan_poison_shadow (mm/kasan/kasan.c:48) ? kasan_kmalloc (mm/kasan/kasan.c:311) __asan_load4 (mm/kasan/kasan.c:371) ? sched_init_smp (kernel/sched/core.c:6552 kernel/sched/core.c:7063) sched_init_smp (kernel/sched/core.c:6552 kernel/sched/core.c:7063) kernel_init_freeable (init/main.c:869 init/main.c:997) ? finish_task_switch (kernel/sched/sched.h:1036 kernel/sched/core.c:2248) ? rest_init (init/main.c:924) kernel_init (init/main.c:929) ? rest_init (init/main.c:924) ret_from_fork (arch/x86/kernel/entry_64.S:348) ? rest_init (init/main.c:924) Read of size 4 by task swapper/0: Memory state around the buggy address: ffff88006d4beb80: fc fc fc fc fc fc fc fc fc fc 00 fc fc fc fc fc ffff88006d4bec00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88006d4bec80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88006d4bed00: fc fc fc fc 00 fc fc fc fc fc fc fc fc fc fc fc ffff88006d4bed80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88006d4bee00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc 04 fc ^ ffff88006d4bee80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88006d4bef00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88006d4bef80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ffff88006d4bf000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88006d4bf080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Zero 'level' (e.g. on non-NUMA system) causing out of bounds access in this line: sched_max_numa_distance = sched_domains_numa_distance[level - 1]; Fix this by exiting from sched_init_numa() earlier. Signed-off-by: Andrey Ryabinin Reviewed-by: Rik van Riel Fixes: 9942f79ba ("sched/numa: Export info needed for NUMA balancing on complex topologies") Cc: peterz@infradead.org Link: http://lkml.kernel.org/r/1415372020-1871-1-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 6841fb46eb07..5f12ca65c9a7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6368,6 +6368,10 @@ static void sched_init_numa(void) if (!sched_debug()) break; } + + if (!level) + return; + /* * 'level' contains the number of unique distances, excluding the * identity distance node_distance(i,i). -- cgit v1.2.1 From 6b96686ecffcbea85dcb502e4584e4a20a2bfb29 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 7 Nov 2014 15:34:54 +0100 Subject: netfilter: nft_masq: fix uninitialized range in nft_masq_{ipv4, ipv6}_eval When transferring from the original range in nf_nat_masquerade_{ipv4,ipv6}() we copy over values from stack in from min_proto/max_proto due to uninitialized range variable in both, nft_masq_{ipv4,ipv6}_eval. As we only initialize flags at this time from nft_masq struct, just zero out the rest. Fixes: 9ba1f726bec09 ("netfilter: nf_tables: add new nft_masq expression") Signed-off-by: Daniel Borkmann Acked-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nft_masq_ipv4.c | 1 + net/ipv6/netfilter/nft_masq_ipv6.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c index c1023c445920..665de06561cd 100644 --- a/net/ipv4/netfilter/nft_masq_ipv4.c +++ b/net/ipv4/netfilter/nft_masq_ipv4.c @@ -24,6 +24,7 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr, struct nf_nat_range range; unsigned int verdict; + memset(&range, 0, sizeof(range)); range.flags = priv->flags; verdict = nf_nat_masquerade_ipv4(pkt->skb, pkt->ops->hooknum, diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c index 8a7ac685076d..529c119cbb14 100644 --- a/net/ipv6/netfilter/nft_masq_ipv6.c +++ b/net/ipv6/netfilter/nft_masq_ipv6.c @@ -25,6 +25,7 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr, struct nf_nat_range range; unsigned int verdict; + memset(&range, 0, sizeof(range)); range.flags = priv->flags; verdict = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out); -- cgit v1.2.1 From fb6eaf2ccca7d3580931bcb4b735101b461f38cf Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 5 Nov 2014 19:10:52 -0600 Subject: rtlwifi: Fix setting of tx descriptor for new trx flow Device RTL8192EE uses a new form of trx flow. This fix sets up the descriptors correctly. Signed-off-by: Larry Finger Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/pci.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c index 25daa8715219..116f746e7c73 100644 --- a/drivers/net/wireless/rtlwifi/pci.c +++ b/drivers/net/wireless/rtlwifi/pci.c @@ -1127,9 +1127,14 @@ static void _rtl_pci_prepare_bcn_tasklet(struct ieee80211_hw *hw) __skb_queue_tail(&ring->queue, pskb); - rtlpriv->cfg->ops->set_desc(hw, (u8 *)pdesc, true, HW_DESC_OWN, - &temp_one); - + if (rtlpriv->use_new_trx_flow) { + temp_one = 4; + rtlpriv->cfg->ops->set_desc(hw, (u8 *)pbuffer_desc, true, + HW_DESC_OWN, (u8 *)&temp_one); + } else { + rtlpriv->cfg->ops->set_desc(hw, (u8 *)pdesc, true, HW_DESC_OWN, + &temp_one); + } return; } -- cgit v1.2.1 From caea2172c23465a77556b2e1d06412b532b90235 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 5 Nov 2014 19:10:53 -0600 Subject: rtlwifi: Fix errors in descriptor manipulation There are typos in the handling of the descriptor pointers where the wrong descriptor is referenced. There is also an error in which the pointer is incremented twice. Signed-off-by: Larry Finger Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/pci.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c index 116f746e7c73..6d2b6281e22b 100644 --- a/drivers/net/wireless/rtlwifi/pci.c +++ b/drivers/net/wireless/rtlwifi/pci.c @@ -1375,9 +1375,9 @@ static void _rtl_pci_free_tx_ring(struct ieee80211_hw *hw, ring->desc = NULL; if (rtlpriv->use_new_trx_flow) { pci_free_consistent(rtlpci->pdev, - sizeof(*ring->desc) * ring->entries, + sizeof(*ring->buffer_desc) * ring->entries, ring->buffer_desc, ring->buffer_desc_dma); - ring->desc = NULL; + ring->buffer_desc = NULL; } } @@ -1548,7 +1548,6 @@ int rtl_pci_reset_trx_ring(struct ieee80211_hw *hw) true, HW_DESC_TXBUFF_ADDR), skb->len, PCI_DMA_TODEVICE); - ring->idx = (ring->idx + 1) % ring->entries; kfree_skb(skb); ring->idx = (ring->idx + 1) % ring->entries; } -- cgit v1.2.1 From d1cd5ba4ca8b41793f4e581dd1dbf46b7f2cf691 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Fri, 7 Nov 2014 10:05:12 -0600 Subject: rtlwifi: rtl8192se: Fix connection problems Changes in the vendor driver were added to rtlwifi, but some updates to rtl8192se were missed, and the driver could neither scan nor connect. There are other changes that will enhance performance, but this minimal set fix the basic functionality. Signed-off-by: Larry Finger Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/pci.c | 3 ++- drivers/net/wireless/rtlwifi/rtl8192se/hw.c | 7 +++++-- drivers/net/wireless/rtlwifi/rtl8192se/phy.c | 2 ++ drivers/net/wireless/rtlwifi/rtl8192se/sw.c | 16 ++++++++++++++++ 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c index 6d2b6281e22b..61f5d36eca6a 100644 --- a/drivers/net/wireless/rtlwifi/pci.c +++ b/drivers/net/wireless/rtlwifi/pci.c @@ -842,7 +842,8 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) break; } /* handle command packet here */ - if (rtlpriv->cfg->ops->rx_command_packet(hw, stats, skb)) { + if (rtlpriv->cfg->ops->rx_command_packet && + rtlpriv->cfg->ops->rx_command_packet(hw, stats, skb)) { dev_kfree_skb_any(skb); goto end; } diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/hw.c b/drivers/net/wireless/rtlwifi/rtl8192se/hw.c index 00e067044c08..5761d5b49e39 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192se/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192se/hw.c @@ -1201,6 +1201,9 @@ static int _rtl92se_set_media_status(struct ieee80211_hw *hw, } + if (type != NL80211_IFTYPE_AP && + rtlpriv->mac80211.link_state < MAC80211_LINKED) + bt_msr = rtl_read_byte(rtlpriv, MSR) & ~MSR_LINK_MASK; rtl_write_byte(rtlpriv, (MSR), bt_msr); temp = rtl_read_dword(rtlpriv, TCR); @@ -1262,6 +1265,7 @@ void rtl92se_enable_interrupt(struct ieee80211_hw *hw) rtl_write_dword(rtlpriv, INTA_MASK, rtlpci->irq_mask[0]); /* Support Bit 32-37(Assign as Bit 0-5) interrupt setting now */ rtl_write_dword(rtlpriv, INTA_MASK + 4, rtlpci->irq_mask[1] & 0x3F); + rtlpci->irq_enabled = true; } void rtl92se_disable_interrupt(struct ieee80211_hw *hw) @@ -1276,8 +1280,7 @@ void rtl92se_disable_interrupt(struct ieee80211_hw *hw) rtlpci = rtl_pcidev(rtl_pcipriv(hw)); rtl_write_dword(rtlpriv, INTA_MASK, 0); rtl_write_dword(rtlpriv, INTA_MASK + 4, 0); - - synchronize_irq(rtlpci->pdev->irq); + rtlpci->irq_enabled = false; } static u8 _rtl92s_set_sysclk(struct ieee80211_hw *hw, u8 data) diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/phy.c b/drivers/net/wireless/rtlwifi/rtl8192se/phy.c index 77c5b5f35244..4b4612fe2fdb 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192se/phy.c +++ b/drivers/net/wireless/rtlwifi/rtl8192se/phy.c @@ -399,6 +399,8 @@ static bool _rtl92s_phy_sw_chnl_step_by_step(struct ieee80211_hw *hw, case 2: currentcmd = &postcommoncmd[*step]; break; + default: + return true; } if (currentcmd->cmdid == CMDID_END) { diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/sw.c b/drivers/net/wireless/rtlwifi/rtl8192se/sw.c index aadba29c167a..fb003868bdef 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192se/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192se/sw.c @@ -236,6 +236,19 @@ static void rtl92s_deinit_sw_vars(struct ieee80211_hw *hw) } } +static bool rtl92se_is_tx_desc_closed(struct ieee80211_hw *hw, u8 hw_queue, + u16 index) +{ + struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); + struct rtl8192_tx_ring *ring = &rtlpci->tx_ring[hw_queue]; + u8 *entry = (u8 *)(&ring->desc[ring->idx]); + u8 own = (u8)rtl92se_get_desc(entry, true, HW_DESC_OWN); + + if (own) + return false; + return true; +} + static struct rtl_hal_ops rtl8192se_hal_ops = { .init_sw_vars = rtl92s_init_sw_vars, .deinit_sw_vars = rtl92s_deinit_sw_vars, @@ -269,6 +282,7 @@ static struct rtl_hal_ops rtl8192se_hal_ops = { .led_control = rtl92se_led_control, .set_desc = rtl92se_set_desc, .get_desc = rtl92se_get_desc, + .is_tx_desc_closed = rtl92se_is_tx_desc_closed, .tx_polling = rtl92se_tx_polling, .enable_hw_sec = rtl92se_enable_hw_security_config, .set_key = rtl92se_set_key, @@ -306,6 +320,8 @@ static struct rtl_hal_cfg rtl92se_hal_cfg = { .maps[MAC_RCR_ACRC32] = RCR_ACRC32, .maps[MAC_RCR_ACF] = RCR_ACF, .maps[MAC_RCR_AAP] = RCR_AAP, + .maps[MAC_HIMR] = INTA_MASK, + .maps[MAC_HIMRE] = INTA_MASK + 4, .maps[EFUSE_TEST] = REG_EFUSE_TEST, .maps[EFUSE_CTRL] = REG_EFUSE_CTRL, -- cgit v1.2.1 From 9c3a6670863033df421c8e46f79f383cf3a922ee Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sat, 8 Nov 2014 13:59:42 +0100 Subject: b43: fix NULL pointer dereference in b43_phy_copy() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit phy_read and phy_write are not set for every phy any more sine this: commit d342b95dd735014a590f9051b1ba227eb54ca8f6 Author: Rafał Miłecki Date: Thu Jul 31 21:59:43 2014 +0200 b43: don't duplicate common PHY read/write ops b43_phy_copy() accesses phy_read and phy_write directly and will fail with some phys. This patch fixes the regression by using the b43_phy_read() and b43_phy_write() functions which should be used for read and write access. This should fix this bug report: https://bugzilla.kernel.org/show_bug.cgi?id=87731 Reported-by: Volker Kempter Tested-by: Volker Kempter Signed-off-by: Hauke Mehrtens Signed-off-by: John W. Linville --- drivers/net/wireless/b43/phy_common.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wireless/b43/phy_common.c b/drivers/net/wireless/b43/phy_common.c index 1dfc682a8055..ee27b06074e1 100644 --- a/drivers/net/wireless/b43/phy_common.c +++ b/drivers/net/wireless/b43/phy_common.c @@ -300,9 +300,7 @@ void b43_phy_write(struct b43_wldev *dev, u16 reg, u16 value) void b43_phy_copy(struct b43_wldev *dev, u16 destreg, u16 srcreg) { - assert_mac_suspended(dev); - dev->phy.ops->phy_write(dev, destreg, - dev->phy.ops->phy_read(dev, srcreg)); + b43_phy_write(dev, destreg, b43_phy_read(dev, srcreg)); } void b43_phy_mask(struct b43_wldev *dev, u16 offset, u16 mask) -- cgit v1.2.1 From 9b520d84957d63348e87c0f2cbd21d86e1e8f2f2 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 4 Nov 2014 15:54:11 +0200 Subject: iwlwifi: mvm: abort scan upon RFKILL This code existed but not for all the different FW APIs we support. Fix this. Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/scan.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/scan.c b/drivers/net/wireless/iwlwifi/mvm/scan.c index b280d5d87127..7554f7053830 100644 --- a/drivers/net/wireless/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/iwlwifi/mvm/scan.c @@ -602,16 +602,6 @@ static int iwl_mvm_cancel_regular_scan(struct iwl_mvm *mvm) SCAN_COMPLETE_NOTIFICATION }; int ret; - if (mvm->scan_status == IWL_MVM_SCAN_NONE) - return 0; - - if (iwl_mvm_is_radio_killed(mvm)) { - ieee80211_scan_completed(mvm->hw, true); - iwl_mvm_unref(mvm, IWL_MVM_REF_SCAN); - mvm->scan_status = IWL_MVM_SCAN_NONE; - return 0; - } - iwl_init_notification_wait(&mvm->notif_wait, &wait_scan_abort, scan_abort_notif, ARRAY_SIZE(scan_abort_notif), @@ -1400,6 +1390,16 @@ int iwl_mvm_unified_sched_scan_lmac(struct iwl_mvm *mvm, int iwl_mvm_cancel_scan(struct iwl_mvm *mvm) { + if (mvm->scan_status == IWL_MVM_SCAN_NONE) + return 0; + + if (iwl_mvm_is_radio_killed(mvm)) { + ieee80211_scan_completed(mvm->hw, true); + iwl_mvm_unref(mvm, IWL_MVM_REF_SCAN); + mvm->scan_status = IWL_MVM_SCAN_NONE; + return 0; + } + if (mvm->fw->ucode_capa.api[0] & IWL_UCODE_TLV_API_LMAC_SCAN) return iwl_mvm_scan_offload_stop(mvm, true); return iwl_mvm_cancel_regular_scan(mvm); -- cgit v1.2.1 From 87dd634ae72bb8f6d0dd12f1cbbc67c7da6dba3b Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Mon, 10 Nov 2014 19:25:22 +0200 Subject: iwlwifi: pcie: fix prph dump length The length counting previously done had an error in it, causing the length down the data dumping function to be shorter than it should be, causing the end of the data to get truncated off and lost. Cc: [3.17+] Fixes: 67c65f2cf710 ("iwlwifi: dump periphery registers to fw-error-dump") Signed-off-by: Liad Kaufman Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/pcie/trans.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index 160c3ebc48d0..dd2f3f8baa9d 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -1894,8 +1894,7 @@ static u32 iwl_trans_pcie_dump_prph(struct iwl_trans *trans, int reg; __le32 *val; - prph_len += sizeof(*data) + sizeof(*prph) + - num_bytes_in_chunk; + prph_len += sizeof(**data) + sizeof(*prph) + num_bytes_in_chunk; (*data)->type = cpu_to_le32(IWL_FW_ERROR_DUMP_PRPH); (*data)->len = cpu_to_le32(sizeof(*prph) + -- cgit v1.2.1 From 2196937e12b1b4ba139806d132647e1651d655df Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 10 Nov 2014 17:11:21 +0100 Subject: netfilter: ipset: small potential read beyond the end of buffer We could be reading 8 bytes into a 4 byte buffer here. It seems harmless but adding a check is the right thing to do and it silences a static checker warning. Signed-off-by: Dan Carpenter Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 86f9d76b1464..d259da3ce67a 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1863,6 +1863,12 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) if (*op < IP_SET_OP_VERSION) { /* Check the version at the beginning of operations */ struct ip_set_req_version *req_version = data; + + if (*len < sizeof(struct ip_set_req_version)) { + ret = -EINVAL; + goto done; + } + if (req_version->version != IPSET_PROTOCOL) { ret = -EPROTO; goto done; -- cgit v1.2.1 From cfd9167af14eb4ec21517a32911d460083ee3d59 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 11 Nov 2014 14:28:47 +0100 Subject: rt2x00: do not align payload on modern H/W RT2800 and newer hardware require padding between header and payload if header length is not multiple of 4. For historical reasons we also align payload to to 4 bytes boundary, but such alignment is not needed on modern H/W. Patch fixes skb_under_panic problems reported from time to time: https://bugzilla.kernel.org/show_bug.cgi?id=84911 https://bugzilla.kernel.org/show_bug.cgi?id=72471 http://marc.info/?l=linux-wireless&m=139108549530402&w=2 https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1087591 Panic happened because we eat 4 bytes of skb headroom on each (re)transmission when sending frame without the payload and the header length not being multiple of 4 (i.e. QoS header has 26 bytes). On such case because paylad_aling=2 is bigger than header_align=0 we increase header_align by 4 bytes. To prevent that we could change the check to: if (payload_length && payload_align > header_align) header_align += 4; but not aligning payload at all is more effective and alignment is not really needed by H/W (that has been tested on OpenWrt project for few years now). Reported-and-tested-by: Antti S. Lankila Debugged-by: Antti S. Lankila Reported-by: Henrik Asp Originally-From: Helmut Schaa Cc: stable@vger.kernel.org Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2x00queue.c | 50 ++++++++----------------------- 1 file changed, 12 insertions(+), 38 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.c b/drivers/net/wireless/rt2x00/rt2x00queue.c index 8e68f87ab13c..66ff36447b94 100644 --- a/drivers/net/wireless/rt2x00/rt2x00queue.c +++ b/drivers/net/wireless/rt2x00/rt2x00queue.c @@ -158,55 +158,29 @@ void rt2x00queue_align_frame(struct sk_buff *skb) skb_trim(skb, frame_length); } -void rt2x00queue_insert_l2pad(struct sk_buff *skb, unsigned int header_length) +/* + * H/W needs L2 padding between the header and the paylod if header size + * is not 4 bytes aligned. + */ +void rt2x00queue_insert_l2pad(struct sk_buff *skb, unsigned int hdr_len) { - unsigned int payload_length = skb->len - header_length; - unsigned int header_align = ALIGN_SIZE(skb, 0); - unsigned int payload_align = ALIGN_SIZE(skb, header_length); - unsigned int l2pad = payload_length ? L2PAD_SIZE(header_length) : 0; + unsigned int l2pad = (skb->len > hdr_len) ? L2PAD_SIZE(hdr_len) : 0; - /* - * Adjust the header alignment if the payload needs to be moved more - * than the header. - */ - if (payload_align > header_align) - header_align += 4; - - /* There is nothing to do if no alignment is needed */ - if (!header_align) + if (!l2pad) return; - /* Reserve the amount of space needed in front of the frame */ - skb_push(skb, header_align); - - /* - * Move the header. - */ - memmove(skb->data, skb->data + header_align, header_length); - - /* Move the payload, if present and if required */ - if (payload_length && payload_align) - memmove(skb->data + header_length + l2pad, - skb->data + header_length + l2pad + payload_align, - payload_length); - - /* Trim the skb to the correct size */ - skb_trim(skb, header_length + l2pad + payload_length); + skb_push(skb, l2pad); + memmove(skb->data, skb->data + l2pad, hdr_len); } -void rt2x00queue_remove_l2pad(struct sk_buff *skb, unsigned int header_length) +void rt2x00queue_remove_l2pad(struct sk_buff *skb, unsigned int hdr_len) { - /* - * L2 padding is only present if the skb contains more than just the - * IEEE 802.11 header. - */ - unsigned int l2pad = (skb->len > header_length) ? - L2PAD_SIZE(header_length) : 0; + unsigned int l2pad = (skb->len > hdr_len) ? L2PAD_SIZE(hdr_len) : 0; if (!l2pad) return; - memmove(skb->data + l2pad, skb->data, header_length); + memmove(skb->data + l2pad, skb->data, hdr_len); skb_pull(skb, l2pad); } -- cgit v1.2.1 From 0cd75b19899fd86b51a6480fb8c00dcd85a54591 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Tue, 11 Nov 2014 13:58:44 +0100 Subject: brcmfmac: fix conversion of channel width 20MHZ_NOHT The function chandef_to_chanspec() failed when converting a chandef with bandwidth set to NL80211_CHAN_WIDTH_20_NOHT. This was reported by user running the device in AP mode. ------------[ cut here ]------------ WARNING: CPU: 0 PID: 304 at drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c:381 chandef_to_chanspec.isra.11+0x158/0x184() Modules linked in: CPU: 0 PID: 304 Comm: hostapd Not tainted 3.16.0-rc7-abb+g64aa90f #8 [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (warn_slowpath_common+0x6c/0x8c) [] (warn_slowpath_common) from [] (warn_slowpath_null+0x1c/0x24) [] (warn_slowpath_null) from [] (chandef_to_chanspec.isra.11+0x158/0x184) [] (chandef_to_chanspec.isra.11) from [] (brcmf_cfg80211_start_ap+0x1e4/0x614) [] (brcmf_cfg80211_start_ap) from [] (nl80211_start_ap+0x288/0x414) [] (nl80211_start_ap) from [] (genl_rcv_msg+0x21c/0x38c) [] (genl_rcv_msg) from [] (netlink_rcv_skb+0xac/0xc0) [] (netlink_rcv_skb) from [] (genl_rcv+0x20/0x34) [] (genl_rcv) from [] (netlink_unicast+0x150/0x20c) [] (netlink_unicast) from [] (netlink_sendmsg+0x2b8/0x398) [] (netlink_sendmsg) from [] (sock_sendmsg+0x84/0xa8) [] (sock_sendmsg) from [] (___sys_sendmsg.part.29+0x268/0x278) [] (___sys_sendmsg.part.29) from [] (__sys_sendmsg+0x4c/0x7c) [] (__sys_sendmsg) from [] (ret_fast_syscall+0x0/0x44) ---[ end trace 965ee2158c9905a2 ]--- Cc: stable@vger.kernel.org # v3.17 Reported-by: Pontus Fuchs Reviewed-by: Hante Meuleman Reviewed-by: Daniel (Deognyoun) Kim Reviewed-by: Franky (Zhenhui) Lin Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Arend van Spriel Signed-off-by: John W. Linville --- drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c index 28fa25b509db..39b45c038a93 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c @@ -299,6 +299,7 @@ static u16 chandef_to_chanspec(struct brcmu_d11inf *d11inf, primary_offset = ch->center_freq1 - ch->chan->center_freq; switch (ch->width) { case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_20_NOHT: ch_inf.bw = BRCMU_CHAN_BW_20; WARN_ON(primary_offset != 0); break; @@ -323,6 +324,10 @@ static u16 chandef_to_chanspec(struct brcmu_d11inf *d11inf, ch_inf.sb = BRCMU_CHAN_SB_LU; } break; + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_160: + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: default: WARN_ON_ONCE(1); } @@ -333,6 +338,7 @@ static u16 chandef_to_chanspec(struct brcmu_d11inf *d11inf, case IEEE80211_BAND_5GHZ: ch_inf.band = BRCMU_CHAN_BAND_5G; break; + case IEEE80211_BAND_60GHZ: default: WARN_ON_ONCE(1); } -- cgit v1.2.1 From 4e6ce4dc7ce71d0886908d55129d5d6482a27ff9 Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Thu, 6 Nov 2014 10:52:23 +0530 Subject: ath9k: Fix RTC_DERIVED_CLK usage Based on the reference clock, which could be 25MHz or 40MHz, AR_RTC_DERIVED_CLK is programmed differently for AR9340 and AR9550. But, when a chip reset is done, processing the initvals sets the register back to the default value. Fix this by moving the code in ath9k_hw_init_pll() to ar9003_hw_override_ini(). Also, do this override for AR9531. Cc: stable@vger.kernel.org Signed-off-by: Miaoqing Pan Signed-off-by: Sujith Manoharan Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/ar9003_phy.c | 13 +++++++++++++ drivers/net/wireless/ath/ath9k/hw.c | 13 ------------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.c b/drivers/net/wireless/ath/ath9k/ar9003_phy.c index 697c4ae90af0..1e8ea5e4d4ca 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.c @@ -664,6 +664,19 @@ static void ar9003_hw_override_ini(struct ath_hw *ah) ah->enabled_cals |= TX_CL_CAL; else ah->enabled_cals &= ~TX_CL_CAL; + + if (AR_SREV_9340(ah) || AR_SREV_9531(ah) || AR_SREV_9550(ah)) { + if (ah->is_clk_25mhz) { + REG_WRITE(ah, AR_RTC_DERIVED_CLK, 0x17c << 1); + REG_WRITE(ah, AR_SLP32_MODE, 0x0010f3d7); + REG_WRITE(ah, AR_SLP32_INC, 0x0001e7ae); + } else { + REG_WRITE(ah, AR_RTC_DERIVED_CLK, 0x261 << 1); + REG_WRITE(ah, AR_SLP32_MODE, 0x0010f400); + REG_WRITE(ah, AR_SLP32_INC, 0x0001e800); + } + udelay(100); + } } static void ar9003_hw_prog_ini(struct ath_hw *ah, diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 8be4b1453394..2ad605760e21 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -861,19 +861,6 @@ static void ath9k_hw_init_pll(struct ath_hw *ah, udelay(RTC_PLL_SETTLE_DELAY); REG_WRITE(ah, AR_RTC_SLEEP_CLK, AR_RTC_FORCE_DERIVED_CLK); - - if (AR_SREV_9340(ah) || AR_SREV_9550(ah)) { - if (ah->is_clk_25mhz) { - REG_WRITE(ah, AR_RTC_DERIVED_CLK, 0x17c << 1); - REG_WRITE(ah, AR_SLP32_MODE, 0x0010f3d7); - REG_WRITE(ah, AR_SLP32_INC, 0x0001e7ae); - } else { - REG_WRITE(ah, AR_RTC_DERIVED_CLK, 0x261 << 1); - REG_WRITE(ah, AR_SLP32_MODE, 0x0010f400); - REG_WRITE(ah, AR_SLP32_INC, 0x0001e800); - } - udelay(100); - } } static void ath9k_hw_init_interrupt_masks(struct ath_hw *ah, -- cgit v1.2.1 From b207422bb92f17f4f892a9b8737d44b37fece25b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 6 Nov 2014 12:52:06 +0100 Subject: ARM: shmobile: kzm9g legacy: Set i2c clks_per_count to 2 On sh73a0/kzm9g-legacy, probing of the i2c masters fails with: i2c-sh_mobile i2c-sh_mobile.0: timing values out of range: L/H=0x208/0x1bf sh_mobile: probe of i2c-sh_mobile.0 failed with error -22 According to the datasheet, the transfer rate is derived from the HP clock (which runs at 104 MHz) divided by two. Hence i2c_sh_mobile_platform_data.clks_per_count should be set to two. Now probing succeeds, and i2c works: i2c-sh_mobile i2c-sh_mobile.0: I2C adapter 0 with bus speed 100000 Hz (L/H=0x104/0xe0) Signed-off-by: Geert Uytterhoeven Reviewed-by: Wolfram Sang Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/setup-sh73a0.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/arm/mach-shmobile/setup-sh73a0.c b/arch/arm/mach-shmobile/setup-sh73a0.c index b7bd8e509668..328657d011d5 100644 --- a/arch/arm/mach-shmobile/setup-sh73a0.c +++ b/arch/arm/mach-shmobile/setup-sh73a0.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -192,11 +193,18 @@ static struct resource i2c4_resources[] = { }, }; +static struct i2c_sh_mobile_platform_data i2c_platform_data = { + .clks_per_count = 2, +}; + static struct platform_device i2c0_device = { .name = "i2c-sh_mobile", .id = 0, .resource = i2c0_resources, .num_resources = ARRAY_SIZE(i2c0_resources), + .dev = { + .platform_data = &i2c_platform_data, + }, }; static struct platform_device i2c1_device = { @@ -204,6 +212,9 @@ static struct platform_device i2c1_device = { .id = 1, .resource = i2c1_resources, .num_resources = ARRAY_SIZE(i2c1_resources), + .dev = { + .platform_data = &i2c_platform_data, + }, }; static struct platform_device i2c2_device = { @@ -211,6 +222,9 @@ static struct platform_device i2c2_device = { .id = 2, .resource = i2c2_resources, .num_resources = ARRAY_SIZE(i2c2_resources), + .dev = { + .platform_data = &i2c_platform_data, + }, }; static struct platform_device i2c3_device = { @@ -218,6 +232,9 @@ static struct platform_device i2c3_device = { .id = 3, .resource = i2c3_resources, .num_resources = ARRAY_SIZE(i2c3_resources), + .dev = { + .platform_data = &i2c_platform_data, + }, }; static struct platform_device i2c4_device = { @@ -225,6 +242,9 @@ static struct platform_device i2c4_device = { .id = 4, .resource = i2c4_resources, .num_resources = ARRAY_SIZE(i2c4_resources), + .dev = { + .platform_data = &i2c_platform_data, + }, }; static const struct sh_dmae_slave_config sh73a0_dmae_slaves[] = { -- cgit v1.2.1 From 50656d9df63d69ce399c8be62d4473b039dac36a Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Tue, 4 Nov 2014 16:37:40 -0800 Subject: ipvs: Keep skb->sk when allocating headroom on tunnel xmit ip_vs_prepare_tunneled_skb() ignores ->sk when allocating a new skb, either unconditionally setting ->sk to NULL or allowing the uninitialized ->sk from a newly allocated skb to leak through to the caller. This patch properly copies ->sk and increments its reference count. Signed-off-by: Calvin Owens Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_xmit.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 437a3663ad03..bd90bf8107da 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -846,6 +846,8 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) goto error; + if (skb->sk) + skb_set_owner_w(new_skb, skb->sk); consume_skb(skb); skb = new_skb; } -- cgit v1.2.1 From 1f9cd915b64bb95f7b41667b4bf8b22f0a0a557b Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 11 Nov 2014 19:35:52 +0100 Subject: dmaengine: sun6i: Fix memcpy operation The prep_memcpy call was not setting any meaningful burst and width because it was relying on the dma_slave_config was not set already. Rework the needed conversion functions, and hardcode the width and burst to use. Signed-off-by: Maxime Ripard Cc: stable@vger.kernel.org Signed-off-by: Vinod Koul --- drivers/dma/sun6i-dma.c | 61 ++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c index 3aa10b328254..91292f5513ff 100644 --- a/drivers/dma/sun6i-dma.c +++ b/drivers/dma/sun6i-dma.c @@ -230,30 +230,25 @@ static inline void sun6i_dma_dump_chan_regs(struct sun6i_dma_dev *sdev, readl(pchan->base + DMA_CHAN_CUR_PARA)); } -static inline int convert_burst(u32 maxburst, u8 *burst) +static inline s8 convert_burst(u32 maxburst) { switch (maxburst) { case 1: - *burst = 0; - break; + return 0; case 8: - *burst = 2; - break; + return 2; default: return -EINVAL; } - - return 0; } -static inline int convert_buswidth(enum dma_slave_buswidth addr_width, u8 *width) +static inline s8 convert_buswidth(enum dma_slave_buswidth addr_width) { if ((addr_width < DMA_SLAVE_BUSWIDTH_1_BYTE) || (addr_width > DMA_SLAVE_BUSWIDTH_4_BYTES)) return -EINVAL; - *width = addr_width >> 1; - return 0; + return addr_width >> 1; } static void *sun6i_dma_lli_add(struct sun6i_dma_lli *prev, @@ -284,26 +279,25 @@ static inline int sun6i_dma_cfg_lli(struct sun6i_dma_lli *lli, struct dma_slave_config *config) { u8 src_width, dst_width, src_burst, dst_burst; - int ret; if (!config) return -EINVAL; - ret = convert_burst(config->src_maxburst, &src_burst); - if (ret) - return ret; + src_burst = convert_burst(config->src_maxburst); + if (src_burst) + return src_burst; - ret = convert_burst(config->dst_maxburst, &dst_burst); - if (ret) - return ret; + dst_burst = convert_burst(config->dst_maxburst); + if (dst_burst) + return dst_burst; - ret = convert_buswidth(config->src_addr_width, &src_width); - if (ret) - return ret; + src_width = convert_buswidth(config->src_addr_width); + if (src_width) + return src_width; - ret = convert_buswidth(config->dst_addr_width, &dst_width); - if (ret) - return ret; + dst_width = convert_buswidth(config->dst_addr_width); + if (dst_width) + return dst_width; lli->cfg = DMA_CHAN_CFG_SRC_BURST(src_burst) | DMA_CHAN_CFG_SRC_WIDTH(src_width) | @@ -542,11 +536,10 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_memcpy( { struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device); struct sun6i_vchan *vchan = to_sun6i_vchan(chan); - struct dma_slave_config *sconfig = &vchan->cfg; struct sun6i_dma_lli *v_lli; struct sun6i_desc *txd; dma_addr_t p_lli; - int ret; + s8 burst, width; dev_dbg(chan2dev(chan), "%s; chan: %d, dest: %pad, src: %pad, len: %zu. flags: 0x%08lx\n", @@ -565,14 +558,21 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_memcpy( goto err_txd_free; } - ret = sun6i_dma_cfg_lli(v_lli, src, dest, len, sconfig); - if (ret) - goto err_dma_free; + v_lli->src = src; + v_lli->dst = dest; + v_lli->len = len; + v_lli->para = NORMAL_WAIT; + burst = convert_burst(8); + width = convert_buswidth(DMA_SLAVE_BUSWIDTH_4_BYTES); v_lli->cfg |= DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) | DMA_CHAN_CFG_DST_DRQ(DRQ_SDRAM) | DMA_CHAN_CFG_DST_LINEAR_MODE | - DMA_CHAN_CFG_SRC_LINEAR_MODE; + DMA_CHAN_CFG_SRC_LINEAR_MODE | + DMA_CHAN_CFG_SRC_BURST(burst) | + DMA_CHAN_CFG_SRC_WIDTH(width) | + DMA_CHAN_CFG_DST_BURST(burst) | + DMA_CHAN_CFG_DST_WIDTH(width); sun6i_dma_lli_add(NULL, v_lli, p_lli, txd); @@ -580,8 +580,6 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_memcpy( return vchan_tx_prep(&vchan->vc, &txd->vd, flags); -err_dma_free: - dma_pool_free(sdev->pool, v_lli, p_lli); err_txd_free: kfree(txd); return NULL; @@ -915,6 +913,7 @@ static int sun6i_dma_probe(struct platform_device *pdev) sdc->slave.device_prep_dma_memcpy = sun6i_dma_prep_dma_memcpy; sdc->slave.device_control = sun6i_dma_control; sdc->slave.chancnt = NR_MAX_VCHANS; + sdc->slave.copy_align = 4; sdc->slave.dev = &pdev->dev; -- cgit v1.2.1 From 2daf1b4d18e3add229d1a3b5c554331d99ac6c7e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 7 Nov 2014 18:48:33 +0100 Subject: netfilter: nft_compat: use current net namespace Instead of init_net when using xtables over nftables compat. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 9d6d6f60a80f..b92f129beade 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -117,7 +117,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, struct xt_target *target, void *info, union nft_entry *entry, u8 proto, bool inv) { - par->net = &init_net; + par->net = ctx->net; par->table = ctx->table->name; switch (ctx->afi->family) { case AF_INET: @@ -324,7 +324,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, struct xt_match *match, void *info, union nft_entry *entry, u8 proto, bool inv) { - par->net = &init_net; + par->net = ctx->net; par->table = ctx->table->name; switch (ctx->afi->family) { case AF_INET: -- cgit v1.2.1 From c918687f5e3962375a19de6ded3c1be85ebdbcd6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 10 Nov 2014 20:53:55 +0100 Subject: netfilter: nft_compat: relax chain type validation Check for nat chain dependency only, which is the one that can actually crash the kernel. Don't care if mangle, filter and security specific match and targets are used out of their scope, they are harmless. This restores iptables-compat with mangle specific match/target when used out of the OUTPUT chain, that are actually emulated through filter chains, which broke when performing strict validation. Fixes: f3f5dde ("netfilter: nft_compat: validate chain type in match/target") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index b92f129beade..70dc96516305 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -21,45 +21,17 @@ #include #include -static const struct { - const char *name; - u8 type; -} table_to_chaintype[] = { - { "filter", NFT_CHAIN_T_DEFAULT }, - { "raw", NFT_CHAIN_T_DEFAULT }, - { "security", NFT_CHAIN_T_DEFAULT }, - { "mangle", NFT_CHAIN_T_ROUTE }, - { "nat", NFT_CHAIN_T_NAT }, - { }, -}; - -static int nft_compat_table_to_chaintype(const char *table) -{ - int i; - - for (i = 0; table_to_chaintype[i].name != NULL; i++) { - if (strcmp(table_to_chaintype[i].name, table) == 0) - return table_to_chaintype[i].type; - } - - return -1; -} - static int nft_compat_chain_validate_dependency(const char *tablename, const struct nft_chain *chain) { - enum nft_chain_type type; const struct nft_base_chain *basechain; if (!tablename || !(chain->flags & NFT_BASE_CHAIN)) return 0; - type = nft_compat_table_to_chaintype(tablename); - if (type < 0) - return -EINVAL; - basechain = nft_base_chain(chain); - if (basechain->type->type != type) + if (strcmp(tablename, "nat") == 0 && + basechain->type->type != NFT_CHAIN_T_NAT) return -EINVAL; return 0; -- cgit v1.2.1 From afefb6f928ed42d5db452ee9251ce6de62673c67 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 10 Nov 2014 19:08:21 +0100 Subject: netfilter: nft_compat: use the match->table to validate dependencies Instead of the match->name, which is of course not relevant. Fixes: f3f5dde ("netfilter: nft_compat: validate chain type in match/target") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 70dc96516305..265e190f2218 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -346,7 +346,7 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, union nft_entry e = {}; int ret; - ret = nft_compat_chain_validate_dependency(match->name, ctx->chain); + ret = nft_compat_chain_validate_dependency(match->table, ctx->chain); if (ret < 0) goto err; @@ -420,7 +420,7 @@ static int nft_match_validate(const struct nft_ctx *ctx, if (!(hook_mask & match->hooks)) return -EINVAL; - ret = nft_compat_chain_validate_dependency(match->name, + ret = nft_compat_chain_validate_dependency(match->table, ctx->chain); if (ret < 0) return ret; -- cgit v1.2.1 From b326dd37b94e29bf6a15940f4fa66aa21a678ab1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 10 Nov 2014 21:14:12 +0100 Subject: netfilter: nf_tables: restore synchronous object release from commit/abort The existing xtables matches and targets, when used from nft_compat, may sleep from the destroy path, ie. when removing rules. Since the objects are released via call_rcu from softirq context, this results in lockdep splats and possible lockups that may be hard to reproduce. Patrick also indicated that delayed object release via call_rcu can cause us problems in the ordering of event notifications when anonymous sets are in place. So, this patch restores the synchronous object release from the commit and abort paths. This includes a call to synchronize_rcu() to make sure that no packets are walking on the objects that are going to be released. This is slowier though, but it's simple and it resolves the aforementioned problems. This is a partial revert of c7c32e7 ("netfilter: nf_tables: defer all object release via rcu") that was introduced in 3.16 to speed up interaction with userspace. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 -- net/netfilter/nf_tables_api.c | 24 ++++++++---------------- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 845c596bf594..3ae969e3acf0 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -396,14 +396,12 @@ struct nft_rule { /** * struct nft_trans - nf_tables object update in transaction * - * @rcu_head: rcu head to defer release of transaction data * @list: used internally * @msg_type: message type * @ctx: transaction context * @data: internal information related to the transaction */ struct nft_trans { - struct rcu_head rcu_head; struct list_head list; int msg_type; struct nft_ctx ctx; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 11ab4b078f3b..66e8425dbfe7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3484,13 +3484,8 @@ static void nft_chain_commit_update(struct nft_trans *trans) } } -/* Schedule objects for release via rcu to make sure no packets are accesing - * removed rules. - */ -static void nf_tables_commit_release_rcu(struct rcu_head *rt) +static void nf_tables_commit_release(struct nft_trans *trans) { - struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head); - switch (trans->msg_type) { case NFT_MSG_DELTABLE: nf_tables_table_destroy(&trans->ctx); @@ -3612,10 +3607,11 @@ static int nf_tables_commit(struct sk_buff *skb) } } + synchronize_rcu(); + list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { list_del(&trans->list); - trans->ctx.nla = NULL; - call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu); + nf_tables_commit_release(trans); } nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); @@ -3623,13 +3619,8 @@ static int nf_tables_commit(struct sk_buff *skb) return 0; } -/* Schedule objects for release via rcu to make sure no packets are accesing - * aborted rules. - */ -static void nf_tables_abort_release_rcu(struct rcu_head *rt) +static void nf_tables_abort_release(struct nft_trans *trans) { - struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head); - switch (trans->msg_type) { case NFT_MSG_NEWTABLE: nf_tables_table_destroy(&trans->ctx); @@ -3725,11 +3716,12 @@ static int nf_tables_abort(struct sk_buff *skb) } } + synchronize_rcu(); + list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list, list) { list_del(&trans->list); - trans->ctx.nla = NULL; - call_rcu(&trans->rcu_head, nf_tables_abort_release_rcu); + nf_tables_abort_release(trans); } return 0; -- cgit v1.2.1 From 2cb1e0259f50c7be88eb277c7442fa74a3ce7c57 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 12 Nov 2014 15:40:44 +0100 Subject: ASoC: cs42l51: re-hook of_match_table pointer In commit a1253ef6d3fa ("ASoC: cs42l51: split i2c from codec driver"), the I2C part of the CS42L51 was moved to a separate file, but the definition of the of_device_id array was left in the driver file itself, no longer connected to the platform_driver structure using the .of_match_table pointer. This commit exports the of_device_id array in cs42l51, and uses it as .of_match_able in cs42l51-i2c.c. This solution was suggested by Brian Austin. Fixes: a1253ef6d3fa ("ASoC: cs42l51: split i2c from codec driver") Signed-off-by: Thomas Petazzoni Acked-by: Brian Austin Signed-off-by: Mark Brown Cc: --- sound/soc/codecs/cs42l51-i2c.c | 1 + sound/soc/codecs/cs42l51.c | 4 +++- sound/soc/codecs/cs42l51.h | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs42l51-i2c.c b/sound/soc/codecs/cs42l51-i2c.c index cee51ae177c1..c40428f25ba5 100644 --- a/sound/soc/codecs/cs42l51-i2c.c +++ b/sound/soc/codecs/cs42l51-i2c.c @@ -46,6 +46,7 @@ static struct i2c_driver cs42l51_i2c_driver = { .driver = { .name = "cs42l51", .owner = THIS_MODULE, + .of_match_table = cs42l51_of_match, }, .probe = cs42l51_i2c_probe, .remove = cs42l51_i2c_remove, diff --git a/sound/soc/codecs/cs42l51.c b/sound/soc/codecs/cs42l51.c index 09488d97de60..669c38fc3034 100644 --- a/sound/soc/codecs/cs42l51.c +++ b/sound/soc/codecs/cs42l51.c @@ -558,11 +558,13 @@ error: } EXPORT_SYMBOL_GPL(cs42l51_probe); -static const struct of_device_id cs42l51_of_match[] = { +const struct of_device_id cs42l51_of_match[] = { { .compatible = "cirrus,cs42l51", }, { } }; MODULE_DEVICE_TABLE(of, cs42l51_of_match); +EXPORT_SYMBOL_GPL(cs42l51_of_match); + MODULE_AUTHOR("Arnaud Patard "); MODULE_DESCRIPTION("Cirrus Logic CS42L51 ALSA SoC Codec Driver"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/codecs/cs42l51.h b/sound/soc/codecs/cs42l51.h index 8c55bf384bc6..0ca805492ac4 100644 --- a/sound/soc/codecs/cs42l51.h +++ b/sound/soc/codecs/cs42l51.h @@ -22,6 +22,7 @@ struct device; extern const struct regmap_config cs42l51_regmap; int cs42l51_probe(struct device *dev, struct regmap *regmap); +extern const struct of_device_id cs42l51_of_match[]; #define CS42L51_CHIP_ID 0x1B #define CS42L51_CHIP_REV_A 0x00 -- cgit v1.2.1 From 121a2f6d5f09d929fc663349ba34e248e8d07391 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 3 Nov 2014 23:20:04 +0100 Subject: ARM: tegra: Add serial port labels to Tegra124 DT These labels will be used to provide deterministic numbering of consoles in a later patch. Signed-off-by: Lucas Stach [treding@nvidia.com: drop aliases, reword commit message] Signed-off-by: Thierry Reding --- arch/arm/boot/dts/tegra124.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi index 478c555ebd96..df2b06b29985 100644 --- a/arch/arm/boot/dts/tegra124.dtsi +++ b/arch/arm/boot/dts/tegra124.dtsi @@ -286,7 +286,7 @@ * the APB DMA based serial driver, the comptible is * "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart". */ - serial@0,70006000 { + uarta: serial@0,70006000 { compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart"; reg = <0x0 0x70006000 0x0 0x40>; reg-shift = <2>; @@ -299,7 +299,7 @@ status = "disabled"; }; - serial@0,70006040 { + uartb: serial@0,70006040 { compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart"; reg = <0x0 0x70006040 0x0 0x40>; reg-shift = <2>; @@ -312,7 +312,7 @@ status = "disabled"; }; - serial@0,70006200 { + uartc: serial@0,70006200 { compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart"; reg = <0x0 0x70006200 0x0 0x40>; reg-shift = <2>; @@ -325,7 +325,7 @@ status = "disabled"; }; - serial@0,70006300 { + uartd: serial@0,70006300 { compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart"; reg = <0x0 0x70006300 0x0 0x40>; reg-shift = <2>; -- cgit v1.2.1 From c4574aa00e7c144ae4d1bfc2388433d9eb82e4d3 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Tue, 11 Nov 2014 12:49:30 -0800 Subject: ARM: dts: tegra: move serial aliases to per-board There are general changes pending to make the /aliases/serial* entries number the serial ports on the system. On Tegra, so far the ports have been just numbered dynamically as they are configured so that makes them change. To avoid this, add specific aliases per board to keep the old numbers. This allows us to change the numbering by default on future SoCs while keeping the numbering on existing boards. Signed-off-by: Olof Johansson Signed-off-by: Thierry Reding --- arch/arm/boot/dts/tegra114-dalmore.dts | 1 + arch/arm/boot/dts/tegra114-roth.dts | 4 ++++ arch/arm/boot/dts/tegra114-tn7.dts | 4 ++++ arch/arm/boot/dts/tegra114.dtsi | 7 ------- arch/arm/boot/dts/tegra124-jetson-tk1.dts | 1 + arch/arm/boot/dts/tegra124-nyan-big.dts | 1 + arch/arm/boot/dts/tegra124-venice2.dts | 1 + arch/arm/boot/dts/tegra20-harmony.dts | 1 + arch/arm/boot/dts/tegra20-iris-512.dts | 5 +++++ arch/arm/boot/dts/tegra20-medcom-wide.dts | 4 ++++ arch/arm/boot/dts/tegra20-paz00.dts | 2 ++ arch/arm/boot/dts/tegra20-seaboard.dts | 1 + arch/arm/boot/dts/tegra20-tamonten.dtsi | 1 + arch/arm/boot/dts/tegra20-trimslice.dts | 1 + arch/arm/boot/dts/tegra20-ventana.dts | 1 + arch/arm/boot/dts/tegra20-whistler.dts | 1 + arch/arm/boot/dts/tegra20.dtsi | 8 -------- arch/arm/boot/dts/tegra30-apalis-eval.dts | 4 ++++ arch/arm/boot/dts/tegra30-beaver.dts | 1 + arch/arm/boot/dts/tegra30-cardhu.dtsi | 2 ++ arch/arm/boot/dts/tegra30-colibri-eval-v3.dts | 3 +++ arch/arm/boot/dts/tegra30.dtsi | 8 -------- 22 files changed, 39 insertions(+), 23 deletions(-) diff --git a/arch/arm/boot/dts/tegra114-dalmore.dts b/arch/arm/boot/dts/tegra114-dalmore.dts index 5c21d216515a..8b7aa0dcdc6e 100644 --- a/arch/arm/boot/dts/tegra114-dalmore.dts +++ b/arch/arm/boot/dts/tegra114-dalmore.dts @@ -15,6 +15,7 @@ aliases { rtc0 = "/i2c@7000d000/tps65913@58"; rtc1 = "/rtc@7000e000"; + serial0 = &uartd; }; memory { diff --git a/arch/arm/boot/dts/tegra114-roth.dts b/arch/arm/boot/dts/tegra114-roth.dts index c7c6825f11fb..a80f1e24a113 100644 --- a/arch/arm/boot/dts/tegra114-roth.dts +++ b/arch/arm/boot/dts/tegra114-roth.dts @@ -15,6 +15,10 @@ linux,initrd-end = <0x82800000>; }; + aliases { + serial0 = &uartd; + }; + firmware { trusted-foundations { compatible = "tlm,trusted-foundations"; diff --git a/arch/arm/boot/dts/tegra114-tn7.dts b/arch/arm/boot/dts/tegra114-tn7.dts index 963662145635..2301c6601d02 100644 --- a/arch/arm/boot/dts/tegra114-tn7.dts +++ b/arch/arm/boot/dts/tegra114-tn7.dts @@ -15,6 +15,10 @@ linux,initrd-end = <0x82800000>; }; + aliases { + serial0 = &uartd; + }; + firmware { trusted-foundations { compatible = "tlm,trusted-foundations"; diff --git a/arch/arm/boot/dts/tegra114.dtsi b/arch/arm/boot/dts/tegra114.dtsi index 2ca9c1807f72..222f3b3f4dd5 100644 --- a/arch/arm/boot/dts/tegra114.dtsi +++ b/arch/arm/boot/dts/tegra114.dtsi @@ -9,13 +9,6 @@ compatible = "nvidia,tegra114"; interrupt-parent = <&gic>; - aliases { - serial0 = &uarta; - serial1 = &uartb; - serial2 = &uartc; - serial3 = &uartd; - }; - host1x@50000000 { compatible = "nvidia,tegra114-host1x", "simple-bus"; reg = <0x50000000 0x00028000>; diff --git a/arch/arm/boot/dts/tegra124-jetson-tk1.dts b/arch/arm/boot/dts/tegra124-jetson-tk1.dts index 029c9a021541..51b373ff1065 100644 --- a/arch/arm/boot/dts/tegra124-jetson-tk1.dts +++ b/arch/arm/boot/dts/tegra124-jetson-tk1.dts @@ -10,6 +10,7 @@ aliases { rtc0 = "/i2c@0,7000d000/pmic@40"; rtc1 = "/rtc@0,7000e000"; + serial0 = &uartd; }; memory { diff --git a/arch/arm/boot/dts/tegra124-nyan-big.dts b/arch/arm/boot/dts/tegra124-nyan-big.dts index 7d0784ce4c74..53181d310247 100644 --- a/arch/arm/boot/dts/tegra124-nyan-big.dts +++ b/arch/arm/boot/dts/tegra124-nyan-big.dts @@ -10,6 +10,7 @@ aliases { rtc0 = "/i2c@0,7000d000/pmic@40"; rtc1 = "/rtc@0,7000e000"; + serial0 = &uarta; }; memory { diff --git a/arch/arm/boot/dts/tegra124-venice2.dts b/arch/arm/boot/dts/tegra124-venice2.dts index 13008858e967..5c3f7813360d 100644 --- a/arch/arm/boot/dts/tegra124-venice2.dts +++ b/arch/arm/boot/dts/tegra124-venice2.dts @@ -10,6 +10,7 @@ aliases { rtc0 = "/i2c@0,7000d000/pmic@40"; rtc1 = "/rtc@0,7000e000"; + serial0 = &uarta; }; memory { diff --git a/arch/arm/boot/dts/tegra20-harmony.dts b/arch/arm/boot/dts/tegra20-harmony.dts index a37279af687c..b926a07b9443 100644 --- a/arch/arm/boot/dts/tegra20-harmony.dts +++ b/arch/arm/boot/dts/tegra20-harmony.dts @@ -10,6 +10,7 @@ aliases { rtc0 = "/i2c@7000d000/tps6586x@34"; rtc1 = "/rtc@7000e000"; + serial0 = &uartd; }; memory { diff --git a/arch/arm/boot/dts/tegra20-iris-512.dts b/arch/arm/boot/dts/tegra20-iris-512.dts index 8cfb83f42e1f..1dd7d7bfdfcc 100644 --- a/arch/arm/boot/dts/tegra20-iris-512.dts +++ b/arch/arm/boot/dts/tegra20-iris-512.dts @@ -6,6 +6,11 @@ model = "Toradex Colibri T20 512MB on Iris"; compatible = "toradex,iris", "toradex,colibri_t20-512", "nvidia,tegra20"; + aliases { + serial0 = &uarta; + serial1 = &uartd; + }; + host1x@50000000 { hdmi@54280000 { status = "okay"; diff --git a/arch/arm/boot/dts/tegra20-medcom-wide.dts b/arch/arm/boot/dts/tegra20-medcom-wide.dts index 1b7c56b33aca..9b87526ab0b7 100644 --- a/arch/arm/boot/dts/tegra20-medcom-wide.dts +++ b/arch/arm/boot/dts/tegra20-medcom-wide.dts @@ -6,6 +6,10 @@ model = "Avionic Design Medcom-Wide board"; compatible = "ad,medcom-wide", "ad,tamonten", "nvidia,tegra20"; + aliases { + serial0 = &uartd; + }; + pwm@7000a000 { status = "okay"; }; diff --git a/arch/arm/boot/dts/tegra20-paz00.dts b/arch/arm/boot/dts/tegra20-paz00.dts index d4438e30de45..ed7e1009326c 100644 --- a/arch/arm/boot/dts/tegra20-paz00.dts +++ b/arch/arm/boot/dts/tegra20-paz00.dts @@ -10,6 +10,8 @@ aliases { rtc0 = "/i2c@7000d000/tps6586x@34"; rtc1 = "/rtc@7000e000"; + serial0 = &uarta; + serial1 = &uartc; }; memory { diff --git a/arch/arm/boot/dts/tegra20-seaboard.dts b/arch/arm/boot/dts/tegra20-seaboard.dts index a1d4bf9895d7..ea282c7c0ca5 100644 --- a/arch/arm/boot/dts/tegra20-seaboard.dts +++ b/arch/arm/boot/dts/tegra20-seaboard.dts @@ -10,6 +10,7 @@ aliases { rtc0 = "/i2c@7000d000/tps6586x@34"; rtc1 = "/rtc@7000e000"; + serial0 = &uartd; }; memory { diff --git a/arch/arm/boot/dts/tegra20-tamonten.dtsi b/arch/arm/boot/dts/tegra20-tamonten.dtsi index 80e7d386ce34..13d4e6185275 100644 --- a/arch/arm/boot/dts/tegra20-tamonten.dtsi +++ b/arch/arm/boot/dts/tegra20-tamonten.dtsi @@ -7,6 +7,7 @@ aliases { rtc0 = "/i2c@7000d000/tps6586x@34"; rtc1 = "/rtc@7000e000"; + serial0 = &uartd; }; memory { diff --git a/arch/arm/boot/dts/tegra20-trimslice.dts b/arch/arm/boot/dts/tegra20-trimslice.dts index 5ad87979ab13..d99af4ef9c64 100644 --- a/arch/arm/boot/dts/tegra20-trimslice.dts +++ b/arch/arm/boot/dts/tegra20-trimslice.dts @@ -10,6 +10,7 @@ aliases { rtc0 = "/i2c@7000c500/rtc@56"; rtc1 = "/rtc@7000e000"; + serial0 = &uarta; }; memory { diff --git a/arch/arm/boot/dts/tegra20-ventana.dts b/arch/arm/boot/dts/tegra20-ventana.dts index ca8484cccddc..04c58e9ca490 100644 --- a/arch/arm/boot/dts/tegra20-ventana.dts +++ b/arch/arm/boot/dts/tegra20-ventana.dts @@ -10,6 +10,7 @@ aliases { rtc0 = "/i2c@7000d000/tps6586x@34"; rtc1 = "/rtc@7000e000"; + serial0 = &uartd; }; memory { diff --git a/arch/arm/boot/dts/tegra20-whistler.dts b/arch/arm/boot/dts/tegra20-whistler.dts index 1843725785c9..340d81108df1 100644 --- a/arch/arm/boot/dts/tegra20-whistler.dts +++ b/arch/arm/boot/dts/tegra20-whistler.dts @@ -10,6 +10,7 @@ aliases { rtc0 = "/i2c@7000d000/max8907@3c"; rtc1 = "/rtc@7000e000"; + serial0 = &uarta; }; memory { diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi index 3b374c49d04d..8acf5d85c99d 100644 --- a/arch/arm/boot/dts/tegra20.dtsi +++ b/arch/arm/boot/dts/tegra20.dtsi @@ -9,14 +9,6 @@ compatible = "nvidia,tegra20"; interrupt-parent = <&intc>; - aliases { - serial0 = &uarta; - serial1 = &uartb; - serial2 = &uartc; - serial3 = &uartd; - serial4 = &uarte; - }; - host1x@50000000 { compatible = "nvidia,tegra20-host1x", "simple-bus"; reg = <0x50000000 0x00024000>; diff --git a/arch/arm/boot/dts/tegra30-apalis-eval.dts b/arch/arm/boot/dts/tegra30-apalis-eval.dts index 45d40f024585..6236bdecb48b 100644 --- a/arch/arm/boot/dts/tegra30-apalis-eval.dts +++ b/arch/arm/boot/dts/tegra30-apalis-eval.dts @@ -11,6 +11,10 @@ rtc0 = "/i2c@7000c000/rtc@68"; rtc1 = "/i2c@7000d000/tps65911@2d"; rtc2 = "/rtc@7000e000"; + serial0 = &uarta; + serial1 = &uartb; + serial2 = &uartc; + serial3 = &uartd; }; pcie-controller@00003000 { diff --git a/arch/arm/boot/dts/tegra30-beaver.dts b/arch/arm/boot/dts/tegra30-beaver.dts index cee8f2246fdb..6b157eeabcc5 100644 --- a/arch/arm/boot/dts/tegra30-beaver.dts +++ b/arch/arm/boot/dts/tegra30-beaver.dts @@ -9,6 +9,7 @@ aliases { rtc0 = "/i2c@7000d000/tps65911@2d"; rtc1 = "/rtc@7000e000"; + serial0 = &uarta; }; memory { diff --git a/arch/arm/boot/dts/tegra30-cardhu.dtsi b/arch/arm/boot/dts/tegra30-cardhu.dtsi index 206379546244..a1b682ea01bd 100644 --- a/arch/arm/boot/dts/tegra30-cardhu.dtsi +++ b/arch/arm/boot/dts/tegra30-cardhu.dtsi @@ -30,6 +30,8 @@ aliases { rtc0 = "/i2c@7000d000/tps65911@2d"; rtc1 = "/rtc@7000e000"; + serial0 = &uarta; + serial1 = &uartc; }; memory { diff --git a/arch/arm/boot/dts/tegra30-colibri-eval-v3.dts b/arch/arm/boot/dts/tegra30-colibri-eval-v3.dts index 7793abd5bef1..4d3ddc585641 100644 --- a/arch/arm/boot/dts/tegra30-colibri-eval-v3.dts +++ b/arch/arm/boot/dts/tegra30-colibri-eval-v3.dts @@ -10,6 +10,9 @@ rtc0 = "/i2c@7000c000/rtc@68"; rtc1 = "/i2c@7000d000/tps65911@2d"; rtc2 = "/rtc@7000e000"; + serial0 = &uarta; + serial1 = &uartb; + serial2 = &uartd; }; host1x@50000000 { diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi index aa6ccea13d30..b270b9e3d455 100644 --- a/arch/arm/boot/dts/tegra30.dtsi +++ b/arch/arm/boot/dts/tegra30.dtsi @@ -9,14 +9,6 @@ compatible = "nvidia,tegra30"; interrupt-parent = <&intc>; - aliases { - serial0 = &uarta; - serial1 = &uartb; - serial2 = &uartc; - serial3 = &uartd; - serial4 = &uarte; - }; - pcie-controller@00003000 { compatible = "nvidia,tegra30-pcie"; device_type = "pci"; -- cgit v1.2.1 From edbde56a25f484d46c9e75563d3b310bde1c185e Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 13 Nov 2014 13:59:02 +0900 Subject: ARM: tegra: Remove eMMC vmmc property for roth/tn7 This property was wrong and broke eMMC since commit 52221610d ("mmc: sdhci: Improve external VDD regulator support"). Align the eMMC properties to those of other Tegra boards. Signed-off-by: Alexandre Courbot Signed-off-by: Thierry Reding --- arch/arm/boot/dts/tegra114-roth.dts | 1 - arch/arm/boot/dts/tegra114-tn7.dts | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/arm/boot/dts/tegra114-roth.dts b/arch/arm/boot/dts/tegra114-roth.dts index a80f1e24a113..54c79c5a9fbc 100644 --- a/arch/arm/boot/dts/tegra114-roth.dts +++ b/arch/arm/boot/dts/tegra114-roth.dts @@ -975,7 +975,6 @@ sdhci@78000600 { status = "okay"; bus-width = <8>; - vmmc-supply = <&vdd_1v8>; non-removable; }; diff --git a/arch/arm/boot/dts/tegra114-tn7.dts b/arch/arm/boot/dts/tegra114-tn7.dts index 2301c6601d02..f91c2c9b2f94 100644 --- a/arch/arm/boot/dts/tegra114-tn7.dts +++ b/arch/arm/boot/dts/tegra114-tn7.dts @@ -244,7 +244,6 @@ sdhci@78000600 { status = "okay"; bus-width = <8>; - vmmc-supply = <&vdd_1v8>; non-removable; }; -- cgit v1.2.1 From 221b9bf42b26a22e6904d20f35c53aec2e73a646 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 13 Nov 2014 13:59:03 +0900 Subject: ARM: tegra: roth: Fix SD card VDD_IO regulator vddio_sdmmc3 is a vdd_io, and thus should be under the vqmmc-supply property, not vmmc-supply. Signed-off-by: Alexandre Courbot Signed-off-by: Thierry Reding --- arch/arm/boot/dts/tegra114-roth.dts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/tegra114-roth.dts b/arch/arm/boot/dts/tegra114-roth.dts index 54c79c5a9fbc..38acf78d7815 100644 --- a/arch/arm/boot/dts/tegra114-roth.dts +++ b/arch/arm/boot/dts/tegra114-roth.dts @@ -920,8 +920,6 @@ regulator-name = "vddio-sdmmc3"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; - regulator-always-on; - regulator-boot-on; }; ldousb { @@ -966,7 +964,7 @@ sdhci@78000400 { status = "okay"; bus-width = <4>; - vmmc-supply = <&vddio_sdmmc3>; + vqmmc-supply = <&vddio_sdmmc3>; cd-gpios = <&gpio TEGRA_GPIO(V, 2) GPIO_ACTIVE_LOW>; power-gpios = <&gpio TEGRA_GPIO(H, 0) GPIO_ACTIVE_HIGH>; }; -- cgit v1.2.1 From 336b5be2c56f3cf2252f5a39d344ef67cf9bdf7a Mon Sep 17 00:00:00 2001 From: Duc Dang Date: Thu, 6 Nov 2014 17:14:18 -0800 Subject: PCI: xgene: Assign resources to bus before adding new devices The X-Gene PCIe driver assumes pci_scan_root_bus() assigns resources as proposed in [1]. But we dropped patch [1] because it would break some architectures, which means the X-Gene PCIe driver is currently broken. Add calls to scan the bus, assign resources, and add devices in the X-Gene driver to fix this. [bhelgaas: changelog] [1] http://lkml.kernel.org/r/1412000971-9242-11-git-send-email-Liviu.Dudau@arm.com Signed-off-by: Duc Dang Signed-off-by: Tanmay Inamdar Signed-off-by: Bjorn Helgaas --- drivers/pci/host/pci-xgene.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c index 9ecabfa8c634..2988fe136c1e 100644 --- a/drivers/pci/host/pci-xgene.c +++ b/drivers/pci/host/pci-xgene.c @@ -631,10 +631,15 @@ static int xgene_pcie_probe_bridge(struct platform_device *pdev) if (ret) return ret; - bus = pci_scan_root_bus(&pdev->dev, 0, &xgene_pcie_ops, port, &res); + bus = pci_create_root_bus(&pdev->dev, 0, + &xgene_pcie_ops, port, &res); if (!bus) return -ENOMEM; + pci_scan_child_bus(bus); + pci_assign_unassigned_bus_resources(bus); + pci_bus_add_devices(bus); + platform_set_drvdata(pdev, port); return 0; } -- cgit v1.2.1 From 2801f7252d5be3f4f3886c2a5890284232801afe Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 6 Nov 2014 16:23:20 +0100 Subject: PCI: Add missing DT binding for "linux,pci-domain" property 41e5c0f81d3e ("of/pci: Add pci_get_new_domain_nr() and of_get_pci_domain_nr()") added parsing of the "linux,pci-domain" property, but didn't add the binding documentation. Since this property will be supported by a number of host bridge drivers, add it to the common PCI binding doc. Fixes: 41e5c0f81d3e ("of/pci: Add pci_get_new_domain_nr() and of_get_pci_domain_nr()") Signed-off-by: Lucas Stach Signed-off-by: Bjorn Helgaas Acked-by: Liviu Dudau Acked-by: Rob Herring --- Documentation/devicetree/bindings/pci/pci.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/devicetree/bindings/pci/pci.txt b/Documentation/devicetree/bindings/pci/pci.txt index 41aeed38926d..f8fbe9af7b2f 100644 --- a/Documentation/devicetree/bindings/pci/pci.txt +++ b/Documentation/devicetree/bindings/pci/pci.txt @@ -7,3 +7,14 @@ And for the interrupt mapping part: Open Firmware Recommended Practice: Interrupt Mapping http://www.openfirmware.org/1275/practice/imap/imap0_9d.pdf + +Additionally to the properties specified in the above standards a host bridge +driver implementation may support the following properties: + +- linux,pci-domain: + If present this property assigns a fixed PCI domain number to a host bridge, + otherwise an unstable (across boots) unique number will be assigned. + It is required to either not set this property at all or set it for all + host bridges in the system, otherwise potentially conflicting domain numbers + may be assigned to root buses behind different host bridges. The domain + number for each host bridge in the system must be unique. -- cgit v1.2.1 From 7a1562d4f2d01721ad07c3a326db7512077ceea9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 11 Nov 2014 12:09:46 -0800 Subject: PCI: Apply _HPX Link Control settings to all devices with a link Previously we applied _HPX type 2 record Link Control register settings only to bridges with a subordinate bus. But it's better to apply them to all devices with a link because if the subordinate bus has not been allocated yet, we won't apply settings to the device. Use pcie_cap_has_lnkctl() to determine whether the device has a Link Control register instead of looking at dev->subordinate. [bhelgaas: changelog] Fixes: 6cd33649fa83 ("PCI: Add pci_configure_device() during enumeration") Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas --- drivers/pci/access.c | 2 +- drivers/pci/pci.h | 2 ++ drivers/pci/probe.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pci/access.c b/drivers/pci/access.c index d292d7cb3417..49dd766852ba 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -444,7 +444,7 @@ static inline int pcie_cap_version(const struct pci_dev *dev) return pcie_caps_reg(dev) & PCI_EXP_FLAGS_VERS; } -static inline bool pcie_cap_has_lnkctl(const struct pci_dev *dev) +bool pcie_cap_has_lnkctl(const struct pci_dev *dev) { int type = pci_pcie_type(dev); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 0601890db22d..4a3902d8e6fe 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -6,6 +6,8 @@ extern const unsigned char pcie_link_speed[]; +bool pcie_cap_has_lnkctl(const struct pci_dev *dev); + /* Functions internal to the PCI core code */ int pci_create_sysfs_dev_files(struct pci_dev *pdev); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 5ed99309c758..6244b1834dfe 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1323,7 +1323,7 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp) ~hpp->pci_exp_devctl_and, hpp->pci_exp_devctl_or); /* Initialize Link Control Register */ - if (dev->subordinate) + if (pcie_cap_has_lnkctl(dev)) pcie_capability_clear_and_set_word(dev, PCI_EXP_LNKCTL, ~hpp->pci_exp_lnkctl_and, hpp->pci_exp_lnkctl_or); -- cgit v1.2.1 From c251ea7bd7a04f1f2575467e0de76e803cf59149 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 14 Nov 2014 02:14:47 -0200 Subject: ASoC: sgtl5000: Fix SMALL_POP bit definition On a mx28evk with a sgtl5000 codec we notice a loud 'click' sound to happen 5 seconds after the end of a playback. The SMALL_POP bit should fix this, but its definition is incorrect: according to the sgtl5000 manual it is bit 0 of CHIP_REF_CTRL register, not bit 1. Fix the definition accordingly and enable the bit as intended per the code comment. After applying this change, no loud 'click' sound is heard after playback Signed-off-by: Fabio Estevam Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/sgtl5000.c | 3 +-- sound/soc/codecs/sgtl5000.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index 6bb77d76561b..dab9b15304af 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1299,8 +1299,7 @@ static int sgtl5000_probe(struct snd_soc_codec *codec) /* enable small pop, introduce 400ms delay in turning off */ snd_soc_update_bits(codec, SGTL5000_CHIP_REF_CTRL, - SGTL5000_SMALL_POP, - SGTL5000_SMALL_POP); + SGTL5000_SMALL_POP, 1); /* disable short cut detector */ snd_soc_write(codec, SGTL5000_CHIP_SHORT_CTRL, 0); diff --git a/sound/soc/codecs/sgtl5000.h b/sound/soc/codecs/sgtl5000.h index 2f8c88931f69..bd7a344bf8c5 100644 --- a/sound/soc/codecs/sgtl5000.h +++ b/sound/soc/codecs/sgtl5000.h @@ -275,7 +275,7 @@ #define SGTL5000_BIAS_CTRL_MASK 0x000e #define SGTL5000_BIAS_CTRL_SHIFT 1 #define SGTL5000_BIAS_CTRL_WIDTH 3 -#define SGTL5000_SMALL_POP 0x0001 +#define SGTL5000_SMALL_POP 0 /* * SGTL5000_CHIP_MIC_CTRL -- cgit v1.2.1 From 5195c14c8b27cc0b18220ddbf0e5ad3328a04187 Mon Sep 17 00:00:00 2001 From: bill bonaparte Date: Thu, 6 Nov 2014 14:36:48 +0100 Subject: netfilter: conntrack: fix race in __nf_conntrack_confirm against get_next_corpse After removal of the central spinlock nf_conntrack_lock, in commit 93bb0ceb75be2 ("netfilter: conntrack: remove central spinlock nf_conntrack_lock"), it is possible to race against get_next_corpse(). The race is against the get_next_corpse() cleanup on the "unconfirmed" list (a per-cpu list with seperate locking), which set the DYING bit. Fix this race, in __nf_conntrack_confirm(), by removing the CT from unconfirmed list before checking the DYING bit. In case race occured, re-add the CT to the dying list. While at this, fix coding style of the comment that has been updated. Fixes: 93bb0ceb75be2 ("netfilter: conntrack: remove central spinlock nf_conntrack_lock") Reported-by: bill bonaparte Signed-off-by: bill bonaparte Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5016a6929085..2c699757bccf 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -611,12 +611,16 @@ __nf_conntrack_confirm(struct sk_buff *skb) */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); pr_debug("Confirming conntrack %p\n", ct); - /* We have to check the DYING flag inside the lock to prevent - a race against nf_ct_get_next_corpse() possibly called from - user context, else we insert an already 'dead' hash, blocking - further use of that particular connection -JM */ + + /* We have to check the DYING flag after unlink to prevent + * a race against nf_ct_get_next_corpse() possibly called from + * user context, else we insert an already 'dead' hash, blocking + * further use of that particular connection -JM. + */ + nf_ct_del_from_dying_or_unconfirmed_list(ct); if (unlikely(nf_ct_is_dying(ct))) { + nf_ct_add_to_dying_list(ct); nf_conntrack_double_unlock(hash, reply_hash); local_bh_enable(); return NF_ACCEPT; @@ -636,8 +640,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) goto out; - nf_ct_del_from_dying_or_unconfirmed_list(ct); - /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ -- cgit v1.2.1 From 84bc88688e3f6ef843aa8803dbcd90168bb89faf Mon Sep 17 00:00:00 2001 From: Vincent BENAYOUN Date: Thu, 13 Nov 2014 13:47:26 +0100 Subject: inetdevice: fixed signed integer overflow There could be a signed overflow in the following code. The expression, (32-logmask) is comprised between 0 and 31 included. It may be equal to 31. In such a case the left shift will produce a signed integer overflow. According to the C99 Standard, this is an undefined behavior. A simple fix is to replace the signed int 1 with the unsigned int 1U. Signed-off-by: Vincent BENAYOUN Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 0068708161ff..0a21fbefdfbe 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -242,7 +242,7 @@ static inline void in_dev_put(struct in_device *idev) static __inline__ __be32 inet_make_mask(int logmask) { if (logmask) - return htonl(~((1<<(32-logmask))-1)); + return htonl(~((1U<<(32-logmask))-1)); return 0; } -- cgit v1.2.1 From 23e62de33d179e229e4c1dfd93f90a3c7355c519 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 13 Nov 2014 16:38:12 -0800 Subject: net: Add vxlan_gso_check() helper Most NICs that report NETIF_F_GSO_UDP_TUNNEL support VXLAN, and not other UDP-based encapsulation protocols where the format and size of the header differs. This patch implements a generic ndo_gso_check() for VXLAN which will only advertise GSO support when the skb looks like it contains VXLAN (or no UDP tunnelling at all). Implementation shamelessly stolen from Tom Herbert: http://thread.gmane.org/gmane.linux.network/332428/focus=333111 Signed-off-by: Joe Stringer Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 13 +++++++++++++ include/net/vxlan.h | 2 ++ 2 files changed, 15 insertions(+) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index fa9dc45b75a6..6b658638b456 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1571,6 +1571,19 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) return false; } +bool vxlan_gso_check(struct sk_buff *skb) +{ + if ((skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) && + (skb->inner_protocol_type != ENCAP_TYPE_ETHER || + skb->inner_protocol != htons(ETH_P_TEB) || + (skb_inner_mac_header(skb) - skb_transport_header(skb) != + sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) + return false; + + return true; +} +EXPORT_SYMBOL_GPL(vxlan_gso_check); + #if IS_ENABLED(CONFIG_IPV6) static int vxlan6_xmit_skb(struct vxlan_sock *vs, struct dst_entry *dst, struct sk_buff *skb, diff --git a/include/net/vxlan.h b/include/net/vxlan.h index d5f59f3fc35d..afadf8e53f20 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -45,6 +45,8 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, __be32 vni, bool xnet); +bool vxlan_gso_check(struct sk_buff *skb); + /* IP header + UDP + VXLAN + Ethernet header */ #define VXLAN_HEADROOM (20 + 8 + 8 + 14) /* IPv6 header + UDP + VXLAN + Ethernet header */ -- cgit v1.2.1 From 725d548f14362f10c9235ee4b32dc22e7c55e50c Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 13 Nov 2014 16:38:13 -0800 Subject: be2net: Implement ndo_gso_check() Use vxlan_gso_check() to advertise offload support for this NIC. Signed-off-by: Joe Stringer Acked-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 9a18e7930b31..3e8475cae4f9 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4421,6 +4421,11 @@ static void be_del_vxlan_port(struct net_device *netdev, sa_family_t sa_family, "Disabled VxLAN offloads for UDP port %d\n", be16_to_cpu(port)); } + +static bool be_gso_check(struct sk_buff *skb, struct net_device *dev) +{ + return vxlan_gso_check(skb); +} #endif static const struct net_device_ops be_netdev_ops = { @@ -4450,6 +4455,7 @@ static const struct net_device_ops be_netdev_ops = { #ifdef CONFIG_BE2NET_VXLAN .ndo_add_vxlan_port = be_add_vxlan_port, .ndo_del_vxlan_port = be_del_vxlan_port, + .ndo_gso_check = be_gso_check, #endif }; -- cgit v1.2.1 From 956bdab2e439881e151b7a1779e7f12a3778b68d Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 13 Nov 2014 16:38:14 -0800 Subject: net/mlx4_en: Implement ndo_gso_check() Use vxlan_gso_check() to advertise offload support for this NIC. Signed-off-by: Joe Stringer Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 02266e3de514..c5fcc56ec06b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2355,6 +2355,11 @@ static void mlx4_en_del_vxlan_port(struct net_device *dev, queue_work(priv->mdev->workqueue, &priv->vxlan_del_task); } + +static bool mlx4_en_gso_check(struct sk_buff *skb, struct net_device *dev) +{ + return vxlan_gso_check(skb); +} #endif static const struct net_device_ops mlx4_netdev_ops = { @@ -2386,6 +2391,7 @@ static const struct net_device_ops mlx4_netdev_ops = { #ifdef CONFIG_MLX4_EN_VXLAN .ndo_add_vxlan_port = mlx4_en_add_vxlan_port, .ndo_del_vxlan_port = mlx4_en_del_vxlan_port, + .ndo_gso_check = mlx4_en_gso_check, #endif }; -- cgit v1.2.1 From 795a05c1c2a012ab2fd08f1523aa3c529049d291 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 13 Nov 2014 16:38:15 -0800 Subject: qlcnic: Implement ndo_gso_check() Use vxlan_gso_check() to advertise offload support for this NIC. Signed-off-by: Joe Stringer Acked-by: Shahed Shaikh Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index f5e29f7bdae3..a913b3ad2f89 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -503,6 +503,11 @@ static void qlcnic_del_vxlan_port(struct net_device *netdev, adapter->flags |= QLCNIC_DEL_VXLAN_PORT; } + +static bool qlcnic_gso_check(struct sk_buff *skb, struct net_device *dev) +{ + return vxlan_gso_check(skb); +} #endif static const struct net_device_ops qlcnic_netdev_ops = { @@ -526,6 +531,7 @@ static const struct net_device_ops qlcnic_netdev_ops = { #ifdef CONFIG_QLCNIC_VXLAN .ndo_add_vxlan_port = qlcnic_add_vxlan_port, .ndo_del_vxlan_port = qlcnic_del_vxlan_port, + .ndo_gso_check = qlcnic_gso_check, #endif #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = qlcnic_poll_controller, -- cgit v1.2.1 From ab64f16ff2e83371927c57a0380fd3c0fee5c1c1 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 11 Nov 2014 13:40:49 -0800 Subject: openvswitch: Fix memory leak. Need to free memory in case of sample action error. Introduced by commit 651887b0c22cffcfce7eb9c ("openvswitch: Sample action without side effects"). Signed-off-by: Pravin B Shelar --- net/openvswitch/actions.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 006886dbee36..00e447a17f64 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -722,8 +722,6 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, case OVS_ACTION_ATTR_SAMPLE: err = sample(dp, skb, key, a); - if (unlikely(err)) /* skb already freed. */ - return err; break; } -- cgit v1.2.1 From 856447d0209c2214d806b30bd3b0d873db5998bd Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Tue, 11 Nov 2014 14:32:20 -0800 Subject: openvswitch: Fix checksum calculation when modifying ICMPv6 packets. The checksum of ICMPv6 packets uses the IP pseudoheader as part of the calculation, unlike ICMP in IPv4. This was not implemented, which means that modifying the IP addresses of an ICMPv6 packet would cause the checksum to no longer be correct as the psuedoheader did not match. Introduced by commit 3fdbd1ce11e5 ("openvswitch: add ipv6 'set' action"). Reported-by: Neal Shrader Signed-off-by: Jesse Gross Signed-off-by: Pravin B Shelar --- net/openvswitch/actions.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 00e447a17f64..8c4229b11c34 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -246,11 +246,11 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto, { int transport_len = skb->len - skb_transport_offset(skb); - if (l4_proto == IPPROTO_TCP) { + if (l4_proto == NEXTHDR_TCP) { if (likely(transport_len >= sizeof(struct tcphdr))) inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb, addr, new_addr, 1); - } else if (l4_proto == IPPROTO_UDP) { + } else if (l4_proto == NEXTHDR_UDP) { if (likely(transport_len >= sizeof(struct udphdr))) { struct udphdr *uh = udp_hdr(skb); @@ -261,6 +261,10 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto, uh->check = CSUM_MANGLED_0; } } + } else if (l4_proto == NEXTHDR_ICMP) { + if (likely(transport_len >= sizeof(struct icmp6hdr))) + inet_proto_csum_replace16(&icmp6_hdr(skb)->icmp6_cksum, + skb, addr, new_addr, 1); } } -- cgit v1.2.1 From 19e7a3df7261c9b7ebced8163c383712d5b6ac6b Mon Sep 17 00:00:00 2001 From: Daniele Di Proietto Date: Tue, 11 Nov 2014 14:51:22 -0800 Subject: openvswitch: Fix NDP flow mask validation match_validate() enforce that a mask matching on NDP attributes has also an exact match on ICMPv6 type. The ICMPv6 type, which is 8-bit wide, is stored in the 'tp.src' field of 'struct sw_flow_key', which is 16-bit wide. Therefore, an exact match on ICMPv6 type should only check the first 8 bits. This commit fixes a bug that prevented flows with an exact match on NDP field from being installed Introduced by commit 03f0d916aa03 ("openvswitch: Mega flow implementation"). Signed-off-by: Daniele Di Proietto Signed-off-by: Pravin B Shelar --- net/openvswitch/flow_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 939bcb32100f..dda040e693a3 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -145,7 +145,7 @@ static bool match_validate(const struct sw_flow_match *match, if (match->key->eth.type == htons(ETH_P_ARP) || match->key->eth.type == htons(ETH_P_RARP)) { key_expected |= 1 << OVS_KEY_ATTR_ARP; - if (match->mask && (match->mask->key.eth.type == htons(0xffff))) + if (match->mask && (match->mask->key.tp.src == htons(0xff))) mask_allowed |= 1 << OVS_KEY_ATTR_ARP; } -- cgit v1.2.1 From 8ec609d8b561468691b60347ff594bd443ea58c0 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 11 Nov 2014 15:55:16 -0800 Subject: openvswitch: Convert dp rcu read operation to locked operations dp read operations depends on ovs_dp_cmd_fill_info(). This API needs to looup vport to find dp name, but vport lookup can fail. Therefore to keep vport reference alive we need to take ovs lock. Introduced by commit 6093ae9abac1 ("openvswitch: Minimize dp and vport critical sections"). Signed-off-by: Pravin B Shelar Acked-by: Andy Zhou --- net/openvswitch/datapath.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index e6d7255183eb..f9e556b56086 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1265,7 +1265,7 @@ static size_t ovs_dp_cmd_msg_size(void) return msgsize; } -/* Called with ovs_mutex or RCU read lock. */ +/* Called with ovs_mutex. */ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { @@ -1555,7 +1555,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) if (!reply) return -ENOMEM; - rcu_read_lock(); + ovs_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); if (IS_ERR(dp)) { err = PTR_ERR(dp); @@ -1564,12 +1564,12 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, info->snd_seq, 0, OVS_DP_CMD_NEW); BUG_ON(err < 0); - rcu_read_unlock(); + ovs_unlock(); return genlmsg_reply(reply, info); err_unlock_free: - rcu_read_unlock(); + ovs_unlock(); kfree_skb(reply); return err; } @@ -1581,8 +1581,8 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) int skip = cb->args[0]; int i = 0; - rcu_read_lock(); - list_for_each_entry_rcu(dp, &ovs_net->dps, list_node) { + ovs_lock(); + list_for_each_entry(dp, &ovs_net->dps, list_node) { if (i >= skip && ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, @@ -1590,7 +1590,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) break; i++; } - rcu_read_unlock(); + ovs_unlock(); cb->args[0] = i; -- cgit v1.2.1 From fecaef85f7188ad1822210e2c7a7625c9a32a8e4 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Tue, 11 Nov 2014 14:36:30 -0800 Subject: openvswitch: Validate IPv6 flow key and mask values. Reject flow label key and mask values with invalid bits set. Introduced by commit 3fdbd1ce11e5 ("openvswitch: add ipv6 'set' action"). Signed-off-by: Jarno Rajahalme Acked-by: Jesse Gross Signed-off-by: Pravin B Shelar --- net/openvswitch/flow_netlink.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index dda040e693a3..fa4ec2e4a78b 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -689,6 +689,13 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); return -EINVAL; } + + if (ipv6_key->ipv6_label & htonl(0xFFF00000)) { + OVS_NLERR("IPv6 flow label %x is out of range (max=%x).\n", + ntohl(ipv6_key->ipv6_label), (1 << 20) - 1); + return -EINVAL; + } + SW_FLOW_KEY_PUT(match, ipv6.label, ipv6_key->ipv6_label, is_mask); SW_FLOW_KEY_PUT(match, ip.proto, -- cgit v1.2.1 From 226424eee809251ec23bd4b09d8efba09c10fc3c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 5 Nov 2014 16:11:44 +0000 Subject: perf: Fix corruption of sibling list with hotplug When a CPU hotplugged out, we call perf_remove_from_context() (via perf_event_exit_cpu()) to rip each CPU-bound event out of its PMU's cpu context, but leave siblings grouped together. Freeing of these events is left to the mercy of the usual refcounting. When a CPU-bound event's refcount drops to zero we cross-call to __perf_remove_from_context() to clean it up, detaching grouped siblings. This works when the relevant CPU is online, but will fail if the CPU is currently offline, and we won't detach the event from its siblings before freeing the event, leaving the sibling list corrupt. If the sibling list is later walked (e.g. because the CPU cam online again before a remaining sibling's refcount drops to zero), we will walk the now corrupted siblings list, potentially dereferencing garbage values. Given that the events should never be scheduled again (as we removed them from their context), we can simply detatch siblings when the CPU goes down in the first place. If the CPU comes back online, the redundant call to __perf_remove_from_context() is safe. Reported-by: Drew Richardson Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Cc: vincent.weaver@maine.edu Cc: Vince Weaver Cc: Will Deacon Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1415203904-25308-2-git-send-email-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 2b02c9fda790..1cd5eef1fcdd 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1562,8 +1562,10 @@ static void perf_remove_from_context(struct perf_event *event, bool detach_group if (!task) { /* - * Per cpu events are removed via an smp call and - * the removal is always successful. + * Per cpu events are removed via an smp call. The removal can + * fail if the CPU is currently offline, but in that case we + * already called __perf_remove_from_context from + * perf_event_exit_cpu. */ cpu_function_call(event->cpu, __perf_remove_from_context, &re); return; @@ -8117,7 +8119,7 @@ static void perf_pmu_rotate_stop(struct pmu *pmu) static void __perf_event_exit_context(void *__info) { - struct remove_event re = { .detach_group = false }; + struct remove_event re = { .detach_group = true }; struct perf_event_context *ctx = __info; perf_pmu_rotate_stop(ctx->pmu); -- cgit v1.2.1 From 41a134a5830a5e1396723ace0a63000780d6e267 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 3 Nov 2014 17:00:27 -0800 Subject: perf/x86/intel/uncore: Fix IRP uncore register offsets on Haswell EP The counter register offsets for the IRP box PMU for Haswell-EP were incorrect. The offsets actually changed over IvyBridge EP. Fix them to the correct values. For this we need to fork the read function from the IVB and use an own counter array. Tested-by: patrick.lu@intel.com Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/1415062828-19759-3-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c index adf138eac85c..0af1c932fa0f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c @@ -2025,13 +2025,27 @@ static struct intel_uncore_type hswep_uncore_imc = { SNBEP_UNCORE_PCI_COMMON_INIT(), }; +static unsigned hswep_uncore_irp_ctrs[] = {0xa0, 0xa8, 0xb0, 0xb8}; + +static u64 hswep_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + u64 count = 0; + + pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx], (u32 *)&count); + pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1); + + return count; +} + static struct intel_uncore_ops hswep_uncore_irp_ops = { .init_box = snbep_uncore_pci_init_box, .disable_box = snbep_uncore_pci_disable_box, .enable_box = snbep_uncore_pci_enable_box, .disable_event = ivbep_uncore_irp_disable_event, .enable_event = ivbep_uncore_irp_enable_event, - .read_counter = ivbep_uncore_irp_read_counter, + .read_counter = hswep_uncore_irp_read_counter, }; static struct intel_uncore_type hswep_uncore_irp = { -- cgit v1.2.1 From 6e84a8d7ac3ba246ef44e313e92bc16a1da1b04a Mon Sep 17 00:00:00 2001 From: Jurgen Kramer Date: Sat, 15 Nov 2014 14:01:21 +0100 Subject: ALSA: usb-audio: Add ctrl message delay quirk for Marantz/Denon devices This patch adds a USB control message delay quirk for a few specific Marantz/Denon devices. Without the delay the DACs will not work properly and produces the following type of messages: Nov 15 10:09:21 orwell kernel: [ 91.342880] usb 3-13: clock source 41 is not valid, cannot use Nov 15 10:09:21 orwell kernel: [ 91.343775] usb 3-13: clock source 41 is not valid, cannot use There are likely other Marantz/Denon devices using the same USB module which exhibit the same problems. But as this cannot be verified I limited the patch to the devices I could test. The following two devices are covered by this path: - Marantz SA-14S1 - Marantz HD-DAC1 Signed-off-by: Jurgen Kramer Cc: Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index d2aa45a8d895..a5941f80fc5b 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1146,6 +1146,20 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe, if ((le16_to_cpu(dev->descriptor.idVendor) == 0x23ba) && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) mdelay(20); + + /* Marantz/Denon devices with USB DAC functionality need a delay + * after each class compliant request + */ + if ((le16_to_cpu(dev->descriptor.idVendor) == 0x154e) && + (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) { + + switch (le16_to_cpu(dev->descriptor.idProduct)) { + case 0x3005: /* Marantz HD-DAC1 */ + case 0x3006: /* Marantz SA-14S1 */ + mdelay(20); + break; + } + } } /* -- cgit v1.2.1 From 68055915c1c22489f9658bd2b7391bb11b2cf4e4 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 3 Nov 2014 17:00:28 -0800 Subject: perf/x86/intel/uncore: Fix boot crash on SBOX PMU on Haswell-EP There were several reports that on some systems writing the SBOX0 PMU initialization MSR would #GP at boot. This did not happen on all systems -- my two test systems booted fine. Writing the three initialization bits bit-by-bit seems to avoid the problem. So add a special callback to do just that. This replaces an earlier patch that disabled the SBOX. Reported-by: Alexei Starovoitov Reported-and-Tested-by: Patrick Lu Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/1415062828-19759-4-git-send-email-andi@firstfloor.org [ Fixed a whitespace error and added attribution tags that were left out inexplicably. ] Signed-off-by: Ingo Molnar --- .../x86/kernel/cpu/perf_event_intel_uncore_snbep.c | 33 ++++++++++++++++++++-- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c index 0af1c932fa0f..f9ed429d6e4f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c @@ -486,14 +486,17 @@ static struct attribute_group snbep_uncore_qpi_format_group = { .attrs = snbep_uncore_qpi_formats_attr, }; -#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \ - .init_box = snbep_uncore_msr_init_box, \ +#define __SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \ .disable_box = snbep_uncore_msr_disable_box, \ .enable_box = snbep_uncore_msr_enable_box, \ .disable_event = snbep_uncore_msr_disable_event, \ .enable_event = snbep_uncore_msr_enable_event, \ .read_counter = uncore_msr_read_counter +#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \ + __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), \ + .init_box = snbep_uncore_msr_init_box \ + static struct intel_uncore_ops snbep_uncore_msr_ops = { SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), }; @@ -1919,6 +1922,30 @@ static struct intel_uncore_type hswep_uncore_cbox = { .format_group = &hswep_uncore_cbox_format_group, }; +/* + * Write SBOX Initialization register bit by bit to avoid spurious #GPs + */ +static void hswep_uncore_sbox_msr_init_box(struct intel_uncore_box *box) +{ + unsigned msr = uncore_msr_box_ctl(box); + + if (msr) { + u64 init = SNBEP_PMON_BOX_CTL_INT; + u64 flags = 0; + int i; + + for_each_set_bit(i, (unsigned long *)&init, 64) { + flags |= (1ULL << i); + wrmsrl(msr, flags); + } + } +} + +static struct intel_uncore_ops hswep_uncore_sbox_msr_ops = { + __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .init_box = hswep_uncore_sbox_msr_init_box +}; + static struct attribute *hswep_uncore_sbox_formats_attr[] = { &format_attr_event.attr, &format_attr_umask.attr, @@ -1944,7 +1971,7 @@ static struct intel_uncore_type hswep_uncore_sbox = { .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL, .msr_offset = HSWEP_SBOX_MSR_OFFSET, - .ops = &snbep_uncore_msr_ops, + .ops = &hswep_uncore_sbox_msr_ops, .format_group = &hswep_uncore_sbox_format_group, }; -- cgit v1.2.1 From 00b4d9a14125f1e51874def2b9de6092e007412d Mon Sep 17 00:00:00 2001 From: Maxime COQUELIN Date: Thu, 6 Nov 2014 10:54:19 +0100 Subject: bitops: Fix shift overflow in GENMASK macros On some 32 bits architectures, including x86, GENMASK(31, 0) returns 0 instead of the expected ~0UL. This is the same on some 64 bits architectures with GENMASK_ULL(63, 0). This is due to an overflow in the shift operand, 1 << 32 for GENMASK, 1 << 64 for GENMASK_ULL. Reported-by: Eric Paire Suggested-by: Rasmus Villemoes Signed-off-by: Maxime Coquelin Signed-off-by: Peter Zijlstra (Intel) Cc: # v3.13+ Cc: linux@rasmusvillemoes.dk Cc: gong.chen@linux.intel.com Cc: John Sullivan Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Theodore Ts'o Fixes: 10ef6b0dffe4 ("bitops: Introduce a more generic BITMASK macro") Link: http://lkml.kernel.org/r/1415267659-10563-1-git-send-email-maxime.coquelin@st.com Signed-off-by: Ingo Molnar --- include/linux/bitops.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/bitops.h b/include/linux/bitops.h index be5fd38bd5a0..5d858e02997f 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -18,8 +18,11 @@ * position @h. For example * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ -#define GENMASK(h, l) (((U32_C(1) << ((h) - (l) + 1)) - 1) << (l)) -#define GENMASK_ULL(h, l) (((U64_C(1) << ((h) - (l) + 1)) - 1) << (l)) +#define GENMASK(h, l) \ + (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) + +#define GENMASK_ULL(h, l) \ + (((~0ULL) << (l)) & (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h)))) extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); -- cgit v1.2.1 From 7af683350cb0ddd0e9d3819b4eb7abe9e2d3e709 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 10 Nov 2014 10:54:35 +0100 Subject: sched/numa: Avoid selecting oneself as swap target Because the whole numa task selection stuff runs with preemption enabled (its long and expensive) we can end up migrating and selecting oneself as a swap target. This doesn't really work out well -- we end up trying to acquire the same lock twice for the swap migrate -- so avoid this. Reported-and-Tested-by: Sasha Levin Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20141110100328.GF29390@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 34baa60f8a7b..3af3d1e7df9b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1179,6 +1179,13 @@ static void task_numa_compare(struct task_numa_env *env, cur = NULL; raw_spin_unlock_irq(&dst_rq->lock); + /* + * Because we have preemption enabled we can get migrated around and + * end try selecting ourselves (current == env->p) as a swap candidate. + */ + if (cur == env->p) + goto unlock; + /* * "imp" is the fault differential for the source task between the * source and destination node. Calculate the total differential for -- cgit v1.2.1 From 23cfa361f3e54a3e184a5e126bbbdd95f984881a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 12 Nov 2014 12:37:37 +0100 Subject: sched/cputime: Fix cpu_timer_sample_group() double accounting While looking over the cpu-timer code I found that we appear to add the delta for the calling task twice, through: cpu_timer_sample_group() thread_group_cputimer() thread_group_cputime() times->sum_exec_runtime += task_sched_runtime(); *sample = cputime.sum_exec_runtime + task_delta_exec(); Which would make the sample run ahead, making the sleep short. Signed-off-by: Peter Zijlstra (Intel) Cc: KOSAKI Motohiro Cc: Oleg Nesterov Cc: Stanislaw Gruszka Cc: Christoph Lameter Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Rik van Riel Cc: Tejun Heo Link: http://lkml.kernel.org/r/20141112113737.GI10476@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 5 ----- kernel/sched/core.c | 13 ------------- kernel/time/posix-cpu-timers.c | 2 +- 3 files changed, 1 insertion(+), 19 deletions(-) diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 8422b4ed6882..b9376cd5a187 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -77,11 +77,6 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu) return kstat_cpu(cpu).irqs_sum; } -/* - * Lock/unlock the current runqueue - to extract task statistics: - */ -extern unsigned long long task_delta_exec(struct task_struct *); - extern void account_user_time(struct task_struct *, cputime_t, cputime_t); extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); extern void account_steal_time(cputime_t); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5f12ca65c9a7..797a6c84c48d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2499,19 +2499,6 @@ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) return ns; } -unsigned long long task_delta_exec(struct task_struct *p) -{ - unsigned long flags; - struct rq *rq; - u64 ns = 0; - - rq = task_rq_lock(p, &flags); - ns = do_task_delta_exec(p, rq); - task_rq_unlock(rq, p, &flags); - - return ns; -} - /* * Return accounted runtime for the task. * In case the task is currently running, return the runtime plus current's diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 492b986195d5..a16b67859e2a 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -553,7 +553,7 @@ static int cpu_timer_sample_group(const clockid_t which_clock, *sample = cputime_to_expires(cputime.utime); break; case CPUCLOCK_SCHED: - *sample = cputime.sum_exec_runtime + task_delta_exec(p); + *sample = cputime.sum_exec_runtime; break; } return 0; -- cgit v1.2.1 From 6e998916dfe327e785e7c2447959b2c1a3ea4930 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 12 Nov 2014 16:58:44 +0100 Subject: sched/cputime: Fix clock_nanosleep()/clock_gettime() inconsistency Commit d670ec13178d0 "posix-cpu-timers: Cure SMP wobbles" fixes one glibc test case in cost of breaking another one. After that commit, calling clock_nanosleep(TIMER_ABSTIME, X) and then clock_gettime(&Y) can result of Y time being smaller than X time. Reproducer/tester can be found further below, it can be compiled and ran by: gcc -o tst-cpuclock2 tst-cpuclock2.c -pthread while ./tst-cpuclock2 ; do : ; done This reproducer, when running on a buggy kernel, will complain about "clock_gettime difference too small". Issue happens because on start in thread_group_cputimer() we initialize sum_exec_runtime of cputimer with threads runtime not yet accounted and then add the threads runtime to running cputimer again on scheduler tick, making it's sum_exec_runtime bigger than actual threads runtime. KOSAKI Motohiro posted a fix for this problem, but that patch was never applied: https://lkml.org/lkml/2013/5/26/191 . This patch takes different approach to cure the problem. It calls update_curr() when cputimer starts, that assure we will have updated stats of running threads and on the next schedule tick we will account only the runtime that elapsed from cputimer start. That also assure we have consistent state between cpu times of individual threads and cpu time of the process consisted by those threads. Full reproducer (tst-cpuclock2.c): #define _GNU_SOURCE #include #include #include #include #include #include #include /* Parameters for the Linux kernel ABI for CPU clocks. */ #define CPUCLOCK_SCHED 2 #define MAKE_PROCESS_CPUCLOCK(pid, clock) \ ((~(clockid_t) (pid) << 3) | (clockid_t) (clock)) static pthread_barrier_t barrier; /* Help advance the clock. */ static void *chew_cpu(void *arg) { pthread_barrier_wait(&barrier); while (1) ; return NULL; } /* Don't use the glibc wrapper. */ static int do_nanosleep(int flags, const struct timespec *req) { clockid_t clock_id = MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED); return syscall(SYS_clock_nanosleep, clock_id, flags, req, NULL); } static int64_t tsdiff(const struct timespec *before, const struct timespec *after) { int64_t before_i = before->tv_sec * 1000000000ULL + before->tv_nsec; int64_t after_i = after->tv_sec * 1000000000ULL + after->tv_nsec; return after_i - before_i; } int main(void) { int result = 0; pthread_t th; pthread_barrier_init(&barrier, NULL, 2); if (pthread_create(&th, NULL, chew_cpu, NULL) != 0) { perror("pthread_create"); return 1; } pthread_barrier_wait(&barrier); /* The test. */ struct timespec before, after, sleeptimeabs; int64_t sleepdiff, diffabs; const struct timespec sleeptime = {.tv_sec = 0,.tv_nsec = 100000000 }; /* The relative nanosleep. Not sure why this is needed, but its presence seems to make it easier to reproduce the problem. */ if (do_nanosleep(0, &sleeptime) != 0) { perror("clock_nanosleep"); return 1; } /* Get the current time. */ if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &before) < 0) { perror("clock_gettime[2]"); return 1; } /* Compute the absolute sleep time based on the current time. */ uint64_t nsec = before.tv_nsec + sleeptime.tv_nsec; sleeptimeabs.tv_sec = before.tv_sec + nsec / 1000000000; sleeptimeabs.tv_nsec = nsec % 1000000000; /* Sleep for the computed time. */ if (do_nanosleep(TIMER_ABSTIME, &sleeptimeabs) != 0) { perror("absolute clock_nanosleep"); return 1; } /* Get the time after the sleep. */ if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &after) < 0) { perror("clock_gettime[3]"); return 1; } /* The time after sleep should always be equal to or after the absolute sleep time passed to clock_nanosleep. */ sleepdiff = tsdiff(&sleeptimeabs, &after); if (sleepdiff < 0) { printf("absolute clock_nanosleep woke too early: %" PRId64 "\n", sleepdiff); result = 1; printf("Before %llu.%09llu\n", before.tv_sec, before.tv_nsec); printf("After %llu.%09llu\n", after.tv_sec, after.tv_nsec); printf("Sleep %llu.%09llu\n", sleeptimeabs.tv_sec, sleeptimeabs.tv_nsec); } /* The difference between the timestamps taken before and after the clock_nanosleep call should be equal to or more than the duration of the sleep. */ diffabs = tsdiff(&before, &after); if (diffabs < sleeptime.tv_nsec) { printf("clock_gettime difference too small: %" PRId64 "\n", diffabs); result = 1; } pthread_cancel(th); return result; } Signed-off-by: Stanislaw Gruszka Signed-off-by: Peter Zijlstra (Intel) Cc: Rik van Riel Cc: Frederic Weisbecker Cc: KOSAKI Motohiro Cc: Oleg Nesterov Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20141112155843.GA24803@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 38 +++++++++++--------------------------- kernel/sched/deadline.c | 2 ++ kernel/sched/fair.c | 7 +++++++ kernel/sched/rt.c | 2 ++ kernel/sched/sched.h | 2 ++ 5 files changed, 24 insertions(+), 27 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 797a6c84c48d..24beb9bb4c3e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2474,31 +2474,6 @@ DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat); EXPORT_PER_CPU_SYMBOL(kstat); EXPORT_PER_CPU_SYMBOL(kernel_cpustat); -/* - * Return any ns on the sched_clock that have not yet been accounted in - * @p in case that task is currently running. - * - * Called with task_rq_lock() held on @rq. - */ -static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) -{ - u64 ns = 0; - - /* - * Must be ->curr _and_ ->on_rq. If dequeued, we would - * project cycles that may never be accounted to this - * thread, breaking clock_gettime(). - */ - if (task_current(rq, p) && task_on_rq_queued(p)) { - update_rq_clock(rq); - ns = rq_clock_task(rq) - p->se.exec_start; - if ((s64)ns < 0) - ns = 0; - } - - return ns; -} - /* * Return accounted runtime for the task. * In case the task is currently running, return the runtime plus current's @@ -2508,7 +2483,7 @@ unsigned long long task_sched_runtime(struct task_struct *p) { unsigned long flags; struct rq *rq; - u64 ns = 0; + u64 ns; #if defined(CONFIG_64BIT) && defined(CONFIG_SMP) /* @@ -2527,7 +2502,16 @@ unsigned long long task_sched_runtime(struct task_struct *p) #endif rq = task_rq_lock(p, &flags); - ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); + /* + * Must be ->curr _and_ ->on_rq. If dequeued, we would + * project cycles that may never be accounted to this + * thread, breaking clock_gettime(). + */ + if (task_current(rq, p) && task_on_rq_queued(p)) { + update_rq_clock(rq); + p->sched_class->update_curr(rq); + } + ns = p->se.sum_exec_runtime; task_rq_unlock(rq, p, &flags); return ns; diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 5285332392d5..28fa9d9e9201 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1701,4 +1701,6 @@ const struct sched_class dl_sched_class = { .prio_changed = prio_changed_dl, .switched_from = switched_from_dl, .switched_to = switched_to_dl, + + .update_curr = update_curr_dl, }; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3af3d1e7df9b..ef2b104b254c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -726,6 +726,11 @@ static void update_curr(struct cfs_rq *cfs_rq) account_cfs_rq_runtime(cfs_rq, delta_exec); } +static void update_curr_fair(struct rq *rq) +{ + update_curr(cfs_rq_of(&rq->curr->se)); +} + static inline void update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se) { @@ -7956,6 +7961,8 @@ const struct sched_class fair_sched_class = { .get_rr_interval = get_rr_interval_fair, + .update_curr = update_curr_fair, + #ifdef CONFIG_FAIR_GROUP_SCHED .task_move_group = task_move_group_fair, #endif diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index d024e6ce30ba..20bca398084a 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2128,6 +2128,8 @@ const struct sched_class rt_sched_class = { .prio_changed = prio_changed_rt, .switched_to = switched_to_rt, + + .update_curr = update_curr_rt, }; #ifdef CONFIG_SCHED_DEBUG diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 24156c8434d1..2df8ef067cc5 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1135,6 +1135,8 @@ struct sched_class { unsigned int (*get_rr_interval) (struct rq *rq, struct task_struct *task); + void (*update_curr) (struct rq *rq); + #ifdef CONFIG_FAIR_GROUP_SCHED void (*task_move_group) (struct task_struct *p, int on_rq); #endif -- cgit v1.2.1 From 2cd3949f702692cf4c5d05b463f19cd706a92dd3 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 11 Nov 2014 14:01:33 -0800 Subject: x86: Require exact match for 'noxsave' command line option We have some very similarly named command-line options: arch/x86/kernel/cpu/common.c:__setup("noxsave", x86_xsave_setup); arch/x86/kernel/cpu/common.c:__setup("noxsaveopt", x86_xsaveopt_setup); arch/x86/kernel/cpu/common.c:__setup("noxsaves", x86_xsaves_setup); __setup() is designed to match options that take arguments, like "foo=bar" where you would have: __setup("foo", x86_foo_func...); The problem is that "noxsave" actually _matches_ "noxsaves" in the same way that "foo" matches "foo=bar". If you boot an old kernel that does not know about "noxsaves" with "noxsaves" on the command line, it will interpret the argument as "noxsave", which is not what you want at all. This makes the "noxsave" handler only return success when it finds an *exact* match. [ tglx: We really need to make __setup() more robust. ] Signed-off-by: Dave Hansen Cc: Dave Hansen Cc: Fenghua Yu Cc: x86@kernel.org Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20141111220133.FE053984@viggo.jf.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4b4f78c9ba19..cfa9b5b2c27a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -146,6 +146,8 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); static int __init x86_xsave_setup(char *s) { + if (strlen(s)) + return 0; setup_clear_cpu_cap(X86_FEATURE_XSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); setup_clear_cpu_cap(X86_FEATURE_XSAVES); -- cgit v1.2.1 From 49dd18ba4615eaa72f15c9087dea1c2ab4744cf5 Mon Sep 17 00:00:00 2001 From: Panu Matilainen Date: Fri, 14 Nov 2014 13:14:32 +0200 Subject: ipv4: Fix incorrect error code when adding an unreachable route Trying to add an unreachable route incorrectly returns -ESRCH if if custom FIB rules are present: [root@localhost ~]# ip route add 74.125.31.199 dev eth0 via 1.2.3.4 RTNETLINK answers: Network is unreachable [root@localhost ~]# ip rule add to 55.66.77.88 table 200 [root@localhost ~]# ip route add 74.125.31.199 dev eth0 via 1.2.3.4 RTNETLINK answers: No such process [root@localhost ~]# Commit 83886b6b636173b206f475929e58fac75c6f2446 ("[NET]: Change "not found" return value for rule lookup") changed fib_rules_lookup() to use -ESRCH as a "not found" code internally, but for user space it should be translated into -ENETUNREACH. Handle the translation centrally in ipv4-specific fib_lookup(), leaving the DECnet case alone. On a related note, commit b7a71b51ee37d919e4098cd961d59a883fd272d8 ("ipv4: removed redundant conditional") removed a similar translation from ip_route_input_slow() prematurely AIUI. Fixes: b7a71b51ee37 ("ipv4: removed redundant conditional") Signed-off-by: Panu Matilainen Signed-off-by: David S. Miller --- net/ipv4/fib_rules.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index f2e15738534d..8f7bd56955b0 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -62,6 +62,10 @@ int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) else res->tclassid = 0; #endif + + if (err == -ESRCH) + err = -ENETUNREACH; + return err; } EXPORT_SYMBOL_GPL(__fib_lookup); -- cgit v1.2.1 From 9f458945080f9e618641ff3ef04e60be0895d7e4 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 14 Nov 2014 15:16:47 +0100 Subject: reciprocal_div: objects with exported symbols should be obj-y rather than lib-y Otherwise the exported symbols might be discarded because of no users in vmlinux. Reported-by: Jim Davis Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- lib/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Makefile b/lib/Makefile index 7512dc978f18..0211d2bd5e17 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -10,7 +10,7 @@ endif lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o timerqueue.o\ idr.o int_sqrt.o extable.o \ - sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ + sha1.o md5.o irq_regs.o argv_split.o \ proportions.o flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o @@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ - percpu-refcount.o percpu_ida.o hash.o rhashtable.o + percpu-refcount.o percpu_ida.o hash.o rhashtable.o reciprocal_div.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += kstrtox.o -- cgit v1.2.1 From 35717d8d6fc6fc50692273d6667a0a575c26aa93 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Fri, 14 Nov 2014 15:42:52 +0100 Subject: drivers: net: cpsw: Fix TX_IN_SEL offset The TX_IN_SEL offset for the CPSW_PORT/TX_IN_CTL register was incorrect. This caused the Dual MAC mode to never get set when it should. It also caused possible unintentional setting of a bit in the CPSW_PORT/TX_BLKS_REM register. The purpose of setting the Dual MAC mode for this register is to: "... allow packets from both ethernet ports to be written into the FIFO without one port starving the other port." - AM335x ARM TRM Signed-off-by: John Ogness Reviewed-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index d8794488f80a..c560f9aeb55d 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -129,9 +129,9 @@ do { \ #define CPSW_VLAN_AWARE BIT(1) #define CPSW_ALE_VLAN_AWARE 1 -#define CPSW_FIFO_NORMAL_MODE (0 << 15) -#define CPSW_FIFO_DUAL_MAC_MODE (1 << 15) -#define CPSW_FIFO_RATE_LIMIT_MODE (2 << 15) +#define CPSW_FIFO_NORMAL_MODE (0 << 16) +#define CPSW_FIFO_DUAL_MAC_MODE (1 << 16) +#define CPSW_FIFO_RATE_LIMIT_MODE (2 << 16) #define CPSW_INTPACEEN (0x3f << 16) #define CPSW_INTPRESCALE_MASK (0x7FF << 0) -- cgit v1.2.1 From 8c2dd54485ccee7fc4086611e188478584758c8d Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 15 Nov 2014 02:11:59 +0300 Subject: ieee802154: fix error handling in ieee802154fake_probe() In case of any failure ieee802154fake_probe() just calls unregister_netdev(). But it does not look safe to unregister netdevice before it was registered. The patch implements straightforward resource deallocation in case of failure in ieee802154fake_probe(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller --- drivers/net/ieee802154/fakehard.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ieee802154/fakehard.c b/drivers/net/ieee802154/fakehard.c index 9ce854f43917..6cbc56ad9ff4 100644 --- a/drivers/net/ieee802154/fakehard.c +++ b/drivers/net/ieee802154/fakehard.c @@ -377,17 +377,20 @@ static int ieee802154fake_probe(struct platform_device *pdev) err = wpan_phy_register(phy); if (err) - goto out; + goto err_phy_reg; err = register_netdev(dev); - if (err < 0) - goto out; + if (err) + goto err_netdev_reg; dev_info(&pdev->dev, "Added ieee802154 HardMAC hardware\n"); return 0; -out: - unregister_netdev(dev); +err_netdev_reg: + wpan_phy_unregister(phy); +err_phy_reg: + free_netdev(dev); + wpan_phy_free(phy); return err; } -- cgit v1.2.1 From 52cff74eef5dd7bdab759300e7d1ca36eba18254 Mon Sep 17 00:00:00 2001 From: Anish Bhatt Date: Fri, 14 Nov 2014 16:38:31 -0800 Subject: dcbnl : Disable software interrupts before taking dcb_lock Solves possible lockup issues that can be seen from firmware DCB agents calling into the DCB app api. DCB firmware event queues can be tied in with NAPI so that dcb events are generated in softIRQ context. This can results in calls to dcb_*app() functions which try to take the dcb_lock. If the the event triggers while we also have the dcb_lock because lldpad or some other agent happened to be issuing a get/set command we could see a cpu lockup. This code was not originally written with firmware agents in mind, hence grabbing dcb_lock from softIRQ context was not considered. Signed-off-by: Anish Bhatt Signed-off-by: David S. Miller --- net/dcb/dcbnl.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index ca11d283bbeb..93ea80196f0e 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1080,13 +1080,13 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) if (!app) return -EMSGSIZE; - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); list_for_each_entry(itr, &dcb_app_list, list) { if (itr->ifindex == netdev->ifindex) { err = nla_put(skb, DCB_ATTR_IEEE_APP, sizeof(itr->app), &itr->app); if (err) { - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); return -EMSGSIZE; } } @@ -1097,7 +1097,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) else dcbx = -EOPNOTSUPP; - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); nla_nest_end(skb, app); /* get peer info if available */ @@ -1234,7 +1234,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) } /* local app */ - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); app = nla_nest_start(skb, DCB_ATTR_CEE_APP_TABLE); if (!app) goto dcb_unlock; @@ -1271,7 +1271,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) else dcbx = -EOPNOTSUPP; - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); /* features flags */ if (ops->getfeatcfg) { @@ -1326,7 +1326,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) return 0; dcb_unlock: - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); nla_put_failure: return err; } @@ -1762,10 +1762,10 @@ u8 dcb_getapp(struct net_device *dev, struct dcb_app *app) struct dcb_app_type *itr; u8 prio = 0; - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); if ((itr = dcb_app_lookup(app, dev->ifindex, 0))) prio = itr->app.priority; - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); return prio; } @@ -1789,7 +1789,7 @@ int dcb_setapp(struct net_device *dev, struct dcb_app *new) if (dev->dcbnl_ops->getdcbx) event.dcbx = dev->dcbnl_ops->getdcbx(dev); - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); /* Search for existing match and replace */ if ((itr = dcb_app_lookup(new, dev->ifindex, 0))) { if (new->priority) @@ -1804,7 +1804,7 @@ int dcb_setapp(struct net_device *dev, struct dcb_app *new) if (new->priority) err = dcb_app_add(new, dev->ifindex); out: - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); if (!err) call_dcbevent_notifiers(DCB_APP_EVENT, &event); return err; @@ -1823,10 +1823,10 @@ u8 dcb_ieee_getapp_mask(struct net_device *dev, struct dcb_app *app) struct dcb_app_type *itr; u8 prio = 0; - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); if ((itr = dcb_app_lookup(app, dev->ifindex, 0))) prio |= 1 << itr->app.priority; - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); return prio; } @@ -1850,7 +1850,7 @@ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new) if (dev->dcbnl_ops->getdcbx) event.dcbx = dev->dcbnl_ops->getdcbx(dev); - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); /* Search for existing match and abort if found */ if (dcb_app_lookup(new, dev->ifindex, new->priority)) { err = -EEXIST; @@ -1859,7 +1859,7 @@ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new) err = dcb_app_add(new, dev->ifindex); out: - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); if (!err) call_dcbevent_notifiers(DCB_APP_EVENT, &event); return err; @@ -1882,7 +1882,7 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del) if (dev->dcbnl_ops->getdcbx) event.dcbx = dev->dcbnl_ops->getdcbx(dev); - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); /* Search for existing match and remove it. */ if ((itr = dcb_app_lookup(del, dev->ifindex, del->priority))) { list_del(&itr->list); @@ -1890,7 +1890,7 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del) err = 0; } - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); if (!err) call_dcbevent_notifiers(DCB_APP_EVENT, &event); return err; @@ -1902,12 +1902,12 @@ static void dcb_flushapp(void) struct dcb_app_type *app; struct dcb_app_type *tmp; - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); list_for_each_entry_safe(app, tmp, &dcb_app_list, list) { list_del(&app->list); kfree(app); } - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); } static int __init dcbnl_init(void) -- cgit v1.2.1 From bb2bdeb83fb125c95e47fc7eca2a3e8f868e2a74 Mon Sep 17 00:00:00 2001 From: Martin Hauke Date: Sun, 16 Nov 2014 19:55:25 +0100 Subject: qmi_wwan: Add support for HP lt4112 LTE/HSPA+ Gobi 4G Modem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added the USB VID/PID for the HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) Signed-off-by: Martin Hauke Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 22756db53dca..b8a82b86f909 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -780,6 +780,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x413c, 0x81a4, 8)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ {QMI_FIXED_INTF(0x413c, 0x81a8, 8)}, /* Dell Wireless 5808 Gobi(TM) 4G LTE Mobile Broadband Card */ {QMI_FIXED_INTF(0x413c, 0x81a9, 8)}, /* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */ + {QMI_FIXED_INTF(0x03f0, 0x581d, 4)}, /* HP lt4112 LTE/HSPA+ Gobi 4G Module (Huawei me906e) */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ -- cgit v1.2.1 From feb91a02ccb09661507f170b2a444aec94f307f9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 5 Nov 2014 20:27:38 +0100 Subject: ipv6: mld: fix add_grhead skb_over_panic for devs with large MTUs It has been reported that generating an MLD listener report on devices with large MTUs (e.g. 9000) and a high number of IPv6 addresses can trigger a skb_over_panic(): skbuff: skb_over_panic: text:ffffffff80612a5d len:3776 put:20 head:ffff88046d751000 data:ffff88046d751010 tail:0xed0 end:0xec0 dev:port1 ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:100! invalid opcode: 0000 [#1] SMP Modules linked in: ixgbe(O) CPU: 3 PID: 0 Comm: swapper/3 Tainted: G O 3.14.23+ #4 [...] Call Trace: [] ? skb_put+0x3a/0x3b [] ? add_grhead+0x45/0x8e [] ? add_grec+0x394/0x3d4 [] ? mld_ifc_timer_expire+0x195/0x20d [] ? mld_dad_timer_expire+0x45/0x45 [] ? call_timer_fn.isra.29+0x12/0x68 [] ? run_timer_softirq+0x163/0x182 [] ? __do_softirq+0xe0/0x21d [] ? irq_exit+0x4e/0xd3 [] ? smp_apic_timer_interrupt+0x3b/0x46 [] ? apic_timer_interrupt+0x6a/0x70 mld_newpack() skb allocations are usually requested with dev->mtu in size, since commit 72e09ad107e7 ("ipv6: avoid high order allocations") we have changed the limit in order to be less likely to fail. However, in MLD/IGMP code, we have some rather ugly AVAILABLE(skb) macros, which determine if we may end up doing an skb_put() for adding another record. To avoid possible fragmentation, we check the skb's tailroom as skb->dev->mtu - skb->len, which is a wrong assumption as the actual max allocation size can be much smaller. The IGMP case doesn't have this issue as commit 57e1ab6eaddc ("igmp: refine skb allocations") stores the allocation size in the cb[]. Set a reserved_tailroom to make it fit into the MTU and use skb_availroom() helper instead. This also allows to get rid of igmp_skb_size(). Reported-by: Wei Liu Fixes: 72e09ad107e7 ("ipv6: avoid high order allocations") Signed-off-by: Daniel Borkmann Cc: Eric Dumazet Cc: Hannes Frederic Sowa Cc: David L Stevens Acked-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 11 +++++------ net/ipv6/mcast.c | 9 +++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index fb70e3ecc3e4..bb15d0e03d4f 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -318,9 +318,7 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted) return scount; } -#define igmp_skb_size(skb) (*(unsigned int *)((skb)->cb)) - -static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) +static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) { struct sk_buff *skb; struct rtable *rt; @@ -330,6 +328,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) struct flowi4 fl4; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; + unsigned int size = mtu; while (1) { skb = alloc_skb(size + hlen + tlen, @@ -341,7 +340,6 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) return NULL; } skb->priority = TC_PRIO_CONTROL; - igmp_skb_size(skb) = size; rt = ip_route_output_ports(net, &fl4, NULL, IGMPV3_ALL_MCR, 0, 0, 0, @@ -354,6 +352,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) skb_dst_set(skb, &rt->dst); skb->dev = dev; + skb->reserved_tailroom = skb_end_offset(skb) - + min(mtu, skb_end_offset(skb)); skb_reserve(skb, hlen); skb_reset_network_header(skb); @@ -423,8 +423,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc, return skb; } -#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? igmp_skb_size(skb) - (skb)->len : \ - skb_tailroom(skb)) : 0) +#define AVAILABLE(skb) ((skb) ? skb_availroom(skb) : 0) static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 9648de2b6745..ed2c4e400b46 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1550,7 +1550,7 @@ static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb, hdr->daddr = *daddr; } -static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size) +static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu) { struct net_device *dev = idev->dev; struct net *net = dev_net(dev); @@ -1561,13 +1561,13 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size) const struct in6_addr *saddr; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; + unsigned int size = mtu + hlen + tlen; int err; u8 ra[8] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT, 2, 0, 0, IPV6_TLV_PADN, 0 }; /* we assume size > sizeof(ra) here */ - size += hlen + tlen; /* limit our allocations to order-0 page */ size = min_t(int, size, SKB_MAX_ORDER(0, 0)); skb = sock_alloc_send_skb(sk, size, 1, &err); @@ -1576,6 +1576,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size) return NULL; skb->priority = TC_PRIO_CONTROL; + skb->reserved_tailroom = skb_end_offset(skb) - + min(mtu, skb_end_offset(skb)); skb_reserve(skb, hlen); if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) { @@ -1690,8 +1692,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, return skb; } -#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? (skb)->dev->mtu - (skb)->len : \ - skb_tailroom(skb)) : 0) +#define AVAILABLE(skb) ((skb) ? skb_availroom(skb) : 0) static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted, int crsend) -- cgit v1.2.1 From 137bd11090d89b3a3ef4bdb7a6cf964ffc797517 Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Fri, 7 Nov 2014 18:05:17 +0000 Subject: dmaengine: pl330: Align DMA memcpy operations to MFIFO width The algorithm used for programming the DMA Controller doesn't take into consideration the requirements of transfers that are not aligned to the bus width. This failure may result in DMA transferring one too few MFIFO entries (so too few bytes are copied) or the DMA trying to write one too many MFIFO entries and hanging because this is never provided. See "MFIFO Usage Overview" chapter in the the TRM for "CoreLink DMA Controller DMA-330", Revision r1p1. We work around these shortcomings by making sure we pick a burst size and length which ensures no bursts straddle an MFIFO entry. Signed-off-by: Jon Medhurst [squashed linker error "undefined reference to `__aeabi_uldivmod] Reported-by: kbuild test robot Acked-by: Arnd Bergmann Signed-off-by: Vinod Koul --- drivers/dma/pl330.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 4839bfa74a10..81505420bde4 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2459,16 +2459,25 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst, /* Select max possible burst size */ burst = pl330->pcfg.data_bus_width / 8; - while (burst > 1) { - if (!(len % burst)) - break; + /* + * Make sure we use a burst size that aligns with all the memcpy + * parameters because our DMA programming algorithm doesn't cope with + * transfers which straddle an entry in the DMA device's MFIFO. + */ + while ((src | dst | len) & (burst - 1)) burst /= 2; - } desc->rqcfg.brst_size = 0; while (burst != (1 << desc->rqcfg.brst_size)) desc->rqcfg.brst_size++; + /* + * If burst size is smaller than bus width then make sure we only + * transfer one at a time to avoid a burst stradling an MFIFO entry. + */ + if (desc->rqcfg.brst_size * 8 < pl330->pcfg.data_bus_width) + desc->rqcfg.brst_len = 1; + desc->rqcfg.brst_len = get_burst_len(desc, len); desc->txd.flags = flags; -- cgit v1.2.1 From c27f95568d1ed2573fc9c8848d741da63ca9a34e Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Fri, 7 Nov 2014 18:05:18 +0000 Subject: dmaengine: pl330: Limit MFIFO usage for memcpy to avoid exhausting entries The MFIFO is shared by all channels so restrict each memcpy to it's fair share. This is being over cautious, but without a global view of DMA channel usage on a system it's not possible to come up with a more optimum safe limit. Signed-off-by: Jon Medhurst Signed-off-by: Vinod Koul --- drivers/dma/pl330.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 81505420bde4..e13b51a46502 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2336,7 +2336,7 @@ static inline int get_burst_len(struct dma_pl330_desc *desc, size_t len) int burst_len; burst_len = pl330->pcfg.data_bus_width / 8; - burst_len *= pl330->pcfg.data_buf_dep; + burst_len *= pl330->pcfg.data_buf_dep / pl330->pcfg.num_chan; burst_len >>= desc->rqcfg.brst_size; /* src/dst_burst_len can't be more than 16 */ -- cgit v1.2.1 From 1f0a5cbf61a54504236bbbe2c98b58e85f90e650 Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Thu, 6 Nov 2014 17:20:12 +0000 Subject: dmaengine: Fix allocation size for PL330 data buffer depth. The datasheet for PL330 says that the data buffer value in the CRD register is 10bits wide. However, the value stored is "minus one", which the driver corrects for. Maximum value that the data buffer depth can have is 1024 lines, which requires 11 bits for storage. While making updates I found printing the peripheral ID as a hex value to be more useful as the datasheet shows the values that way. Signed-off-by: Liviu Dudau Signed-off-by: Vinod Koul --- drivers/dma/pl330.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index e13b51a46502..19a99743cf52 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -271,7 +271,7 @@ struct pl330_config { #define DMAC_MODE_NS (1 << 0) unsigned int mode; unsigned int data_bus_width:10; /* In number of bits */ - unsigned int data_buf_dep:10; + unsigned int data_buf_dep:11; unsigned int num_chan:4; unsigned int num_peri:6; u32 peri_ns; @@ -2741,7 +2741,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) dev_info(&adev->dev, - "Loaded driver for PL330 DMAC-%d\n", adev->periphid); + "Loaded driver for PL330 DMAC-%x\n", adev->periphid); dev_info(&adev->dev, "\tDBUFF-%ux%ubytes Num_Chans-%u Num_Peri-%u Num_Events-%u\n", pcfg->data_buf_dep, pcfg->data_bus_width / 8, pcfg->num_chan, -- cgit v1.2.1 From 2208d655a91f9879bd9a39ff9df05dd668b3512c Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 14 Nov 2014 09:25:29 +0100 Subject: drm/i915: drop WaSetupGtModeTdRowDispatch:snb This reverts the regressing commit 6547fbdbfff62c99e4f7b4f985ff8b3454f33b0f Author: Daniel Vetter Date: Fri Dec 14 23:38:29 2012 +0100 drm/i915: Implement WaSetupGtModeTdRowDispatch that causes GPU hangs immediately on boot. Reported-by: Leo Wolf Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=79996 Cc: stable@vger.kernel.org (v3.8+) Signed-off-by: Daniel Vetter [Jani: amended the commit message slightly.] Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_pm.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c27b6140bfd1..ad2fd605f76b 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5469,11 +5469,6 @@ static void gen6_init_clock_gating(struct drm_device *dev) I915_WRITE(_3D_CHICKEN, _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); - /* WaSetupGtModeTdRowDispatch:snb */ - if (IS_SNB_GT1(dev)) - I915_WRITE(GEN6_GT_MODE, - _MASKED_BIT_ENABLE(GEN6_TD_FOUR_ROW_DISPATCH_DISABLE)); - /* WaDisable_RenderCache_OperationalFlush:snb */ I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); -- cgit v1.2.1 From 0485c9dc24ec0939b42ca5104c0373297506b555 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 14 Nov 2014 10:09:49 +0100 Subject: drm/i915: Kick fbdev before vgacon It's magic, but it seems to work. This fixes a regression introduced in commit 1bb9e632a0aeee1121e652ee4dc80e5e6f14bcd2 Author: Daniel Vetter Date: Tue Jul 8 10:02:43 2014 +0200 drm/i915: Only unbind vgacon, not other console drivers My best guess is that the vga fbdev driver falls over if we rip out parts of vgacon. Hooray. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82439 Cc: stable@vger.kernel.org (v3.16+) Reported-and-tested-by: Lv Zheng Signed-off-by: Daniel Vetter Acked-by: Chris Wilson Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_dma.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 1403b01e8216..318ade9bb5af 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1670,15 +1670,17 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) goto out_regs; if (drm_core_check_feature(dev, DRIVER_MODESET)) { - ret = i915_kick_out_vgacon(dev_priv); + /* WARNING: Apparently we must kick fbdev drivers before vgacon, + * otherwise the vga fbdev driver falls over. */ + ret = i915_kick_out_firmware_fb(dev_priv); if (ret) { - DRM_ERROR("failed to remove conflicting VGA console\n"); + DRM_ERROR("failed to remove conflicting framebuffer drivers\n"); goto out_gtt; } - ret = i915_kick_out_firmware_fb(dev_priv); + ret = i915_kick_out_vgacon(dev_priv); if (ret) { - DRM_ERROR("failed to remove conflicting framebuffer drivers\n"); + DRM_ERROR("failed to remove conflicting VGA console\n"); goto out_gtt; } } -- cgit v1.2.1 From 97840cb67ff5ac8add836684f011fd838518d698 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 14 Nov 2014 18:14:33 +0100 Subject: netfilter: nfnetlink: fix insufficient validation in nfnetlink_bind Make sure the netlink group exists, otherwise you can trigger an out of bound array memory access from the netlink_bind() path. This splat can only be triggered only by superuser. [ 180.203600] UBSan: Undefined behaviour in ../net/netfilter/nfnetlink.c:467:28 [ 180.204249] index 9 is out of range for type 'int [9]' [ 180.204697] CPU: 0 PID: 1771 Comm: trinity-main Not tainted 3.18.0-rc4-mm1+ #122 [ 180.205365] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org +04/01/2014 [ 180.206498] 0000000000000018 0000000000000000 0000000000000009 ffff88007bdf7da8 [ 180.207220] ffffffff82b0ef5f 0000000000000092 ffffffff845ae2e0 ffff88007bdf7db8 [ 180.207887] ffffffff8199e489 ffff88007bdf7e18 ffffffff8199ea22 0000003900000000 [ 180.208639] Call Trace: [ 180.208857] dump_stack (lib/dump_stack.c:52) [ 180.209370] ubsan_epilogue (lib/ubsan.c:174) [ 180.209849] __ubsan_handle_out_of_bounds (lib/ubsan.c:400) [ 180.210512] nfnetlink_bind (net/netfilter/nfnetlink.c:467) [ 180.210986] netlink_bind (net/netlink/af_netlink.c:1483) [ 180.211495] SYSC_bind (net/socket.c:1541) Moreover, define the missing nf_tables and nf_acct multicast groups too. Reported-by: Andrey Ryabinin Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 6c5a915cfa75..13c2e17bbe27 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -47,6 +47,8 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = { [NFNLGRP_CONNTRACK_EXP_NEW] = NFNL_SUBSYS_CTNETLINK_EXP, [NFNLGRP_CONNTRACK_EXP_UPDATE] = NFNL_SUBSYS_CTNETLINK_EXP, [NFNLGRP_CONNTRACK_EXP_DESTROY] = NFNL_SUBSYS_CTNETLINK_EXP, + [NFNLGRP_NFTABLES] = NFNL_SUBSYS_NFTABLES, + [NFNLGRP_ACCT_QUOTA] = NFNL_SUBSYS_ACCT, }; void nfnl_lock(__u8 subsys_id) @@ -464,7 +466,12 @@ static void nfnetlink_rcv(struct sk_buff *skb) static int nfnetlink_bind(int group) { const struct nfnetlink_subsystem *ss; - int type = nfnl_group2type[group]; + int type; + + if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX) + return -EINVAL; + + type = nfnl_group2type[group]; rcu_read_lock(); ss = nfnetlink_get_subsys(type); @@ -514,6 +521,9 @@ static int __init nfnetlink_init(void) { int i; + for (i = NFNLGRP_NONE + 1; i <= NFNLGRP_MAX; i++) + BUG_ON(nfnl_group2type[i] == NFNL_SUBSYS_NONE); + for (i=0; i Date: Mon, 17 Nov 2014 12:20:28 +0100 Subject: bridge: fix netfilter/NF_BR_LOCAL_OUT for own, locally generated queries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ebtables on the OUTPUT chain (NF_BR_LOCAL_OUT) would not work as expected for both locally generated IGMP and MLD queries. The IP header specific filter options are off by 14 Bytes for netfilter (actual output on interfaces is fine). NF_HOOK() expects the skb->data to point to the IP header, not the ethernet one (while dev_queue_xmit() does not). Luckily there is an br_dev_queue_push_xmit() helper function already - let's just use that. Introduced by eb1d16414339a6e113d89e2cca2556005d7ce919 ("bridge: Add core IGMP snooping support") Ebtables example: $ ebtables -I OUTPUT -p IPv6 -o eth1 --logical-out br0 \ --log --log-level 6 --log-ip6 --log-prefix="~EBT: " -j DROP before (broken): ~EBT: IN= OUT=eth1 MAC source = 02:04:64:a4:39:c2 \ MAC dest = 33:33:00:00:00:01 proto = 0x86dd IPv6 \ SRC=64a4:39c2:86dd:6000:0000:0020:0001:fe80 IPv6 \ DST=0000:0000:0000:0004:64ff:fea4:39c2:ff02, \ IPv6 priority=0x3, Next Header=2 after (working): ~EBT: IN= OUT=eth1 MAC source = 02:04:64:a4:39:c2 \ MAC dest = 33:33:00:00:00:01 proto = 0x86dd IPv6 \ SRC=fe80:0000:0000:0000:0004:64ff:fea4:39c2 IPv6 \ DST=ff02:0000:0000:0000:0000:0000:0000:0001, \ IPv6 priority=0x0, Next Header=0 Signed-off-by: Linus Lüssing Acked-by: Herbert Xu Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_multicast.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 648d79ccf462..c465876c7861 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -813,10 +813,9 @@ static void __br_multicast_send_query(struct net_bridge *br, return; if (port) { - __skb_push(skb, sizeof(struct ethhdr)); skb->dev = port->dev; NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, - dev_queue_xmit); + br_dev_queue_push_xmit); } else { br_multicast_select_own_querier(br, ip, skb); netif_rx(skb); -- cgit v1.2.1 From a358a0ef861dae6f8330fb034aaa43adae71ebc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johan=20Rast=C3=A9n?= Date: Mon, 17 Nov 2014 08:39:33 +0100 Subject: ALSA: usb-audio: Set the Control Selector to SU_SELECTOR_CONTROL for UAC2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Specified in section 5.2.5.6.1 of the USB Audio Class 2.0 definition. Solves the following error for C-Media 6632A (Asus Xonar U7): [ 8219.676164] cannot get ctl value: req = 0x81, wValue = 0x0, wIndex = 0x1400, type = 3 Signed-off-by: Johan Rastén Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 2e4a9dbc51fa..6e354d326858 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -2033,10 +2033,11 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid, cval->res = 1; cval->initialized = 1; - if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR) - cval->control = UAC2_CX_CLOCK_SELECTOR; - else + if (state->mixer->protocol == UAC_VERSION_1) cval->control = 0; + else /* UAC_VERSION_2 */ + cval->control = (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR) ? + UAC2_CX_CLOCK_SELECTOR : UAC2_SU_SELECTOR; namelist = kmalloc(sizeof(char *) * desc->bNrInPins, GFP_KERNEL); if (!namelist) { -- cgit v1.2.1 From 9da7a5a9fdeeb76b2243f6b473363a7e6147ab6f Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 17 Nov 2014 10:48:21 +0000 Subject: ASoC: wm_adsp: Avoid attempt to free buffers that might still be in use We should not free any buffers associated with writing out coefficients to the DSP until all the async writes have completed. This patch updates the out of memory path when allocating a new buffer to include a call to regmap_async_complete. Reported-by: JS Park Signed-off-by: Charles Keepax Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/wm_adsp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index f412a9911a75..67124783558a 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1355,6 +1355,7 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp) file, blocks, pos - firmware->size); out_fw: + regmap_async_complete(regmap); release_firmware(firmware); wm_adsp_buf_free(&buf_list); out: -- cgit v1.2.1 From daad1660285a967b5da363e8de264e86b4a496a0 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Tue, 4 Nov 2014 15:22:50 -0800 Subject: ath9k: fix regression in bssidmask calculation The commit that went into 3.17: ath9k: Summarize hw state per channel context Group and set hw state (opmode, primary_sta, beacon conf) per channel context instead of whole list of vifs. This would allow each channel context to run in different mode (STA/AP). Signed-off-by: Felix Fietkau Signed-off-by: Rajkumar Manoharan Signed-off-by: John W. Linville broke multi-vif configuration due to not properly calculating the bssid mask. The test case that caught this was: create wlan0 and sta0-4 (6 total), not sure how much that matters. associate all 6 (works fine) disconnect 5 of them, leaving sta0 up Start trying to bring up the other 5 one at a time. It will fail, with iw events looking like this (in these logs, several sta are trying to come up, but symptom is the same with just one) The patch causing the regression made quite a few changes, but the part I think caused this particular problem was not recalculating the bssid mask when adding and removing interfaces. Re-adding those calls fixes my test case. Fix bad comment as well. Signed-off-by: Ben Greear Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 30c66dfcd7a0..4f18a6be0c7d 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -974,9 +974,8 @@ void ath9k_calculate_iter_data(struct ath_softc *sc, struct ath_vif *avp; /* - * Pick the MAC address of the first interface as the new hardware - * MAC address. The hardware will use it together with the BSSID mask - * when matching addresses. + * The hardware will use primary station addr together with the + * BSSID mask when matching addresses. */ memset(iter_data, 0, sizeof(*iter_data)); memset(&iter_data->mask, 0xff, ETH_ALEN); @@ -1205,6 +1204,8 @@ static int ath9k_add_interface(struct ieee80211_hw *hw, list_add_tail(&avp->list, &avp->chanctx->vifs); } + ath9k_calculate_summary_state(sc, avp->chanctx); + ath9k_assign_hw_queues(hw, vif); an->sc = sc; @@ -1274,6 +1275,8 @@ static void ath9k_remove_interface(struct ieee80211_hw *hw, ath_tx_node_cleanup(sc, &avp->mcast_node); + ath9k_calculate_summary_state(sc, avp->chanctx); + mutex_unlock(&sc->mutex); } -- cgit v1.2.1 From 8180bd47b043507568056f74f69b6a5abea26514 Mon Sep 17 00:00:00 2001 From: Mathy Vanhoef Date: Wed, 12 Nov 2014 21:33:34 -0500 Subject: brcmfmac: kill URB when request timed out Kill the submitted URB in brcmf_usb_dl_cmd if the request timed out. This assures the URB is never submitted twice. It also prevents a possible use-after-free of the URB transfer buffer if a timeout occurs. Signed-off-by: Mathy Vanhoef Acked-by: Arend van Spriel Signed-off-by: John W. Linville --- drivers/net/wireless/brcm80211/brcmfmac/usb.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/brcm80211/brcmfmac/usb.c index dc135915470d..875d1142c8b0 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/usb.c @@ -669,10 +669,12 @@ static int brcmf_usb_dl_cmd(struct brcmf_usbdev_info *devinfo, u8 cmd, goto finalize; } - if (!brcmf_usb_ioctl_resp_wait(devinfo)) + if (!brcmf_usb_ioctl_resp_wait(devinfo)) { + usb_kill_urb(devinfo->ctl_urb); ret = -ETIMEDOUT; - else + } else { memcpy(buffer, tmpbuf, buflen); + } finalize: kfree(tmpbuf); -- cgit v1.2.1 From 4c69f05eaa428e37890daf88b86a567ce615570b Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 14 Nov 2014 14:12:21 -0800 Subject: brcmfmac: fix error handling of irq_of_parse_and_map Return value of irq_of_parse_and_map() is unsigned int, with 0 indicating failure, so testing for negative result never works. Signed-off-by: Dmitry Torokhov Cc: stable@vger.kernel.org # v3.17 Acked-by: Arend van Spriel Signed-off-by: John W. Linville --- drivers/net/wireless/brcm80211/brcmfmac/of.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/brcm80211/brcmfmac/of.c b/drivers/net/wireless/brcm80211/brcmfmac/of.c index f05f5270fec1..927bffd5be64 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/of.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/of.c @@ -40,8 +40,8 @@ void brcmf_of_probe(struct brcmf_sdio_dev *sdiodev) return; irq = irq_of_parse_and_map(np, 0); - if (irq < 0) { - brcmf_err("interrupt could not be mapped: err=%d\n", irq); + if (!irq) { + brcmf_err("interrupt could not be mapped\n"); devm_kfree(dev, sdiodev->pdata); return; } -- cgit v1.2.1 From d7ce4377494adfaf8afb15ecf4f07d399bbf13d9 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Fri, 14 Nov 2014 13:51:22 +0800 Subject: powerpc/fsl_msi: mark the msi cascade handler IRQF_NO_THREAD The commit 543c043cbae7 ("powerpc/fsl_msi: change the irq handler from chained to normal") changes the msi cascade handler from chained to normal. Since cascade handler must run in hard interrupt context, this will cause kernel panic if we force threading of all the interrupt handler via kernel command parameter 'threadirqs'. So mark the irq handler IRQF_NO_THREAD explicitly. Signed-off-by: Kevin Hao Signed-off-by: Scott Wood --- arch/powerpc/sysdev/fsl_msi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index de40b48b460e..da08ed088157 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -361,7 +361,7 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, cascade_data->virq = virt_msir; msi->cascade_array[irq_index] = cascade_data; - ret = request_irq(virt_msir, fsl_msi_cascade, 0, + ret = request_irq(virt_msir, fsl_msi_cascade, IRQF_NO_THREAD, "fsl-msi-cascade", cascade_data); if (ret) { dev_err(&dev->dev, "failed to request_irq(%d), ret = %d\n", -- cgit v1.2.1 From 4a83d42ae2041d5b76f1a0662bc3a5a85e4eb0d1 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Tue, 18 Nov 2014 17:57:40 +0800 Subject: ALSA: hda - move DELL_WMI_MIC_MUTE_LED to the tail in the quirk chain We have one more Dell machine needs DELL_WMI_MIC_MUTE_LED quirk, but the machine uses alc293 instead of alc255. So if DELL_WMI_MIC_MUTE_LED still chain ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, the machine can't use this quirk. To change this situation, let the DELL_WMI_MIC_MUTE_LED to be a standalone quirk, and let other quirks chain it. After this change, this quirk can be chained to any existing quirks, and as a result, it is possible that this quirk is applied to a non-Dell machine or a Dell machine without mic mute led on it, but it is still safe since alc_fixup_dell_wmi() will return an error in these situations. And remove the quirk for machine with subsystem id 0x6010 and 0x601f, these two machines will fall back to the quirk ALC255_FIXUP_DELL1_MIC_NO_PRESENCE-->ALC255_FIXUP_HEADSET_MODE--> ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED through pin_fixup_tbl[]. BugLink: https://bugs.launchpad.net/bugs/1381856 Reported-and-tested-by: Po-Hsu Lin Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 172395465e8a..9da5798c370a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4709,6 +4709,8 @@ static const struct hda_fixup alc269_fixups[] = { [ALC255_FIXUP_HEADSET_MODE] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_mode_alc255, + .chained = true, + .chain_id = ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED }, [ALC255_FIXUP_HEADSET_MODE_NO_HP_MIC] = { .type = HDA_FIXUP_FUNC, @@ -4744,8 +4746,6 @@ static const struct hda_fixup alc269_fixups[] = { [ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_dell_wmi, - .chained_before = true, - .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE }, [ALC282_FIXUP_ASPIRE_V5_PINS] = { .type = HDA_FIXUP_PINS, @@ -4783,10 +4783,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x05f4, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05f5, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05f6, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1028, 0x0610, "Dell", ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED), SND_PCI_QUIRK(0x1028, 0x0615, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK), SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK), - SND_PCI_QUIRK(0x1028, 0x061f, "Dell", ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED), SND_PCI_QUIRK(0x1028, 0x0638, "Dell Inspiron 5439", ALC290_FIXUP_MONO_SPEAKERS_HSJACK), SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), -- cgit v1.2.1 From 6676f3081f7e3dae64e05b87d47a041b782f898a Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Tue, 18 Nov 2014 17:57:41 +0800 Subject: ALSA: hda - fix the mic mute led problem for Latitude E5550 The microphone mute led on the Latitude E5550 can't work. We need to apply DELL_WMI_MIC_MUTE_LED quirk to this machine. The machine uses alc293 codec and already applied the quirk ALC293_FIXUP_DELL1_MIC_NO_PRESENCE through pin_fixup_tbl[]. Here we just let DELL_WMI_MIC_MUTE_LED be chained to ALC269_FIXUP_HEADSET_MODE, then the machine will have these quirks ALC293_FIXUP_DELL1_MIC_NO_PRESENCE--> ALC269_FIXUP_HEADSET_MODE-->ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED. BugLink: https://bugs.launchpad.net/bugs/1381856 Reported-and-tested-by: Po-Hsu Lin Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 9da5798c370a..8fea1b86df25 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4520,6 +4520,8 @@ static const struct hda_fixup alc269_fixups[] = { [ALC269_FIXUP_HEADSET_MODE] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_mode, + .chained = true, + .chain_id = ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED }, [ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC] = { .type = HDA_FIXUP_FUNC, -- cgit v1.2.1 From 5247a589c24022ab34e780039cc8000c48f2035e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20K=C3=B6rper?= Date: Fri, 31 Oct 2014 07:33:54 +0100 Subject: can: dev: avoid calling kfree_skb() from interrupt context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ikfree_skb() is Called in can_free_echo_skb(), which might be called from (TX Error) interrupt, which triggers the folloing warning: [ 1153.360705] ------------[ cut here ]------------ [ 1153.360715] WARNING: CPU: 0 PID: 31 at net/core/skbuff.c:563 skb_release_head_state+0xb9/0xd0() [ 1153.360772] Call Trace: [ 1153.360778] [] dump_stack+0x41/0x52 [ 1153.360782] [] warn_slowpath_common+0x7e/0xa0 [ 1153.360784] [] ? skb_release_head_state+0xb9/0xd0 [ 1153.360786] [] ? skb_release_head_state+0xb9/0xd0 [ 1153.360788] [] warn_slowpath_null+0x22/0x30 [ 1153.360791] [] skb_release_head_state+0xb9/0xd0 [ 1153.360793] [] skb_release_all+0x10/0x30 [ 1153.360795] [] kfree_skb+0x36/0x80 [ 1153.360799] [] ? can_free_echo_skb+0x28/0x40 [can_dev] [ 1153.360802] [] can_free_echo_skb+0x28/0x40 [can_dev] [ 1153.360805] [] esd_pci402_interrupt+0x34c/0x57a [esd402] [ 1153.360809] [] handle_irq_event_percpu+0x35/0x180 [ 1153.360811] [] ? handle_irq_event_percpu+0xa3/0x180 [ 1153.360813] [] handle_irq_event+0x31/0x50 [ 1153.360816] [] handle_fasteoi_irq+0x6f/0x120 [ 1153.360818] [] ? handle_edge_irq+0x110/0x110 [ 1153.360822] [] handle_irq+0x71/0x90 [ 1153.360823] [] do_IRQ+0x3c/0xd0 [ 1153.360829] [] common_interrupt+0x2c/0x34 [ 1153.360834] [] ? finish_task_switch+0x47/0xf0 [ 1153.360836] [] __schedule+0x35b/0x7e0 [ 1153.360839] [] ? console_unlock+0x2c4/0x4d0 [ 1153.360842] [] ? n_tty_receive_buf_common+0x890/0x890 [ 1153.360845] [] ? process_one_work+0x196/0x370 [ 1153.360847] [] schedule+0x23/0x60 [ 1153.360849] [] worker_thread+0x161/0x460 [ 1153.360852] [] ? __wake_up_locked+0x1f/0x30 [ 1153.360854] [] ? rescuer_thread+0x2f0/0x2f0 [ 1153.360856] [] kthread+0xa1/0xc0 [ 1153.360859] [] ret_from_kernel_thread+0x21/0x30 [ 1153.360861] [] ? kthread_create_on_node+0x110/0x110 [ 1153.360863] ---[ end trace 5ff83639cbb74b35 ]--- This patch replaces the kfree_skb() by dev_kfree_skb_any(). Signed-off-by: Thomas Körper Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 02492d241e4c..509d5e063d8c 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -382,7 +382,7 @@ void can_free_echo_skb(struct net_device *dev, unsigned int idx) BUG_ON(idx >= priv->echo_skb_max); if (priv->echo_skb[idx]) { - kfree_skb(priv->echo_skb[idx]); + dev_kfree_skb_any(priv->echo_skb[idx]); priv->echo_skb[idx] = NULL; } } -- cgit v1.2.1 From 67b5909edccfe3ea3b85b1d96284d2c53e3fd47c Mon Sep 17 00:00:00 2001 From: Roman Fietze Date: Mon, 20 Oct 2014 10:32:42 +0200 Subject: can: dev: fix typo CIA -> CiA, CAN in Automation This patch fixes a typo in CAN's dev.c: CIA -> CiA which stands for CAN in Automation. Signed-off-by: Roman Fietze Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 509d5e063d8c..2cfe5012e4e5 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -110,7 +110,7 @@ static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, long rate; u64 v64; - /* Use CIA recommended sample points */ + /* Use CiA recommended sample points */ if (bt->sample_point) { sampl_pt = bt->sample_point; } else { -- cgit v1.2.1 From 98e69016a11b2b9398bea668442193b3b362cd43 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Fri, 7 Nov 2014 16:45:12 +0800 Subject: can: dev: add can_is_canfd_skb() API The CAN device drivers can use can_is_canfd_skb() to check if the frame to send is on CAN FD mode or normal CAN mode. Acked-by: Oliver Hartkopp Signed-off-by: Dong Aisheng Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 6992afc6ba7f..b37ea95bc348 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -99,6 +99,12 @@ inval_skb: return 1; } +static inline bool can_is_canfd_skb(const struct sk_buff *skb) +{ + /* the CAN specific type of skb is identified by its data length */ + return skb->len == CANFD_MTU; +} + /* get data length from can_dlc with sanitized can_dlc */ u8 can_dlc2len(u8 can_dlc); -- cgit v1.2.1 From efbd50d2f62fc1f69a3dcd153e63ba28cc8eb27f Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 11 Oct 2014 00:31:07 +0400 Subject: can: esd_usb2: fix memory leak on disconnect It seems struct esd_usb2 dev is not deallocated on disconnect. The patch adds the missing deallocation. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Acked-by: Matthias Fuchs Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/esd_usb2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index b7c9e8b11460..7a90075529c3 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -1143,6 +1143,7 @@ static void esd_usb2_disconnect(struct usb_interface *intf) } } unlink_all_urbs(dev); + kfree(dev); } } -- cgit v1.2.1 From 4e2061b1e1f6b8e698b36a6518b6f8c15edc4547 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Tue, 18 Nov 2014 19:17:06 +0530 Subject: can: remove unused variable these variable were only assigned some values, but then never reused again. so they are safe to be removed. Signed-off-by: Sudip Mukherjee Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sja1000/kvaser_pci.c | 5 +---- drivers/net/can/usb/ems_usb.c | 3 +-- drivers/net/can/usb/esd_usb2.c | 2 -- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/can/sja1000/kvaser_pci.c b/drivers/net/can/sja1000/kvaser_pci.c index 8ff3424d5147..15c00faeec61 100644 --- a/drivers/net/can/sja1000/kvaser_pci.c +++ b/drivers/net/can/sja1000/kvaser_pci.c @@ -214,7 +214,7 @@ static int kvaser_pci_add_chan(struct pci_dev *pdev, int channel, struct net_device *dev; struct sja1000_priv *priv; struct kvaser_pci *board; - int err, init_step; + int err; dev = alloc_sja1000dev(sizeof(struct kvaser_pci)); if (dev == NULL) @@ -235,7 +235,6 @@ static int kvaser_pci_add_chan(struct pci_dev *pdev, int channel, if (channel == 0) { board->xilinx_ver = ioread8(board->res_addr + XILINX_VERINT) >> 4; - init_step = 2; /* Assert PTADR# - we're in passive mode so the other bits are not important */ @@ -264,8 +263,6 @@ static int kvaser_pci_add_chan(struct pci_dev *pdev, int channel, priv->irq_flags = IRQF_SHARED; dev->irq = pdev->irq; - init_step = 4; - dev_info(&pdev->dev, "reg_base=%p conf_addr=%p irq=%d\n", priv->reg_base, board->conf_addr, dev->irq); diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 00f2534dde73..29d3f0938eb8 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -434,10 +434,9 @@ static void ems_usb_read_bulk_callback(struct urb *urb) if (urb->actual_length > CPC_HEADER_SIZE) { struct ems_cpc_msg *msg; u8 *ibuf = urb->transfer_buffer; - u8 msg_count, again, start; + u8 msg_count, start; msg_count = ibuf[0] & ~0x80; - again = ibuf[0] & 0x80; start = CPC_HEADER_SIZE; diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index 7a90075529c3..c063a54ab8dd 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -464,7 +464,6 @@ static void esd_usb2_write_bulk_callback(struct urb *urb) { struct esd_tx_urb_context *context = urb->context; struct esd_usb2_net_priv *priv; - struct esd_usb2 *dev; struct net_device *netdev; size_t size = sizeof(struct esd_usb2_msg); @@ -472,7 +471,6 @@ static void esd_usb2_write_bulk_callback(struct urb *urb) priv = context->priv; netdev = priv->netdev; - dev = priv->usb2; /* free up our allocated buffer */ usb_free_coherent(urb->dev, size, -- cgit v1.2.1 From fb3ec7ba5a665c280f7299a36460449038fc1083 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Tue, 18 Nov 2014 19:17:07 +0530 Subject: can: xilinx_can: fix comparison of unsigned variable The variable err was of the type u32. It was being compared with < 0, and being an unsigned variable the comparison would have been always false. Moreover, err was getting the return value from set_reset_mode() and xcan_set_bittiming(), and both are returning int. Signed-off-by: Sudip Mukherjee Reviewed-by: Michal Simek Signed-off-by: Marc Kleine-Budde --- drivers/net/can/xilinx_can.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 5e8b5609c067..47b2f801d127 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -300,7 +300,8 @@ static int xcan_set_bittiming(struct net_device *ndev) static int xcan_chip_start(struct net_device *ndev) { struct xcan_priv *priv = netdev_priv(ndev); - u32 err, reg_msr, reg_sr_mask; + u32 reg_msr, reg_sr_mask; + int err; unsigned long timeout; /* Check if it is in reset mode */ -- cgit v1.2.1 From 92593a035ee6a81f50b54ab34b23b1c1319ee413 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 18 Nov 2014 13:16:13 +0100 Subject: can: xilinx_can: add .ndo_change_mtu function Use common can_change_mtu function. Signed-off-by: Marc Kleine-Budde --- drivers/net/can/xilinx_can.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 47b2f801d127..8a998e3884ce 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -962,6 +962,7 @@ static const struct net_device_ops xcan_netdev_ops = { .ndo_open = xcan_open, .ndo_stop = xcan_close, .ndo_start_xmit = xcan_start_xmit, + .ndo_change_mtu = can_change_mtu, }; /** -- cgit v1.2.1 From ca976d6af4e50899f4e840e8017ec29d8fe6b50e Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 18 Nov 2014 13:16:13 +0100 Subject: can: rcar_can: add .ndo_change_mtu function Use common can_change_mtu function. Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rcar_can.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c index 1abe133d1594..9718248e55f1 100644 --- a/drivers/net/can/rcar_can.c +++ b/drivers/net/can/rcar_can.c @@ -628,6 +628,7 @@ static const struct net_device_ops rcar_can_netdev_ops = { .ndo_open = rcar_can_open, .ndo_stop = rcar_can_close, .ndo_start_xmit = rcar_can_start_xmit, + .ndo_change_mtu = can_change_mtu, }; static void rcar_can_rx_pkt(struct rcar_can_priv *priv) -- cgit v1.2.1 From f2a306c29d024193b1272cd014108882f7887a9e Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Fri, 26 Sep 2014 00:26:27 +0200 Subject: devicetree: bindings: add sandisk to the vendor prefixes Add sandisk to the list of vendors. This prefix should be used also for companies absorbed by Sandisk, like M-Systems. Signed-off-by: Robert Jarzmik Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 723999d73744..3de80303de83 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -127,6 +127,7 @@ renesas Renesas Electronics Corporation ricoh Ricoh Co. Ltd. rockchip Fuzhou Rockchip Electronics Co., Ltd samsung Samsung Semiconductor +sandisk Sandisk Corporation sbs Smart Battery System schindler Schindler seagate Seagate Technology PLC -- cgit v1.2.1 From fb86b97300d930b57471068720c52bfa8622eab7 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 18 Nov 2014 10:46:57 +0100 Subject: x86, microcode: Update BSPs microcode on resume In the situation when we apply early microcode but do *not* apply late microcode, we fail to update the BSP's microcode on resume because we haven't initialized the uci->mc microcode pointer. So, in order to alleviate that, we go and dig out the stashed microcode patch during early boot. It is basically the same thing that is done on the APs early during boot so do that too here. Tested-by: alex.schnaidt@gmail.com Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=88001 Cc: Henrique de Moraes Holschuh Cc: Fenghua Yu Cc: # v3.9 Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20141118094657.GA6635@pd.tnic Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/core.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index dd9d6190b08d..2ce9051174e6 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -465,6 +465,14 @@ static void mc_bp_resume(void) if (uci->valid && uci->mc) microcode_ops->apply_microcode(cpu); + else if (!uci->mc) + /* + * We might resume and not have applied late microcode but still + * have a newer patch stashed from the early loader. We don't + * have it in uci->mc so we have to load it the same way we're + * applying patches early on the APs. + */ + load_ucode_ap(); } static struct syscore_ops mc_syscore_ops = { -- cgit v1.2.1 From 45e2a9d4701d8c624d4a4bcdd1084eae31e92f58 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 14 Nov 2014 11:47:37 -0800 Subject: x86, mm: Set NX across entire PMD at boot When setting up permissions on kernel memory at boot, the end of the PMD that was split from bss remained executable. It should be NX like the rest. This performs a PMD alignment instead of a PAGE alignment to get the correct span of memory. Before: ---[ High Kernel Mapping ]--- ... 0xffffffff8202d000-0xffffffff82200000 1868K RW GLB NX pte 0xffffffff82200000-0xffffffff82c00000 10M RW PSE GLB NX pmd 0xffffffff82c00000-0xffffffff82df5000 2004K RW GLB NX pte 0xffffffff82df5000-0xffffffff82e00000 44K RW GLB x pte 0xffffffff82e00000-0xffffffffc0000000 978M pmd After: ---[ High Kernel Mapping ]--- ... 0xffffffff8202d000-0xffffffff82200000 1868K RW GLB NX pte 0xffffffff82200000-0xffffffff82e00000 12M RW PSE GLB NX pmd 0xffffffff82e00000-0xffffffffc0000000 978M pmd [ tglx: Changed it to roundup(_brk_end, PMD_SIZE) and added a comment. We really should unmap the reminder along with the holes caused by init,initdata etc. but thats a different issue ] Signed-off-by: Kees Cook Cc: Andy Lutomirski Cc: Toshi Kani Cc: Yasuaki Ishimatsu Cc: David Vrabel Cc: Wang Nan Cc: Yinghai Lu Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20141114194737.GA3091@www.outflux.net Signed-off-by: Thomas Gleixner --- arch/x86/mm/init_64.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 4cb8763868fc..4e5dfec750fc 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1123,7 +1123,7 @@ void mark_rodata_ro(void) unsigned long end = (unsigned long) &__end_rodata_hpage_align; unsigned long text_end = PFN_ALIGN(&__stop___ex_table); unsigned long rodata_end = PFN_ALIGN(&__end_rodata); - unsigned long all_end = PFN_ALIGN(&_end); + unsigned long all_end; printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", (end - start) >> 10); @@ -1134,7 +1134,16 @@ void mark_rodata_ro(void) /* * The rodata/data/bss/brk section (but not the kernel text!) * should also be not-executable. + * + * We align all_end to PMD_SIZE because the existing mapping + * is a full PMD. If we would align _brk_end to PAGE_SIZE we + * split the PMD and the reminder between _brk_end and the end + * of the PMD will remain mapped executable. + * + * Any PMD which was setup after the one which covers _brk_end + * has been zapped already via cleanup_highmem(). */ + all_end = roundup((unsigned long)_brk_end, PMD_SIZE); set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT); rodata_test(); -- cgit v1.2.1 From 70b61e362187b5fccac206506d402f3424e3e749 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 17 Nov 2014 16:16:04 -0800 Subject: x86, kaslr: Handle Gold linker for finding bss/brk When building with the Gold linker, the .bss and .brk areas of vmlinux are shown as consecutive instead of having the same file offset. Allow for either state, as long as things add up correctly. Fixes: e6023367d779 ("x86, kaslr: Prevent .bss from overlaping initrd") Reported-by: Markus Trippelsdorf Signed-off-by: Kees Cook Cc: Junjie Mao Link: http://lkml.kernel.org/r/20141118001604.GA25045@www.outflux.net Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner --- arch/x86/tools/calc_run_size.pl | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/tools/calc_run_size.pl b/arch/x86/tools/calc_run_size.pl index 0b0b124d3ece..23210baade2d 100644 --- a/arch/x86/tools/calc_run_size.pl +++ b/arch/x86/tools/calc_run_size.pl @@ -19,7 +19,16 @@ while (<>) { if ($file_offset == 0) { $file_offset = $offset; } elsif ($file_offset != $offset) { - die ".bss and .brk lack common file offset\n"; + # BFD linker shows the same file offset in ELF. + # Gold linker shows them as consecutive. + next if ($file_offset + $mem_size == $offset + $size); + + printf STDERR "file_offset: 0x%lx\n", $file_offset; + printf STDERR "mem_size: 0x%lx\n", $mem_size; + printf STDERR "offset: 0x%lx\n", $offset; + printf STDERR "size: 0x%lx\n", $size; + + die ".bss and .brk are non-contiguous\n"; } } } -- cgit v1.2.1 From 27b3383a1432127bfcf9f8a63bf184ff4d866141 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 22 Oct 2014 11:49:01 +0200 Subject: of: Spelling s/stucture/structure/ Signed-off-by: Geert Uytterhoeven Cc: Grant Likely Cc: Rob Herring Signed-off-by: Rob Herring --- drivers/of/dynamic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index f297891d8529..d4994177dec2 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -247,7 +247,7 @@ void of_node_release(struct kobject *kobj) * @allocflags: Allocation flags (typically pass GFP_KERNEL) * * Copy a property by dynamically allocating the memory of both the - * property stucture and the property name & contents. The property's + * property structure and the property name & contents. The property's * flags have the OF_DYNAMIC bit set so that we can differentiate between * dynamically allocated properties and not. * Returns the newly allocated property or NULL on out of memory error. -- cgit v1.2.1 From 66865de4314caca30598244b86817e774c188afa Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Sat, 1 Nov 2014 17:35:31 -0600 Subject: of/irq: Drop obsolete 'interrupts' vs 'interrupts-extended' text a9ecdc0fdc54 ("of/irq: Fix lookup to use 'interrupts-extended' property first") updated the description to say that: - Both 'interrupts' and 'interrupts-extended' may be present - Software should prefer 'interrupts-extended' - Software that doesn't comprehend 'interrupts-extended' may use 'interrupts' But there is still a paragraph at the end that prohibits having both and says 'interrupts' should be preferred. Remove the contradictory text. Fixes: a9ecdc0fdc54 ("of/irq: Fix lookup to use 'interrupts-extended' property first") Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org # v3.13+ Acked-by: Brian Norris Acked-by: Mark Rutland Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/interrupt-controller/interrupts.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/interrupts.txt b/Documentation/devicetree/bindings/interrupt-controller/interrupts.txt index ce6a1a072028..8a3c40829899 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/interrupts.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/interrupts.txt @@ -30,10 +30,6 @@ should only be used when a device has multiple interrupt parents. Example: interrupts-extended = <&intc1 5 1>, <&intc2 1 0>; -A device node may contain either "interrupts" or "interrupts-extended", but not -both. If both properties are present, then the operating system should log an -error and use only the data in "interrupts". - 2) Interrupt controller nodes ----------------------------- -- cgit v1.2.1 From ab74d00a39f70e1bc34a01322bb59f3750ca7a8c Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Sun, 9 Nov 2014 00:55:47 -0800 Subject: of: Fix crash if an earlycon driver is not found __earlycon_of_table_sentinel.compatible is a char[128], not a pointer, so it will never be NULL. Checking it against NULL causes the match loop to run past the end of the array, and eventually match a bogus entry, under the following conditions: - Kernel command line specifies "earlycon" with no parameters - DT has a stdout-path pointing to a UART node - The UART driver doesn't use OF_EARLYCON_DECLARE (or maybe the console driver is compiled out) Fix this by checking to see if match->compatible is a non-empty string. Signed-off-by: Kevin Cernekee Cc: # 3.16+ Signed-off-by: Rob Herring --- drivers/of/fdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index d1ffca8b34ea..30e97bcc4f88 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -773,7 +773,7 @@ int __init early_init_dt_scan_chosen_serial(void) if (offset < 0) return -ENODEV; - while (match->compatible) { + while (match->compatible[0]) { unsigned long addr; if (fdt_node_check_compatible(fdt, offset, match->compatible)) { match++; -- cgit v1.2.1 From 9b6eab07588c2de102423fe99c875fc4bfda2508 Mon Sep 17 00:00:00 2001 From: Antony Pavlov Date: Sun, 9 Nov 2014 01:37:34 +0300 Subject: devicetree: vendor-prefixes.txt: fix whitespace Signed-off-by: Antony Pavlov Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 3de80303de83..f9c7e77d218a 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -139,7 +139,7 @@ silergy Silergy Corp. sirf SiRF Technology, Inc. sitronix Sitronix Technology Corporation smsc Standard Microsystems Corporation -snps Synopsys, Inc. +snps Synopsys, Inc. solidrun SolidRun sony Sony Corporation spansion Spansion Inc. -- cgit v1.2.1 From 746c9e9f92dde2789908e51a354ba90a1962a2eb Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 14 Nov 2014 17:55:03 +1100 Subject: of/base: Fix PowerPC address parsing hack We have a historical hack that treats missing ranges properties as the equivalent of an empty one. This is needed for ancient PowerMac "bad" device-trees, and shouldn't be enabled for any other PowerPC platform, otherwise we get some nasty layout of devices in sysfs or even duplication when a set of otherwise identically named devices is created multiple times under a different parent node with no ranges property. This fix is needed for the PowerNV i2c busses to be exposed properly and will fix a number of other embedded cases. Signed-off-by: Benjamin Herrenschmidt CC: Acked-by: Grant Likely Signed-off-by: Rob Herring --- drivers/of/address.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index afdb78299f61..06af494184d6 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -450,6 +450,21 @@ static struct of_bus *of_match_bus(struct device_node *np) return NULL; } +static int of_empty_ranges_quirk(void) +{ + if (IS_ENABLED(CONFIG_PPC)) { + /* To save cycles, we cache the result */ + static int quirk_state = -1; + + if (quirk_state < 0) + quirk_state = + of_machine_is_compatible("Power Macintosh") || + of_machine_is_compatible("MacRISC"); + return quirk_state; + } + return false; +} + static int of_translate_one(struct device_node *parent, struct of_bus *bus, struct of_bus *pbus, __be32 *addr, int na, int ns, int pna, const char *rprop) @@ -475,12 +490,10 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus, * This code is only enabled on powerpc. --gcl */ ranges = of_get_property(parent, rprop, &rlen); -#if !defined(CONFIG_PPC) - if (ranges == NULL) { + if (ranges == NULL && !of_empty_ranges_quirk()) { pr_err("OF: no ranges; cannot translate\n"); return 1; } -#endif /* !defined(CONFIG_PPC) */ if (ranges == NULL || rlen == 0) { offset = of_read_number(addr, na); memset(addr, 0, pna * 4); -- cgit v1.2.1 From f9cb89b63db8cb2755a5179843a0643cc284f1ef Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Wed, 14 May 2014 11:24:43 +0200 Subject: of: Add vendor prefix for Chips&Media, Inc. Chips&Media is a developer of Video Codec IP cores. Signed-off-by: Philipp Zabel [robh: fix-up alphabetical ordering] Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index f9c7e77d218a..a0655781d87d 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -34,6 +34,7 @@ chipidea Chipidea, Inc chrp Common Hardware Reference Platform chunghwa Chunghwa Picture Tubes Ltd. cirrus Cirrus Logic, Inc. +cnm Chips&Media, Inc. cortina Cortina Systems, Inc. crystalfontz Crystalfontz America, Inc. dallas Maxim Integrated Products (formerly Dallas Semiconductor) -- cgit v1.2.1 From 5641c09226f401ee054e48521707fb185380e8d3 Mon Sep 17 00:00:00 2001 From: bpqw Date: Wed, 12 Nov 2014 14:26:42 +0000 Subject: devicetree: bindings: Add vendor prefix for Micron Technology, Inc. This patch is used to add vendor prefix for Micron Technology, Inc. in the vendor-prefixes.txt file. Micron Technology, Inc. is an American multinational corporation based in Boise, Idaho, best known for producing many forms of semiconductor devices. This includes DRAM, SDRAM, flash memory, eMMC and SSDs. Signed-off-by: Bean Huo [robh: cleanup commit msg formatting and company name] Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index a0655781d87d..a344ec2713a5 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -93,6 +93,7 @@ maxim Maxim Integrated Products mediatek MediaTek Inc. micrel Micrel Inc. microchip Microchip Technology Inc. +micron Micron Technology Inc. mitsubishi Mitsubishi Electric Corporation mosaixtech Mosaix Technologies, Inc. moxa Moxa -- cgit v1.2.1 From a0e27f51ba8a04125c22a95c4d3e98297a7191de Mon Sep 17 00:00:00 2001 From: Soren Brinkmann Date: Thu, 6 Nov 2014 07:38:51 -0800 Subject: documentation: pinctrl bindings: Fix trivial typo 'abitrary' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A misspelled 'arbitrary' propagated to quite a few locations in the DT binding documentation for pin-controllers. Fixing by: git grep abitrary | cut -f1 -d: | xargs sed -i 's/abitrary/arbitrary/' Reported-by: Andreas Färber Signed-off-by: Soren Brinkmann Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/pinctrl/img,tz1090-pdc-pinctrl.txt | 2 +- Documentation/devicetree/bindings/pinctrl/img,tz1090-pinctrl.txt | 2 +- Documentation/devicetree/bindings/pinctrl/lantiq,falcon-pinumx.txt | 2 +- Documentation/devicetree/bindings/pinctrl/lantiq,xway-pinumx.txt | 2 +- Documentation/devicetree/bindings/pinctrl/nvidia,tegra20-pinmux.txt | 2 +- Documentation/devicetree/bindings/pinctrl/pinctrl-sirf.txt | 2 +- Documentation/devicetree/bindings/pinctrl/pinctrl_spear.txt | 2 +- Documentation/devicetree/bindings/pinctrl/qcom,apq8064-pinctrl.txt | 2 +- Documentation/devicetree/bindings/pinctrl/qcom,apq8084-pinctrl.txt | 2 +- Documentation/devicetree/bindings/pinctrl/qcom,ipq8064-pinctrl.txt | 2 +- Documentation/devicetree/bindings/pinctrl/qcom,msm8960-pinctrl.txt | 2 +- Documentation/devicetree/bindings/pinctrl/qcom,msm8974-pinctrl.txt | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/img,tz1090-pdc-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/img,tz1090-pdc-pinctrl.txt index a186181c402b..51b943cc9770 100644 --- a/Documentation/devicetree/bindings/pinctrl/img,tz1090-pdc-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/img,tz1090-pdc-pinctrl.txt @@ -9,7 +9,7 @@ Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices, including the meaning of the phrase "pin configuration node". -TZ1090-PDC's pin configuration nodes act as a container for an abitrary number +TZ1090-PDC's pin configuration nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents some desired configuration for a pin, a group, or a list of pins or groups. This configuration can include the mux function to select on those pin(s)/group(s), and various pin configuration diff --git a/Documentation/devicetree/bindings/pinctrl/img,tz1090-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/img,tz1090-pinctrl.txt index 4b27c99f7f9d..49d0e6050940 100644 --- a/Documentation/devicetree/bindings/pinctrl/img,tz1090-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/img,tz1090-pinctrl.txt @@ -9,7 +9,7 @@ Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices, including the meaning of the phrase "pin configuration node". -TZ1090's pin configuration nodes act as a container for an abitrary number of +TZ1090's pin configuration nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents some desired configuration for a pin, a group, or a list of pins or groups. This configuration can include the mux function to select on those pin(s)/group(s), and various pin configuration diff --git a/Documentation/devicetree/bindings/pinctrl/lantiq,falcon-pinumx.txt b/Documentation/devicetree/bindings/pinctrl/lantiq,falcon-pinumx.txt index daa768956069..ac4da9fe07bd 100644 --- a/Documentation/devicetree/bindings/pinctrl/lantiq,falcon-pinumx.txt +++ b/Documentation/devicetree/bindings/pinctrl/lantiq,falcon-pinumx.txt @@ -9,7 +9,7 @@ Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices, including the meaning of the phrase "pin configuration node". -Lantiq's pin configuration nodes act as a container for an abitrary number of +Lantiq's pin configuration nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents some desired configuration for a pin, a group, or a list of pins or groups. This configuration can include the mux function to select on those group(s), and two pin configuration parameters: diff --git a/Documentation/devicetree/bindings/pinctrl/lantiq,xway-pinumx.txt b/Documentation/devicetree/bindings/pinctrl/lantiq,xway-pinumx.txt index b5469db1d7ad..e89b4677567d 100644 --- a/Documentation/devicetree/bindings/pinctrl/lantiq,xway-pinumx.txt +++ b/Documentation/devicetree/bindings/pinctrl/lantiq,xway-pinumx.txt @@ -9,7 +9,7 @@ Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices, including the meaning of the phrase "pin configuration node". -Lantiq's pin configuration nodes act as a container for an abitrary number of +Lantiq's pin configuration nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents some desired configuration for a pin, a group, or a list of pins or groups. This configuration can include the mux function to select on those group(s), and two pin configuration parameters: diff --git a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra20-pinmux.txt b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra20-pinmux.txt index 61e73cde9ae9..3c8ce28baad6 100644 --- a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra20-pinmux.txt +++ b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra20-pinmux.txt @@ -9,7 +9,7 @@ Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices, including the meaning of the phrase "pin configuration node". -Tegra's pin configuration nodes act as a container for an abitrary number of +Tegra's pin configuration nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents some desired configuration for a pin, a group, or a list of pins or groups. This configuration can include the mux function to select on those pin(s)/group(s), and various pin configuration diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-sirf.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-sirf.txt index c596a6ad3285..5f55be59d914 100644 --- a/Documentation/devicetree/bindings/pinctrl/pinctrl-sirf.txt +++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-sirf.txt @@ -13,7 +13,7 @@ Optional properties: Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices. -SiRFprimaII's pinmux nodes act as a container for an abitrary number of subnodes. +SiRFprimaII's pinmux nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents some desired configuration for a group of pins. Required subnode-properties: diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl_spear.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl_spear.txt index b4480d5c3aca..458615596946 100644 --- a/Documentation/devicetree/bindings/pinctrl/pinctrl_spear.txt +++ b/Documentation/devicetree/bindings/pinctrl/pinctrl_spear.txt @@ -32,7 +32,7 @@ Required properties: Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices. -SPEAr's pinmux nodes act as a container for an abitrary number of subnodes. Each +SPEAr's pinmux nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents muxing for a pin, a group, or a list of pins or groups. diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,apq8064-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,apq8064-pinctrl.txt index 2fb90b37aa09..a7bde64798c7 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,apq8064-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/qcom,apq8064-pinctrl.txt @@ -18,7 +18,7 @@ Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices, including the meaning of the phrase "pin configuration node". -Qualcomm's pin configuration nodes act as a container for an abitrary number of +Qualcomm's pin configuration nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents some desired configuration for a pin, a group, or a list of pins or groups. This configuration can include the mux function to select on those pin(s)/group(s), and various pin configuration diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,apq8084-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,apq8084-pinctrl.txt index ffafa1990a30..c4ea61ac56f2 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,apq8084-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/qcom,apq8084-pinctrl.txt @@ -47,7 +47,7 @@ Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices, including the meaning of the phrase "pin configuration node". -The pin configuration nodes act as a container for an abitrary number of +The pin configuration nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents some desired configuration for a pin, a group, or a list of pins or groups. This configuration can include the mux function to select on those pin(s)/group(s), and various pin configuration diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,ipq8064-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,ipq8064-pinctrl.txt index e33e4dcdce79..6e88e91feb11 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,ipq8064-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/qcom,ipq8064-pinctrl.txt @@ -18,7 +18,7 @@ Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices, including the meaning of the phrase "pin configuration node". -Qualcomm's pin configuration nodes act as a container for an abitrary number of +Qualcomm's pin configuration nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents some desired configuration for a pin, a group, or a list of pins or groups. This configuration can include the mux function to select on those pin(s)/group(s), and various pin configuration diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,msm8960-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,msm8960-pinctrl.txt index 93b7de91b9f6..eb8d8aa41f20 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,msm8960-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/qcom,msm8960-pinctrl.txt @@ -47,7 +47,7 @@ Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices, including the meaning of the phrase "pin configuration node". -The pin configuration nodes act as a container for an abitrary number of +The pin configuration nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents some desired configuration for a pin, a group, or a list of pins or groups. This configuration can include the mux function to select on those pin(s)/group(s), and various pin configuration diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,msm8974-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,msm8974-pinctrl.txt index d2ea80dc43eb..e4d6a9d20f7d 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,msm8974-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/qcom,msm8974-pinctrl.txt @@ -18,7 +18,7 @@ Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices, including the meaning of the phrase "pin configuration node". -Qualcomm's pin configuration nodes act as a container for an abitrary number of +Qualcomm's pin configuration nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents some desired configuration for a pin, a group, or a list of pins or groups. This configuration can include the mux function to select on those pin(s)/group(s), and various pin configuration -- cgit v1.2.1 From 50212b425d0ef8b606b316826b56abbb48680758 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 18 Nov 2014 13:16:13 +0100 Subject: can: gs_usb: add .ndo_change_mtu function Use common can_change_mtu function. Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/gs_usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 04b0f84612f0..009acc8641fc 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -718,6 +718,7 @@ static const struct net_device_ops gs_usb_netdev_ops = { .ndo_open = gs_can_open, .ndo_stop = gs_can_close, .ndo_start_xmit = gs_can_start_xmit, + .ndo_change_mtu = can_change_mtu, }; static struct gs_can *gs_make_candev(unsigned int channel, struct usb_interface *intf) -- cgit v1.2.1 From d6fdb38b4f2c4090e176aa55fe73dd2f45cfa4a6 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 29 Oct 2014 18:45:23 +0800 Subject: can: m_can: add .ndo_change_mtu function Use common can_change_mtu function. Signed-off-by: Dong Aisheng Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 10d571eaed85..e61886bf1bb3 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -992,6 +992,7 @@ static const struct net_device_ops m_can_netdev_ops = { .ndo_open = m_can_open, .ndo_stop = m_can_close, .ndo_start_xmit = m_can_start_xmit, + .ndo_change_mtu = can_change_mtu, }; static int register_m_can_dev(struct net_device *dev) -- cgit v1.2.1 From efe22286e05751e9913a8002fefdb45cdb7361ad Mon Sep 17 00:00:00 2001 From: David Cohen Date: Wed, 15 Oct 2014 14:41:50 -0700 Subject: can: m_can: add CONFIG_HAS_IOMEM dependence m_can uses io memory which makes it not compilable on architectures without HAS_IOMEM such as UML: drivers/built-in.o: In function `m_can_plat_probe': m_can.c:(.text+0x218cc5): undefined reference to `devm_ioremap_resource' m_can.c:(.text+0x218df9): undefined reference to `devm_ioremap' Signed-off-by: David Cohen Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/m_can/Kconfig b/drivers/net/can/m_can/Kconfig index fca5482c09ac..04f20dd39007 100644 --- a/drivers/net/can/m_can/Kconfig +++ b/drivers/net/can/m_can/Kconfig @@ -1,4 +1,5 @@ config CAN_M_CAN + depends on HAS_IOMEM tristate "Bosch M_CAN devices" ---help--- Say Y here if you want to support for Bosch M_CAN controller. -- cgit v1.2.1 From 962845da54afe2fc7ebf64c2d8bb5aa65a826b2f Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Fri, 7 Nov 2014 16:45:14 +0800 Subject: can: m_can: add missing message RAM initialization The M_CAN message RAM is usually equipped with a parity or ECC functionality. But RAM cells suffer a hardware reset and can therefore hold arbitrary content at startup - including parity and/or ECC bits. To prevent the M_CAN controller detecting checksum errors when reading potentially uninitialized TX message RAM content to transmit CAN frames the TX message RAM has to be written with (any kind of) initial data. Signed-off-by: Dong Aisheng Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index e61886bf1bb3..268ad5064c47 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1010,7 +1010,7 @@ static int m_can_of_parse_mram(struct platform_device *pdev, struct resource *res; void __iomem *addr; u32 out_val[MRAM_CFG_LEN]; - int ret; + int i, start, end, ret; /* message ram could be shared */ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "message_ram"); @@ -1061,6 +1061,15 @@ static int m_can_of_parse_mram(struct platform_device *pdev, priv->mcfg[MRAM_TXE].off, priv->mcfg[MRAM_TXE].num, priv->mcfg[MRAM_TXB].off, priv->mcfg[MRAM_TXB].num); + /* initialize the entire Message RAM in use to avoid possible + * ECC/parity checksum errors when reading an uninitialized buffer + */ + start = priv->mcfg[MRAM_SIDF].off; + end = priv->mcfg[MRAM_TXB].off + + priv->mcfg[MRAM_TXB].num * TXB_ELEMENT_SIZE; + for (i = start; i < end; i += 4) + writel(0x0, priv->mram_base + i); + return 0; } -- cgit v1.2.1 From f6a99649526370c7a88b86736c02f2a74b5b20e7 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 29 Oct 2014 18:45:21 +0800 Subject: can: m_can: fix possible sleep in napi poll The m_can_get_berr_counter function can sleep and it may be called in napi poll function. Rework it to fix the following warning. root@imx6qdlsolo:~# cangen can0 -f -L 12 -D 112233445566778899001122 [ 1846.017565] m_can 20e8000.can can0: entered error warning state [ 1846.023551] ------------[ cut here ]------------ [ 1846.028216] WARNING: CPU: 0 PID: 560 at kernel/locking/mutex.c:867 mutex_trylock+0x218/0x23c() [ 1846.036889] DEBUG_LOCKS_WARN_ON(in_interrupt()) [ 1846.041263] Modules linked in: [ 1846.044594] CPU: 0 PID: 560 Comm: cangen Not tainted 3.17.0-rc4-next-20140915-00010-g032d018-dirty #477 [ 1846.054033] Backtrace: [ 1846.056557] [<80012448>] (dump_backtrace) from [<80012728>] (show_stack+0x18/0x1c) [ 1846.064180] r6:809a07ec r5:809a07ec r4:00000000 r3:00000000 [ 1846.069966] [<80012710>] (show_stack) from [<806c9ee0>] (dump_stack+0x8c/0xa4) [ 1846.077264] [<806c9e54>] (dump_stack) from [<8002aa78>] (warn_slowpath_common+0x70/0x94) [ 1846.085403] r6:806cd1b0 r5:00000009 r4:be1d5c20 r3:be07b0c0 [ 1846.091204] [<8002aa08>] (warn_slowpath_common) from [<8002aad4>] (warn_slowpath_fmt+0x38/0x40) [ 1846.099951] r8:8119106c r7:80515aa4 r6:be027000 r5:00000001 r4:809d1df4 [ 1846.106830] [<8002aaa0>] (warn_slowpath_fmt) from [<806cd1b0>] (mutex_trylock+0x218/0x23c) [ 1846.115141] r3:80851c88 r2:8084fb74 [ 1846.118804] [<806ccf98>] (mutex_trylock) from [<80515aa4>] (clk_prepare_lock+0x14/0xf4) [ 1846.126859] r8:00000040 r7:be1d5cec r6:be027000 r5:be255800 r4:be027000 [ 1846.133737] [<80515a90>] (clk_prepare_lock) from [<80517660>] (clk_prepare+0x14/0x2c) [ 1846.141583] r5:be255800 r4:be027000 [ 1846.145272] [<8051764c>] (clk_prepare) from [<8041ff14>] (m_can_get_berr_counter+0x20/0xd4) [ 1846.153672] r4:be255800 r3:be07b0c0 [ 1846.157325] [<8041fef4>] (m_can_get_berr_counter) from [<80420428>] (m_can_poll+0x310/0x8fc) [ 1846.165809] r7:bd4dc540 r6:00000744 r5:11300000 r4:be255800 [ 1846.171590] [<80420118>] (m_can_poll) from [<8056a468>] (net_rx_action+0xcc/0x1b4) [ 1846.179204] r10:00000101 r9:be255ebc r8:00000040 r7:be7c3208 r6:8097c100 r5:be7c3200 [ 1846.187192] r4:0000012c [ 1846.189779] [<8056a39c>] (net_rx_action) from [<8002deec>] (__do_softirq+0xfc/0x2c4) [ 1846.197568] r10:00000101 r9:8097c088 r8:00000003 r7:8097c080 r6:40000001 r5:8097c08c [ 1846.205559] r4:00000020 [ 1846.208144] [<8002ddf0>] (__do_softirq) from [<8002e194>] (do_softirq+0x7c/0x88) [ 1846.215588] r10:00000000 r9:bd516a60 r8:be18ce00 r7:00000000 r6:be255800 r5:8056c0ec [ 1846.223578] r4:60000093 [ 1846.226163] [<8002e118>] (do_softirq) from [<8002e288>] (__local_bh_enable_ip+0xe8/0x10c) [ 1846.234386] r4:00000200 r3:be1d4000 [ 1846.238036] [<8002e1a0>] (__local_bh_enable_ip) from [<8056c108>] (__dev_queue_xmit+0x314/0x6b0) [ 1846.246868] r6:be255800 r5:bd516a00 r4:00000000 r3:be07b0c0 [ 1846.252645] [<8056bdf4>] (__dev_queue_xmit) from [<8056c4b8>] (dev_queue_xmit+0x14/0x18) Signed-off-by: Dong Aisheng Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 268ad5064c47..214160b4c314 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -481,11 +481,23 @@ static int m_can_handle_lec_err(struct net_device *dev, return 1; } +static int __m_can_get_berr_counter(const struct net_device *dev, + struct can_berr_counter *bec) +{ + struct m_can_priv *priv = netdev_priv(dev); + unsigned int ecr; + + ecr = m_can_read(priv, M_CAN_ECR); + bec->rxerr = (ecr & ECR_REC_MASK) >> ECR_REC_SHIFT; + bec->txerr = ecr & ECR_TEC_MASK; + + return 0; +} + static int m_can_get_berr_counter(const struct net_device *dev, struct can_berr_counter *bec) { struct m_can_priv *priv = netdev_priv(dev); - unsigned int ecr; int err; err = clk_prepare_enable(priv->hclk); @@ -498,9 +510,7 @@ static int m_can_get_berr_counter(const struct net_device *dev, return err; } - ecr = m_can_read(priv, M_CAN_ECR); - bec->rxerr = (ecr & ECR_REC_MASK) >> ECR_REC_SHIFT; - bec->txerr = ecr & ECR_TEC_MASK; + __m_can_get_berr_counter(dev, bec); clk_disable_unprepare(priv->cclk); clk_disable_unprepare(priv->hclk); @@ -544,7 +554,7 @@ static int m_can_handle_state_change(struct net_device *dev, if (unlikely(!skb)) return 0; - m_can_get_berr_counter(dev, &bec); + __m_can_get_berr_counter(dev, &bec); switch (new_state) { case CAN_STATE_ERROR_ACTIVE: -- cgit v1.2.1 From 921f168109d029e3b59459c3a2754f0e2a70fad3 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Tue, 18 Nov 2014 19:00:54 +0800 Subject: can: m_can: fix not set can_dlc for remote frame The original code missed to set the cf->can_dlc in the RTR case, so add it. Signed-off-by: Dong Aisheng Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 214160b4c314..98f7e0ea7f6a 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -330,7 +330,7 @@ static void m_can_read_fifo(const struct net_device *dev, struct can_frame *cf, u32 rxfs) { struct m_can_priv *priv = netdev_priv(dev); - u32 id, fgi; + u32 id, fgi, dlc; /* calculate the fifo get index for where to read data */ fgi = (rxfs & RXFS_FGI_MASK) >> RXFS_FGI_OFF; @@ -340,11 +340,12 @@ static void m_can_read_fifo(const struct net_device *dev, struct can_frame *cf, else cf->can_id = (id >> 18) & CAN_SFF_MASK; + dlc = m_can_fifo_read(priv, fgi, M_CAN_FIFO_DLC); + cf->can_dlc = get_can_dlc((dlc >> 16) & 0x0F); + if (id & RX_BUF_RTR) { cf->can_id |= CAN_RTR_FLAG; } else { - id = m_can_fifo_read(priv, fgi, M_CAN_FIFO_DLC); - cf->can_dlc = get_can_dlc((id >> 16) & 0x0F); *(u32 *)(cf->data + 0) = m_can_fifo_read(priv, fgi, M_CAN_FIFO_DATA(0)); *(u32 *)(cf->data + 4) = m_can_fifo_read(priv, fgi, -- cgit v1.2.1 From 7660f633070986ab4ee41c063a90e140d5781e13 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 29 Oct 2014 18:45:24 +0800 Subject: can: m_can: add missing delay after setting CCCR_INIT bit The spec mentions there may be a delay until the value written to INIT can be read back due to the synchronization mechanism between the two clock domains. But it does not indicate the exact clock cycles needed. The 5us delay is a test value and seems ok. Without the delay, CCCR.CCE bit may fail to be set and then the initialization fail sometimes when do repeatly up and down. Signed-off-by: Dong Aisheng Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 98f7e0ea7f6a..3ad7d88720b7 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -296,6 +296,7 @@ static inline void m_can_config_endisable(const struct m_can_priv *priv, if (enable) { /* enable m_can configuration */ m_can_write(priv, M_CAN_CCCR, cccr | CCCR_INIT); + udelay(5); /* CCCR.CCE can only be set/reset while CCCR.INIT = '1' */ m_can_write(priv, M_CAN_CCCR, cccr | CCCR_INIT | CCCR_CCE); } else { -- cgit v1.2.1 From a93f5cae67b366e4ce7e9eb336e2885309fd6612 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 29 Oct 2014 18:45:22 +0800 Subject: can: m_can: fix incorrect error messages Fix a few error messages. Signed-off-by: Dong Aisheng Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 3ad7d88720b7..2f61676b3a97 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -608,14 +608,14 @@ static int m_can_handle_state_errors(struct net_device *dev, u32 psr) if ((psr & PSR_EP) && (priv->can.state != CAN_STATE_ERROR_PASSIVE)) { - netdev_dbg(dev, "entered error warning state\n"); + netdev_dbg(dev, "entered error passive state\n"); work_done += m_can_handle_state_change(dev, CAN_STATE_ERROR_PASSIVE); } if ((psr & PSR_BO) && (priv->can.state != CAN_STATE_BUS_OFF)) { - netdev_dbg(dev, "entered error warning state\n"); + netdev_dbg(dev, "entered error bus off state\n"); work_done += m_can_handle_state_change(dev, CAN_STATE_BUS_OFF); } @@ -627,7 +627,7 @@ static void m_can_handle_other_err(struct net_device *dev, u32 irqstatus) { if (irqstatus & IR_WDI) netdev_err(dev, "Message RAM Watchdog event due to missing READY\n"); - if (irqstatus & IR_BEU) + if (irqstatus & IR_ELO) netdev_err(dev, "Error Logging Overflow\n"); if (irqstatus & IR_BEU) netdev_err(dev, "Bit Error Uncorrected\n"); -- cgit v1.2.1 From 80646733f11c2e9de3b6339f7e635047e6087280 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Tue, 18 Nov 2014 19:00:55 +0800 Subject: can: m_can: update to support CAN FD features Bosch M_CAN is CAN FD capable device. This patch implements the CAN FD features include up to 64 bytes payload and bitrate switch function. 1) Change the Rx FIFO and Tx Buffer to 64 bytes for support CAN FD up to 64 bytes payload. It's backward compatible with old 8 bytes normal CAN frame. 2) Allocate can frame or canfd frame based on EDL bit 3) Bitrate Switch function is disabled by default and will be enabled according to CANFD_BRS bit in cf->flags. Acked-by: Oliver Hartkopp Signed-off-by: Dong Aisheng Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 177 ++++++++++++++++++++++++++++++++---------- 1 file changed, 134 insertions(+), 43 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 2f61676b3a97..d7bc462aafdc 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -105,14 +105,36 @@ enum m_can_mram_cfg { MRAM_CFG_NUM, }; +/* Fast Bit Timing & Prescaler Register (FBTP) */ +#define FBTR_FBRP_MASK 0x1f +#define FBTR_FBRP_SHIFT 16 +#define FBTR_FTSEG1_SHIFT 8 +#define FBTR_FTSEG1_MASK (0xf << FBTR_FTSEG1_SHIFT) +#define FBTR_FTSEG2_SHIFT 4 +#define FBTR_FTSEG2_MASK (0x7 << FBTR_FTSEG2_SHIFT) +#define FBTR_FSJW_SHIFT 0 +#define FBTR_FSJW_MASK 0x3 + /* Test Register (TEST) */ #define TEST_LBCK BIT(4) /* CC Control Register(CCCR) */ -#define CCCR_TEST BIT(7) -#define CCCR_MON BIT(5) -#define CCCR_CCE BIT(1) -#define CCCR_INIT BIT(0) +#define CCCR_TEST BIT(7) +#define CCCR_CMR_MASK 0x3 +#define CCCR_CMR_SHIFT 10 +#define CCCR_CMR_CANFD 0x1 +#define CCCR_CMR_CANFD_BRS 0x2 +#define CCCR_CMR_CAN 0x3 +#define CCCR_CME_MASK 0x3 +#define CCCR_CME_SHIFT 8 +#define CCCR_CME_CAN 0 +#define CCCR_CME_CANFD 0x1 +#define CCCR_CME_CANFD_BRS 0x2 +#define CCCR_TEST BIT(7) +#define CCCR_MON BIT(5) +#define CCCR_CCE BIT(1) +#define CCCR_INIT BIT(0) +#define CCCR_CANFD 0x10 /* Bit Timing & Prescaler Register (BTP) */ #define BTR_BRP_MASK 0x3ff @@ -204,6 +226,7 @@ enum m_can_mram_cfg { /* Rx Buffer / FIFO Element Size Configuration (RXESC) */ #define M_CAN_RXESC_8BYTES 0x0 +#define M_CAN_RXESC_64BYTES 0x777 /* Tx Buffer Configuration(TXBC) */ #define TXBC_NDTB_OFF 16 @@ -211,6 +234,7 @@ enum m_can_mram_cfg { /* Tx Buffer Element Size Configuration(TXESC) */ #define TXESC_TBDS_8BYTES 0x0 +#define TXESC_TBDS_64BYTES 0x7 /* Tx Event FIFO Con.guration (TXEFC) */ #define TXEFC_EFS_OFF 16 @@ -219,11 +243,11 @@ enum m_can_mram_cfg { /* Message RAM Configuration (in bytes) */ #define SIDF_ELEMENT_SIZE 4 #define XIDF_ELEMENT_SIZE 8 -#define RXF0_ELEMENT_SIZE 16 -#define RXF1_ELEMENT_SIZE 16 +#define RXF0_ELEMENT_SIZE 72 +#define RXF1_ELEMENT_SIZE 72 #define RXB_ELEMENT_SIZE 16 #define TXE_ELEMENT_SIZE 8 -#define TXB_ELEMENT_SIZE 16 +#define TXB_ELEMENT_SIZE 72 /* Message RAM Elements */ #define M_CAN_FIFO_ID 0x0 @@ -231,11 +255,17 @@ enum m_can_mram_cfg { #define M_CAN_FIFO_DATA(n) (0x8 + ((n) << 2)) /* Rx Buffer Element */ +/* R0 */ #define RX_BUF_ESI BIT(31) #define RX_BUF_XTD BIT(30) #define RX_BUF_RTR BIT(29) +/* R1 */ +#define RX_BUF_ANMF BIT(31) +#define RX_BUF_EDL BIT(21) +#define RX_BUF_BRS BIT(20) /* Tx Buffer Element */ +/* R0 */ #define TX_BUF_XTD BIT(30) #define TX_BUF_RTR BIT(29) @@ -327,42 +357,67 @@ static inline void m_can_disable_all_interrupts(const struct m_can_priv *priv) m_can_write(priv, M_CAN_ILE, 0x0); } -static void m_can_read_fifo(const struct net_device *dev, struct can_frame *cf, - u32 rxfs) +static void m_can_read_fifo(struct net_device *dev, u32 rxfs) { + struct net_device_stats *stats = &dev->stats; struct m_can_priv *priv = netdev_priv(dev); + struct canfd_frame *cf; + struct sk_buff *skb; u32 id, fgi, dlc; + int i; /* calculate the fifo get index for where to read data */ fgi = (rxfs & RXFS_FGI_MASK) >> RXFS_FGI_OFF; + dlc = m_can_fifo_read(priv, fgi, M_CAN_FIFO_DLC); + if (dlc & RX_BUF_EDL) + skb = alloc_canfd_skb(dev, &cf); + else + skb = alloc_can_skb(dev, (struct can_frame **)&cf); + if (!skb) { + stats->rx_dropped++; + return; + } + + if (dlc & RX_BUF_EDL) + cf->len = can_dlc2len((dlc >> 16) & 0x0F); + else + cf->len = get_can_dlc((dlc >> 16) & 0x0F); + id = m_can_fifo_read(priv, fgi, M_CAN_FIFO_ID); if (id & RX_BUF_XTD) cf->can_id = (id & CAN_EFF_MASK) | CAN_EFF_FLAG; else cf->can_id = (id >> 18) & CAN_SFF_MASK; - dlc = m_can_fifo_read(priv, fgi, M_CAN_FIFO_DLC); - cf->can_dlc = get_can_dlc((dlc >> 16) & 0x0F); + if (id & RX_BUF_ESI) { + cf->flags |= CANFD_ESI; + netdev_dbg(dev, "ESI Error\n"); + } - if (id & RX_BUF_RTR) { + if (!(dlc & RX_BUF_EDL) && (id & RX_BUF_RTR)) { cf->can_id |= CAN_RTR_FLAG; } else { - *(u32 *)(cf->data + 0) = m_can_fifo_read(priv, fgi, - M_CAN_FIFO_DATA(0)); - *(u32 *)(cf->data + 4) = m_can_fifo_read(priv, fgi, - M_CAN_FIFO_DATA(1)); + if (dlc & RX_BUF_BRS) + cf->flags |= CANFD_BRS; + + for (i = 0; i < cf->len; i += 4) + *(u32 *)(cf->data + i) = + m_can_fifo_read(priv, fgi, + M_CAN_FIFO_DATA(i / 4)); } /* acknowledge rx fifo 0 */ m_can_write(priv, M_CAN_RXF0A, fgi); + + stats->rx_packets++; + stats->rx_bytes += cf->len; + + netif_receive_skb(skb); } static int m_can_do_rx_poll(struct net_device *dev, int quota) { struct m_can_priv *priv = netdev_priv(dev); - struct net_device_stats *stats = &dev->stats; - struct sk_buff *skb; - struct can_frame *frame; u32 pkts = 0; u32 rxfs; @@ -376,18 +431,7 @@ static int m_can_do_rx_poll(struct net_device *dev, int quota) if (rxfs & RXFS_RFL) netdev_warn(dev, "Rx FIFO 0 Message Lost\n"); - skb = alloc_can_skb(dev, &frame); - if (!skb) { - stats->rx_dropped++; - return pkts; - } - - m_can_read_fifo(dev, frame, rxfs); - - stats->rx_packets++; - stats->rx_bytes += frame->can_dlc; - - netif_receive_skb(skb); + m_can_read_fifo(dev, rxfs); quota--; pkts++; @@ -745,10 +789,23 @@ static const struct can_bittiming_const m_can_bittiming_const = { .brp_inc = 1, }; +static const struct can_bittiming_const m_can_data_bittiming_const = { + .name = KBUILD_MODNAME, + .tseg1_min = 2, /* Time segment 1 = prop_seg + phase_seg1 */ + .tseg1_max = 16, + .tseg2_min = 1, /* Time segment 2 = phase_seg2 */ + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 1, + .brp_max = 32, + .brp_inc = 1, +}; + static int m_can_set_bittiming(struct net_device *dev) { struct m_can_priv *priv = netdev_priv(dev); const struct can_bittiming *bt = &priv->can.bittiming; + const struct can_bittiming *dbt = &priv->can.data_bittiming; u16 brp, sjw, tseg1, tseg2; u32 reg_btp; @@ -759,7 +816,17 @@ static int m_can_set_bittiming(struct net_device *dev) reg_btp = (brp << BTR_BRP_SHIFT) | (sjw << BTR_SJW_SHIFT) | (tseg1 << BTR_TSEG1_SHIFT) | (tseg2 << BTR_TSEG2_SHIFT); m_can_write(priv, M_CAN_BTP, reg_btp); - netdev_dbg(dev, "setting BTP 0x%x\n", reg_btp); + + if (priv->can.ctrlmode & CAN_CTRLMODE_FD) { + brp = dbt->brp - 1; + sjw = dbt->sjw - 1; + tseg1 = dbt->prop_seg + dbt->phase_seg1 - 1; + tseg2 = dbt->phase_seg2 - 1; + reg_btp = (brp << FBTR_FBRP_SHIFT) | (sjw << FBTR_FSJW_SHIFT) | + (tseg1 << FBTR_FTSEG1_SHIFT) | + (tseg2 << FBTR_FTSEG2_SHIFT); + m_can_write(priv, M_CAN_FBTP, reg_btp); + } return 0; } @@ -779,8 +846,8 @@ static void m_can_chip_config(struct net_device *dev) m_can_config_endisable(priv, true); - /* RX Buffer/FIFO Element Size 8 bytes data field */ - m_can_write(priv, M_CAN_RXESC, M_CAN_RXESC_8BYTES); + /* RX Buffer/FIFO Element Size 64 bytes data field */ + m_can_write(priv, M_CAN_RXESC, M_CAN_RXESC_64BYTES); /* Accept Non-matching Frames Into FIFO 0 */ m_can_write(priv, M_CAN_GFC, 0x0); @@ -789,8 +856,8 @@ static void m_can_chip_config(struct net_device *dev) m_can_write(priv, M_CAN_TXBC, (1 << TXBC_NDTB_OFF) | priv->mcfg[MRAM_TXB].off); - /* only support 8 bytes firstly */ - m_can_write(priv, M_CAN_TXESC, TXESC_TBDS_8BYTES); + /* support 64 bytes payload */ + m_can_write(priv, M_CAN_TXESC, TXESC_TBDS_64BYTES); m_can_write(priv, M_CAN_TXEFC, (1 << TXEFC_EFS_OFF) | priv->mcfg[MRAM_TXE].off); @@ -805,7 +872,8 @@ static void m_can_chip_config(struct net_device *dev) RXFC_FWM_1 | priv->mcfg[MRAM_RXF1].off); cccr = m_can_read(priv, M_CAN_CCCR); - cccr &= ~(CCCR_TEST | CCCR_MON); + cccr &= ~(CCCR_TEST | CCCR_MON | (CCCR_CMR_MASK << CCCR_CMR_SHIFT) | + (CCCR_CME_MASK << CCCR_CME_SHIFT)); test = m_can_read(priv, M_CAN_TEST); test &= ~TEST_LBCK; @@ -817,6 +885,9 @@ static void m_can_chip_config(struct net_device *dev) test |= TEST_LBCK; } + if (priv->can.ctrlmode & CAN_CTRLMODE_FD) + cccr |= CCCR_CME_CANFD_BRS << CCCR_CME_SHIFT; + m_can_write(priv, M_CAN_CCCR, cccr); m_can_write(priv, M_CAN_TEST, test); @@ -881,11 +952,13 @@ static struct net_device *alloc_m_can_dev(void) priv->dev = dev; priv->can.bittiming_const = &m_can_bittiming_const; + priv->can.data_bittiming_const = &m_can_data_bittiming_const; priv->can.do_set_mode = m_can_set_mode; priv->can.do_get_berr_counter = m_can_get_berr_counter; priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_LISTENONLY | - CAN_CTRLMODE_BERR_REPORTING; + CAN_CTRLMODE_BERR_REPORTING | + CAN_CTRLMODE_FD; return dev; } @@ -968,8 +1041,9 @@ static netdev_tx_t m_can_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct m_can_priv *priv = netdev_priv(dev); - struct can_frame *cf = (struct can_frame *)skb->data; - u32 id; + struct canfd_frame *cf = (struct canfd_frame *)skb->data; + u32 id, cccr; + int i; if (can_dropped_invalid_skb(dev, skb)) return NETDEV_TX_OK; @@ -988,11 +1062,28 @@ static netdev_tx_t m_can_start_xmit(struct sk_buff *skb, /* message ram configuration */ m_can_fifo_write(priv, 0, M_CAN_FIFO_ID, id); - m_can_fifo_write(priv, 0, M_CAN_FIFO_DLC, cf->can_dlc << 16); - m_can_fifo_write(priv, 0, M_CAN_FIFO_DATA(0), *(u32 *)(cf->data + 0)); - m_can_fifo_write(priv, 0, M_CAN_FIFO_DATA(1), *(u32 *)(cf->data + 4)); + m_can_fifo_write(priv, 0, M_CAN_FIFO_DLC, can_len2dlc(cf->len) << 16); + + for (i = 0; i < cf->len; i += 4) + m_can_fifo_write(priv, 0, M_CAN_FIFO_DATA(i / 4), + *(u32 *)(cf->data + i)); + can_put_echo_skb(skb, dev, 0); + if (priv->can.ctrlmode & CAN_CTRLMODE_FD) { + cccr = m_can_read(priv, M_CAN_CCCR); + cccr &= ~(CCCR_CMR_MASK << CCCR_CMR_SHIFT); + if (can_is_canfd_skb(skb)) { + if (cf->flags & CANFD_BRS) + cccr |= CCCR_CMR_CANFD_BRS << CCCR_CMR_SHIFT; + else + cccr |= CCCR_CMR_CANFD << CCCR_CMR_SHIFT; + } else { + cccr |= CCCR_CMR_CAN << CCCR_CMR_SHIFT; + } + m_can_write(priv, M_CAN_CCCR, cccr); + } + /* enable first TX buffer to start transfer */ m_can_write(priv, M_CAN_TXBTIE, 0x1); m_can_write(priv, M_CAN_TXBAR, 0x1); -- cgit v1.2.1 From 11bf7828a59880427403e13dcff8228d67e9e0f7 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Mon, 17 Nov 2014 16:24:54 -0800 Subject: vxlan: Inline vxlan_gso_check(). Suggested-by: Or Gerlitz Signed-off-by: Joe Stringer Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 19 ------------------- include/net/vxlan.h | 18 +++++++++++++++++- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 6b658638b456..e1e335c339e3 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -67,12 +67,6 @@ #define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags required value. */ -/* VXLAN protocol header */ -struct vxlanhdr { - __be32 vx_flags; - __be32 vx_vni; -}; - /* UDP port for VXLAN traffic. * The IANA assigned port is 4789, but the Linux default is 8472 * for compatibility with early adopters. @@ -1571,19 +1565,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) return false; } -bool vxlan_gso_check(struct sk_buff *skb) -{ - if ((skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) && - (skb->inner_protocol_type != ENCAP_TYPE_ETHER || - skb->inner_protocol != htons(ETH_P_TEB) || - (skb_inner_mac_header(skb) - skb_transport_header(skb) != - sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) - return false; - - return true; -} -EXPORT_SYMBOL_GPL(vxlan_gso_check); - #if IS_ENABLED(CONFIG_IPV6) static int vxlan6_xmit_skb(struct vxlan_sock *vs, struct dst_entry *dst, struct sk_buff *skb, diff --git a/include/net/vxlan.h b/include/net/vxlan.h index afadf8e53f20..57cccd0052e5 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -8,6 +8,12 @@ #define VNI_HASH_BITS 10 #define VNI_HASH_SIZE (1<gso_type & SKB_GSO_UDP_TUNNEL) && + (skb->inner_protocol_type != ENCAP_TYPE_ETHER || + skb->inner_protocol != htons(ETH_P_TEB) || + (skb_inner_mac_header(skb) - skb_transport_header(skb) != + sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) + return false; + + return true; +} /* IP header + UDP + VXLAN + Ethernet header */ #define VXLAN_HEADROOM (20 + 8 + 8 + 14) -- cgit v1.2.1 From 280ba51d60be6f4ca3347eaa60783314f38df72e Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 18 Nov 2014 22:35:31 +0100 Subject: mac80211: minstrel_ht: fix a crash in rate sorting The commit 5935839ad73583781b8bbe8d91412f6826e218a4 "mac80211: improve minstrel_ht rate sorting by throughput & probability" introduced a crash on rate sorting that occurs when the rate added to the sorting array is faster than all the previous rates. Due to an off-by-one error, it reads the rate index from tp_list[-1], which contains uninitialized stack garbage, and then uses the resulting index for accessing the group rate stats, leading to a crash if the garbage value is big enough. Cc: Thomas Huehn Reported-by: Jouni Malinen Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_ht.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index df90ce2db00c..408fd8ab4eef 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -252,19 +252,16 @@ minstrel_ht_sort_best_tp_rates(struct minstrel_ht_sta *mi, u8 index, cur_thr = mi->groups[cur_group].rates[cur_idx].cur_tp; cur_prob = mi->groups[cur_group].rates[cur_idx].probability; - tmp_group = tp_list[j - 1] / MCS_GROUP_RATES; - tmp_idx = tp_list[j - 1] % MCS_GROUP_RATES; - tmp_thr = mi->groups[tmp_group].rates[tmp_idx].cur_tp; - tmp_prob = mi->groups[tmp_group].rates[tmp_idx].probability; - - while (j > 0 && (cur_thr > tmp_thr || - (cur_thr == tmp_thr && cur_prob > tmp_prob))) { - j--; + do { tmp_group = tp_list[j - 1] / MCS_GROUP_RATES; tmp_idx = tp_list[j - 1] % MCS_GROUP_RATES; tmp_thr = mi->groups[tmp_group].rates[tmp_idx].cur_tp; tmp_prob = mi->groups[tmp_group].rates[tmp_idx].probability; - } + if (cur_thr < tmp_thr || + (cur_thr == tmp_thr && cur_prob <= tmp_prob)) + break; + j--; + } while (j > 0); if (j < MAX_THR_RATES - 1) { memmove(&tp_list[j + 1], &tp_list[j], (sizeof(*tp_list) * -- cgit v1.2.1 From 6bab4a8a1888729f17f4923cc5867e4674f66333 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 18 Nov 2014 23:59:33 +0100 Subject: clockevent: sun4i: Fix race condition in the probe code The interrupts were activated and the handler registered before the clockevent was registered in the probe function. The interrupt handler, however, was making the assumption that the clockevent device was registered. That could cause a null pointer dereference if the timer interrupt was firing during this narrow window. Fix that by moving the clockevent registration before the interrupt is enabled. Reported-by: Roman Byshko Signed-off-by: Maxime Ripard Cc: stable@vger.kernel.org Signed-off-by: Daniel Lezcano --- drivers/clocksource/sun4i_timer.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c index efb17c3ee120..f4a9c0058b4d 100644 --- a/drivers/clocksource/sun4i_timer.c +++ b/drivers/clocksource/sun4i_timer.c @@ -182,6 +182,12 @@ static void __init sun4i_timer_init(struct device_node *node) /* Make sure timer is stopped before playing with interrupts */ sun4i_clkevt_time_stop(0); + sun4i_clockevent.cpumask = cpu_possible_mask; + sun4i_clockevent.irq = irq; + + clockevents_config_and_register(&sun4i_clockevent, rate, + TIMER_SYNC_TICKS, 0xffffffff); + ret = setup_irq(irq, &sun4i_timer_irq); if (ret) pr_warn("failed to setup irq %d\n", irq); @@ -189,12 +195,6 @@ static void __init sun4i_timer_init(struct device_node *node) /* Enable timer0 interrupt */ val = readl(timer_base + TIMER_IRQ_EN_REG); writel(val | TIMER_IRQ_EN(0), timer_base + TIMER_IRQ_EN_REG); - - sun4i_clockevent.cpumask = cpu_possible_mask; - sun4i_clockevent.irq = irq; - - clockevents_config_and_register(&sun4i_clockevent, rate, - TIMER_SYNC_TICKS, 0xffffffff); } CLOCKSOURCE_OF_DECLARE(sun4i, "allwinner,sun4i-a10-timer", sun4i_timer_init); -- cgit v1.2.1 From 21255dad9dffa4407cab866f5561cb9568f7f7d8 Mon Sep 17 00:00:00 2001 From: James Cowgill Date: Thu, 13 Nov 2014 11:08:06 +0000 Subject: MIPS: Loongson3: Fix __node_distances undefined error export the __node_distances symbol in the loongson3 numa code to fix the build error: Building modules, stage 2. MODPOST 221 modules ERROR: "__node_distances" [drivers/block/nvme.ko] undefined! scripts/Makefile.modpost:90: recipe for target '__modpost' failed when building the kernel with: CONFIG_CPU_LOONGSON3=y CONFIG_NUMA=y CONFIG_BLK_DEV_NVME=m Signed-off-by: James Cowgill Cc: # v3.17+ Reviewed-by: James Hogan Reviewed-by: Huacai Chen Cc: linux-mips@linux-mips.org Cc: Wei Yongjun Patchwork: https://patchwork.linux-mips.org/patch/8444/ Signed-off-by: Ralf Baechle --- arch/mips/loongson/loongson-3/numa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/loongson/loongson-3/numa.c b/arch/mips/loongson/loongson-3/numa.c index 37ed184398c6..42323bcc5d28 100644 --- a/arch/mips/loongson/loongson-3/numa.c +++ b/arch/mips/loongson/loongson-3/numa.c @@ -33,6 +33,7 @@ static struct node_data prealloc__node_data[MAX_NUMNODES]; unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES]; +EXPORT_SYMBOL(__node_distances); struct node_data *__node_data[MAX_NUMNODES]; EXPORT_SYMBOL(__node_data); -- cgit v1.2.1 From 5829b0ecc584d15ae4eeabe69f2ab554bdec4689 Mon Sep 17 00:00:00 2001 From: James Cowgill Date: Thu, 13 Nov 2014 11:08:07 +0000 Subject: MIPS: IP27: Fix __node_distances undefined error export the __node_distances symbol in the ip27 memory code to fix the build error: Building modules, stage 2. MODPOST 311 modules ERROR: "__node_distances" [drivers/block/nvme.ko] undefined! scripts/Makefile.modpost:90: recipe for target '__modpost' failed when building the kernel with: CONFIG_SGI_IP27=y CONFIG_BLK_DEV_NVME=m Signed-off-by: James Cowgill Cc: # v3.15+ Reviewed-by: James Hogan Signed-off-by: Ralf Baechle --- arch/mips/sgi-ip27/ip27-memory.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index a95c00f5fb96..a304bcc37e4f 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -107,6 +107,7 @@ static void router_recurse(klrou_t *router_a, klrou_t *router_b, int depth) } unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; +EXPORT_SYMBOL(__node_distances); static int __init compute_node_distance(nasid_t nasid_a, nasid_t nasid_b) { -- cgit v1.2.1 From cc94ea31150f83d1f70ad854c920dd6b739c1628 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 4 Nov 2014 14:13:22 +0800 Subject: MIPS: Loongson: Fix the write-combine CCA value setting All Loongson-2/3 processors support _CACHE_UNCACHED_ACCELERATED, not only Loongson-3A. Signed-off-by: Huacai Chen Cc: John Crispin Cc: Steven J. Hill Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu Patchwork: https://patchwork.linux-mips.org/patch/8319/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cpu-probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 94c4a0c0a577..08dc945f7608 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -765,7 +765,6 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) break; case PRID_REV_LOONGSON3A: c->cputype = CPU_LOONGSON3; - c->writecombine = _CACHE_UNCACHED_ACCELERATED; __cpu_name[cpu] = "ICT Loongson-3"; set_elf_platform(cpu, "loongson3a"); break; @@ -782,6 +781,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) MIPS_CPU_FPU | MIPS_CPU_LLSC | MIPS_CPU_32FPR; c->tlbsize = 64; + c->writecombine = _CACHE_UNCACHED_ACCELERATED; break; case PRID_IMP_LOONGSON_32: /* Loongson-1 */ decode_configs(c); -- cgit v1.2.1 From 7352c8b13dd9a848b0c5d6209d62761afb164dcb Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 4 Nov 2014 14:13:23 +0800 Subject: MIPS: Loongson: Set Loongson-3's ISA level to MIPS64R1 In CPU manual Loongson-3 is MIPS64R2 compatible, but during tests we found that its EI/DI instructions have problems. So we just set the ISA level to MIPS64R1. Signed-off-by: Huacai Chen Cc: John Crispin Cc: Steven J. Hill Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu Patchwork: https://patchwork.linux-mips.org/patch/8320/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h | 2 -- arch/mips/kernel/cpu-probe.c | 5 ++++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h index 7d28f95b0512..6d69332f21ec 100644 --- a/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h @@ -41,10 +41,8 @@ #define cpu_has_mcheck 0 #define cpu_has_mdmx 0 #define cpu_has_mips16 0 -#define cpu_has_mips32r1 0 #define cpu_has_mips32r2 0 #define cpu_has_mips3d 0 -#define cpu_has_mips64r1 0 #define cpu_has_mips64r2 0 #define cpu_has_mipsmt 0 #define cpu_has_prefetch 0 diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 08dc945f7608..d5a4f380b019 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -757,26 +757,29 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) c->cputype = CPU_LOONGSON2; __cpu_name[cpu] = "ICT Loongson-2"; set_elf_platform(cpu, "loongson2e"); + set_isa(c, MIPS_CPU_ISA_III); break; case PRID_REV_LOONGSON2F: c->cputype = CPU_LOONGSON2; __cpu_name[cpu] = "ICT Loongson-2"; set_elf_platform(cpu, "loongson2f"); + set_isa(c, MIPS_CPU_ISA_III); break; case PRID_REV_LOONGSON3A: c->cputype = CPU_LOONGSON3; __cpu_name[cpu] = "ICT Loongson-3"; set_elf_platform(cpu, "loongson3a"); + set_isa(c, MIPS_CPU_ISA_M64R1); break; case PRID_REV_LOONGSON3B_R1: case PRID_REV_LOONGSON3B_R2: c->cputype = CPU_LOONGSON3; __cpu_name[cpu] = "ICT Loongson-3"; set_elf_platform(cpu, "loongson3b"); + set_isa(c, MIPS_CPU_ISA_M64R1); break; } - set_isa(c, MIPS_CPU_ISA_III); c->options = R4K_OPTS | MIPS_CPU_FPU | MIPS_CPU_LLSC | MIPS_CPU_32FPR; -- cgit v1.2.1 From bbaf113a481b6ce32444c125807ad3618643ce57 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Fri, 17 Oct 2014 18:10:24 +0300 Subject: MIPS: oprofile: Fix backtrace on 64-bit kernel Fix incorrect cast that always results in wrong address for the new frame on 64-bit kernels. Signed-off-by: Aaro Koskinen Cc: stable@vger.kernel.org Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/8110/ Signed-off-by: Ralf Baechle --- arch/mips/oprofile/backtrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/oprofile/backtrace.c b/arch/mips/oprofile/backtrace.c index 6854ed5097d2..83a1dfd8f0e3 100644 --- a/arch/mips/oprofile/backtrace.c +++ b/arch/mips/oprofile/backtrace.c @@ -92,7 +92,7 @@ static inline int unwind_user_frame(struct stackframe *old_frame, /* This marks the end of the previous function, which means we overran. */ break; - stack_size = (unsigned) stack_adjustment; + stack_size = (unsigned long) stack_adjustment; } else if (is_ra_save_ins(&ip)) { int ra_slot = ip.i_format.simmediate; if (ra_slot < 0) -- cgit v1.2.1 From 58563817cfed0432e9a54476d5fc6c3aeba475e4 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Mon, 17 Nov 2014 09:30:23 +0000 Subject: MIPS: asm: uaccess: Add v1 register to clobber list on EVA When EVA is turned on and prefetching is being used in memcpy.S, the v1 register is being used as a helper register to the PREFE instruction. However, v1 ($3) was not in the clobber list, which means that the compiler did not preserve it across function calls, and that could corrupt the value of the register leading to all sorts of userland crashes. We fix this problem by using the DADDI_SCRATCH macro to define the clobbered register when CONFIG_EVA && CONFIG_CPU_HAS_PREFETCH are enabled. Signed-off-by: Markos Chandras Cc: # v3.15+ Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/8510/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/uaccess.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index a10951090234..b9ab717e3619 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h @@ -773,10 +773,11 @@ extern void __put_user_unaligned_unknown(void); "jal\t" #destination "\n\t" #endif -#ifndef CONFIG_CPU_DADDI_WORKAROUNDS -#define DADDI_SCRATCH "$0" -#else +#if defined(CONFIG_CPU_DADDI_WORKAROUNDS) || (defined(CONFIG_EVA) && \ + defined(CONFIG_CPU_HAS_PREFETCH)) #define DADDI_SCRATCH "$3" +#else +#define DADDI_SCRATCH "$0" #endif extern size_t __copy_user(void *__to, const void *__from, size_t __n); -- cgit v1.2.1 From 6a8dff6ab16c903b0d8ef5fbf21543f39bf5d675 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Mon, 17 Nov 2014 09:31:07 +0000 Subject: MIPS: tlb-r4k: Add missing HTW stop/start sequences HTW needs to stop and start again whenever the EntryHI register changes otherwise an inflight HTW operation might use the new EntryHI register for updating an old entry and that could lead to crashes or even a machine check exception. We fix this by ensuring the HTW has stop whenever the EntryHI register is about to change Signed-off-by: Markos Chandras Cc: # v3.17+ Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/8511/ Signed-off-by: Ralf Baechle --- arch/mips/mm/tlb-r4k.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index fa6ebd4bc9e9..c3917e251f59 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -299,6 +299,7 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) local_irq_save(flags); + htw_stop(); pid = read_c0_entryhi() & ASID_MASK; address &= (PAGE_MASK << 1); write_c0_entryhi(address | pid); @@ -346,6 +347,7 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) tlb_write_indexed(); } tlbw_use_hazard(); + htw_start(); flush_itlb_vm(vma); local_irq_restore(flags); } @@ -422,6 +424,7 @@ __init int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1, local_irq_save(flags); /* Save old context and create impossible VPN2 value */ + htw_stop(); old_ctx = read_c0_entryhi(); old_pagemask = read_c0_pagemask(); wired = read_c0_wired(); @@ -443,6 +446,7 @@ __init int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1, write_c0_entryhi(old_ctx); write_c0_pagemask(old_pagemask); + htw_start(); out: local_irq_restore(flags); return ret; -- cgit v1.2.1 From 51b1029d9966060c6ad02030e6f251425b4f06c1 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Mon, 17 Nov 2014 09:32:38 +0000 Subject: MIPS: lib: memcpy: Restore NOP on delay slot before returning to caller Commit cf62a8b8134dd3 ("MIPS: lib: memcpy: Use macro to build the copy_user code") switched to a macro in order to build the memcpy symbols in preparation for the EVA support. However, this commit also removed the NOP instruction after the 'jr ra' when returning back to the caller. This had no visible side-effects since the next instruction was a load to the t0 register which was already in the clobbered list, but it may have undesired effects in the future if some other code is introduced in between the .Ldone and the .Ll_exc_copy labels. Signed-off-by: Markos Chandras Cc: # v3.15+ Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/8512/ Signed-off-by: Ralf Baechle --- arch/mips/lib/memcpy.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index c17ef80cf65a..5d3238af9b5c 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S @@ -503,6 +503,7 @@ STOREB(t0, NBYTES-2(dst), .Ls_exc_p1\@) .Ldone\@: jr ra + nop .if __memcpy == 1 END(memcpy) .set __memcpy, 0 -- cgit v1.2.1 From 640465bda58c7078725201be7430c31a349121e9 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 18 Nov 2014 18:47:13 +0100 Subject: MIPS: Zero variable read by get_user / __get_user in case of an error. This wasn't happening in all cases. Signed-off-by: Ralf Baechle --- arch/mips/include/asm/uaccess.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index b9ab717e3619..133678ab4eb8 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h @@ -301,7 +301,8 @@ do { \ __get_kernel_common((x), size, __gu_ptr); \ else \ __get_user_common((x), size, __gu_ptr); \ - } \ + } else \ + (x) = 0; \ \ __gu_err; \ }) @@ -316,6 +317,7 @@ do { \ " .insn \n" \ " .section .fixup,\"ax\" \n" \ "3: li %0, %4 \n" \ + " move %1, $0 \n" \ " j 2b \n" \ " .previous \n" \ " .section __ex_table,\"a\" \n" \ @@ -630,6 +632,7 @@ do { \ " .insn \n" \ " .section .fixup,\"ax\" \n" \ "3: li %0, %4 \n" \ + " move %1, $0 \n" \ " j 2b \n" \ " .previous \n" \ " .section __ex_table,\"a\" \n" \ -- cgit v1.2.1 From 99436f7d69045800ffd1d66912f85d37150c7e2b Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 17 Nov 2014 16:09:54 +0000 Subject: MIPS: jump_label.c: Correct the span of the J instruction Correct the check for the span of the 256MB segment addressable by the J instruction according to this instruction's semantics. The calculation of the jump target is applied to the address of the delay-slot instruction that immediately follows. Adjust the check accordingly by adding 4 to `e->code' that holds the address of the J instruction itself. Signed-off-by: Maciej W. Rozycki Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/8515/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/jump_label.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/jump_label.c b/arch/mips/kernel/jump_label.c index 6001610cfe55..f65a843e883b 100644 --- a/arch/mips/kernel/jump_label.c +++ b/arch/mips/kernel/jump_label.c @@ -27,8 +27,8 @@ void arch_jump_label_transform(struct jump_entry *e, union mips_instruction *insn_p = (union mips_instruction *)(unsigned long)e->code; - /* Jump only works within a 256MB aligned region. */ - BUG_ON((e->target & ~J_RANGE_MASK) != (e->code & ~J_RANGE_MASK)); + /* Jump only works within a 256MB aligned region of its delay slot. */ + BUG_ON((e->target & ~J_RANGE_MASK) != ((e->code + 4) & ~J_RANGE_MASK)); /* Target must have 4 byte alignment. */ BUG_ON((e->target & 3) != 0); -- cgit v1.2.1 From 935c2dbec4d6d3163ee8e7409996904a734ad89a Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 17 Nov 2014 16:10:32 +0000 Subject: MIPS: jump_label.c: Handle the microMIPS J instruction encoding Implement the microMIPS encoding of the J instruction for the purpose of the static keys feature, fixing a crash early on in bootstrap as the kernel is unhappy seeing the ISA bit set in jump table entries. Make sure the ISA bit correctly reflects the instruction encoding chosen for the kernel, 0 for the standard MIPS and 1 for the microMIPS encoding. Also make sure the instruction to patch is a 32-bit NOP in the microMIPS mode as by default the 16-bit short encoding is assumed Signed-off-by: Maciej W. Rozycki Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/8516/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/jump_label.h | 8 +++++++- arch/mips/kernel/jump_label.c | 40 +++++++++++++++++++++++++++++--------- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index e194f957ca8c..fdbff44e5482 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -20,9 +20,15 @@ #define WORD_INSN ".word" #endif +#ifdef CONFIG_CPU_MICROMIPS +#define NOP_INSN "nop32" +#else +#define NOP_INSN "nop" +#endif + static __always_inline bool arch_static_branch(struct static_key *key) { - asm_volatile_goto("1:\tnop\n\t" + asm_volatile_goto("1:\t" NOP_INSN "\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" diff --git a/arch/mips/kernel/jump_label.c b/arch/mips/kernel/jump_label.c index f65a843e883b..dda800e9e731 100644 --- a/arch/mips/kernel/jump_label.c +++ b/arch/mips/kernel/jump_label.c @@ -18,31 +18,53 @@ #ifdef HAVE_JUMP_LABEL -#define J_RANGE_MASK ((1ul << 28) - 1) +/* + * Define parameters for the standard MIPS and the microMIPS jump + * instruction encoding respectively: + * + * - the ISA bit of the target, either 0 or 1 respectively, + * + * - the amount the jump target address is shifted right to fit in the + * immediate field of the machine instruction, either 2 or 1, + * + * - the mask determining the size of the jump region relative to the + * delay-slot instruction, either 256MB or 128MB, + * + * - the jump target alignment, either 4 or 2 bytes. + */ +#define J_ISA_BIT IS_ENABLED(CONFIG_CPU_MICROMIPS) +#define J_RANGE_SHIFT (2 - J_ISA_BIT) +#define J_RANGE_MASK ((1ul << (26 + J_RANGE_SHIFT)) - 1) +#define J_ALIGN_MASK ((1ul << J_RANGE_SHIFT) - 1) void arch_jump_label_transform(struct jump_entry *e, enum jump_label_type type) { + union mips_instruction *insn_p; union mips_instruction insn; - union mips_instruction *insn_p = - (union mips_instruction *)(unsigned long)e->code; - /* Jump only works within a 256MB aligned region of its delay slot. */ + insn_p = (union mips_instruction *)msk_isa16_mode(e->code); + + /* Jump only works within an aligned region its delay slot is in. */ BUG_ON((e->target & ~J_RANGE_MASK) != ((e->code + 4) & ~J_RANGE_MASK)); - /* Target must have 4 byte alignment. */ - BUG_ON((e->target & 3) != 0); + /* Target must have the right alignment and ISA must be preserved. */ + BUG_ON((e->target & J_ALIGN_MASK) != J_ISA_BIT); if (type == JUMP_LABEL_ENABLE) { - insn.j_format.opcode = j_op; - insn.j_format.target = (e->target & J_RANGE_MASK) >> 2; + insn.j_format.opcode = J_ISA_BIT ? mm_j32_op : j_op; + insn.j_format.target = e->target >> J_RANGE_SHIFT; } else { insn.word = 0; /* nop */ } get_online_cpus(); mutex_lock(&text_mutex); - *insn_p = insn; + if (IS_ENABLED(CONFIG_CPU_MICROMIPS)) { + insn_p->halfword[0] = insn.word >> 16; + insn_p->halfword[1] = insn.word; + } else + *insn_p = insn; flush_icache_range((unsigned long)insn_p, (unsigned long)insn_p + sizeof(*insn_p)); -- cgit v1.2.1 From 49e41938f8a53d01d1c5d133a038acb9650008f9 Mon Sep 17 00:00:00 2001 From: Tyler Baker Date: Tue, 18 Nov 2014 11:10:53 -0800 Subject: ARM: multi_v7_defconfig: fix failure setting CPU voltage by enabling dependent I2C controller This patch fixes a long standing issue introduced during the 3.16 merge window. Shortly after the merge, exynos5250-based arndale boards began to produce the following errors: kern.err kernel: exynos-cpufreq exynos-cpufreq: failed to set cpu voltage kern.err kernel: cpufreq: __target_index: Failed to change cpu frequency: -22 Further analysis revealed that the S5M8767 voltage regulator used on the exynos5250-based arndale board utilizes the S3C2410 I2C controller. If the S3C2410 I2C controller driver is not enabled, the S5M8767 voltage regulator fails to probe. Therefore a dependency exists between these two drivers. In the exynos_defconfig both CONFIG_REGULATOR_S5M8767 and CONFIG_I2C_S3C2410 options are enabled, and no errors are produced. However, in the multi_v7_defconfig only the CONFIG_REGULATOR_S5M8767 option is enabled and the errors are present. So let's enable the CONFIG_I2C_S3C2410 option in the multi_v7_defconfig to allow the S5M8767 voltage regulator to probe. Signed-off-by: Tyler Baker Acked-by: Kukjin Kim Signed-off-by: Kevin Hilman --- arch/arm/configs/multi_v7_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 3487046d8a78..9d7a32f93fcf 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -217,6 +217,7 @@ CONFIG_I2C_CADENCE=y CONFIG_I2C_DESIGNWARE_PLATFORM=y CONFIG_I2C_EXYNOS5=y CONFIG_I2C_MV64XXX=y +CONFIG_I2C_S3C2410=y CONFIG_I2C_SIRF=y CONFIG_I2C_TEGRA=y CONFIG_I2C_ST=y -- cgit v1.2.1 From c1a2086e2d8c4eb4e8630ba752e911ec180dec67 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 19 Nov 2014 16:22:32 +0000 Subject: of/selftest: Fix off-by-one error in removal path The removal path for selftest data has an off by one error that causes the code to dereference beyond the end of the nodes[] array on the first pass through. The old code only worked by chance on a lot of platforms, but the bug was recently exposed on aarch64. The fix is simple. Decrement the node count before dereferencing, not after. Reported-by: Kevin Hilman Cc: Rob Herring Cc: Gaurav Minocha Cc: # v3.17+ --- drivers/of/selftest.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c index 11b873c54a77..e6c14dc400e9 100644 --- a/drivers/of/selftest.c +++ b/drivers/of/selftest.c @@ -896,7 +896,7 @@ static void selftest_data_remove(void) return; } - while (last_node_index >= 0) { + while (last_node_index-- > 0) { if (nodes[last_node_index]) { np = of_find_node_by_path(nodes[last_node_index]->full_name); if (strcmp(np->full_name, "/aliases") != 0) { @@ -908,7 +908,6 @@ static void selftest_data_remove(void) } } } - last_node_index--; } } -- cgit v1.2.1 From 4a7795d35e252f38298980530e01e21867f8f856 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 19 Nov 2014 15:50:34 +0800 Subject: vfs: fix reference leak in d_prune_aliases() In "d_prune_alias(): just lock the parent and call __dentry_kill()" the old dget + d_drop + dput has been replaced with lock_parent + __dentry_kill; unfortunately, dput() does more than just killing dentry - it also drops the reference to parent. New variant leaks that reference and needs dput(parent) after killing the child off. Signed-off-by: Yan, Zheng Signed-off-by: Al Viro --- fs/dcache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/dcache.c b/fs/dcache.c index 3ffef7f4e5cd..5bc72b07fde2 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -778,6 +778,7 @@ restart: struct dentry *parent = lock_parent(dentry); if (likely(!dentry->d_lockref.count)) { __dentry_kill(dentry); + dput(parent); goto restart; } if (parent) -- cgit v1.2.1 From 7ca2f234404e738cc807ed87c43f9932513bc8c6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Nov 2014 15:11:24 +0100 Subject: isofs: avoid unused function warning With the isofs_hash() function removed, isofs_hash_ms() is the only user of isofs_hash_common(), but it's defined inside of an #ifdef, which triggers this gcc warning in ARM axm55xx_defconfig starting with v3.18-rc3: fs/isofs/inode.c:177:1: warning: 'isofs_hash_common' defined but not used [-Wunused-function] This patch moves the function inside of the same #ifdef section to avoid that warning, which seems the best compromise of a relatively harmless patch for a late -rc. Signed-off-by: Arnd Bergmann Fixes: b0afd8e5db7b ("isofs: don't bother with ->d_op for normal case") Signed-off-by: Al Viro --- fs/isofs/inode.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index fe839b915116..d67a16f2a45d 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -170,27 +170,6 @@ struct iso9660_options{ s32 sbsector; }; -/* - * Compute the hash for the isofs name corresponding to the dentry. - */ -static int -isofs_hash_common(struct qstr *qstr, int ms) -{ - const char *name; - int len; - - len = qstr->len; - name = qstr->name; - if (ms) { - while (len && name[len-1] == '.') - len--; - } - - qstr->hash = full_name_hash(name, len); - - return 0; -} - /* * Compute the hash for the isofs name corresponding to the dentry. */ @@ -263,6 +242,27 @@ isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry, } #ifdef CONFIG_JOLIET +/* + * Compute the hash for the isofs name corresponding to the dentry. + */ +static int +isofs_hash_common(struct qstr *qstr, int ms) +{ + const char *name; + int len; + + len = qstr->len; + name = qstr->name; + if (ms) { + while (len && name[len-1] == '.') + len--; + } + + qstr->hash = full_name_hash(name, len); + + return 0; +} + static int isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr) { -- cgit v1.2.1 From f82c458a2c3ffb94b431fc6ad791a79df1b3713e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 19 Nov 2014 10:25:09 -0800 Subject: btrfs: fix lockups from btrfs_clear_path_blocking The fair reader/writer locks mean that btrfs_clear_path_blocking needs to strictly follow lock ordering rules even when we already have blocking locks on a given path. Before we can clear a blocking lock on the path, we need to make sure all of the locks have been converted to blocking. This will remove lock inversions against anyone spinning in write_lock() against the buffers we're trying to get read locks on. These inversions didn't exist before the fair read/writer locks, but now we need to be more careful. We papered over this deadlock in the past by changing btrfs_try_read_lock() to be a true trylock against both the spinlock and the blocking lock. This was slower, and not sufficient to fix all the deadlocks. This patch adds a btrfs_tree_read_lock_atomic(), which basically means get the spinlock but trylock on the blocking lock. Signed-off-by: Chris Mason Signed-off-by: Josef Bacik Reported-by: Patrick Schmid cc: stable@vger.kernel.org #v3.15+ --- fs/btrfs/ctree.c | 14 ++------------ fs/btrfs/locking.c | 24 +++++++++++++++++++++--- fs/btrfs/locking.h | 2 ++ 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 19bc6162fb8e..150822ee0a0b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -80,13 +80,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, { int i; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - /* lockdep really cares that we take all of these spinlocks - * in the right order. If any of the locks in the path are not - * currently blocking, it is going to complain. So, make really - * really sure by forcing the path to blocking before we clear - * the path blocking. - */ if (held) { btrfs_set_lock_blocking_rw(held, held_rw); if (held_rw == BTRFS_WRITE_LOCK) @@ -95,7 +88,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, held_rw = BTRFS_READ_LOCK_BLOCKING; } btrfs_set_path_blocking(p); -#endif for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { if (p->nodes[i] && p->locks[i]) { @@ -107,10 +99,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, } } -#ifdef CONFIG_DEBUG_LOCK_ALLOC if (held) btrfs_clear_lock_blocking_rw(held, held_rw); -#endif } /* this also releases the path */ @@ -2893,7 +2883,7 @@ cow_done: } p->locks[level] = BTRFS_WRITE_LOCK; } else { - err = btrfs_try_tree_read_lock(b); + err = btrfs_tree_read_lock_atomic(b); if (!err) { btrfs_set_path_blocking(p); btrfs_tree_read_lock(b); @@ -3025,7 +3015,7 @@ again: } level = btrfs_header_level(b); - err = btrfs_try_tree_read_lock(b); + err = btrfs_tree_read_lock_atomic(b); if (!err) { btrfs_set_path_blocking(p); btrfs_tree_read_lock(b); diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 5665d2149249..f8229ef1b46d 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -127,6 +127,26 @@ again: atomic_inc(&eb->spinning_readers); } +/* + * take a spinning read lock. + * returns 1 if we get the read lock and 0 if we don't + * this won't wait for blocking writers + */ +int btrfs_tree_read_lock_atomic(struct extent_buffer *eb) +{ + if (atomic_read(&eb->blocking_writers)) + return 0; + + read_lock(&eb->lock); + if (atomic_read(&eb->blocking_writers)) { + read_unlock(&eb->lock); + return 0; + } + atomic_inc(&eb->read_locks); + atomic_inc(&eb->spinning_readers); + return 1; +} + /* * returns 1 if we get the read lock and 0 if we don't * this won't wait for blocking writers @@ -158,9 +178,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb) atomic_read(&eb->blocking_readers)) return 0; - if (!write_trylock(&eb->lock)) - return 0; - + write_lock(&eb->lock); if (atomic_read(&eb->blocking_writers) || atomic_read(&eb->blocking_readers)) { write_unlock(&eb->lock); diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index b81e0e9a4894..c44a9d5f5362 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h @@ -35,6 +35,8 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw); void btrfs_assert_tree_locked(struct extent_buffer *eb); int btrfs_try_tree_read_lock(struct extent_buffer *eb); int btrfs_try_tree_write_lock(struct extent_buffer *eb); +int btrfs_tree_read_lock_atomic(struct extent_buffer *eb); + static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw) { -- cgit v1.2.1 From b8e4500f42fe4464a33a887579147050bed8fcef Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 18 Nov 2014 15:14:44 +0100 Subject: bonding: fix curr_active_slave/carrier with loadbalance arp monitoring Since commit 6fde8f037e60 ("bonding: fix locking in bond_loadbalance_arp_mon()") we can have a stale bond carrier state and stale curr_active_slave when using arp monitoring in loadbalance modes. The reason is that in bond_loadbalance_arp_mon() we can't have do_failover == true but slave_state_changed == false, whenever do_failover is true then slave_state_changed is also true. Then the following piece from bond_loadbalance_arp_mon(): if (slave_state_changed) { bond_slave_state_change(bond); if (BOND_MODE(bond) == BOND_MODE_XOR) bond_update_slave_arr(bond, NULL); } else if (do_failover) { block_netpoll_tx(); bond_select_active_slave(bond); unblock_netpoll_tx(); } will execute only the first branch, always and regardless of do_failover. Since these two events aren't related in such way, we need to decouple and consider them separately. For example this issue could lead to the following result: Bonding Mode: load balancing (round-robin) *MII Status: down* MII Polling Interval (ms): 0 Up Delay (ms): 0 Down Delay (ms): 0 ARP Polling Interval (ms): 100 ARP IP target/s (n.n.n.n form): 192.168.9.2 Slave Interface: ens12 *MII Status: up* Speed: 10000 Mbps Duplex: full Link Failure Count: 2 Permanent HW addr: 00:0f:53:01:42:2c Slave queue ID: 0 Slave Interface: eth1 *MII Status: up* Speed: Unknown Duplex: Unknown Link Failure Count: 70 Permanent HW addr: 52:54:00:2f:0f:8e Slave queue ID: 0 Since some interfaces are up, then the status of the bond should also be up, but it will never change unless something invokes bond_set_carrier() (i.e. enslave, bond_select_active_slave etc). Now, if I force the calling of bond_select_active_slave via for example changing primary_reselect (it can change in any mode), then the MII status goes to "up" because it calls bond_select_active_slave() which should've been done from bond_loadbalance_arp_mon() itself. CC: Veaceslav Falico CC: Jay Vosburgh CC: Andy Gospodarek CC: Ding Tianhong Fixes: 6fde8f037e60 ("bonding: fix locking in bond_loadbalance_arp_mon()") Signed-off-by: Nikolay Aleksandrov Acked-by: Veaceslav Falico Acked-by: Andy Gospodarek Acked-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index c9ac06cfe6b7..a5115fb7cf33 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2471,7 +2471,8 @@ static void bond_loadbalance_arp_mon(struct work_struct *work) bond_slave_state_change(bond); if (BOND_MODE(bond) == BOND_MODE_XOR) bond_update_slave_arr(bond, NULL); - } else if (do_failover) { + } + if (do_failover) { block_netpoll_tx(); bond_select_active_slave(bond); unblock_netpoll_tx(); -- cgit v1.2.1 From 9737c6ab7afbc950e997ef80cba2c40dbbd16ea4 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 18 Nov 2014 17:51:27 +0200 Subject: net/mlx4_en: Add VXLAN ndo calls to the PF net device ops too This is currently missing, which results in a crash when one attempts to set VXLAN tunnel over the mlx4_en when acting as PF. [ 2408.785472] BUG: unable to handle kernel NULL pointer dereference at (null) [...] [ 2408.994104] Call Trace: [ 2408.996584] [] ? vxlan_get_rx_port+0xd6/0x103 [vxlan] [ 2409.003316] [] ? vxlan_lowerdev_event+0xf2/0xf2 [vxlan] [ 2409.010225] [] mlx4_en_start_port+0x862/0x96a [mlx4_en] [ 2409.017132] [] mlx4_en_open+0x17f/0x1b8 [mlx4_en] While here, make sure to invoke vxlan_get_rx_port() only when VXLAN offloads are actually enabled and not when they are only supported. Reported-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index c5fcc56ec06b..4d69e382b4e5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1693,7 +1693,7 @@ int mlx4_en_start_port(struct net_device *dev) mlx4_set_stats_bitmap(mdev->dev, &priv->stats_bitmap); #ifdef CONFIG_MLX4_EN_VXLAN - if (priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS) + if (priv->mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) vxlan_get_rx_port(dev); #endif priv->port_up = true; @@ -2422,6 +2422,11 @@ static const struct net_device_ops mlx4_netdev_ops_master = { .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif .ndo_get_phys_port_id = mlx4_en_get_phys_port_id, +#ifdef CONFIG_MLX4_EN_VXLAN + .ndo_add_vxlan_port = mlx4_en_add_vxlan_port, + .ndo_del_vxlan_port = mlx4_en_del_vxlan_port, + .ndo_gso_check = mlx4_en_gso_check, +#endif }; int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, -- cgit v1.2.1 From 7fc986d8a9727e5d40da3c2c1c343da6142e82a9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 19 Nov 2014 14:30:32 -0700 Subject: PCI: Support 64-bit bridge windows if we have 64-bit dma_addr_t Aaron reported that a 32-bit x86 kernel with Physical Address Extension (PAE) support complains about bridge prefetchable memory windows above 4GB: pci_bus 0000:00: root bus resource [mem 0x380000000000-0x383fffffffff] ... pci 0000:03:00.0: reg 0x10: [mem 0x383fffc00000-0x383fffdfffff 64bit pref] pci 0000:03:00.0: reg 0x20: [mem 0x383fffe04000-0x383fffe07fff 64bit pref] pci 0000:03:00.1: reg 0x10: [mem 0x383fffa00000-0x383fffbfffff 64bit pref] pci 0000:03:00.1: reg 0x20: [mem 0x383fffe00000-0x383fffe03fff 64bit pref] pci 0000:00:02.2: PCI bridge to [bus 03-04] pci 0000:00:02.2: bridge window [io 0x1000-0x1fff] pci 0000:00:02.2: bridge window [mem 0x91900000-0x91cfffff] pci 0000:00:02.2: can't handle 64-bit address space for bridge In this kernel, unsigned long is 32 bits and dma_addr_t is 64 bits. Previously we used "unsigned long" to hold the bridge window address. But this is a bus address, so we should use dma_addr_t instead. Use dma_addr_t to hold the bridge window base and limit. The question of whether the CPU can actually *address* the window is separate and depends on what the physical address space of the CPU is and whether the host bridge does any address translation. [bhelgaas: fix "shift count > width of type", changelog, stable tag] Fixes: d56dbf5bab8c ("PCI: Allocate 64-bit BARs above 4G when possible") Link: https://bugzilla.kernel.org/show_bug.cgi?id=88131 Reported-by: Aaron Ma Tested-by: Aaron Ma Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org # v3.14+ --- drivers/pci/probe.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 6244b1834dfe..c8ca98c2b480 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -407,15 +407,16 @@ static void pci_read_bridge_mmio_pref(struct pci_bus *child) { struct pci_dev *dev = child->self; u16 mem_base_lo, mem_limit_lo; - unsigned long base, limit; + u64 base64, limit64; + dma_addr_t base, limit; struct pci_bus_region region; struct resource *res; res = child->resource[2]; pci_read_config_word(dev, PCI_PREF_MEMORY_BASE, &mem_base_lo); pci_read_config_word(dev, PCI_PREF_MEMORY_LIMIT, &mem_limit_lo); - base = ((unsigned long) mem_base_lo & PCI_PREF_RANGE_MASK) << 16; - limit = ((unsigned long) mem_limit_lo & PCI_PREF_RANGE_MASK) << 16; + base64 = (mem_base_lo & PCI_PREF_RANGE_MASK) << 16; + limit64 = (mem_limit_lo & PCI_PREF_RANGE_MASK) << 16; if ((mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) { u32 mem_base_hi, mem_limit_hi; @@ -429,17 +430,20 @@ static void pci_read_bridge_mmio_pref(struct pci_bus *child) * this, just assume they are not being used. */ if (mem_base_hi <= mem_limit_hi) { -#if BITS_PER_LONG == 64 - base |= ((unsigned long) mem_base_hi) << 32; - limit |= ((unsigned long) mem_limit_hi) << 32; -#else - if (mem_base_hi || mem_limit_hi) { - dev_err(&dev->dev, "can't handle 64-bit address space for bridge\n"); - return; - } -#endif + base64 |= (u64) mem_base_hi << 32; + limit64 |= (u64) mem_limit_hi << 32; } } + + base = (dma_addr_t) base64; + limit = (dma_addr_t) limit64; + + if (base != base64) { + dev_err(&dev->dev, "can't handle bridge window above 4GB (bus address %#010llx)\n", + (unsigned long long) base64); + return; + } + if (base <= limit) { res->flags = (mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH; -- cgit v1.2.1 From ffb1388a364d135810337182d6800a0c7ee44f48 Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Wed, 19 Nov 2014 09:35:39 +0800 Subject: ipv6: delete protocol and unregister rtnetlink when cleanup pim6_protocol was added when initiation, but it not deleted. Similarly, unregister RTNL_FAMILY_IP6MR rtnetlink. Signed-off-by: Duan Jiong Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 0171f08325c3..1a01d79b8698 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1439,6 +1439,10 @@ reg_pernet_fail: void ip6_mr_cleanup(void) { + rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE); +#ifdef CONFIG_IPV6_PIMSM_V2 + inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); +#endif unregister_netdevice_notifier(&ip6_mr_notifier); unregister_pernet_subsys(&ip6mr_net_ops); kmem_cache_destroy(mrt_cachep); -- cgit v1.2.1 From ee7255ada313a6db99be47ce174b0bfb8295a041 Mon Sep 17 00:00:00 2001 From: Anish Bhatt Date: Tue, 18 Nov 2014 19:09:51 -0800 Subject: cxgb4i : Don't block unload/cxgb4 unload when remote closes TCP connection cxgb4i was returning wrong error and not releasing module reference if remote end abruptly closed TCP connection. This prevents the cxgb4 network module from being unloaded, further affecting other network drivers dependent on cxgb4 Sending to net as this affects all cxgb4 based network drivers. Signed-off-by: Anish Bhatt Signed-off-by: David S. Miller --- drivers/scsi/cxgbi/cxgb4i/cxgb4i.c | 2 ++ drivers/scsi/cxgbi/libcxgbi.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c index 3e0a0d315f72..f48f40ce9f87 100644 --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c @@ -828,6 +828,8 @@ static void do_act_open_rpl(struct cxgbi_device *cdev, struct sk_buff *skb) if (status == CPL_ERR_RTX_NEG_ADVICE) goto rel_skb; + module_put(THIS_MODULE); + if (status && status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && status != CPL_ERR_ARP_MISS) diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c index 674d498b46ab..9d63853c5fce 100644 --- a/drivers/scsi/cxgbi/libcxgbi.c +++ b/drivers/scsi/cxgbi/libcxgbi.c @@ -816,7 +816,7 @@ static void cxgbi_inform_iscsi_conn_closing(struct cxgbi_sock *csk) read_lock_bh(&csk->callback_lock); if (csk->user_data) iscsi_conn_failure(csk->user_data, - ISCSI_ERR_CONN_FAILED); + ISCSI_ERR_TCP_CONN_CLOSE); read_unlock_bh(&csk->callback_lock); } } -- cgit v1.2.1 From 78579b7c7eb45f0e7ec5e9437087ed21749f9a9c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 19 Nov 2014 01:44:11 +0100 Subject: ACPI / PM: Ignore wakeup setting if the ACPI companion can't wake up As reported by Dmitry, on some Chromebooks there are devices with corresponding ACPI objects and with unusual system wakeup configuration. Namely, they technically are wakeup-capable, but the wakeup is handled via a platform-specific out-of-band mechanism and the ACPI PM layer has no information on the wakeup capability. As a result, device_may_wakeup(dev) called from acpi_dev_suspend_late() returns 'true' for those devices, but the wakeup.flags.valid flag is unset for the corresponding ACPI device objects, so acpi_device_wakeup() reproducibly fails for them causing acpi_dev_suspend_late() to return an error code. The entire system suspend is then aborted and the machines in question cannot suspend at all. Address the problem by ignoring the device_may_wakeup(dev) return value in acpi_dev_suspend_late() if the ACPI companion of the device being handled has wakeup.flags.valid unset (in which case it is clear that the wakeup is supposed to be handled by other means). This fixes a regression introduced by commit a76e9bd89ae7 (i2c: attach/detach I2C client device to the ACPI power domain) as the affected systems could suspend and resume successfully before that commit. Fixes: a76e9bd89ae7 (i2c: attach/detach I2C client device to the ACPI power domain) Reported-by: Dmitry Torokhov Reviewed-by: Dmitry Torokhov Cc: 3.13+ # 3.13+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/device_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 143ec6ea1468..7db193160766 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -878,7 +878,7 @@ int acpi_dev_suspend_late(struct device *dev) return 0; target_state = acpi_target_system_state(); - wakeup = device_may_wakeup(dev); + wakeup = device_may_wakeup(dev) && acpi_device_can_wakeup(adev); error = acpi_device_wakeup(adev, target_state, wakeup); if (wakeup && error) return error; -- cgit v1.2.1 From b1a5ad006b34ded9dc7ec64988deba1b3ecad367 Mon Sep 17 00:00:00 2001 From: Chris Moore Date: Tue, 4 Nov 2014 16:28:29 +0000 Subject: IB/isert: Adjust CQ size to HW limits isert has an issue of trying to create a CQ with more CQEs than are supported by the hardware, that currently results in failures during isert_device creation during first session login. This is the isert version of the patch that Minh Tran submitted for iser, and is simple a workaround required to function with existing ocrdma hardware. Signed-off-by: Chris Moore Reviewied-by: Sagi Grimberg Cc: # 3.10+ Signed-off-by: Nicholas Bellinger --- drivers/infiniband/ulp/isert/ib_isert.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index a6c7b395c61d..10641b7816f4 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -228,12 +228,16 @@ isert_create_device_ib_res(struct isert_device *device) struct isert_cq_desc *cq_desc; struct ib_device_attr *dev_attr; int ret = 0, i, j; + int max_rx_cqe, max_tx_cqe; dev_attr = &device->dev_attr; ret = isert_query_device(ib_dev, dev_attr); if (ret) return ret; + max_rx_cqe = min(ISER_MAX_RX_CQ_LEN, dev_attr->max_cqe); + max_tx_cqe = min(ISER_MAX_TX_CQ_LEN, dev_attr->max_cqe); + /* asign function handlers */ if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS && dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) { @@ -275,7 +279,7 @@ isert_create_device_ib_res(struct isert_device *device) isert_cq_rx_callback, isert_cq_event_callback, (void *)&cq_desc[i], - ISER_MAX_RX_CQ_LEN, i); + max_rx_cqe, i); if (IS_ERR(device->dev_rx_cq[i])) { ret = PTR_ERR(device->dev_rx_cq[i]); device->dev_rx_cq[i] = NULL; @@ -287,7 +291,7 @@ isert_create_device_ib_res(struct isert_device *device) isert_cq_tx_callback, isert_cq_event_callback, (void *)&cq_desc[i], - ISER_MAX_TX_CQ_LEN, i); + max_tx_cqe, i); if (IS_ERR(device->dev_tx_cq[i])) { ret = PTR_ERR(device->dev_tx_cq[i]); device->dev_tx_cq[i] = NULL; -- cgit v1.2.1 From 788ec2fc2ca295a2d929986e95231214ecd8d142 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 19 Nov 2014 17:13:44 +0000 Subject: of/selftest: Fix testing when /aliases is missing The /aliases node isn't always present in the device tree, but the unittest code assumes that /aliases is there. Add a check when inserting the testcase data to see if of_aliases needs to be updated, and undo the settings when the nodes are removed. Signed-off-by: Grant Likely Cc: Rob Herring Cc: Gaurav Minocha Cc: --- drivers/of/selftest.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c index e6c14dc400e9..e2d79afa9dc6 100644 --- a/drivers/of/selftest.c +++ b/drivers/of/selftest.c @@ -899,7 +899,11 @@ static void selftest_data_remove(void) while (last_node_index-- > 0) { if (nodes[last_node_index]) { np = of_find_node_by_path(nodes[last_node_index]->full_name); - if (strcmp(np->full_name, "/aliases") != 0) { + if (np == nodes[last_node_index]) { + if (of_aliases == np) { + of_node_put(of_aliases); + of_aliases = NULL; + } detach_node_and_children(np); } else { for_each_property_of_node(np, prop) { @@ -920,6 +924,8 @@ static int __init of_selftest(void) res = selftest_data_add(); if (res) return res; + if (!of_aliases) + of_aliases = of_find_node_by_path("/aliases"); np = of_find_node_by_path("/testcase-data/phandle-tests/consumer-a"); if (!np) { -- cgit v1.2.1 From ef94b1864d1ed5be54376404bb23d22ed0481feb Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 20 Nov 2014 16:39:59 +0100 Subject: ovl: rename filesystem type to "overlay" Some distributions carry an "old" format of overlayfs while mainline has a "new" format. The distros will possibly want to keep the old overlayfs alongside the new for compatibility reasons. To make it possible to differentiate the two versions change the name of the new one from "overlayfs" to "overlay". Signed-off-by: Miklos Szeredi Reported-by: Serge Hallyn Cc: Andy Whitcroft --- Documentation/filesystems/overlayfs.txt | 2 +- MAINTAINERS | 2 +- fs/Makefile | 2 +- fs/overlayfs/Kconfig | 2 +- fs/overlayfs/Makefile | 4 ++-- fs/overlayfs/super.c | 6 +++--- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt index 530850a72735..a27c950ece61 100644 --- a/Documentation/filesystems/overlayfs.txt +++ b/Documentation/filesystems/overlayfs.txt @@ -64,7 +64,7 @@ is formed. At mount time, the two directories given as mount options "lowerdir" and "upperdir" are combined into a merged directory: - mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper,\ + mount -t overlay overlay -olowerdir=/lower,upperdir=/upper,\ workdir=/work /merged The "workdir" needs to be an empty directory on the same filesystem diff --git a/MAINTAINERS b/MAINTAINERS index c444907ccd69..af1e738e8772 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6888,7 +6888,7 @@ F: drivers/scsi/osd/ F: include/scsi/osd_* F: fs/exofs/ -OVERLAYFS FILESYSTEM +OVERLAY FILESYSTEM M: Miklos Szeredi L: linux-fsdevel@vger.kernel.org S: Supported diff --git a/fs/Makefile b/fs/Makefile index 34a1b9dea6dd..da0bbb456d3f 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -104,7 +104,7 @@ obj-$(CONFIG_QNX6FS_FS) += qnx6/ obj-$(CONFIG_AUTOFS4_FS) += autofs4/ obj-$(CONFIG_ADFS_FS) += adfs/ obj-$(CONFIG_FUSE_FS) += fuse/ -obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/ +obj-$(CONFIG_OVERLAY_FS) += overlayfs/ obj-$(CONFIG_UDF_FS) += udf/ obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ obj-$(CONFIG_OMFS_FS) += omfs/ diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index e60125976873..34355818a2e0 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -1,4 +1,4 @@ -config OVERLAYFS_FS +config OVERLAY_FS tristate "Overlay filesystem support" help An overlay filesystem combines two filesystems - an 'upper' filesystem diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile index 8f91889480d0..900daed3e91d 100644 --- a/fs/overlayfs/Makefile +++ b/fs/overlayfs/Makefile @@ -2,6 +2,6 @@ # Makefile for the overlay filesystem. # -obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o +obj-$(CONFIG_OVERLAY_FS) += overlay.o -overlayfs-objs := super.o inode.o dir.o readdir.o copy_up.o +overlay-objs := super.o inode.o dir.o readdir.o copy_up.o diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 08b704cebfc4..b92bd1829cf7 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -24,7 +24,7 @@ MODULE_AUTHOR("Miklos Szeredi "); MODULE_DESCRIPTION("Overlay filesystem"); MODULE_LICENSE("GPL"); -#define OVERLAYFS_SUPER_MAGIC 0x794c764f +#define OVERLAYFS_SUPER_MAGIC 0x794c7630 struct ovl_config { char *lowerdir; @@ -776,11 +776,11 @@ static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, static struct file_system_type ovl_fs_type = { .owner = THIS_MODULE, - .name = "overlayfs", + .name = "overlay", .mount = ovl_mount, .kill_sb = kill_anon_super, }; -MODULE_ALIAS_FS("overlayfs"); +MODULE_ALIAS_FS("overlay"); static int __init ovl_init(void) { -- cgit v1.2.1 From a105d685a8483985a01776411de191a726b48132 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 20 Nov 2014 16:39:59 +0100 Subject: ovl: fix remove/copy-up race ovl_remove_and_whiteout() needs to check if upper dentry exists or not after having locked upper parent directory. Previously we used a "type" value computed before locking the upper parent directory, which is susceptible to racing with copy-up. There's a similar check in ovl_check_empty_and_clear(). This one is not actually racy, since copy-up doesn't change the "emptyness" property of a directory. Add a comment to this effect, and check the existence of upper dentry locally to make the code cleaner. Signed-off-by: Miklos Szeredi --- fs/overlayfs/dir.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 15cd91ad9940..8ffc4b980f1b 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -284,8 +284,7 @@ out: return ERR_PTR(err); } -static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry, - enum ovl_path_type type) +static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry) { int err; struct dentry *ret = NULL; @@ -294,8 +293,17 @@ static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry, err = ovl_check_empty_dir(dentry, &list); if (err) ret = ERR_PTR(err); - else if (type == OVL_PATH_MERGE) - ret = ovl_clear_empty(dentry, &list); + else { + /* + * If no upperdentry then skip clearing whiteouts. + * + * Can race with copy-up, since we don't hold the upperdir + * mutex. Doesn't matter, since copy-up can't create a + * non-empty directory from an empty one. + */ + if (ovl_dentry_upper(dentry)) + ret = ovl_clear_empty(dentry, &list); + } ovl_cache_free(&list); @@ -487,8 +495,7 @@ out: return err; } -static int ovl_remove_and_whiteout(struct dentry *dentry, - enum ovl_path_type type, bool is_dir) +static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) { struct dentry *workdir = ovl_workdir(dentry); struct inode *wdir = workdir->d_inode; @@ -500,7 +507,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, int err; if (is_dir) { - opaquedir = ovl_check_empty_and_clear(dentry, type); + opaquedir = ovl_check_empty_and_clear(dentry); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) goto out; @@ -515,9 +522,10 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, if (IS_ERR(whiteout)) goto out_unlock; - if (type == OVL_PATH_LOWER) { + upper = ovl_dentry_upper(dentry); + if (!upper) { upper = lookup_one_len(dentry->d_name.name, upperdir, - dentry->d_name.len); + dentry->d_name.len); err = PTR_ERR(upper); if (IS_ERR(upper)) goto kill_whiteout; @@ -529,7 +537,6 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, } else { int flags = 0; - upper = ovl_dentry_upper(dentry); if (opaquedir) upper = opaquedir; err = -ESTALE; @@ -648,7 +655,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) cap_raise(override_cred->cap_effective, CAP_CHOWN); old_cred = override_creds(override_cred); - err = ovl_remove_and_whiteout(dentry, type, is_dir); + err = ovl_remove_and_whiteout(dentry, is_dir); revert_creds(old_cred); put_cred(override_cred); @@ -781,7 +788,7 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, } if (overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) { - opaquedir = ovl_check_empty_and_clear(new, new_type); + opaquedir = ovl_check_empty_and_clear(new); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) { opaquedir = NULL; -- cgit v1.2.1 From 521484639ec19a6f1ed56de6993feb255f5f676c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 20 Nov 2014 16:40:00 +0100 Subject: ovl: fix race in private xattr checks Xattr operations can race with copy up. This does not matter as long as we consistently fiter out "trunsted.overlay.opaque" attribute on upper directories. Previously we checked parent against OVL_PATH_MERGE. This is too general, and prone to race with copy-up. I.e. we found the parent to be on the lower layer but ovl_dentry_real() would return the copied-up dentry, possibly with the "opaque" attribute. So instead use ovl_path_real() and decide to filter the attributes based on the actual type of the dentry we'll use. Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index af2d18c9fcee..07d74b24913b 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -235,26 +235,36 @@ out: return err; } +static bool ovl_need_xattr_filter(struct dentry *dentry, + enum ovl_path_type type) +{ + return type == OVL_PATH_UPPER && S_ISDIR(dentry->d_inode->i_mode); +} + ssize_t ovl_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) { - if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && - ovl_is_private_xattr(name)) + struct path realpath; + enum ovl_path_type type = ovl_path_real(dentry, &realpath); + + if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) return -ENODATA; - return vfs_getxattr(ovl_dentry_real(dentry), name, value, size); + return vfs_getxattr(realpath.dentry, name, value, size); } ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) { + struct path realpath; + enum ovl_path_type type = ovl_path_real(dentry, &realpath); ssize_t res; int off; - res = vfs_listxattr(ovl_dentry_real(dentry), list, size); + res = vfs_listxattr(realpath.dentry, list, size); if (res <= 0 || size == 0) return res; - if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE) + if (!ovl_need_xattr_filter(dentry, type)) return res; /* filter out private xattrs */ @@ -279,17 +289,16 @@ int ovl_removexattr(struct dentry *dentry, const char *name) { int err; struct path realpath; - enum ovl_path_type type; + enum ovl_path_type type = ovl_path_real(dentry, &realpath); err = ovl_want_write(dentry); if (err) goto out; - if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && - ovl_is_private_xattr(name)) + err = -ENODATA; + if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) goto out_drop_write; - type = ovl_path_real(dentry, &realpath); if (type == OVL_PATH_LOWER) { err = vfs_getxattr(realpath.dentry, name, NULL, 0); if (err < 0) -- cgit v1.2.1 From 91c77947133f7aef851b625701e182d3f99d14a9 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 20 Nov 2014 16:40:00 +0100 Subject: ovl: allow filenames with comma Allow option separator (comma) to be escaped with backslash. Signed-off-by: Miklos Szeredi --- fs/overlayfs/super.c | 48 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index b92bd1829cf7..eee7a62e1c0e 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -462,11 +462,34 @@ static const match_table_t ovl_tokens = { {OPT_ERR, NULL} }; +static char *ovl_next_opt(char **s) +{ + char *sbegin = *s; + char *p; + + if (sbegin == NULL) + return NULL; + + for (p = sbegin; *p; p++) { + if (*p == '\\') { + p++; + if (!*p) + break; + } else if (*p == ',') { + *p = '\0'; + *s = p + 1; + return sbegin; + } + } + *s = NULL; + return sbegin; +} + static int ovl_parse_opt(char *opt, struct ovl_config *config) { char *p; - while ((p = strsep(&opt, ",")) != NULL) { + while ((p = ovl_next_opt(&opt)) != NULL) { int token; substring_t args[MAX_OPT_ARGS]; @@ -554,15 +577,34 @@ out_dput: goto out_unlock; } +static void ovl_unescape(char *s) +{ + char *d = s; + + for (;; s++, d++) { + if (*s == '\\') + s++; + *d = *s; + if (!*s) + break; + } +} + static int ovl_mount_dir(const char *name, struct path *path) { int err; + char *tmp = kstrdup(name, GFP_KERNEL); + + if (!tmp) + return -ENOMEM; - err = kern_path(name, LOOKUP_FOLLOW, path); + ovl_unescape(tmp); + err = kern_path(tmp, LOOKUP_FOLLOW, path); if (err) { - pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); + pr_err("overlayfs: failed to resolve '%s': %i\n", tmp, err); err = -EINVAL; } + kfree(tmp); return err; } -- cgit v1.2.1 From 71d509280f7e92eb60ae6b7c78c20afafff060c7 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 20 Nov 2014 16:40:01 +0100 Subject: ovl: use lockless_dereference() for upperdentry Don't open code lockless_dereference() in ovl_upperdentry_dereference(). Signed-off-by: Miklos Szeredi --- fs/overlayfs/super.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index eee7a62e1c0e..f16d318b71f8 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -84,12 +84,7 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry) static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) { - struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry); - /* - * Make sure to order reads to upperdentry wrt ovl_dentry_update() - */ - smp_read_barrier_depends(); - return upperdentry; + return lockless_dereference(oe->__upperdentry); } void ovl_path_upper(struct dentry *dentry, struct path *path) -- cgit v1.2.1 From c9f00fdb9ab3999cb2fb582ad82a5db9e70c82f5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 20 Nov 2014 16:40:01 +0100 Subject: ovl: pass dentry into ovl_dir_read_merged() Pass dentry into ovl_dir_read_merged() insted of upperpath and lowerpath. This cleans up callers and paves the way for multi-layer directory reads. Signed-off-by: Miklos Szeredi --- fs/overlayfs/readdir.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 2a7ef4f8e2a6..7299e962f334 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -274,11 +274,11 @@ static int ovl_dir_mark_whiteouts(struct dentry *dir, return 0; } -static inline int ovl_dir_read_merged(struct path *upperpath, - struct path *lowerpath, - struct list_head *list) +static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) { int err; + struct path lowerpath; + struct path upperpath; struct ovl_readdir_data rdd = { .ctx.actor = ovl_fill_merge, .list = list, @@ -286,25 +286,28 @@ static inline int ovl_dir_read_merged(struct path *upperpath, .is_merge = false, }; - if (upperpath->dentry) { - err = ovl_dir_read(upperpath, &rdd); + ovl_path_lower(dentry, &lowerpath); + ovl_path_upper(dentry, &upperpath); + + if (upperpath.dentry) { + err = ovl_dir_read(&upperpath, &rdd); if (err) goto out; - if (lowerpath->dentry) { - err = ovl_dir_mark_whiteouts(upperpath->dentry, &rdd); + if (lowerpath.dentry) { + err = ovl_dir_mark_whiteouts(upperpath.dentry, &rdd); if (err) goto out; } } - if (lowerpath->dentry) { + if (lowerpath.dentry) { /* * Insert lowerpath entries before upperpath ones, this allows * offsets to be reasonably constant */ list_add(&rdd.middle, rdd.list); rdd.is_merge = true; - err = ovl_dir_read(lowerpath, &rdd); + err = ovl_dir_read(&lowerpath, &rdd); list_del(&rdd.middle); } out: @@ -329,8 +332,6 @@ static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) { int res; - struct path lowerpath; - struct path upperpath; struct ovl_dir_cache *cache; cache = ovl_dir_cache(dentry); @@ -347,10 +348,7 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) cache->refcount = 1; INIT_LIST_HEAD(&cache->entries); - ovl_path_lower(dentry, &lowerpath); - ovl_path_upper(dentry, &upperpath); - - res = ovl_dir_read_merged(&upperpath, &lowerpath, &cache->entries); + res = ovl_dir_read_merged(dentry, &cache->entries); if (res) { ovl_cache_free(&cache->entries); kfree(cache); @@ -538,14 +536,9 @@ const struct file_operations ovl_dir_operations = { int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) { int err; - struct path lowerpath; - struct path upperpath; struct ovl_cache_entry *p; - ovl_path_upper(dentry, &upperpath); - ovl_path_lower(dentry, &lowerpath); - - err = ovl_dir_read_merged(&upperpath, &lowerpath, list); + err = ovl_dir_read_merged(dentry, list); if (err) return err; -- cgit v1.2.1 From 1d113735ecf21de74a04c3b58fa106ac2e64ca0d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 20 Nov 2014 16:40:01 +0100 Subject: ovl: update MAINTAINERS There's a union/overlay specific mailing list now. Also add a git tree. Signed-off-by: Miklos Szeredi --- MAINTAINERS | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index af1e738e8772..0ff630de8a6d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6890,9 +6890,10 @@ F: fs/exofs/ OVERLAY FILESYSTEM M: Miklos Szeredi -L: linux-fsdevel@vger.kernel.org +L: linux-unionfs@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git S: Supported -F: fs/overlayfs/* +F: fs/overlayfs/ F: Documentation/filesystems/overlayfs.txt P54 WIRELESS DRIVER -- cgit v1.2.1 From 7676895f4736421ebafc48de5078e25ea69e88ee Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 20 Nov 2014 16:40:02 +0100 Subject: ovl: ovl_dir_fsync() cleanup Check against !OVL_PATH_LOWER instead of OVL_PATH_MERGE. For a copied up directory the two are currently equivalent. Signed-off-by: Miklos Szeredi --- fs/overlayfs/readdir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 7299e962f334..ab1e3dcbed95 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -450,10 +450,10 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, /* * Need to check if we started out being a lower dir, but got copied up */ - if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) { + if (!od->is_upper && ovl_path_type(dentry) != OVL_PATH_LOWER) { struct inode *inode = file_inode(file); - realfile =lockless_dereference(od->upperfile); + realfile = lockless_dereference(od->upperfile); if (!realfile) { struct path upperpath; -- cgit v1.2.1 From b7bc596ebbe0cddc97d76ef9309f64471bbf13eb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 19 Nov 2014 13:12:54 -0500 Subject: drm/radeon: disable native backlight control on pre-r6xx asics (v2) Just use the acpi interface. That's what windows uses on this generation and it's the only thing that seems to work reliably on these generation parts. You can still force the native backlight interface by setting radeon.backlight=1 Bug: https://bugzilla.kernel.org/show_bug.cgi?id=88501 v2: merge into above if/else block Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_encoders.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c index 9a19e52cc655..6b670b0bc47b 100644 --- a/drivers/gpu/drm/radeon/radeon_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_encoders.c @@ -179,6 +179,9 @@ static void radeon_encoder_add_backlight(struct radeon_encoder *radeon_encoder, (rdev->pdev->subsystem_vendor == 0x1734) && (rdev->pdev->subsystem_device == 0x1107)) use_bl = false; + /* disable native backlight control on older asics */ + else if (rdev->family < CHIP_R600) + use_bl = false; else use_bl = true; } -- cgit v1.2.1 From 28731d5818ae25b92d1fb82fe0ac196e97102c1b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Nov 2014 19:17:02 -0500 Subject: drm/radeon: fix endian swapping in vbios fetch for tdp table Value needs to be swapped on BE. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/r600_dpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c index f6309bd23e01..b5c73df8e202 100644 --- a/drivers/gpu/drm/radeon/r600_dpm.c +++ b/drivers/gpu/drm/radeon/r600_dpm.c @@ -1256,7 +1256,7 @@ int r600_parse_extended_power_table(struct radeon_device *rdev) (mode_info->atom_context->bios + data_offset + le16_to_cpu(ext_hdr->usPowerTuneTableOffset)); rdev->pm.dpm.dyn_state.cac_tdp_table->maximum_power_delivery_limit = - ppt->usMaximumPowerDeliveryLimit; + le16_to_cpu(ppt->usMaximumPowerDeliveryLimit); pt = &ppt->power_tune_table; } else { ATOM_PPLIB_POWERTUNE_Table *ppt = (ATOM_PPLIB_POWERTUNE_Table *) -- cgit v1.2.1 From a1d69c60c44134f64945bbf6a6dfda22eaf4a214 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 19 Nov 2014 22:13:10 +0100 Subject: brcmfmac: don't include linux/unaligned/access_ok.h This is a specific implementation, is the multiplexer that has the arch-specific knowledge of which of the implementations needs to be used, so include that. This issue was revealed by kbuild testing when was added in resulting in redefinition of get_unaligned_be16 (and probably others). Cc: stable@vger.kernel.org # v3.17 Reported-by: Fengguang Wu Signed-off-by: Johannes Berg Signed-off-by: Arend van Spriel Signed-off-by: John W. Linville --- drivers/net/wireless/brcm80211/brcmfmac/pcie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/brcm80211/brcmfmac/pcie.c index 8c0632ec9f7a..16fef3382019 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/pcie.c @@ -19,10 +19,10 @@ #include #include #include -#include #include #include #include +#include #include #include -- cgit v1.2.1 From b5e212a3051b65e426a513901d9c7001681c7215 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 19 Nov 2014 13:56:19 -0800 Subject: x86, syscall: Fix _TIF_NOHZ handling in syscall_trace_enter_phase1 TIF_NOHZ is 19 (i.e. _TIF_SYSCALL_TRACE | _TIF_NOTIFY_RESUME | _TIF_SINGLESTEP), not (1<<19). This code is involved in Dave's trinity lockup, but I don't see why it would cause any of the problems he's seeing, except inadvertently by causing a different path through entry_64.S's syscall handling. Signed-off-by: Andy Lutomirski Cc: Don Zickus Cc: Peter Zijlstra Cc: Dave Jones Cc: Linus Torvalds Link: http://lkml.kernel.org/r/a6cd3b60a3f53afb6e1c8081b0ec30ff19003dd7.1416434075.git.luto@amacapital.net Signed-off-by: Thomas Gleixner --- arch/x86/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 749b0e423419..e510618b2e91 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1484,7 +1484,7 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) */ if (work & _TIF_NOHZ) { user_exit(); - work &= ~TIF_NOHZ; + work &= ~_TIF_NOHZ; } #ifdef CONFIG_SECCOMP -- cgit v1.2.1 From a5f6fc28d6e6cc379c6839f21820e62262419584 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 19 Nov 2014 18:05:26 +0100 Subject: pptp: fix stack info leak in pptp_getname() pptp_getname() only partially initializes the stack variable sa, particularly only fills the pptp part of the sa_addr union. The code thereby discloses 16 bytes of kernel stack memory via getsockname(). Fix this by memset(0)'ing the union before. Cc: Dmitry Kozlov Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- drivers/net/ppp/pptp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 1aff970be33e..1dc628ffce2b 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -506,7 +506,9 @@ static int pptp_getname(struct socket *sock, struct sockaddr *uaddr, int len = sizeof(struct sockaddr_pppox); struct sockaddr_pppox sp; - sp.sa_family = AF_PPPOX; + memset(&sp.sa_addr, 0, sizeof(sp.sa_addr)); + + sp.sa_family = AF_PPPOX; sp.sa_protocol = PX_PROTO_PPTP; sp.sa_addr.pptp = pppox_sk(sock->sk)->proto.pptp.src_addr; -- cgit v1.2.1 From d3052bb5d306b29c1e7d9e5998c5ac4ca1ff0ca9 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 19 Nov 2014 13:54:49 -0800 Subject: openvswitch: Don't validate IPv6 label masks. When userspace doesn't provide a mask, OVS datapath generates a fully unwildcarded mask for the flow by copying the flow and setting all bits in all fields. For IPv6 label, this creates a mask that matches on the upper 12 bits, causing the following error: openvswitch: netlink: Invalid IPv6 flow label value (value=ffffffff, max=fffff) This patch ignores the label validation check for masks, avoiding this error. Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index fa4ec2e4a78b..089b195c064a 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -690,7 +690,7 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, return -EINVAL; } - if (ipv6_key->ipv6_label & htonl(0xFFF00000)) { + if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) { OVS_NLERR("IPv6 flow label %x is out of range (max=%x).\n", ntohl(ipv6_key->ipv6_label), (1 << 20) - 1); return -EINVAL; -- cgit v1.2.1 From 01462405f0c093b2f8dfddafcadcda6c9e4c5cdf Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Wed, 19 Nov 2014 23:05:49 +0100 Subject: ipx: fix locking regression in ipx_sendmsg and ipx_recvmsg This fixes an old regression introduced by commit b0d0d915 (ipx: remove the BKL). When a recvmsg syscall blocks waiting for new data, no data can be sent on the same socket with sendmsg because ipx_recvmsg() sleeps with the socket locked. This breaks mars-nwe (NetWare emulator): - the ncpserv process reads the request using recvmsg - ncpserv forks and spawns nwconn - ncpserv calls a (blocking) recvmsg and waits for new requests - nwconn deadlocks in sendmsg on the same socket Commit b0d0d915 has simply replaced BKL locking with lock_sock/release_sock. Unlike now, BKL got unlocked while sleeping, so a blocking recvmsg did not block a concurrent sendmsg. Only keep the socket locked while actually working with the socket data and release it prior to calling skb_recv_datagram(). Signed-off-by: Jiri Bohac Reviewed-by: Arnd Bergmann Signed-off-by: David S. Miller --- net/ipx/af_ipx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 91729b807c7d..1b095ca37aa4 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1764,6 +1764,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, struct ipxhdr *ipx = NULL; struct sk_buff *skb; int copied, rc; + bool locked = true; lock_sock(sk); /* put the autobinding in */ @@ -1790,6 +1791,8 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, if (sock_flag(sk, SOCK_ZAPPED)) goto out; + release_sock(sk); + locked = false; skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &rc); if (!skb) { @@ -1826,7 +1829,8 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, out_free: skb_free_datagram(sk, skb); out: - release_sock(sk); + if (locked) + release_sock(sk); return rc; } -- cgit v1.2.1 From 17544e2ad78fa0bbff6fcdbf09426d04ce95ed1e Mon Sep 17 00:00:00 2001 From: Anish Bhatt Date: Thu, 20 Nov 2014 17:11:46 -0800 Subject: cxgb4 : Fix DCB priority groups being returned in wrong order Peer priority groups were being reversed, but this was missed in the previous fix sent out for this issue. v2 : Previous patch was doing extra unnecessary work, result is the same. Please ignore previous patch Fixes : ee7bc3cdc270 ('cxgb4 : dcb open-lldp interop fixes') Signed-off-by: Anish Bhatt Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c index cca604994003..4fe33606f372 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c @@ -1082,7 +1082,7 @@ static int cxgb4_cee_peer_getpg(struct net_device *dev, struct cee_pg *pg) pgid = be32_to_cpu(pcmd.u.dcb.pgid.pgid); for (i = 0; i < CXGB4_MAX_PRIORITY; i++) - pg->prio_pg[i] = (pgid >> (i * 4)) & 0xF; + pg->prio_pg[7 - i] = (pgid >> (i * 4)) & 0xF; INIT_PORT_DCB_READ_PEER_CMD(pcmd, pi->port_id); pcmd.u.dcb.pgrate.type = FW_PORT_DCB_TYPE_PGRATE; -- cgit v1.2.1 From 892d6eb1245b771987afb8667a65344e568d3439 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 20 Nov 2014 17:03:05 +0800 Subject: virtio-net: validate features during probe We currently trigger BUG when VIRTIO_NET_F_CTRL_VQ is not set but one of features depending on it is. That's not a friendly way to report errors to hypervisors. Let's check, and fail probe instead. Cc: Rusty Russell Cc: Cornelia Huck Cc: Wanlong Gao Signed-off-by: Michael S. Tsirkin Signed-off-by: Jason Wang Acked-by: Cornelia Huck Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index ec2a8b41ed41..b0bc8ead47de 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1673,6 +1673,40 @@ static const struct attribute_group virtio_net_mrg_rx_group = { }; #endif +static bool virtnet_fail_on_feature(struct virtio_device *vdev, + unsigned int fbit, + const char *fname, const char *dname) +{ + if (!virtio_has_feature(vdev, fbit)) + return false; + + dev_err(&vdev->dev, "device advertises feature %s but not %s", + fname, dname); + + return true; +} + +#define VIRTNET_FAIL_ON(vdev, fbit, dbit) \ + virtnet_fail_on_feature(vdev, fbit, #fbit, dbit) + +static bool virtnet_validate_features(struct virtio_device *vdev) +{ + if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) && + (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX, + "VIRTIO_NET_F_CTRL_VQ") || + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN, + "VIRTIO_NET_F_CTRL_VQ") || + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE, + "VIRTIO_NET_F_CTRL_VQ") || + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") || + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, + "VIRTIO_NET_F_CTRL_VQ"))) { + return false; + } + + return true; +} + static int virtnet_probe(struct virtio_device *vdev) { int i, err; @@ -1680,6 +1714,9 @@ static int virtnet_probe(struct virtio_device *vdev) struct virtnet_info *vi; u16 max_queue_pairs; + if (!virtnet_validate_features(vdev)) + return -EINVAL; + /* Find if host supports multiqueue virtio_net device */ err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ, struct virtio_net_config, -- cgit v1.2.1 From e7820e39b7d19b9fe1928fc19de9361b44150ca6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 21 Nov 2014 11:47:16 -0800 Subject: net: Revert "net: avoid one atomic operation in skb_clone()" Not sure what I was thinking, but doing anything after releasing a refcount is suicidal or/and embarrassing. By the time we set skb->fclone to SKB_FCLONE_FREE, another cpu could have released last reference and freed whole skb. We potentially corrupt memory or trap if CONFIG_DEBUG_PAGEALLOC is set. Reported-by: Chris Mason Fixes: ce1a4ea3f1258 ("net: avoid one atomic operation in skb_clone()") Signed-off-by: Eric Dumazet Cc: Sabrina Dubroca Signed-off-by: David S. Miller --- net/core/skbuff.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c16615bfb61e..32e31c299631 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -552,20 +552,13 @@ static void kfree_skbmem(struct sk_buff *skb) case SKB_FCLONE_CLONE: fclones = container_of(skb, struct sk_buff_fclones, skb2); - /* Warning : We must perform the atomic_dec_and_test() before - * setting skb->fclone back to SKB_FCLONE_FREE, otherwise - * skb_clone() could set clone_ref to 2 before our decrement. - * Anyway, if we are going to free the structure, no need to - * rewrite skb->fclone. + /* The clone portion is available for + * fast-cloning again. */ - if (atomic_dec_and_test(&fclones->fclone_ref)) { + skb->fclone = SKB_FCLONE_FREE; + + if (atomic_dec_and_test(&fclones->fclone_ref)) kmem_cache_free(skbuff_fclone_cache, fclones); - } else { - /* The clone portion is available for - * fast-cloning again. - */ - skb->fclone = SKB_FCLONE_FREE; - } break; } } @@ -887,11 +880,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) if (skb->fclone == SKB_FCLONE_ORIG && n->fclone == SKB_FCLONE_FREE) { n->fclone = SKB_FCLONE_CLONE; - /* As our fastclone was free, clone_ref must be 1 at this point. - * We could use atomic_inc() here, but it is faster - * to set the final value. - */ - atomic_set(&fclones->fclone_ref, 2); + atomic_inc(&fclones->fclone_ref); } else { if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; -- cgit v1.2.1 From 0c228e833c88e3aa029250f5db77d5968c5ce5b5 Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Thu, 20 Nov 2014 15:09:53 -0800 Subject: tcp: Restore RFC5961-compliant behavior for SYN packets Commit c3ae62af8e755 ("tcp: should drop incoming frames without ACK flag set") was created to mitigate a security vulnerability in which a local attacker is able to inject data into locally-opened sockets by using TCP protocol statistics in procfs to quickly find the correct sequence number. This broke the RFC5961 requirement to send a challenge ACK in response to spurious RST packets, which was subsequently fixed by commit 7b514a886ba50 ("tcp: accept RST without ACK flag"). Unfortunately, the RFC5961 requirement that spurious SYN packets be handled in a similar manner remains broken. RFC5961 section 4 states that: ... the handling of the SYN in the synchronized state SHOULD be performed as follows: 1) If the SYN bit is set, irrespective of the sequence number, TCP MUST send an ACK (also referred to as challenge ACK) to the remote peer: After sending the acknowledgment, TCP MUST drop the unacceptable segment and stop processing further. By sending an ACK, the remote peer is challenged to confirm the loss of the previous connection and the request to start a new connection. A legitimate peer, after restart, would not have a TCB in the synchronized state. Thus, when the ACK arrives, the peer should send a RST segment back with the sequence number derived from the ACK field that caused the RST. This RST will confirm that the remote peer has indeed closed the previous connection. Upon receipt of a valid RST, the local TCP endpoint MUST terminate its connection. The local TCP endpoint should then rely on SYN retransmission from the remote end to re-establish the connection. This patch lets SYN packets through the discard added in c3ae62af8e755, so that spurious SYN packets are properly dealt with as per the RFC. The challenge ACK is sent unconditionally and is rate-limited, so the original vulnerability is not reintroduced by this patch. Signed-off-by: Calvin Owens Acked-by: Eric Dumazet Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 88fa2d160685..d107ee246a1d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5231,7 +5231,7 @@ slow_path: if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb)) goto csum_error; - if (!th->ack && !th->rst) + if (!th->ack && !th->rst && !th->syn) goto discard; /* @@ -5650,7 +5650,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, goto discard; } - if (!th->ack && !th->rst) + if (!th->ack && !th->rst && !th->syn) goto discard; if (!tcp_validate_incoming(sk, skb, th, 0)) -- cgit v1.2.1 From 4aab3b5b3ccf94fc907e66233e6ca4d8675759a6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 22 Nov 2014 09:22:42 -0500 Subject: percpu-ref: fix DEAD flag contamination of percpu pointer While decoupling ATOMIC and DEAD flags, f47ad4578461 ("percpu_ref: decouple switching to percpu mode and reinit") updated __ref_is_percpu() so that it only tests ATOMIC flag to determine whether the ref is in percpu mode or not; however, while DEAD implies ATOMIC, the two flags are set separately during percpu_ref_kill() and if __ref_is_percpu() races percpu_ref_kill(), it may see DEAD w/o ATOMIC. Because __ref_is_percpu() returns @ref->percpu_count_ptr value verbatim as the percpu pointer after testing ATOMIC, the pointer may now be contaminated with the DEAD flag. This can be fixed by clearing the flag bits before returning the pointer which was the fix proposed by Shaohua; however, as DEAD implies ATOMIC, we can just test for both flags at once and avoid the explicit masking. Update __ref_is_percpu() so that it tests that both ATOMIC and DEAD are clear before returning @ref->percpu_count_ptr as the percpu pointer. Signed-off-by: Tejun Heo Reported-and-Reviewed-by: Shaohua Li Link: http://lkml.kernel.org/r/995deb699f5b873c45d667df4add3b06f73c2c25.1416638887.git.shli@kernel.org Fixes: f47ad4578461 ("percpu_ref: decouple switching to percpu mode and reinit") --- include/linux/percpu-refcount.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index d5c89e0dd0e6..51ce60c35f4c 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -133,7 +133,13 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref, /* paired with smp_store_release() in percpu_ref_reinit() */ smp_read_barrier_depends(); - if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC)) + /* + * Theoretically, the following could test just ATOMIC; however, + * then we'd have to mask off DEAD separately as DEAD may be + * visible without ATOMIC if we race with percpu_ref_kill(). DEAD + * implies ATOMIC anyway. Test them together. + */ + if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC_DEAD)) return false; *percpu_countp = (unsigned long __percpu *)percpu_ptr; -- cgit v1.2.1 From af726f21ed8af2cdaa4e93098dc211521218ae65 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 22 Nov 2014 18:00:31 -0800 Subject: x86_64, traps: Fix the espfix64 #DF fixup and rewrite it in C There's nothing special enough about the espfix64 double fault fixup to justify writing it in assembly. Move it to C. This also fixes a bug: if the double fault came from an IST stack, the old asm code would return to a partially uninitialized stack frame. Fixes: 3891a04aafd668686239349ea58f3314ea2af86b Signed-off-by: Andy Lutomirski Reviewed-by: Thomas Gleixner Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- arch/x86/kernel/entry_64.S | 34 ++-------------------------------- arch/x86/kernel/traps.c | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 32 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index df088bb03fb3..a4dc8de7c4be 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -828,6 +828,7 @@ ENTRY(native_iret) jnz native_irq_return_ldt #endif +.global native_irq_return_iret native_irq_return_iret: iretq _ASM_EXTABLE(native_irq_return_iret, bad_iret) @@ -922,37 +923,6 @@ ENTRY(retint_kernel) CFI_ENDPROC END(common_interrupt) - /* - * If IRET takes a fault on the espfix stack, then we - * end up promoting it to a doublefault. In that case, - * modify the stack to make it look like we just entered - * the #GP handler from user space, similar to bad_iret. - */ -#ifdef CONFIG_X86_ESPFIX64 - ALIGN -__do_double_fault: - XCPT_FRAME 1 RDI+8 - movq RSP(%rdi),%rax /* Trap on the espfix stack? */ - sarq $PGDIR_SHIFT,%rax - cmpl $ESPFIX_PGD_ENTRY,%eax - jne do_double_fault /* No, just deliver the fault */ - cmpl $__KERNEL_CS,CS(%rdi) - jne do_double_fault - movq RIP(%rdi),%rax - cmpq $native_irq_return_iret,%rax - jne do_double_fault /* This shouldn't happen... */ - movq PER_CPU_VAR(kernel_stack),%rax - subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */ - movq %rax,RSP(%rdi) - movq $0,(%rax) /* Missing (lost) #GP error code */ - movq $general_protection,RIP(%rdi) - retq - CFI_ENDPROC -END(__do_double_fault) -#else -# define __do_double_fault do_double_fault -#endif - /* * APIC interrupts. */ @@ -1124,7 +1094,7 @@ idtentry overflow do_overflow has_error_code=0 idtentry bounds do_bounds has_error_code=0 idtentry invalid_op do_invalid_op has_error_code=0 idtentry device_not_available do_device_not_available has_error_code=0 -idtentry double_fault __do_double_fault has_error_code=1 paranoid=1 +idtentry double_fault do_double_fault has_error_code=1 paranoid=1 idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 idtentry invalid_TSS do_invalid_TSS has_error_code=1 idtentry segment_not_present do_segment_not_present has_error_code=1 diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 0d0e922fafc1..819662746e23 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -259,6 +259,30 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) static const char str[] = "double fault"; struct task_struct *tsk = current; +#ifdef CONFIG_X86_ESPFIX64 + extern unsigned char native_irq_return_iret[]; + + /* + * If IRET takes a non-IST fault on the espfix64 stack, then we + * end up promoting it to a doublefault. In that case, modify + * the stack to make it look like we just entered the #GP + * handler from user space, similar to bad_iret. + */ + if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && + regs->cs == __KERNEL_CS && + regs->ip == (unsigned long)native_irq_return_iret) + { + struct pt_regs *normal_regs = task_pt_regs(current); + + /* Fake a #GP(0) from userspace. */ + memmove(&normal_regs->ip, (void *)regs->sp, 5*8); + normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ + regs->ip = (unsigned long)general_protection; + regs->sp = (unsigned long)&normal_regs->orig_ax; + return; + } +#endif + exception_enter(); /* Return not checked because double check cannot be ignored */ notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); -- cgit v1.2.1 From 6f442be2fb22be02cafa606f1769fa1e6f894441 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 22 Nov 2014 18:00:32 -0800 Subject: x86_64, traps: Stop using IST for #SS On a 32-bit kernel, this has no effect, since there are no IST stacks. On a 64-bit kernel, #SS can only happen in user code, on a failed iret to user space, a canonical violation on access via RSP or RBP, or a genuine stack segment violation in 32-bit kernel code. The first two cases don't need IST, and the latter two cases are unlikely fatal bugs, and promoting them to double faults would be fine. This fixes a bug in which the espfix64 code mishandles a stack segment violation. This saves 4k of memory per CPU and a tiny bit of code. Signed-off-by: Andy Lutomirski Reviewed-by: Thomas Gleixner Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- arch/x86/include/asm/page_32_types.h | 1 - arch/x86/include/asm/page_64_types.h | 11 +++++------ arch/x86/include/asm/traps.h | 1 + arch/x86/kernel/dumpstack_64.c | 1 - arch/x86/kernel/entry_64.S | 2 +- arch/x86/kernel/traps.c | 18 +----------------- 6 files changed, 8 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index f48b17df4224..3a52ee0e726d 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -20,7 +20,6 @@ #define THREAD_SIZE_ORDER 1 #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) -#define STACKFAULT_STACK 0 #define DOUBLEFAULT_STACK 1 #define NMI_STACK 0 #define DEBUG_STACK 0 diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 678205195ae1..75450b2c7be4 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -14,12 +14,11 @@ #define IRQ_STACK_ORDER 2 #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) -#define STACKFAULT_STACK 1 -#define DOUBLEFAULT_STACK 2 -#define NMI_STACK 3 -#define DEBUG_STACK 4 -#define MCE_STACK 5 -#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ +#define DOUBLEFAULT_STACK 1 +#define NMI_STACK 2 +#define DEBUG_STACK 3 +#define MCE_STACK 4 +#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */ #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index bc8352e7010a..707adc6549d8 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -39,6 +39,7 @@ asmlinkage void simd_coprocessor_error(void); #ifdef CONFIG_TRACING asmlinkage void trace_page_fault(void); +#define trace_stack_segment stack_segment #define trace_divide_error divide_error #define trace_bounds bounds #define trace_invalid_op invalid_op diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 1abcb50b48ae..ff86f19b5758 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -24,7 +24,6 @@ static char x86_stack_ids[][8] = { [ DEBUG_STACK-1 ] = "#DB", [ NMI_STACK-1 ] = "NMI", [ DOUBLEFAULT_STACK-1 ] = "#DF", - [ STACKFAULT_STACK-1 ] = "#SS", [ MCE_STACK-1 ] = "#MC", #if DEBUG_STKSZ > EXCEPTION_STKSZ [ N_EXCEPTION_STACKS ... diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index a4dc8de7c4be..49a0c1781253 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1259,7 +1259,7 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK -idtentry stack_segment do_stack_segment has_error_code=1 paranoid=1 +idtentry stack_segment do_stack_segment has_error_code=1 #ifdef CONFIG_XEN idtentry xen_debug do_debug has_error_code=0 idtentry xen_int3 do_int3 has_error_code=0 diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 819662746e23..48035e9cdde9 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -233,27 +233,11 @@ DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op) DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) -#ifdef CONFIG_X86_32 DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) -#endif DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) #ifdef CONFIG_X86_64 /* Runs on IST stack */ -dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) -{ - enum ctx_state prev_state; - - prev_state = exception_enter(); - if (notify_die(DIE_TRAP, "stack segment", regs, error_code, - X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) { - preempt_conditional_sti(regs); - do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); - preempt_conditional_cli(regs); - } - exception_exit(prev_state); -} - dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) { static const char str[] = "double fault"; @@ -802,7 +786,7 @@ void __init trap_init(void) set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun); set_intr_gate(X86_TRAP_TS, invalid_TSS); set_intr_gate(X86_TRAP_NP, segment_not_present); - set_intr_gate_ist(X86_TRAP_SS, &stack_segment, STACKFAULT_STACK); + set_intr_gate(X86_TRAP_SS, stack_segment); set_intr_gate(X86_TRAP_GP, general_protection); set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug); set_intr_gate(X86_TRAP_MF, coprocessor_error); -- cgit v1.2.1 From b645af2d5905c4e32399005b867987919cbfc3ae Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 22 Nov 2014 18:00:33 -0800 Subject: x86_64, traps: Rework bad_iret It's possible for iretq to userspace to fail. This can happen because of a bad CS, SS, or RIP. Historically, we've handled it by fixing up an exception from iretq to land at bad_iret, which pretends that the failed iret frame was really the hardware part of #GP(0) from userspace. To make this work, there's an extra fixup to fudge the gs base into a usable state. This is suboptimal because it loses the original exception. It's also buggy because there's no guarantee that we were on the kernel stack to begin with. For example, if the failing iret happened on return from an NMI, then we'll end up executing general_protection on the NMI stack. This is bad for several reasons, the most immediate of which is that general_protection, as a non-paranoid idtentry, will try to deliver signals and/or schedule from the wrong stack. This patch throws out bad_iret entirely. As a replacement, it augments the existing swapgs fudge into a full-blown iret fixup, mostly written in C. It's should be clearer and more correct. Signed-off-by: Andy Lutomirski Reviewed-by: Thomas Gleixner Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- arch/x86/kernel/entry_64.S | 45 +++++++++++++++++++-------------------------- arch/x86/kernel/traps.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 26 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 49a0c1781253..c0226ab54106 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -830,8 +830,13 @@ ENTRY(native_iret) .global native_irq_return_iret native_irq_return_iret: + /* + * This may fault. Non-paranoid faults on return to userspace are + * handled by fixup_bad_iret. These include #SS, #GP, and #NP. + * Double-faults due to espfix64 are handled in do_double_fault. + * Other faults here are fatal. + */ iretq - _ASM_EXTABLE(native_irq_return_iret, bad_iret) #ifdef CONFIG_X86_ESPFIX64 native_irq_return_ldt: @@ -859,25 +864,6 @@ native_irq_return_ldt: jmp native_irq_return_iret #endif - .section .fixup,"ax" -bad_iret: - /* - * The iret traps when the %cs or %ss being restored is bogus. - * We've lost the original trap vector and error code. - * #GPF is the most likely one to get for an invalid selector. - * So pretend we completed the iret and took the #GPF in user mode. - * - * We are now running with the kernel GS after exception recovery. - * But error_entry expects us to have user GS to match the user %cs, - * so swap back. - */ - pushq $0 - - SWAPGS - jmp general_protection - - .previous - /* edi: workmask, edx: work */ retint_careful: CFI_RESTORE_STATE @@ -1369,17 +1355,16 @@ error_sti: /* * There are two places in the kernel that can potentially fault with - * usergs. Handle them here. The exception handlers after iret run with - * kernel gs again, so don't set the user space flag. B stepping K8s - * sometimes report an truncated RIP for IRET exceptions returning to - * compat mode. Check for these here too. + * usergs. Handle them here. B stepping K8s sometimes report a + * truncated RIP for IRET exceptions returning to compat mode. Check + * for these here too. */ error_kernelspace: CFI_REL_OFFSET rcx, RCX+8 incl %ebx leaq native_irq_return_iret(%rip),%rcx cmpq %rcx,RIP+8(%rsp) - je error_swapgs + je error_bad_iret movl %ecx,%eax /* zero extend */ cmpq %rax,RIP+8(%rsp) je bstep_iret @@ -1390,7 +1375,15 @@ error_kernelspace: bstep_iret: /* Fix truncated RIP */ movq %rcx,RIP+8(%rsp) - jmp error_swapgs + /* fall through */ + +error_bad_iret: + SWAPGS + mov %rsp,%rdi + call fixup_bad_iret + mov %rax,%rsp + decl %ebx /* Return to usergs */ + jmp error_sti CFI_ENDPROC END(error_entry) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 48035e9cdde9..de801f22128a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -407,6 +407,35 @@ asmlinkage __visible struct pt_regs *sync_regs(struct pt_regs *eregs) return regs; } NOKPROBE_SYMBOL(sync_regs); + +struct bad_iret_stack { + void *error_entry_ret; + struct pt_regs regs; +}; + +asmlinkage __visible +struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) +{ + /* + * This is called from entry_64.S early in handling a fault + * caused by a bad iret to user mode. To handle the fault + * correctly, we want move our stack frame to task_pt_regs + * and we want to pretend that the exception came from the + * iret target. + */ + struct bad_iret_stack *new_stack = + container_of(task_pt_regs(current), + struct bad_iret_stack, regs); + + /* Copy the IRET target to the new stack. */ + memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); + + /* Copy the remainder of the stack from the current stack. */ + memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip)); + + BUG_ON(!user_mode_vm(&new_stack->regs)); + return new_stack; +} #endif /* -- cgit v1.2.1 From 90e362f4a75d0911ca75e5cd95591a6cf1f169dc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Nov 2014 23:04:52 +0100 Subject: sched: Provide update_curr callbacks for stop/idle scheduling classes Chris bisected a NULL pointer deference in task_sched_runtime() to commit 6e998916dfe3 'sched/cputime: Fix clock_nanosleep()/clock_gettime() inconsistency'. Chris observed crashes in atop or other /proc walking programs when he started fork bombs on his machine. He assumed that this is a new exit race, but that does not make any sense when looking at that commit. What's interesting is that, the commit provides update_curr callbacks for all scheduling classes except stop_task and idle_task. While nothing can ever hit that via the clock_nanosleep() and clock_gettime() interfaces, which have been the target of the commit in question, the author obviously forgot that there are other code paths which invoke task_sched_runtime() do_task_stat(() thread_group_cputime_adjusted() thread_group_cputime() task_cputime() task_sched_runtime() if (task_current(rq, p) && task_on_rq_queued(p)) { update_rq_clock(rq); up->sched_class->update_curr(rq); } If the stats are read for a stomp machine task, aka 'migration/N' and that task is current on its cpu, this will happily call the NULL pointer of stop_task->update_curr. Ooops. Chris observation that this happens faster when he runs the fork bomb makes sense as the fork bomb will kick migration threads more often so the probability to hit the issue will increase. Add the missing update_curr callbacks to the scheduler classes stop_task and idle_task. While idle tasks cannot be monitored via /proc we have other means to hit the idle case. Fixes: 6e998916dfe3 'sched/cputime: Fix clock_nanosleep()/clock_gettime() inconsistency' Reported-by: Chris Mason Reported-and-tested-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Ingo Molnar Cc: Stanislaw Gruszka Cc: Peter Zijlstra Signed-off-by: Linus Torvalds --- kernel/sched/idle_task.c | 5 +++++ kernel/sched/stop_task.c | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index 67ad4e7f506a..c65dac8c97cd 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c @@ -75,6 +75,10 @@ static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task return 0; } +static void update_curr_idle(struct rq *rq) +{ +} + /* * Simple, special scheduling class for the per-CPU idle tasks: */ @@ -101,4 +105,5 @@ const struct sched_class idle_sched_class = { .prio_changed = prio_changed_idle, .switched_to = switched_to_idle, + .update_curr = update_curr_idle, }; diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index 67426e529f59..79ffec45a6ac 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -102,6 +102,10 @@ get_rr_interval_stop(struct rq *rq, struct task_struct *task) return 0; } +static void update_curr_stop(struct rq *rq) +{ +} + /* * Simple, special scheduling class for the per-CPU stop tasks: */ @@ -128,4 +132,5 @@ const struct sched_class stop_sched_class = { .prio_changed = prio_changed_stop, .switched_to = switched_to_stop, + .update_curr = update_curr_stop, }; -- cgit v1.2.1 From 82975bc6a6df743b9a01810fb32cb65d0ec5d60b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 21 Nov 2014 13:26:07 -0800 Subject: uprobes, x86: Fix _TIF_UPROBE vs _TIF_NOTIFY_RESUME x86 call do_notify_resume on paranoid returns if TIF_UPROBE is set but not on non-paranoid returns. I suspect that this is a mistake and that the code only works because int3 is paranoid. Setting _TIF_NOTIFY_RESUME in the uprobe code was probably a workaround for the x86 bug. With that bug fixed, we can remove _TIF_NOTIFY_RESUME from the uprobes code. Reported-by: Oleg Nesterov Acked-by: Srikar Dronamraju Acked-by: Borislav Petkov Signed-off-by: Andy Lutomirski Signed-off-by: Linus Torvalds --- arch/x86/include/asm/thread_info.h | 2 +- kernel/events/uprobes.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 854053889d4d..547e344a6dc6 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -141,7 +141,7 @@ struct thread_info { /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ - _TIF_USER_RETURN_NOTIFY) + _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 1d0af8a2c646..ed8f2cde34c5 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1640,7 +1640,6 @@ bool uprobe_deny_signal(void) if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) { utask->state = UTASK_SSTEP_TRAPPED; set_tsk_thread_flag(t, TIF_UPROBE); - set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); } } -- cgit v1.2.1 From 5d01410fe4d92081f349b013a2e7a95429e4f2c9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 23 Nov 2014 15:25:20 -0800 Subject: Linux 3.18-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 00d618bbe8e7..2fd5c4e5c139 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Diseased Newt # *DOCUMENTATION* -- cgit v1.2.1 From f90523d1aa3c414031094ff2284d47791f0c0a05 Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Thu, 13 Nov 2014 11:45:39 +0800 Subject: Btrfs: remove noused bbio_ret in __btrfs_map_block in condition bbio_ret in this condition is always !NULL because previous code already have a check-and-skip: 4908 if (!bbio_ret) 4909 goto out; Signed-off-by: Zhao Lei Signed-off-by: Miao Xie Reviewed-by: David Sterba --- fs/btrfs/volumes.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d47289c715c8..20160aa44655 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5161,8 +5161,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, BTRFS_BLOCK_GROUP_RAID6)) { u64 tmp; - if (bbio_ret && ((rw & REQ_WRITE) || mirror_num > 1) - && raid_map_ret) { + if (raid_map_ret && ((rw & REQ_WRITE) || mirror_num > 1)) { int i, rot; /* push stripe_nr back to the start of the full stripe */ -- cgit v1.2.1 From 6de65650758e819d3dfdc621010dcd6117e8d186 Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Thu, 13 Nov 2014 11:45:40 +0800 Subject: Btrfs: remove unnecessary code of stripe_index assignment in __btrfs_map_block stripe_index's value was set again in latter line: stripe_index = 0; Signed-off-by: Zhao Lei Signed-off-by: Miao Xie Reviewed-by: David Sterba --- fs/btrfs/volumes.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 20160aa44655..6f5b302a08cf 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5166,9 +5166,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, /* push stripe_nr back to the start of the full stripe */ stripe_nr = raid56_full_stripe_start; - do_div(stripe_nr, stripe_len); - - stripe_index = do_div(stripe_nr, nr_data_stripes(map)); + do_div(stripe_nr, stripe_len * nr_data_stripes(map)); /* RAID[56] write or recovery. Return all stripes */ num_stripes = map->num_stripes; -- cgit v1.2.1 From b89e1b012c7f81123344058d5f245b844464d30c Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 15 Oct 2014 11:18:44 +0800 Subject: Btrfs, raid56: don't change bbio and raid_map Because we will reuse bbio and raid_map during the scrub later, it is better that we don't change any variant of bbio and don't free it at the end of IO request. So we introduced similar variants into the raid bio, and don't access those bbio's variants any more. Signed-off-by: Miao Xie --- fs/btrfs/raid56.c | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 6a41631cb959..c54b0e64c590 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -58,7 +58,6 @@ */ #define RBIO_CACHE_READY_BIT 3 - #define RBIO_CACHE_SIZE 1024 struct btrfs_raid_bio { @@ -146,6 +145,10 @@ struct btrfs_raid_bio { atomic_t refs; + + atomic_t stripes_pending; + + atomic_t error; /* * these are two arrays of pointers. We allocate the * rbio big enough to hold them both and setup their @@ -858,13 +861,13 @@ static void raid_write_end_io(struct bio *bio, int err) bio_put(bio); - if (!atomic_dec_and_test(&rbio->bbio->stripes_pending)) + if (!atomic_dec_and_test(&rbio->stripes_pending)) return; err = 0; /* OK, we have read all the stripes we need to. */ - if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors) + if (atomic_read(&rbio->error) > rbio->bbio->max_errors) err = -EIO; rbio_orig_end_io(rbio, err, 0); @@ -949,6 +952,8 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root, rbio->faila = -1; rbio->failb = -1; atomic_set(&rbio->refs, 1); + atomic_set(&rbio->error, 0); + atomic_set(&rbio->stripes_pending, 0); /* * the stripe_pages and bio_pages array point to the extra @@ -1169,7 +1174,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); spin_unlock_irq(&rbio->bio_list_lock); - atomic_set(&rbio->bbio->error, 0); + atomic_set(&rbio->error, 0); /* * now that we've set rmw_locked, run through the @@ -1245,8 +1250,8 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) } } - atomic_set(&bbio->stripes_pending, bio_list_size(&bio_list)); - BUG_ON(atomic_read(&bbio->stripes_pending) == 0); + atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list)); + BUG_ON(atomic_read(&rbio->stripes_pending) == 0); while (1) { bio = bio_list_pop(&bio_list); @@ -1331,11 +1336,11 @@ static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed) if (rbio->faila == -1) { /* first failure on this rbio */ rbio->faila = failed; - atomic_inc(&rbio->bbio->error); + atomic_inc(&rbio->error); } else if (rbio->failb == -1) { /* second failure on this rbio */ rbio->failb = failed; - atomic_inc(&rbio->bbio->error); + atomic_inc(&rbio->error); } else { ret = -EIO; } @@ -1394,11 +1399,11 @@ static void raid_rmw_end_io(struct bio *bio, int err) bio_put(bio); - if (!atomic_dec_and_test(&rbio->bbio->stripes_pending)) + if (!atomic_dec_and_test(&rbio->stripes_pending)) return; err = 0; - if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors) + if (atomic_read(&rbio->error) > rbio->bbio->max_errors) goto cleanup; /* @@ -1439,7 +1444,6 @@ static void async_read_rebuild(struct btrfs_raid_bio *rbio) static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) { int bios_to_read = 0; - struct btrfs_bio *bbio = rbio->bbio; struct bio_list bio_list; int ret; int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE); @@ -1455,7 +1459,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) index_rbio_pages(rbio); - atomic_set(&rbio->bbio->error, 0); + atomic_set(&rbio->error, 0); /* * build a list of bios to read all the missing parts of this * stripe @@ -1503,7 +1507,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) * the bbio may be freed once we submit the last bio. Make sure * not to touch it after that */ - atomic_set(&bbio->stripes_pending, bios_to_read); + atomic_set(&rbio->stripes_pending, bios_to_read); while (1) { bio = bio_list_pop(&bio_list); if (!bio) @@ -1917,10 +1921,10 @@ static void raid_recover_end_io(struct bio *bio, int err) set_bio_pages_uptodate(bio); bio_put(bio); - if (!atomic_dec_and_test(&rbio->bbio->stripes_pending)) + if (!atomic_dec_and_test(&rbio->stripes_pending)) return; - if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors) + if (atomic_read(&rbio->error) > rbio->bbio->max_errors) rbio_orig_end_io(rbio, -EIO, 0); else __raid_recover_end_io(rbio); @@ -1951,7 +1955,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) if (ret) goto cleanup; - atomic_set(&rbio->bbio->error, 0); + atomic_set(&rbio->error, 0); /* * read everything that hasn't failed. Thanks to the @@ -1960,7 +1964,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) */ for (stripe = 0; stripe < bbio->num_stripes; stripe++) { if (rbio->faila == stripe || rbio->failb == stripe) { - atomic_inc(&rbio->bbio->error); + atomic_inc(&rbio->error); continue; } @@ -1990,7 +1994,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) * were up to date, or we might have no bios to read because * the devices were gone. */ - if (atomic_read(&rbio->bbio->error) <= rbio->bbio->max_errors) { + if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) { __raid_recover_end_io(rbio); goto out; } else { @@ -2002,7 +2006,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) * the bbio may be freed once we submit the last bio. Make sure * not to touch it after that */ - atomic_set(&bbio->stripes_pending, bios_to_read); + atomic_set(&rbio->stripes_pending, bios_to_read); while (1) { bio = bio_list_pop(&bio_list); if (!bio) -- cgit v1.2.1 From af8e2d1df9848b39dd86b1e696bf8781d2020a88 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 23 Oct 2014 14:42:50 +0800 Subject: Btrfs, scrub: repair the common data on RAID5/6 if it is corrupted This patch implement the RAID5/6 common data repair function, the implementation is similar to the scrub on the other RAID such as RAID1, the differentia is that we don't read the data from the mirror, we use the data repair function of RAID5/6. Signed-off-by: Miao Xie --- fs/btrfs/raid56.c | 52 ++++++++++---- fs/btrfs/raid56.h | 2 +- fs/btrfs/scrub.c | 194 ++++++++++++++++++++++++++++++++++++++++++++++++----- fs/btrfs/volumes.c | 16 ++++- fs/btrfs/volumes.h | 4 ++ 5 files changed, 235 insertions(+), 33 deletions(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index c54b0e64c590..95053a903474 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -58,6 +58,15 @@ */ #define RBIO_CACHE_READY_BIT 3 +/* + * bbio and raid_map is managed by the caller, so we shouldn't free + * them here. And besides that, all rbios with this flag should not + * be cached, because we need raid_map to check the rbios' stripe + * is the same or not, but it is very likely that the caller has + * free raid_map, so don't cache those rbios. + */ +#define RBIO_HOLD_BBIO_MAP_BIT 4 + #define RBIO_CACHE_SIZE 1024 struct btrfs_raid_bio { @@ -799,6 +808,21 @@ done_nolock: remove_rbio_from_cache(rbio); } +static inline void +__free_bbio_and_raid_map(struct btrfs_bio *bbio, u64 *raid_map, int need) +{ + if (need) { + kfree(raid_map); + kfree(bbio); + } +} + +static inline void free_bbio_and_raid_map(struct btrfs_raid_bio *rbio) +{ + __free_bbio_and_raid_map(rbio->bbio, rbio->raid_map, + !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags)); +} + static void __free_raid_bio(struct btrfs_raid_bio *rbio) { int i; @@ -817,8 +841,9 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio) rbio->stripe_pages[i] = NULL; } } - kfree(rbio->raid_map); - kfree(rbio->bbio); + + free_bbio_and_raid_map(rbio); + kfree(rbio); } @@ -933,11 +958,8 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root, rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2, GFP_NOFS); - if (!rbio) { - kfree(raid_map); - kfree(bbio); + if (!rbio) return ERR_PTR(-ENOMEM); - } bio_list_init(&rbio->bio_list); INIT_LIST_HEAD(&rbio->plug_list); @@ -1692,8 +1714,10 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio, struct blk_plug_cb *cb; rbio = alloc_rbio(root, bbio, raid_map, stripe_len); - if (IS_ERR(rbio)) + if (IS_ERR(rbio)) { + __free_bbio_and_raid_map(bbio, raid_map, 1); return PTR_ERR(rbio); + } bio_list_add(&rbio->bio_list, bio); rbio->bio_list_bytes = bio->bi_iter.bi_size; @@ -1888,7 +1912,8 @@ cleanup: cleanup_io: if (rbio->read_rebuild) { - if (err == 0) + if (err == 0 && + !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags)) cache_rbio_pages(rbio); else clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); @@ -2038,15 +2063,19 @@ cleanup: */ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, struct btrfs_bio *bbio, u64 *raid_map, - u64 stripe_len, int mirror_num) + u64 stripe_len, int mirror_num, int hold_bbio) { struct btrfs_raid_bio *rbio; int ret; rbio = alloc_rbio(root, bbio, raid_map, stripe_len); - if (IS_ERR(rbio)) + if (IS_ERR(rbio)) { + __free_bbio_and_raid_map(bbio, raid_map, !hold_bbio); return PTR_ERR(rbio); + } + if (hold_bbio) + set_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags); rbio->read_rebuild = 1; bio_list_add(&rbio->bio_list, bio); rbio->bio_list_bytes = bio->bi_iter.bi_size; @@ -2054,8 +2083,7 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, rbio->faila = find_logical_bio_stripe(rbio, bio); if (rbio->faila == -1) { BUG(); - kfree(raid_map); - kfree(bbio); + __free_bbio_and_raid_map(bbio, raid_map, !hold_bbio); kfree(rbio); return -EIO; } diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h index ea5d73bfdfbe..b310e8c830d1 100644 --- a/fs/btrfs/raid56.h +++ b/fs/btrfs/raid56.h @@ -41,7 +41,7 @@ static inline int nr_data_stripes(struct map_lookup *map) int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, struct btrfs_bio *bbio, u64 *raid_map, - u64 stripe_len, int mirror_num); + u64 stripe_len, int mirror_num, int hold_bbio); int raid56_parity_write(struct btrfs_root *root, struct bio *bio, struct btrfs_bio *bbio, u64 *raid_map, u64 stripe_len); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index efa083113827..ca4b9eb8b5da 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -63,6 +63,13 @@ struct scrub_ctx; */ #define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */ +struct scrub_recover { + atomic_t refs; + struct btrfs_bio *bbio; + u64 *raid_map; + u64 map_length; +}; + struct scrub_page { struct scrub_block *sblock; struct page *page; @@ -79,6 +86,8 @@ struct scrub_page { unsigned int io_error:1; }; u8 csum[BTRFS_CSUM_SIZE]; + + struct scrub_recover *recover; }; struct scrub_bio { @@ -196,7 +205,7 @@ static int scrub_setup_recheck_block(struct scrub_ctx *sctx, static void scrub_recheck_block(struct btrfs_fs_info *fs_info, struct scrub_block *sblock, int is_metadata, int have_csum, u8 *csum, u64 generation, - u16 csum_size); + u16 csum_size, int retry_failed_mirror); static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, struct scrub_block *sblock, int is_metadata, int have_csum, @@ -790,6 +799,20 @@ out: scrub_pending_trans_workers_dec(sctx); } +static inline void scrub_get_recover(struct scrub_recover *recover) +{ + atomic_inc(&recover->refs); +} + +static inline void scrub_put_recover(struct scrub_recover *recover) +{ + if (atomic_dec_and_test(&recover->refs)) { + kfree(recover->bbio); + kfree(recover->raid_map); + kfree(recover); + } +} + /* * scrub_handle_errored_block gets called when either verification of the * pages failed or the bio failed to read, e.g. with EIO. In the latter @@ -906,7 +929,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) /* build and submit the bios for the failed mirror, check checksums */ scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum, - csum, generation, sctx->csum_size); + csum, generation, sctx->csum_size, 1); if (!sblock_bad->header_error && !sblock_bad->checksum_error && sblock_bad->no_io_error_seen) { @@ -1019,7 +1042,7 @@ nodatasum_case: /* build and submit the bios, check checksums */ scrub_recheck_block(fs_info, sblock_other, is_metadata, have_csum, csum, generation, - sctx->csum_size); + sctx->csum_size, 0); if (!sblock_other->header_error && !sblock_other->checksum_error && @@ -1169,7 +1192,7 @@ nodatasum_case: */ scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum, csum, - generation, sctx->csum_size); + generation, sctx->csum_size, 1); if (!sblock_bad->header_error && !sblock_bad->checksum_error && sblock_bad->no_io_error_seen) @@ -1201,11 +1224,18 @@ out: mirror_index++) { struct scrub_block *sblock = sblocks_for_recheck + mirror_index; + struct scrub_recover *recover; int page_index; for (page_index = 0; page_index < sblock->page_count; page_index++) { sblock->pagev[page_index]->sblock = NULL; + recover = sblock->pagev[page_index]->recover; + if (recover) { + scrub_put_recover(recover); + sblock->pagev[page_index]->recover = + NULL; + } scrub_page_put(sblock->pagev[page_index]); } } @@ -1215,14 +1245,63 @@ out: return 0; } +static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio, u64 *raid_map) +{ + if (raid_map) { + if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE) + return 3; + else + return 2; + } else { + return (int)bbio->num_stripes; + } +} + +static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map, + u64 mapped_length, + int nstripes, int mirror, + int *stripe_index, + u64 *stripe_offset) +{ + int i; + + if (raid_map) { + /* RAID5/6 */ + for (i = 0; i < nstripes; i++) { + if (raid_map[i] == RAID6_Q_STRIPE || + raid_map[i] == RAID5_P_STRIPE) + continue; + + if (logical >= raid_map[i] && + logical < raid_map[i] + mapped_length) + break; + } + + *stripe_index = i; + *stripe_offset = logical - raid_map[i]; + } else { + /* The other RAID type */ + *stripe_index = mirror; + *stripe_offset = 0; + } +} + static int scrub_setup_recheck_block(struct scrub_ctx *sctx, struct btrfs_fs_info *fs_info, struct scrub_block *original_sblock, u64 length, u64 logical, struct scrub_block *sblocks_for_recheck) { + struct scrub_recover *recover; + struct btrfs_bio *bbio; + u64 *raid_map; + u64 sublen; + u64 mapped_length; + u64 stripe_offset; + int stripe_index; int page_index; int mirror_index; + int nmirrors; int ret; /* @@ -1233,23 +1312,39 @@ static int scrub_setup_recheck_block(struct scrub_ctx *sctx, page_index = 0; while (length > 0) { - u64 sublen = min_t(u64, length, PAGE_SIZE); - u64 mapped_length = sublen; - struct btrfs_bio *bbio = NULL; + sublen = min_t(u64, length, PAGE_SIZE); + mapped_length = sublen; + bbio = NULL; + raid_map = NULL; /* * with a length of PAGE_SIZE, each returned stripe * represents one mirror */ - ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical, - &mapped_length, &bbio, 0); + ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical, + &mapped_length, &bbio, 0, &raid_map); if (ret || !bbio || mapped_length < sublen) { kfree(bbio); + kfree(raid_map); return -EIO; } + recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS); + if (!recover) { + kfree(bbio); + kfree(raid_map); + return -ENOMEM; + } + + atomic_set(&recover->refs, 1); + recover->bbio = bbio; + recover->raid_map = raid_map; + recover->map_length = mapped_length; + BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO); - for (mirror_index = 0; mirror_index < (int)bbio->num_stripes; + + nmirrors = scrub_nr_raid_mirrors(bbio, raid_map); + for (mirror_index = 0; mirror_index < nmirrors; mirror_index++) { struct scrub_block *sblock; struct scrub_page *page; @@ -1265,26 +1360,38 @@ leave_nomem: spin_lock(&sctx->stat_lock); sctx->stat.malloc_errors++; spin_unlock(&sctx->stat_lock); - kfree(bbio); + scrub_put_recover(recover); return -ENOMEM; } scrub_page_get(page); sblock->pagev[page_index] = page; page->logical = logical; - page->physical = bbio->stripes[mirror_index].physical; + + scrub_stripe_index_and_offset(logical, raid_map, + mapped_length, + bbio->num_stripes, + mirror_index, + &stripe_index, + &stripe_offset); + page->physical = bbio->stripes[stripe_index].physical + + stripe_offset; + page->dev = bbio->stripes[stripe_index].dev; + BUG_ON(page_index >= original_sblock->page_count); page->physical_for_dev_replace = original_sblock->pagev[page_index]-> physical_for_dev_replace; /* for missing devices, dev->bdev is NULL */ - page->dev = bbio->stripes[mirror_index].dev; page->mirror_num = mirror_index + 1; sblock->page_count++; page->page = alloc_page(GFP_NOFS); if (!page->page) goto leave_nomem; + + scrub_get_recover(recover); + page->recover = recover; } - kfree(bbio); + scrub_put_recover(recover); length -= sublen; logical += sublen; page_index++; @@ -1293,6 +1400,51 @@ leave_nomem: return 0; } +struct scrub_bio_ret { + struct completion event; + int error; +}; + +static void scrub_bio_wait_endio(struct bio *bio, int error) +{ + struct scrub_bio_ret *ret = bio->bi_private; + + ret->error = error; + complete(&ret->event); +} + +static inline int scrub_is_page_on_raid56(struct scrub_page *page) +{ + return page->recover && page->recover->raid_map; +} + +static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, + struct bio *bio, + struct scrub_page *page) +{ + struct scrub_bio_ret done; + int ret; + + init_completion(&done.event); + done.error = 0; + bio->bi_iter.bi_sector = page->logical >> 9; + bio->bi_private = &done; + bio->bi_end_io = scrub_bio_wait_endio; + + ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio, + page->recover->raid_map, + page->recover->map_length, + page->mirror_num, 1); + if (ret) + return ret; + + wait_for_completion(&done.event); + if (done.error) + return -EIO; + + return 0; +} + /* * this function will check the on disk data for checksum errors, header * errors and read I/O errors. If any I/O errors happen, the exact pages @@ -1303,7 +1455,7 @@ leave_nomem: static void scrub_recheck_block(struct btrfs_fs_info *fs_info, struct scrub_block *sblock, int is_metadata, int have_csum, u8 *csum, u64 generation, - u16 csum_size) + u16 csum_size, int retry_failed_mirror) { int page_num; @@ -1329,11 +1481,17 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, continue; } bio->bi_bdev = page->dev->bdev; - bio->bi_iter.bi_sector = page->physical >> 9; bio_add_page(bio, page->page, PAGE_SIZE, 0); - if (btrfsic_submit_bio_wait(READ, bio)) - sblock->no_io_error_seen = 0; + if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) { + if (scrub_submit_raid56_bio_wait(fs_info, bio, page)) + sblock->no_io_error_seen = 0; + } else { + bio->bi_iter.bi_sector = page->physical >> 9; + + if (btrfsic_submit_bio_wait(READ, bio)) + sblock->no_io_error_seen = 0; + } bio_put(bio); } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6f5b302a08cf..217c42ea90b0 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5161,7 +5161,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, BTRFS_BLOCK_GROUP_RAID6)) { u64 tmp; - if (raid_map_ret && ((rw & REQ_WRITE) || mirror_num > 1)) { + if (raid_map_ret && + ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || + mirror_num > 1)) { int i, rot; /* push stripe_nr back to the start of the full stripe */ @@ -5440,6 +5442,16 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, mirror_num, NULL); } +/* For Scrub/replace */ +int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw, + u64 logical, u64 *length, + struct btrfs_bio **bbio_ret, int mirror_num, + u64 **raid_map_ret) +{ + return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret, + mirror_num, raid_map_ret); +} + int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, u64 chunk_start, u64 physical, u64 devid, u64 **logical, int *naddrs, int *stripe_len) @@ -5809,7 +5821,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, } else { ret = raid56_parity_recover(root, bio, bbio, raid_map, map_length, - mirror_num); + mirror_num, 0); } /* * FIXME, replace dosen't support raid56 yet, please fix diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 08980fa23039..01094bb804c7 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -393,6 +393,10 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, u64 logical, u64 *length, struct btrfs_bio **bbio_ret, int mirror_num); +int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw, + u64 logical, u64 *length, + struct btrfs_bio **bbio_ret, int mirror_num, + u64 **raid_map_ret); int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, u64 chunk_start, u64 physical, u64 devid, u64 **logical, int *naddrs, int *stripe_len); -- cgit v1.2.1 From 1b94b5567e9c70ad3b24bd5e576a422246875c2a Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 6 Nov 2014 16:14:21 +0800 Subject: Btrfs, raid56: use a variant to record the operation type We will introduce new operation type later, if we still use integer variant as bool variant to record the operation type, we would add new variant and increase the size of raid bio structure. It is not good, by this patch, we define different number for different operation, and we can just use a variant to record the operation type. Signed-off-by: Miao Xie --- fs/btrfs/raid56.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 95053a903474..0600bf69199e 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -69,6 +69,11 @@ #define RBIO_CACHE_SIZE 1024 +enum btrfs_rbio_ops { + BTRFS_RBIO_WRITE = 0, + BTRFS_RBIO_READ_REBUILD = 1, +}; + struct btrfs_raid_bio { struct btrfs_fs_info *fs_info; struct btrfs_bio *bbio; @@ -131,7 +136,7 @@ struct btrfs_raid_bio { * differently from a parity rebuild as part of * rmw */ - int read_rebuild; + enum btrfs_rbio_ops operation; /* first bad stripe */ int faila; @@ -154,7 +159,6 @@ struct btrfs_raid_bio { atomic_t refs; - atomic_t stripes_pending; atomic_t error; @@ -590,8 +594,7 @@ static int rbio_can_merge(struct btrfs_raid_bio *last, return 0; /* reads can't merge with writes */ - if (last->read_rebuild != - cur->read_rebuild) { + if (last->operation != cur->operation) { return 0; } @@ -784,9 +787,9 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio) spin_unlock(&rbio->bio_list_lock); spin_unlock_irqrestore(&h->lock, flags); - if (next->read_rebuild) + if (next->operation == BTRFS_RBIO_READ_REBUILD) async_read_rebuild(next); - else { + else if (next->operation == BTRFS_RBIO_WRITE){ steal_rbio(rbio, next); async_rmw_stripe(next); } @@ -1720,6 +1723,7 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio, } bio_list_add(&rbio->bio_list, bio); rbio->bio_list_bytes = bio->bi_iter.bi_size; + rbio->operation = BTRFS_RBIO_WRITE; /* * don't plug on full rbios, just get them out the door @@ -1768,7 +1772,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) faila = rbio->faila; failb = rbio->failb; - if (rbio->read_rebuild) { + if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { spin_lock_irq(&rbio->bio_list_lock); set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); spin_unlock_irq(&rbio->bio_list_lock); @@ -1785,7 +1789,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) * if we're rebuilding a read, we have to use * pages from the bio list */ - if (rbio->read_rebuild && + if (rbio->operation == BTRFS_RBIO_READ_REBUILD && (stripe == faila || stripe == failb)) { page = page_in_rbio(rbio, stripe, pagenr, 0); } else { @@ -1878,7 +1882,7 @@ pstripe: * know they can be trusted. If this was a read reconstruction, * other endio functions will fiddle the uptodate bits */ - if (!rbio->read_rebuild) { + if (rbio->operation == BTRFS_RBIO_WRITE) { for (i = 0; i < nr_pages; i++) { if (faila != -1) { page = rbio_stripe_page(rbio, faila, i); @@ -1895,7 +1899,7 @@ pstripe: * if we're rebuilding a read, we have to use * pages from the bio list */ - if (rbio->read_rebuild && + if (rbio->operation == BTRFS_RBIO_READ_REBUILD && (stripe == faila || stripe == failb)) { page = page_in_rbio(rbio, stripe, pagenr, 0); } else { @@ -1910,8 +1914,7 @@ cleanup: kfree(pointers); cleanup_io: - - if (rbio->read_rebuild) { + if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { if (err == 0 && !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags)) cache_rbio_pages(rbio); @@ -2050,7 +2053,7 @@ out: return 0; cleanup: - if (rbio->read_rebuild) + if (rbio->operation == BTRFS_RBIO_READ_REBUILD) rbio_orig_end_io(rbio, -EIO, 0); return -EIO; } @@ -2076,7 +2079,7 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, if (hold_bbio) set_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags); - rbio->read_rebuild = 1; + rbio->operation = BTRFS_RBIO_READ_REBUILD; bio_list_add(&rbio->bio_list, bio); rbio->bio_list_bytes = bio->bi_iter.bi_size; -- cgit v1.2.1 From 5a6ac9eacb49143cbad3bbfda72263101cb1f3df Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 6 Nov 2014 17:20:58 +0800 Subject: Btrfs, raid56: support parity scrub on raid56 The implementation is: - Read and check all the data with checksum in the same stripe. All the data which has checksum is COW data, and we are sure that it is not changed though we don't lock the stripe. because the space of that data just can be reclaimed after the current transction is committed, and then the fs can use it to store the other data, but when doing scrub, we hold the current transaction, that is that data can not be recovered, it is safe that read and check it out of the stripe lock. - Lock the stripe - Read out all the data without checksum and parity The data without checksum and the parity may be changed if we don't lock the stripe, so we need read it in the stripe lock context. - Check the parity - Re-calculate the new parity and write back it if the old parity is not right - Unlock the stripe If we can not read out the data or the data we read is corrupted, we will try to repair it. If the repair fails. we will mark the horizontal sub-stripe(pages on the same horizontal) as corrupted sub-stripe, and we will skip the parity check and repair of that horizontal sub-stripe. And in order to skip the horizontal sub-stripe that has no data, we introduce a bitmap. If there is some data on the horizontal sub-stripe, we will the relative bit to 1, and when we check and repair the parity, we will skip those horizontal sub-stripes that the relative bits is 0. Signed-off-by: Miao Xie --- fs/btrfs/raid56.c | 514 ++++++++++++++++++++++++++++++++++++++++++++- fs/btrfs/raid56.h | 12 ++ fs/btrfs/scrub.c | 609 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 1115 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 0600bf69199e..b85d68f721b8 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -72,6 +72,7 @@ enum btrfs_rbio_ops { BTRFS_RBIO_WRITE = 0, BTRFS_RBIO_READ_REBUILD = 1, + BTRFS_RBIO_PARITY_SCRUB = 2, }; struct btrfs_raid_bio { @@ -130,6 +131,7 @@ struct btrfs_raid_bio { /* number of data stripes (no p/q) */ int nr_data; + int stripe_npages; /* * set if we're doing a parity rebuild * for a read from higher up, which is handled @@ -144,6 +146,7 @@ struct btrfs_raid_bio { /* second bad stripe (for raid6 use) */ int failb; + int scrubp; /* * number of pages needed to represent the full * stripe @@ -178,6 +181,11 @@ struct btrfs_raid_bio { * here for faster lookup */ struct page **bio_pages; + + /* + * bitmap to record which horizontal stripe has data + */ + unsigned long *dbitmap; }; static int __raid56_parity_recover(struct btrfs_raid_bio *rbio); @@ -192,6 +200,10 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio); static void index_rbio_pages(struct btrfs_raid_bio *rbio); static int alloc_rbio_pages(struct btrfs_raid_bio *rbio); +static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, + int need_check); +static void async_scrub_parity(struct btrfs_raid_bio *rbio); + /* * the stripe hash table is used for locking, and to collect * bios in hopes of making a full stripe @@ -593,10 +605,20 @@ static int rbio_can_merge(struct btrfs_raid_bio *last, cur->raid_map[0]) return 0; - /* reads can't merge with writes */ - if (last->operation != cur->operation) { + /* we can't merge with different operations */ + if (last->operation != cur->operation) + return 0; + /* + * We've need read the full stripe from the drive. + * check and repair the parity and write the new results. + * + * We're not allowed to add any new bios to the + * bio list here, anyone else that wants to + * change this stripe needs to do their own rmw. + */ + if (last->operation == BTRFS_RBIO_PARITY_SCRUB || + cur->operation == BTRFS_RBIO_PARITY_SCRUB) return 0; - } return 1; } @@ -789,9 +811,12 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio) if (next->operation == BTRFS_RBIO_READ_REBUILD) async_read_rebuild(next); - else if (next->operation == BTRFS_RBIO_WRITE){ + else if (next->operation == BTRFS_RBIO_WRITE) { steal_rbio(rbio, next); async_rmw_stripe(next); + } else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) { + steal_rbio(rbio, next); + async_scrub_parity(next); } goto done_nolock; @@ -957,9 +982,11 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root, struct btrfs_raid_bio *rbio; int nr_data = 0; int num_pages = rbio_nr_pages(stripe_len, bbio->num_stripes); + int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE); void *p; - rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2, + rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2 + + DIV_ROUND_UP(stripe_npages, BITS_PER_LONG / 8), GFP_NOFS); if (!rbio) return ERR_PTR(-ENOMEM); @@ -974,6 +1001,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root, rbio->fs_info = root->fs_info; rbio->stripe_len = stripe_len; rbio->nr_pages = num_pages; + rbio->stripe_npages = stripe_npages; rbio->faila = -1; rbio->failb = -1; atomic_set(&rbio->refs, 1); @@ -987,6 +1015,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root, p = rbio + 1; rbio->stripe_pages = p; rbio->bio_pages = p + sizeof(struct page *) * num_pages; + rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2; if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE) nr_data = bbio->num_stripes - 2; @@ -1781,6 +1810,14 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) index_rbio_pages(rbio); for (pagenr = 0; pagenr < nr_pages; pagenr++) { + /* + * Now we just use bitmap to mark the horizontal stripes in + * which we have data when doing parity scrub. + */ + if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB && + !test_bit(pagenr, rbio->dbitmap)) + continue; + /* setup our array of pointers with pages * from each stripe */ @@ -1925,7 +1962,13 @@ cleanup_io: } else if (err == 0) { rbio->faila = -1; rbio->failb = -1; - finish_rmw(rbio); + + if (rbio->operation == BTRFS_RBIO_WRITE) + finish_rmw(rbio); + else if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) + finish_parity_scrub(rbio, 0); + else + BUG(); } else { rbio_orig_end_io(rbio, err, 0); } @@ -2133,3 +2176,462 @@ static void read_rebuild_work(struct btrfs_work *work) rbio = container_of(work, struct btrfs_raid_bio, work); __raid56_parity_recover(rbio); } + +/* + * The following code is used to scrub/replace the parity stripe + * + * Note: We need make sure all the pages that add into the scrub/replace + * raid bio are correct and not be changed during the scrub/replace. That + * is those pages just hold metadata or file data with checksum. + */ + +struct btrfs_raid_bio * +raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio, + struct btrfs_bio *bbio, u64 *raid_map, + u64 stripe_len, struct btrfs_device *scrub_dev, + unsigned long *dbitmap, int stripe_nsectors) +{ + struct btrfs_raid_bio *rbio; + int i; + + rbio = alloc_rbio(root, bbio, raid_map, stripe_len); + if (IS_ERR(rbio)) + return NULL; + bio_list_add(&rbio->bio_list, bio); + /* + * This is a special bio which is used to hold the completion handler + * and make the scrub rbio is similar to the other types + */ + ASSERT(!bio->bi_iter.bi_size); + rbio->operation = BTRFS_RBIO_PARITY_SCRUB; + + for (i = 0; i < bbio->num_stripes; i++) { + if (bbio->stripes[i].dev == scrub_dev) { + rbio->scrubp = i; + break; + } + } + + /* Now we just support the sectorsize equals to page size */ + ASSERT(root->sectorsize == PAGE_SIZE); + ASSERT(rbio->stripe_npages == stripe_nsectors); + bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors); + + return rbio; +} + +void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio, + struct page *page, u64 logical) +{ + int stripe_offset; + int index; + + ASSERT(logical >= rbio->raid_map[0]); + ASSERT(logical + PAGE_SIZE <= rbio->raid_map[0] + + rbio->stripe_len * rbio->nr_data); + stripe_offset = (int)(logical - rbio->raid_map[0]); + index = stripe_offset >> PAGE_CACHE_SHIFT; + rbio->bio_pages[index] = page; +} + +/* + * We just scrub the parity that we have correct data on the same horizontal, + * so we needn't allocate all pages for all the stripes. + */ +static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio) +{ + int i; + int bit; + int index; + struct page *page; + + for_each_set_bit(bit, rbio->dbitmap, rbio->stripe_npages) { + for (i = 0; i < rbio->bbio->num_stripes; i++) { + index = i * rbio->stripe_npages + bit; + if (rbio->stripe_pages[index]) + continue; + + page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (!page) + return -ENOMEM; + rbio->stripe_pages[index] = page; + ClearPageUptodate(page); + } + } + return 0; +} + +/* + * end io function used by finish_rmw. When we finally + * get here, we've written a full stripe + */ +static void raid_write_parity_end_io(struct bio *bio, int err) +{ + struct btrfs_raid_bio *rbio = bio->bi_private; + + if (err) + fail_bio_stripe(rbio, bio); + + bio_put(bio); + + if (!atomic_dec_and_test(&rbio->stripes_pending)) + return; + + err = 0; + + if (atomic_read(&rbio->error)) + err = -EIO; + + rbio_orig_end_io(rbio, err, 0); +} + +static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, + int need_check) +{ + struct btrfs_bio *bbio = rbio->bbio; + void *pointers[bbio->num_stripes]; + int nr_data = rbio->nr_data; + int stripe; + int pagenr; + int p_stripe = -1; + int q_stripe = -1; + struct page *p_page = NULL; + struct page *q_page = NULL; + struct bio_list bio_list; + struct bio *bio; + int ret; + + bio_list_init(&bio_list); + + if (bbio->num_stripes - rbio->nr_data == 1) { + p_stripe = bbio->num_stripes - 1; + } else if (bbio->num_stripes - rbio->nr_data == 2) { + p_stripe = bbio->num_stripes - 2; + q_stripe = bbio->num_stripes - 1; + } else { + BUG(); + } + + /* + * Because the higher layers(scrubber) are unlikely to + * use this area of the disk again soon, so don't cache + * it. + */ + clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); + + if (!need_check) + goto writeback; + + p_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (!p_page) + goto cleanup; + SetPageUptodate(p_page); + + if (q_stripe != -1) { + q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (!q_page) { + __free_page(p_page); + goto cleanup; + } + SetPageUptodate(q_page); + } + + atomic_set(&rbio->error, 0); + + for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { + struct page *p; + void *parity; + /* first collect one page from each data stripe */ + for (stripe = 0; stripe < nr_data; stripe++) { + p = page_in_rbio(rbio, stripe, pagenr, 0); + pointers[stripe] = kmap(p); + } + + /* then add the parity stripe */ + pointers[stripe++] = kmap(p_page); + + if (q_stripe != -1) { + + /* + * raid6, add the qstripe and call the + * library function to fill in our p/q + */ + pointers[stripe++] = kmap(q_page); + + raid6_call.gen_syndrome(bbio->num_stripes, PAGE_SIZE, + pointers); + } else { + /* raid5 */ + memcpy(pointers[nr_data], pointers[0], PAGE_SIZE); + run_xor(pointers + 1, nr_data - 1, PAGE_CACHE_SIZE); + } + + /* Check scrubbing pairty and repair it */ + p = rbio_stripe_page(rbio, rbio->scrubp, pagenr); + parity = kmap(p); + if (memcmp(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE)) + memcpy(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE); + else + /* Parity is right, needn't writeback */ + bitmap_clear(rbio->dbitmap, pagenr, 1); + kunmap(p); + + for (stripe = 0; stripe < bbio->num_stripes; stripe++) + kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); + } + + __free_page(p_page); + if (q_page) + __free_page(q_page); + +writeback: + /* + * time to start writing. Make bios for everything from the + * higher layers (the bio_list in our rbio) and our p/q. Ignore + * everything else. + */ + for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { + struct page *page; + + page = rbio_stripe_page(rbio, rbio->scrubp, pagenr); + ret = rbio_add_io_page(rbio, &bio_list, + page, rbio->scrubp, pagenr, rbio->stripe_len); + if (ret) + goto cleanup; + } + + nr_data = bio_list_size(&bio_list); + if (!nr_data) { + /* Every parity is right */ + rbio_orig_end_io(rbio, 0, 0); + return; + } + + atomic_set(&rbio->stripes_pending, nr_data); + + while (1) { + bio = bio_list_pop(&bio_list); + if (!bio) + break; + + bio->bi_private = rbio; + bio->bi_end_io = raid_write_parity_end_io; + BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags)); + submit_bio(WRITE, bio); + } + return; + +cleanup: + rbio_orig_end_io(rbio, -EIO, 0); +} + +static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe) +{ + if (stripe >= 0 && stripe < rbio->nr_data) + return 1; + return 0; +} + +/* + * While we're doing the parity check and repair, we could have errors + * in reading pages off the disk. This checks for errors and if we're + * not able to read the page it'll trigger parity reconstruction. The + * parity scrub will be finished after we've reconstructed the failed + * stripes + */ +static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio) +{ + if (atomic_read(&rbio->error) > rbio->bbio->max_errors) + goto cleanup; + + if (rbio->faila >= 0 || rbio->failb >= 0) { + int dfail = 0, failp = -1; + + if (is_data_stripe(rbio, rbio->faila)) + dfail++; + else if (is_parity_stripe(rbio->faila)) + failp = rbio->faila; + + if (is_data_stripe(rbio, rbio->failb)) + dfail++; + else if (is_parity_stripe(rbio->failb)) + failp = rbio->failb; + + /* + * Because we can not use a scrubbing parity to repair + * the data, so the capability of the repair is declined. + * (In the case of RAID5, we can not repair anything) + */ + if (dfail > rbio->bbio->max_errors - 1) + goto cleanup; + + /* + * If all data is good, only parity is correctly, just + * repair the parity. + */ + if (dfail == 0) { + finish_parity_scrub(rbio, 0); + return; + } + + /* + * Here means we got one corrupted data stripe and one + * corrupted parity on RAID6, if the corrupted parity + * is scrubbing parity, luckly, use the other one to repair + * the data, or we can not repair the data stripe. + */ + if (failp != rbio->scrubp) + goto cleanup; + + __raid_recover_end_io(rbio); + } else { + finish_parity_scrub(rbio, 1); + } + return; + +cleanup: + rbio_orig_end_io(rbio, -EIO, 0); +} + +/* + * end io for the read phase of the rmw cycle. All the bios here are physical + * stripe bios we've read from the disk so we can recalculate the parity of the + * stripe. + * + * This will usually kick off finish_rmw once all the bios are read in, but it + * may trigger parity reconstruction if we had any errors along the way + */ +static void raid56_parity_scrub_end_io(struct bio *bio, int err) +{ + struct btrfs_raid_bio *rbio = bio->bi_private; + + if (err) + fail_bio_stripe(rbio, bio); + else + set_bio_pages_uptodate(bio); + + bio_put(bio); + + if (!atomic_dec_and_test(&rbio->stripes_pending)) + return; + + /* + * this will normally call finish_rmw to start our write + * but if there are any failed stripes we'll reconstruct + * from parity first + */ + validate_rbio_for_parity_scrub(rbio); +} + +static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) +{ + int bios_to_read = 0; + struct btrfs_bio *bbio = rbio->bbio; + struct bio_list bio_list; + int ret; + int pagenr; + int stripe; + struct bio *bio; + + ret = alloc_rbio_essential_pages(rbio); + if (ret) + goto cleanup; + + bio_list_init(&bio_list); + + atomic_set(&rbio->error, 0); + /* + * build a list of bios to read all the missing parts of this + * stripe + */ + for (stripe = 0; stripe < bbio->num_stripes; stripe++) { + for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { + struct page *page; + /* + * we want to find all the pages missing from + * the rbio and read them from the disk. If + * page_in_rbio finds a page in the bio list + * we don't need to read it off the stripe. + */ + page = page_in_rbio(rbio, stripe, pagenr, 1); + if (page) + continue; + + page = rbio_stripe_page(rbio, stripe, pagenr); + /* + * the bio cache may have handed us an uptodate + * page. If so, be happy and use it + */ + if (PageUptodate(page)) + continue; + + ret = rbio_add_io_page(rbio, &bio_list, page, + stripe, pagenr, rbio->stripe_len); + if (ret) + goto cleanup; + } + } + + bios_to_read = bio_list_size(&bio_list); + if (!bios_to_read) { + /* + * this can happen if others have merged with + * us, it means there is nothing left to read. + * But if there are missing devices it may not be + * safe to do the full stripe write yet. + */ + goto finish; + } + + /* + * the bbio may be freed once we submit the last bio. Make sure + * not to touch it after that + */ + atomic_set(&rbio->stripes_pending, bios_to_read); + while (1) { + bio = bio_list_pop(&bio_list); + if (!bio) + break; + + bio->bi_private = rbio; + bio->bi_end_io = raid56_parity_scrub_end_io; + + btrfs_bio_wq_end_io(rbio->fs_info, bio, + BTRFS_WQ_ENDIO_RAID56); + + BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags)); + submit_bio(READ, bio); + } + /* the actual write will happen once the reads are done */ + return; + +cleanup: + rbio_orig_end_io(rbio, -EIO, 0); + return; + +finish: + validate_rbio_for_parity_scrub(rbio); +} + +static void scrub_parity_work(struct btrfs_work *work) +{ + struct btrfs_raid_bio *rbio; + + rbio = container_of(work, struct btrfs_raid_bio, work); + raid56_parity_scrub_stripe(rbio); +} + +static void async_scrub_parity(struct btrfs_raid_bio *rbio) +{ + btrfs_init_work(&rbio->work, btrfs_rmw_helper, + scrub_parity_work, NULL, NULL); + + btrfs_queue_work(rbio->fs_info->rmw_workers, + &rbio->work); +} + +void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio) +{ + if (!lock_stripe_add(rbio)) + async_scrub_parity(rbio); +} diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h index b310e8c830d1..3d4ddb3d861d 100644 --- a/fs/btrfs/raid56.h +++ b/fs/btrfs/raid56.h @@ -39,6 +39,9 @@ static inline int nr_data_stripes(struct map_lookup *map) #define is_parity_stripe(x) (((x) == RAID5_P_STRIPE) || \ ((x) == RAID6_Q_STRIPE)) +struct btrfs_raid_bio; +struct btrfs_device; + int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, struct btrfs_bio *bbio, u64 *raid_map, u64 stripe_len, int mirror_num, int hold_bbio); @@ -46,6 +49,15 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio, struct btrfs_bio *bbio, u64 *raid_map, u64 stripe_len); +struct btrfs_raid_bio * +raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio, + struct btrfs_bio *bbio, u64 *raid_map, + u64 stripe_len, struct btrfs_device *scrub_dev, + unsigned long *dbitmap, int stripe_nsectors); +void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio, + struct page *page, u64 logical); +void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio); + int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info); void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info); #endif diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index ca4b9eb8b5da..7f95afcf9fd3 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -74,6 +74,7 @@ struct scrub_page { struct scrub_block *sblock; struct page *page; struct btrfs_device *dev; + struct list_head list; u64 flags; /* extent flags */ u64 generation; u64 logical; @@ -114,14 +115,52 @@ struct scrub_block { atomic_t outstanding_pages; atomic_t ref_count; /* free mem on transition to zero */ struct scrub_ctx *sctx; + struct scrub_parity *sparity; struct { unsigned int header_error:1; unsigned int checksum_error:1; unsigned int no_io_error_seen:1; unsigned int generation_error:1; /* also sets header_error */ + + /* The following is for the data used to check parity */ + /* It is for the data with checksum */ + unsigned int data_corrected:1; }; }; +/* Used for the chunks with parity stripe such RAID5/6 */ +struct scrub_parity { + struct scrub_ctx *sctx; + + struct btrfs_device *scrub_dev; + + u64 logic_start; + + u64 logic_end; + + int nsectors; + + int stripe_len; + + atomic_t ref_count; + + struct list_head spages; + + /* Work of parity check and repair */ + struct btrfs_work work; + + /* Mark the parity blocks which have data */ + unsigned long *dbitmap; + + /* + * Mark the parity blocks which have data, but errors happen when + * read data or check data + */ + unsigned long *ebitmap; + + unsigned long bitmap[0]; +}; + struct scrub_wr_ctx { struct scrub_bio *wr_curr_bio; struct btrfs_device *tgtdev; @@ -227,6 +266,8 @@ static void scrub_block_get(struct scrub_block *sblock); static void scrub_block_put(struct scrub_block *sblock); static void scrub_page_get(struct scrub_page *spage); static void scrub_page_put(struct scrub_page *spage); +static void scrub_parity_get(struct scrub_parity *sparity); +static void scrub_parity_put(struct scrub_parity *sparity); static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx, struct scrub_page *spage); static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, @@ -943,6 +984,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) */ spin_lock(&sctx->stat_lock); sctx->stat.unverified_errors++; + sblock_to_check->data_corrected = 1; spin_unlock(&sctx->stat_lock); if (sctx->is_dev_replace) @@ -1203,6 +1245,7 @@ nodatasum_case: corrected_error: spin_lock(&sctx->stat_lock); sctx->stat.corrected_errors++; + sblock_to_check->data_corrected = 1; spin_unlock(&sctx->stat_lock); printk_ratelimited_in_rcu(KERN_ERR "BTRFS: fixed up error at logical %llu on dev %s\n", @@ -1644,6 +1687,13 @@ static void scrub_write_block_to_dev_replace(struct scrub_block *sblock) { int page_num; + /* + * This block is used for the check of the parity on the source device, + * so the data needn't be written into the destination device. + */ + if (sblock->sparity) + return; + for (page_num = 0; page_num < sblock->page_count; page_num++) { int ret; @@ -2025,6 +2075,9 @@ static void scrub_block_put(struct scrub_block *sblock) if (atomic_dec_and_test(&sblock->ref_count)) { int i; + if (sblock->sparity) + scrub_parity_put(sblock->sparity); + for (i = 0; i < sblock->page_count; i++) scrub_page_put(sblock->pagev[i]); kfree(sblock); @@ -2282,9 +2335,51 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work) scrub_pending_bio_dec(sctx); } +static inline void __scrub_mark_bitmap(struct scrub_parity *sparity, + unsigned long *bitmap, + u64 start, u64 len) +{ + int offset; + int nsectors; + int sectorsize = sparity->sctx->dev_root->sectorsize; + + if (len >= sparity->stripe_len) { + bitmap_set(bitmap, 0, sparity->nsectors); + return; + } + + start -= sparity->logic_start; + offset = (int)do_div(start, sparity->stripe_len); + offset /= sectorsize; + nsectors = (int)len / sectorsize; + + if (offset + nsectors <= sparity->nsectors) { + bitmap_set(bitmap, offset, nsectors); + return; + } + + bitmap_set(bitmap, offset, sparity->nsectors - offset); + bitmap_set(bitmap, 0, nsectors - (sparity->nsectors - offset)); +} + +static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity, + u64 start, u64 len) +{ + __scrub_mark_bitmap(sparity, sparity->ebitmap, start, len); +} + +static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity, + u64 start, u64 len) +{ + __scrub_mark_bitmap(sparity, sparity->dbitmap, start, len); +} + static void scrub_block_complete(struct scrub_block *sblock) { + int corrupted = 0; + if (!sblock->no_io_error_seen) { + corrupted = 1; scrub_handle_errored_block(sblock); } else { /* @@ -2292,9 +2387,19 @@ static void scrub_block_complete(struct scrub_block *sblock) * dev replace case, otherwise write here in dev replace * case. */ - if (!scrub_checksum(sblock) && sblock->sctx->is_dev_replace) + corrupted = scrub_checksum(sblock); + if (!corrupted && sblock->sctx->is_dev_replace) scrub_write_block_to_dev_replace(sblock); } + + if (sblock->sparity && corrupted && !sblock->data_corrected) { + u64 start = sblock->pagev[0]->logical; + u64 end = sblock->pagev[sblock->page_count - 1]->logical + + PAGE_SIZE; + + scrub_parity_mark_sectors_error(sblock->sparity, + start, end - start); + } } static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, @@ -2386,6 +2491,132 @@ behind_scrub_pages: return 0; } +static int scrub_pages_for_parity(struct scrub_parity *sparity, + u64 logical, u64 len, + u64 physical, struct btrfs_device *dev, + u64 flags, u64 gen, int mirror_num, u8 *csum) +{ + struct scrub_ctx *sctx = sparity->sctx; + struct scrub_block *sblock; + int index; + + sblock = kzalloc(sizeof(*sblock), GFP_NOFS); + if (!sblock) { + spin_lock(&sctx->stat_lock); + sctx->stat.malloc_errors++; + spin_unlock(&sctx->stat_lock); + return -ENOMEM; + } + + /* one ref inside this function, plus one for each page added to + * a bio later on */ + atomic_set(&sblock->ref_count, 1); + sblock->sctx = sctx; + sblock->no_io_error_seen = 1; + sblock->sparity = sparity; + scrub_parity_get(sparity); + + for (index = 0; len > 0; index++) { + struct scrub_page *spage; + u64 l = min_t(u64, len, PAGE_SIZE); + + spage = kzalloc(sizeof(*spage), GFP_NOFS); + if (!spage) { +leave_nomem: + spin_lock(&sctx->stat_lock); + sctx->stat.malloc_errors++; + spin_unlock(&sctx->stat_lock); + scrub_block_put(sblock); + return -ENOMEM; + } + BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK); + /* For scrub block */ + scrub_page_get(spage); + sblock->pagev[index] = spage; + /* For scrub parity */ + scrub_page_get(spage); + list_add_tail(&spage->list, &sparity->spages); + spage->sblock = sblock; + spage->dev = dev; + spage->flags = flags; + spage->generation = gen; + spage->logical = logical; + spage->physical = physical; + spage->mirror_num = mirror_num; + if (csum) { + spage->have_csum = 1; + memcpy(spage->csum, csum, sctx->csum_size); + } else { + spage->have_csum = 0; + } + sblock->page_count++; + spage->page = alloc_page(GFP_NOFS); + if (!spage->page) + goto leave_nomem; + len -= l; + logical += l; + physical += l; + } + + WARN_ON(sblock->page_count == 0); + for (index = 0; index < sblock->page_count; index++) { + struct scrub_page *spage = sblock->pagev[index]; + int ret; + + ret = scrub_add_page_to_rd_bio(sctx, spage); + if (ret) { + scrub_block_put(sblock); + return ret; + } + } + + /* last one frees, either here or in bio completion for last page */ + scrub_block_put(sblock); + return 0; +} + +static int scrub_extent_for_parity(struct scrub_parity *sparity, + u64 logical, u64 len, + u64 physical, struct btrfs_device *dev, + u64 flags, u64 gen, int mirror_num) +{ + struct scrub_ctx *sctx = sparity->sctx; + int ret; + u8 csum[BTRFS_CSUM_SIZE]; + u32 blocksize; + + if (flags & BTRFS_EXTENT_FLAG_DATA) { + blocksize = sctx->sectorsize; + } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { + blocksize = sctx->nodesize; + } else { + blocksize = sctx->sectorsize; + WARN_ON(1); + } + + while (len) { + u64 l = min_t(u64, len, blocksize); + int have_csum = 0; + + if (flags & BTRFS_EXTENT_FLAG_DATA) { + /* push csums to sbio */ + have_csum = scrub_find_csum(sctx, logical, l, csum); + if (have_csum == 0) + goto skip; + } + ret = scrub_pages_for_parity(sparity, logical, l, physical, dev, + flags, gen, mirror_num, + have_csum ? csum : NULL); +skip: + if (ret) + return ret; + len -= l; + logical += l; + physical += l; + } + return 0; +} + /* * Given a physical address, this will calculate it's * logical offset. if this is a parity stripe, it will return @@ -2394,7 +2625,8 @@ behind_scrub_pages: * return 0 if it is a data stripe, 1 means parity stripe. */ static int get_raid56_logic_offset(u64 physical, int num, - struct map_lookup *map, u64 *offset) + struct map_lookup *map, u64 *offset, + u64 *stripe_start) { int i; int j = 0; @@ -2405,6 +2637,9 @@ static int get_raid56_logic_offset(u64 physical, int num, last_offset = (physical - map->stripes[num].physical) * nr_data_stripes(map); + if (stripe_start) + *stripe_start = last_offset; + *offset = last_offset; for (i = 0; i < nr_data_stripes(map); i++) { *offset = last_offset + i * map->stripe_len; @@ -2427,13 +2662,330 @@ static int get_raid56_logic_offset(u64 physical, int num, return 1; } +static void scrub_free_parity(struct scrub_parity *sparity) +{ + struct scrub_ctx *sctx = sparity->sctx; + struct scrub_page *curr, *next; + int nbits; + + nbits = bitmap_weight(sparity->ebitmap, sparity->nsectors); + if (nbits) { + spin_lock(&sctx->stat_lock); + sctx->stat.read_errors += nbits; + sctx->stat.uncorrectable_errors += nbits; + spin_unlock(&sctx->stat_lock); + } + + list_for_each_entry_safe(curr, next, &sparity->spages, list) { + list_del_init(&curr->list); + scrub_page_put(curr); + } + + kfree(sparity); +} + +static void scrub_parity_bio_endio(struct bio *bio, int error) +{ + struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private; + struct scrub_ctx *sctx = sparity->sctx; + + if (error) + bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, + sparity->nsectors); + + scrub_free_parity(sparity); + scrub_pending_bio_dec(sctx); + bio_put(bio); +} + +static void scrub_parity_check_and_repair(struct scrub_parity *sparity) +{ + struct scrub_ctx *sctx = sparity->sctx; + struct bio *bio; + struct btrfs_raid_bio *rbio; + struct scrub_page *spage; + struct btrfs_bio *bbio = NULL; + u64 *raid_map = NULL; + u64 length; + int ret; + + if (!bitmap_andnot(sparity->dbitmap, sparity->dbitmap, sparity->ebitmap, + sparity->nsectors)) + goto out; + + length = sparity->logic_end - sparity->logic_start + 1; + ret = btrfs_map_sblock(sctx->dev_root->fs_info, REQ_GET_READ_MIRRORS, + sparity->logic_start, + &length, &bbio, 0, &raid_map); + if (ret || !bbio || !raid_map) + goto bbio_out; + + bio = btrfs_io_bio_alloc(GFP_NOFS, 0); + if (!bio) + goto bbio_out; + + bio->bi_iter.bi_sector = sparity->logic_start >> 9; + bio->bi_private = sparity; + bio->bi_end_io = scrub_parity_bio_endio; + + rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio, + raid_map, length, + sparity->scrub_dev, + sparity->dbitmap, + sparity->nsectors); + if (!rbio) + goto rbio_out; + + list_for_each_entry(spage, &sparity->spages, list) + raid56_parity_add_scrub_pages(rbio, spage->page, + spage->logical); + + scrub_pending_bio_inc(sctx); + raid56_parity_submit_scrub_rbio(rbio); + return; + +rbio_out: + bio_put(bio); +bbio_out: + kfree(bbio); + kfree(raid_map); + bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, + sparity->nsectors); + spin_lock(&sctx->stat_lock); + sctx->stat.malloc_errors++; + spin_unlock(&sctx->stat_lock); +out: + scrub_free_parity(sparity); +} + +static inline int scrub_calc_parity_bitmap_len(int nsectors) +{ + return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * (BITS_PER_LONG / 8); +} + +static void scrub_parity_get(struct scrub_parity *sparity) +{ + atomic_inc(&sparity->ref_count); +} + +static void scrub_parity_put(struct scrub_parity *sparity) +{ + if (!atomic_dec_and_test(&sparity->ref_count)) + return; + + scrub_parity_check_and_repair(sparity); +} + +static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx, + struct map_lookup *map, + struct btrfs_device *sdev, + struct btrfs_path *path, + u64 logic_start, + u64 logic_end) +{ + struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; + struct btrfs_root *root = fs_info->extent_root; + struct btrfs_root *csum_root = fs_info->csum_root; + struct btrfs_extent_item *extent; + u64 flags; + int ret; + int slot; + struct extent_buffer *l; + struct btrfs_key key; + u64 generation; + u64 extent_logical; + u64 extent_physical; + u64 extent_len; + struct btrfs_device *extent_dev; + struct scrub_parity *sparity; + int nsectors; + int bitmap_len; + int extent_mirror_num; + int stop_loop = 0; + + nsectors = map->stripe_len / root->sectorsize; + bitmap_len = scrub_calc_parity_bitmap_len(nsectors); + sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len, + GFP_NOFS); + if (!sparity) { + spin_lock(&sctx->stat_lock); + sctx->stat.malloc_errors++; + spin_unlock(&sctx->stat_lock); + return -ENOMEM; + } + + sparity->stripe_len = map->stripe_len; + sparity->nsectors = nsectors; + sparity->sctx = sctx; + sparity->scrub_dev = sdev; + sparity->logic_start = logic_start; + sparity->logic_end = logic_end; + atomic_set(&sparity->ref_count, 1); + INIT_LIST_HEAD(&sparity->spages); + sparity->dbitmap = sparity->bitmap; + sparity->ebitmap = (void *)sparity->bitmap + bitmap_len; + + ret = 0; + while (logic_start < logic_end) { + if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) + key.type = BTRFS_METADATA_ITEM_KEY; + else + key.type = BTRFS_EXTENT_ITEM_KEY; + key.objectid = logic_start; + key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + + if (ret > 0) { + ret = btrfs_previous_extent_item(root, path, 0); + if (ret < 0) + goto out; + if (ret > 0) { + btrfs_release_path(path); + ret = btrfs_search_slot(NULL, root, &key, + path, 0, 0); + if (ret < 0) + goto out; + } + } + + stop_loop = 0; + while (1) { + u64 bytes; + + l = path->nodes[0]; + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(l)) { + ret = btrfs_next_leaf(root, path); + if (ret == 0) + continue; + if (ret < 0) + goto out; + + stop_loop = 1; + break; + } + btrfs_item_key_to_cpu(l, &key, slot); + + if (key.type == BTRFS_METADATA_ITEM_KEY) + bytes = root->nodesize; + else + bytes = key.offset; + + if (key.objectid + bytes <= logic_start) + goto next; + + if (key.type != BTRFS_EXTENT_ITEM_KEY && + key.type != BTRFS_METADATA_ITEM_KEY) + goto next; + + if (key.objectid > logic_end) { + stop_loop = 1; + break; + } + + while (key.objectid >= logic_start + map->stripe_len) + logic_start += map->stripe_len; + + extent = btrfs_item_ptr(l, slot, + struct btrfs_extent_item); + flags = btrfs_extent_flags(l, extent); + generation = btrfs_extent_generation(l, extent); + + if (key.objectid < logic_start && + (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) { + btrfs_err(fs_info, + "scrub: tree block %llu spanning stripes, ignored. logical=%llu", + key.objectid, logic_start); + goto next; + } +again: + extent_logical = key.objectid; + extent_len = bytes; + + if (extent_logical < logic_start) { + extent_len -= logic_start - extent_logical; + extent_logical = logic_start; + } + + if (extent_logical + extent_len > + logic_start + map->stripe_len) + extent_len = logic_start + map->stripe_len - + extent_logical; + + scrub_parity_mark_sectors_data(sparity, extent_logical, + extent_len); + + scrub_remap_extent(fs_info, extent_logical, + extent_len, &extent_physical, + &extent_dev, + &extent_mirror_num); + + ret = btrfs_lookup_csums_range(csum_root, + extent_logical, + extent_logical + extent_len - 1, + &sctx->csum_list, 1); + if (ret) + goto out; + + ret = scrub_extent_for_parity(sparity, extent_logical, + extent_len, + extent_physical, + extent_dev, flags, + generation, + extent_mirror_num); + if (ret) + goto out; + + scrub_free_csums(sctx); + if (extent_logical + extent_len < + key.objectid + bytes) { + logic_start += map->stripe_len; + + if (logic_start >= logic_end) { + stop_loop = 1; + break; + } + + if (logic_start < key.objectid + bytes) { + cond_resched(); + goto again; + } + } +next: + path->slots[0]++; + } + + btrfs_release_path(path); + + if (stop_loop) + break; + + logic_start += map->stripe_len; + } +out: + if (ret < 0) + scrub_parity_mark_sectors_error(sparity, logic_start, + logic_end - logic_start + 1); + scrub_parity_put(sparity); + scrub_submit(sctx); + mutex_lock(&sctx->wr_ctx.wr_lock); + scrub_wr_submit(sctx); + mutex_unlock(&sctx->wr_ctx.wr_lock); + + btrfs_release_path(path); + return ret < 0 ? ret : 0; +} + static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, struct map_lookup *map, struct btrfs_device *scrub_dev, int num, u64 base, u64 length, int is_dev_replace) { - struct btrfs_path *path; + struct btrfs_path *path, *ppath; struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; struct btrfs_root *root = fs_info->extent_root; struct btrfs_root *csum_root = fs_info->csum_root; @@ -2460,6 +3012,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, u64 extent_logical; u64 extent_physical; u64 extent_len; + u64 stripe_logical; + u64 stripe_end; struct btrfs_device *extent_dev; int extent_mirror_num; int stop_loop = 0; @@ -2485,7 +3039,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, mirror_num = num % map->num_stripes + 1; } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) { - get_raid56_logic_offset(physical, num, map, &offset); + get_raid56_logic_offset(physical, num, map, &offset, NULL); increment = map->stripe_len * nr_data_stripes(map); mirror_num = 1; } else { @@ -2497,6 +3051,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, if (!path) return -ENOMEM; + ppath = btrfs_alloc_path(); + if (!ppath) { + btrfs_free_path(ppath); + return -ENOMEM; + } + /* * work on commit root. The related disk blocks are static as * long as COW is applied. This means, it is save to rewrite @@ -2515,7 +3075,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) { get_raid56_logic_offset(physical_end, num, - map, &logic_end); + map, &logic_end, NULL); logic_end += base; } else { logic_end = logical + increment * nstripes; @@ -2562,10 +3122,18 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) { ret = get_raid56_logic_offset(physical, num, - map, &logical); + map, &logical, &stripe_logical); logical += base; - if (ret) + if (ret) { + stripe_logical += base; + stripe_end = stripe_logical + increment - 1; + ret = scrub_raid56_parity(sctx, map, scrub_dev, + ppath, stripe_logical, + stripe_end); + if (ret) + goto out; goto skip; + } } /* * canceled? @@ -2716,13 +3284,25 @@ again: * loop until we find next data stripe * or we have finished all stripes. */ - do { - physical += map->stripe_len; - ret = get_raid56_logic_offset( - physical, num, - map, &logical); - logical += base; - } while (physical < physical_end && ret); +loop: + physical += map->stripe_len; + ret = get_raid56_logic_offset(physical, + num, map, &logical, + &stripe_logical); + logical += base; + + if (ret && physical < physical_end) { + stripe_logical += base; + stripe_end = stripe_logical + + increment - 1; + ret = scrub_raid56_parity(sctx, + map, scrub_dev, ppath, + stripe_logical, + stripe_end); + if (ret) + goto out; + goto loop; + } } else { physical += map->stripe_len; logical += increment; @@ -2763,6 +3343,7 @@ out: blk_finish_plug(&plug); btrfs_free_path(path); + btrfs_free_path(ppath); return ret < 0 ? ret : 0; } -- cgit v1.2.1 From 2c8cdd6ee4e7f637b0486c6798117e7859dee586 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Fri, 14 Nov 2014 16:06:25 +0800 Subject: Btrfs, replace: write dirty pages into the replace target device The implementation is simple: - In order to avoid changing the code logic of btrfs_map_bio and RAID56, we add the stripes of the replace target devices at the end of the stripe array in btrfs bio, and we sort those target device stripes in the array. And we keep the number of the target device stripes in the btrfs bio. - Except write operation on RAID56, all the other operation don't take the target device stripes into account. - When we do write operation, we read the data from the common devices and calculate the parity. Then write the dirty data and new parity out, at this time, we will find the relative replace target stripes and wirte the relative data into it. Note: The function that copying old data on the source device to the target device was implemented in the past, it is similar to the other RAID type. Signed-off-by: Miao Xie --- fs/btrfs/raid56.c | 104 +++++++++++++++++++++++++++++++++-------------------- fs/btrfs/volumes.c | 26 ++++++++++++-- fs/btrfs/volumes.h | 10 ++++-- 3 files changed, 97 insertions(+), 43 deletions(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index b85d68f721b8..89a8486c34b3 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -131,6 +131,8 @@ struct btrfs_raid_bio { /* number of data stripes (no p/q) */ int nr_data; + int real_stripes; + int stripe_npages; /* * set if we're doing a parity rebuild @@ -638,7 +640,7 @@ static struct page *rbio_pstripe_page(struct btrfs_raid_bio *rbio, int index) */ static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index) { - if (rbio->nr_data + 1 == rbio->bbio->num_stripes) + if (rbio->nr_data + 1 == rbio->real_stripes) return NULL; index += ((rbio->nr_data + 1) * rbio->stripe_len) >> @@ -981,7 +983,8 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root, { struct btrfs_raid_bio *rbio; int nr_data = 0; - int num_pages = rbio_nr_pages(stripe_len, bbio->num_stripes); + int real_stripes = bbio->num_stripes - bbio->num_tgtdevs; + int num_pages = rbio_nr_pages(stripe_len, real_stripes); int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE); void *p; @@ -1001,6 +1004,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root, rbio->fs_info = root->fs_info; rbio->stripe_len = stripe_len; rbio->nr_pages = num_pages; + rbio->real_stripes = real_stripes; rbio->stripe_npages = stripe_npages; rbio->faila = -1; rbio->failb = -1; @@ -1017,10 +1021,10 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root, rbio->bio_pages = p + sizeof(struct page *) * num_pages; rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2; - if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE) - nr_data = bbio->num_stripes - 2; + if (raid_map[real_stripes - 1] == RAID6_Q_STRIPE) + nr_data = real_stripes - 2; else - nr_data = bbio->num_stripes - 1; + nr_data = real_stripes - 1; rbio->nr_data = nr_data; return rbio; @@ -1132,7 +1136,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio, static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio) { if (rbio->faila >= 0 || rbio->failb >= 0) { - BUG_ON(rbio->faila == rbio->bbio->num_stripes - 1); + BUG_ON(rbio->faila == rbio->real_stripes - 1); __raid56_parity_recover(rbio); } else { finish_rmw(rbio); @@ -1193,7 +1197,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio) static noinline void finish_rmw(struct btrfs_raid_bio *rbio) { struct btrfs_bio *bbio = rbio->bbio; - void *pointers[bbio->num_stripes]; + void *pointers[rbio->real_stripes]; int stripe_len = rbio->stripe_len; int nr_data = rbio->nr_data; int stripe; @@ -1207,11 +1211,11 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) bio_list_init(&bio_list); - if (bbio->num_stripes - rbio->nr_data == 1) { - p_stripe = bbio->num_stripes - 1; - } else if (bbio->num_stripes - rbio->nr_data == 2) { - p_stripe = bbio->num_stripes - 2; - q_stripe = bbio->num_stripes - 1; + if (rbio->real_stripes - rbio->nr_data == 1) { + p_stripe = rbio->real_stripes - 1; + } else if (rbio->real_stripes - rbio->nr_data == 2) { + p_stripe = rbio->real_stripes - 2; + q_stripe = rbio->real_stripes - 1; } else { BUG(); } @@ -1268,7 +1272,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) SetPageUptodate(p); pointers[stripe++] = kmap(p); - raid6_call.gen_syndrome(bbio->num_stripes, PAGE_SIZE, + raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE, pointers); } else { /* raid5 */ @@ -1277,7 +1281,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) } - for (stripe = 0; stripe < bbio->num_stripes; stripe++) + for (stripe = 0; stripe < rbio->real_stripes; stripe++) kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); } @@ -1286,7 +1290,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) * higher layers (the bio_list in our rbio) and our p/q. Ignore * everything else. */ - for (stripe = 0; stripe < bbio->num_stripes; stripe++) { + for (stripe = 0; stripe < rbio->real_stripes; stripe++) { for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) { struct page *page; if (stripe < rbio->nr_data) { @@ -1304,6 +1308,32 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) } } + if (likely(!bbio->num_tgtdevs)) + goto write_data; + + for (stripe = 0; stripe < rbio->real_stripes; stripe++) { + if (!bbio->tgtdev_map[stripe]) + continue; + + for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) { + struct page *page; + if (stripe < rbio->nr_data) { + page = page_in_rbio(rbio, stripe, pagenr, 1); + if (!page) + continue; + } else { + page = rbio_stripe_page(rbio, stripe, pagenr); + } + + ret = rbio_add_io_page(rbio, &bio_list, page, + rbio->bbio->tgtdev_map[stripe], + pagenr, rbio->stripe_len); + if (ret) + goto cleanup; + } + } + +write_data: atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list)); BUG_ON(atomic_read(&rbio->stripes_pending) == 0); @@ -1342,7 +1372,8 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio, stripe = &rbio->bbio->stripes[i]; stripe_start = stripe->physical; if (physical >= stripe_start && - physical < stripe_start + rbio->stripe_len) { + physical < stripe_start + rbio->stripe_len && + bio->bi_bdev == stripe->dev->bdev) { return i; } } @@ -1791,7 +1822,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) int err; int i; - pointers = kzalloc(rbio->bbio->num_stripes * sizeof(void *), + pointers = kzalloc(rbio->real_stripes * sizeof(void *), GFP_NOFS); if (!pointers) { err = -ENOMEM; @@ -1821,7 +1852,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) /* setup our array of pointers with pages * from each stripe */ - for (stripe = 0; stripe < rbio->bbio->num_stripes; stripe++) { + for (stripe = 0; stripe < rbio->real_stripes; stripe++) { /* * if we're rebuilding a read, we have to use * pages from the bio list @@ -1836,7 +1867,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) } /* all raid6 handling here */ - if (rbio->raid_map[rbio->bbio->num_stripes - 1] == + if (rbio->raid_map[rbio->real_stripes - 1] == RAID6_Q_STRIPE) { /* @@ -1886,10 +1917,10 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) } if (rbio->raid_map[failb] == RAID5_P_STRIPE) { - raid6_datap_recov(rbio->bbio->num_stripes, + raid6_datap_recov(rbio->real_stripes, PAGE_SIZE, faila, pointers); } else { - raid6_2data_recov(rbio->bbio->num_stripes, + raid6_2data_recov(rbio->real_stripes, PAGE_SIZE, faila, failb, pointers); } @@ -1931,7 +1962,7 @@ pstripe: } } } - for (stripe = 0; stripe < rbio->bbio->num_stripes; stripe++) { + for (stripe = 0; stripe < rbio->real_stripes; stripe++) { /* * if we're rebuilding a read, we have to use * pages from the bio list @@ -2012,7 +2043,6 @@ static void raid_recover_end_io(struct bio *bio, int err) static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) { int bios_to_read = 0; - struct btrfs_bio *bbio = rbio->bbio; struct bio_list bio_list; int ret; int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE); @@ -2033,7 +2063,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) * stripe cache, it is possible that some or all of these * pages are going to be uptodate. */ - for (stripe = 0; stripe < bbio->num_stripes; stripe++) { + for (stripe = 0; stripe < rbio->real_stripes; stripe++) { if (rbio->faila == stripe || rbio->failb == stripe) { atomic_inc(&rbio->error); continue; @@ -2139,7 +2169,7 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, * asking for mirror 3 */ if (mirror_num == 3) - rbio->failb = bbio->num_stripes - 2; + rbio->failb = rbio->real_stripes - 2; ret = lock_stripe_add(rbio); @@ -2205,7 +2235,7 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio, ASSERT(!bio->bi_iter.bi_size); rbio->operation = BTRFS_RBIO_PARITY_SCRUB; - for (i = 0; i < bbio->num_stripes; i++) { + for (i = 0; i < rbio->real_stripes; i++) { if (bbio->stripes[i].dev == scrub_dev) { rbio->scrubp = i; break; @@ -2246,7 +2276,7 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio) struct page *page; for_each_set_bit(bit, rbio->dbitmap, rbio->stripe_npages) { - for (i = 0; i < rbio->bbio->num_stripes; i++) { + for (i = 0; i < rbio->real_stripes; i++) { index = i * rbio->stripe_npages + bit; if (rbio->stripe_pages[index]) continue; @@ -2288,8 +2318,7 @@ static void raid_write_parity_end_io(struct bio *bio, int err) static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check) { - struct btrfs_bio *bbio = rbio->bbio; - void *pointers[bbio->num_stripes]; + void *pointers[rbio->real_stripes]; int nr_data = rbio->nr_data; int stripe; int pagenr; @@ -2303,11 +2332,11 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, bio_list_init(&bio_list); - if (bbio->num_stripes - rbio->nr_data == 1) { - p_stripe = bbio->num_stripes - 1; - } else if (bbio->num_stripes - rbio->nr_data == 2) { - p_stripe = bbio->num_stripes - 2; - q_stripe = bbio->num_stripes - 1; + if (rbio->real_stripes - rbio->nr_data == 1) { + p_stripe = rbio->real_stripes - 1; + } else if (rbio->real_stripes - rbio->nr_data == 2) { + p_stripe = rbio->real_stripes - 2; + q_stripe = rbio->real_stripes - 1; } else { BUG(); } @@ -2358,7 +2387,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, */ pointers[stripe++] = kmap(q_page); - raid6_call.gen_syndrome(bbio->num_stripes, PAGE_SIZE, + raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE, pointers); } else { /* raid5 */ @@ -2376,7 +2405,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, bitmap_clear(rbio->dbitmap, pagenr, 1); kunmap(p); - for (stripe = 0; stripe < bbio->num_stripes; stripe++) + for (stripe = 0; stripe < rbio->real_stripes; stripe++) kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); } @@ -2526,7 +2555,6 @@ static void raid56_parity_scrub_end_io(struct bio *bio, int err) static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) { int bios_to_read = 0; - struct btrfs_bio *bbio = rbio->bbio; struct bio_list bio_list; int ret; int pagenr; @@ -2544,7 +2572,7 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) * build a list of bios to read all the missing parts of this * stripe */ - for (stripe = 0; stripe < bbio->num_stripes; stripe++) { + for (stripe = 0; stripe < rbio->real_stripes; stripe++) { for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { struct page *page; /* diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 217c42ea90b0..6d8a5e8d8c39 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4881,13 +4881,15 @@ static inline int parity_smaller(u64 a, u64 b) static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map) { struct btrfs_bio_stripe s; + int real_stripes = bbio->num_stripes - bbio->num_tgtdevs; int i; u64 l; int again = 1; + int m; while (again) { again = 0; - for (i = 0; i < bbio->num_stripes - 1; i++) { + for (i = 0; i < real_stripes - 1; i++) { if (parity_smaller(raid_map[i], raid_map[i+1])) { s = bbio->stripes[i]; l = raid_map[i]; @@ -4895,6 +4897,14 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map) raid_map[i] = raid_map[i+1]; bbio->stripes[i+1] = s; raid_map[i+1] = l; + + if (bbio->tgtdev_map) { + m = bbio->tgtdev_map[i]; + bbio->tgtdev_map[i] = + bbio->tgtdev_map[i + 1]; + bbio->tgtdev_map[i + 1] = m; + } + again = 1; } } @@ -4923,6 +4933,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, int ret = 0; int num_stripes; int max_errors = 0; + int tgtdev_indexes = 0; struct btrfs_bio *bbio = NULL; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; int dev_replace_is_ongoing = 0; @@ -5234,14 +5245,19 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, num_alloc_stripes <<= 1; if (rw & REQ_GET_READ_MIRRORS) num_alloc_stripes++; + tgtdev_indexes = num_stripes; } - bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS); + + bbio = kzalloc(btrfs_bio_size(num_alloc_stripes, tgtdev_indexes), + GFP_NOFS); if (!bbio) { kfree(raid_map); ret = -ENOMEM; goto out; } atomic_set(&bbio->error, 0); + if (dev_replace_is_ongoing) + bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes); if (rw & REQ_DISCARD) { int factor = 0; @@ -5326,6 +5342,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) max_errors = btrfs_chunk_max_errors(map); + tgtdev_indexes = 0; if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) && dev_replace->tgtdev != NULL) { int index_where_to_add; @@ -5354,8 +5371,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, new->physical = old->physical; new->length = old->length; new->dev = dev_replace->tgtdev; + bbio->tgtdev_map[i] = index_where_to_add; index_where_to_add++; max_errors++; + tgtdev_indexes++; } } num_stripes = index_where_to_add; @@ -5401,7 +5420,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, tgtdev_stripe->length = bbio->stripes[index_srcdev].length; tgtdev_stripe->dev = dev_replace->tgtdev; + bbio->tgtdev_map[index_srcdev] = num_stripes; + tgtdev_indexes++; num_stripes++; } } @@ -5411,6 +5432,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, bbio->num_stripes = num_stripes; bbio->max_errors = max_errors; bbio->mirror_num = mirror_num; + bbio->num_tgtdevs = tgtdev_indexes; /* * this is the case that REQ_READ && dev_replace_is_ongoing && diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 01094bb804c7..70be2571cedf 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -292,7 +292,7 @@ struct btrfs_bio_stripe { struct btrfs_bio; typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); -#define BTRFS_BIO_ORIG_BIO_SUBMITTED 0x1 +#define BTRFS_BIO_ORIG_BIO_SUBMITTED (1 << 0) struct btrfs_bio { atomic_t stripes_pending; @@ -305,6 +305,8 @@ struct btrfs_bio { int max_errors; int num_stripes; int mirror_num; + int num_tgtdevs; + int *tgtdev_map; struct btrfs_bio_stripe stripes[]; }; @@ -387,8 +389,10 @@ struct btrfs_balance_control { int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, u64 end, u64 *length); -#define btrfs_bio_size(n) (sizeof(struct btrfs_bio) + \ - (sizeof(struct btrfs_bio_stripe) * (n))) +#define btrfs_bio_size(total_stripes, real_stripes) \ + (sizeof(struct btrfs_bio) + \ + (sizeof(struct btrfs_bio_stripe) * (total_stripes)) + \ + (sizeof(int) * (real_stripes))) int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, u64 logical, u64 *length, -- cgit v1.2.1 From 7603597690147a16b5cc77047d7570fa22a22673 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Fri, 14 Nov 2014 17:45:42 +0800 Subject: Btrfs, replace: write raid56 parity into the replace target device This function reused the code of parity scrub, and we just write the right parity or corrected parity into the target device before the parity scrub end. Signed-off-by: Miao Xie --- fs/btrfs/raid56.c | 23 +++++++++++++++++++++++ fs/btrfs/scrub.c | 2 +- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 89a8486c34b3..5ece565bc5f0 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -2318,7 +2318,9 @@ static void raid_write_parity_end_io(struct bio *bio, int err) static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check) { + struct btrfs_bio *bbio = rbio->bbio; void *pointers[rbio->real_stripes]; + DECLARE_BITMAP(pbitmap, rbio->stripe_npages); int nr_data = rbio->nr_data; int stripe; int pagenr; @@ -2328,6 +2330,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, struct page *q_page = NULL; struct bio_list bio_list; struct bio *bio; + int is_replace = 0; int ret; bio_list_init(&bio_list); @@ -2341,6 +2344,11 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, BUG(); } + if (bbio->num_tgtdevs && bbio->tgtdev_map[rbio->scrubp]) { + is_replace = 1; + bitmap_copy(pbitmap, rbio->dbitmap, rbio->stripe_npages); + } + /* * Because the higher layers(scrubber) are unlikely to * use this area of the disk again soon, so don't cache @@ -2429,6 +2437,21 @@ writeback: goto cleanup; } + if (!is_replace) + goto submit_write; + + for_each_set_bit(pagenr, pbitmap, rbio->stripe_npages) { + struct page *page; + + page = rbio_stripe_page(rbio, rbio->scrubp, pagenr); + ret = rbio_add_io_page(rbio, &bio_list, page, + bbio->tgtdev_map[rbio->scrubp], + pagenr, rbio->stripe_len); + if (ret) + goto cleanup; + } + +submit_write: nr_data = bio_list_size(&bio_list); if (!nr_data) { /* Every parity is right */ diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 7f95afcf9fd3..0ae837fd676d 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2714,7 +2714,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) goto out; length = sparity->logic_end - sparity->logic_start + 1; - ret = btrfs_map_sblock(sctx->dev_root->fs_info, REQ_GET_READ_MIRRORS, + ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE, sparity->logic_start, &length, &bbio, 0, &raid_map); if (ret || !bbio || !raid_map) -- cgit v1.2.1 From 4245215d6a8dba1a51c50533b6667919687c0b89 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Tue, 25 Nov 2014 16:39:28 +0800 Subject: Btrfs, raid56: fix use-after-free problem in the final device replace procedure on raid56 The commit c404e0dc (Btrfs: fix use-after-free in the finishing procedure of the device replace) fixed a use-after-free problem which happened when removing the source device at the end of device replace, but at that time, btrfs didn't support device replace on raid56, so we didn't fix the problem on the raid56 profile. Currently, we implemented device replace for raid56, so we need kick that problem out before we enable that function for raid56. The fix method is very simple, we just increase the bio per-cpu counter before we submit a raid56 io, and decrease the counter when the raid56 io ends. Signed-off-by: Miao Xie --- fs/btrfs/ctree.h | 7 ++++++- fs/btrfs/dev-replace.c | 4 ++-- fs/btrfs/raid56.c | 41 ++++++++++++++++++++++++++++++++--------- fs/btrfs/raid56.h | 4 ++-- fs/btrfs/scrub.c | 2 +- fs/btrfs/volumes.c | 7 ++----- 6 files changed, 45 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fe69edda11fb..470e3177a7e8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -4097,7 +4097,12 @@ int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, /* dev-replace.c */ void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info); void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info); -void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info); +void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount); + +static inline void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info) +{ + btrfs_bio_counter_sub(fs_info, 1); +} /* reada.c */ struct reada_control { diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 6f662b34ba0e..fa27b4e3b6c8 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -920,9 +920,9 @@ void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info) percpu_counter_inc(&fs_info->bio_counter); } -void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info) +void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount) { - percpu_counter_dec(&fs_info->bio_counter); + percpu_counter_sub(&fs_info->bio_counter, amount); if (waitqueue_active(&fs_info->replace_wait)) wake_up(&fs_info->replace_wait); diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 5ece565bc5f0..8ab2a17bbba8 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -162,6 +162,8 @@ struct btrfs_raid_bio { */ int bio_list_bytes; + int generic_bio_cnt; + atomic_t refs; atomic_t stripes_pending; @@ -354,6 +356,7 @@ static void merge_rbio(struct btrfs_raid_bio *dest, { bio_list_merge(&dest->bio_list, &victim->bio_list); dest->bio_list_bytes += victim->bio_list_bytes; + dest->generic_bio_cnt += victim->generic_bio_cnt; bio_list_init(&victim->bio_list); } @@ -891,6 +894,10 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err, int uptodate) { struct bio *cur = bio_list_get(&rbio->bio_list); struct bio *next; + + if (rbio->generic_bio_cnt) + btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt); + free_raid_bio(rbio); while (cur) { @@ -1775,6 +1782,7 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio, struct btrfs_raid_bio *rbio; struct btrfs_plug_cb *plug = NULL; struct blk_plug_cb *cb; + int ret; rbio = alloc_rbio(root, bbio, raid_map, stripe_len); if (IS_ERR(rbio)) { @@ -1785,12 +1793,19 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio, rbio->bio_list_bytes = bio->bi_iter.bi_size; rbio->operation = BTRFS_RBIO_WRITE; + btrfs_bio_counter_inc_noblocked(root->fs_info); + rbio->generic_bio_cnt = 1; + /* * don't plug on full rbios, just get them out the door * as quickly as we can */ - if (rbio_is_full(rbio)) - return full_stripe_write(rbio); + if (rbio_is_full(rbio)) { + ret = full_stripe_write(rbio); + if (ret) + btrfs_bio_counter_dec(root->fs_info); + return ret; + } cb = blk_check_plugged(btrfs_raid_unplug, root->fs_info, sizeof(*plug)); @@ -1801,10 +1816,13 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio, INIT_LIST_HEAD(&plug->rbio_list); } list_add_tail(&rbio->plug_list, &plug->rbio_list); + ret = 0; } else { - return __raid56_parity_write(rbio); + ret = __raid56_parity_write(rbio); + if (ret) + btrfs_bio_counter_dec(root->fs_info); } - return 0; + return ret; } /* @@ -2139,19 +2157,17 @@ cleanup: */ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, struct btrfs_bio *bbio, u64 *raid_map, - u64 stripe_len, int mirror_num, int hold_bbio) + u64 stripe_len, int mirror_num, int generic_io) { struct btrfs_raid_bio *rbio; int ret; rbio = alloc_rbio(root, bbio, raid_map, stripe_len); if (IS_ERR(rbio)) { - __free_bbio_and_raid_map(bbio, raid_map, !hold_bbio); + __free_bbio_and_raid_map(bbio, raid_map, generic_io); return PTR_ERR(rbio); } - if (hold_bbio) - set_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags); rbio->operation = BTRFS_RBIO_READ_REBUILD; bio_list_add(&rbio->bio_list, bio); rbio->bio_list_bytes = bio->bi_iter.bi_size; @@ -2159,11 +2175,18 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, rbio->faila = find_logical_bio_stripe(rbio, bio); if (rbio->faila == -1) { BUG(); - __free_bbio_and_raid_map(bbio, raid_map, !hold_bbio); + __free_bbio_and_raid_map(bbio, raid_map, generic_io); kfree(rbio); return -EIO; } + if (generic_io) { + btrfs_bio_counter_inc_noblocked(root->fs_info); + rbio->generic_bio_cnt = 1; + } else { + set_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags); + } + /* * reconstruct from the q stripe if they are * asking for mirror 3 diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h index 3d4ddb3d861d..31d4a157b5e3 100644 --- a/fs/btrfs/raid56.h +++ b/fs/btrfs/raid56.h @@ -43,8 +43,8 @@ struct btrfs_raid_bio; struct btrfs_device; int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, - struct btrfs_bio *bbio, u64 *raid_map, - u64 stripe_len, int mirror_num, int hold_bbio); + struct btrfs_bio *bbio, u64 *raid_map, + u64 stripe_len, int mirror_num, int generic_io); int raid56_parity_write(struct btrfs_root *root, struct bio *bio, struct btrfs_bio *bbio, u64 *raid_map, u64 stripe_len); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 0ae837fd676d..27f2e16cd259 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1477,7 +1477,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio, page->recover->raid_map, page->recover->map_length, - page->mirror_num, 1); + page->mirror_num, 0); if (ret) return ret; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6d8a5e8d8c39..cbb766577f31 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5843,12 +5843,9 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, } else { ret = raid56_parity_recover(root, bio, bbio, raid_map, map_length, - mirror_num, 0); + mirror_num, 1); } - /* - * FIXME, replace dosen't support raid56 yet, please fix - * it in the future. - */ + btrfs_bio_counter_dec(root->fs_info); return ret; } -- cgit v1.2.1 From 5d3edd8f44aac94de7b16f4c54290e24f5e8c532 Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Thu, 13 Nov 2014 11:45:38 +0800 Subject: Btrfs, replace: enable dev-replace for raid56 Signed-off-by: Zhao Lei Signed-off-by: Miao Xie --- fs/btrfs/dev-replace.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index fa27b4e3b6c8..0bf41f8b1e23 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -316,11 +316,6 @@ int btrfs_dev_replace_start(struct btrfs_root *root, struct btrfs_device *tgt_device = NULL; struct btrfs_device *src_device = NULL; - if (btrfs_fs_incompat(fs_info, RAID56)) { - btrfs_warn(fs_info, "dev_replace cannot yet handle RAID5/RAID6"); - return -EOPNOTSUPP; - } - switch (args->start.cont_reading_from_srcdev_mode) { case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS: case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID: -- cgit v1.2.1