From 86a570c577b7975f31c0539f6a724194892ebc43 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 24 Oct 2014 19:03:57 -0700 Subject: [PATCH 001/208] ASoC: fsl_asrc: Add reg_defaults for regmap to fix kernel dump Kernel dump (WARN_ON) ocurred during system boot-up inside regmap_write(): ------------[ cut here ]------------ WARNING: CPU: 0 PID: 47 at kernel/locking/lockdep.c:2744 lockdep_trace_alloc+0xe8/0x108() DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags)) Modules linked in: CPU: 0 PID: 47 Comm: kworker/u2:2 Not tainted 3.18.0-rc1-10245-gb75d289-dirty #56 Workqueue: deferwq deferred_probe_work_func Backtrace: [<80012294>] (dump_backtrace) from [<80012578>] (show_stack+0x18/0x1c) r6:8097c73c r5:8097c73c r4:00000000 r3:be33ba80 [<80012560>] (show_stack) from [<806aac48>] (dump_stack+0x8c/0xa4) [<806aabbc>] (dump_stack) from [<8002a694>] (warn_slowpath_common+0x70/0x94) r6:80062838 r5:00000009 r4:bd827b30 r3:be33ba80 [<8002a624>] (warn_slowpath_common) from [<8002a6f0>] (warn_slowpath_fmt+0x38/0x40) r8:00000004 r7:00000001 r6:000080d0 r5:60000193 r4:bd826010 [<8002a6bc>] (warn_slowpath_fmt) from [<80062838>] (lockdep_trace_alloc+0xe8/0x108) r3:80831590 r2:8082e160 [<80062750>] (lockdep_trace_alloc) from [<800ea5dc>] (kmem_cache_alloc+0x28/0x134) r5:000080d0 r4:be001f00 [<800ea5b4>] (kmem_cache_alloc) from [<8038d72c>] (regcache_rbtree_write+0x15c/0x648) r10:00000000 r9:0000001c r8:00000004 r7:00000001 r6:00000000 r5:bd819a00 r4:00000000 r3:811aea88 [<8038d5d0>] (regcache_rbtree_write) from [<8038c4d8>] (regcache_write+0x5c/0x64) r10:be3f9f88 r9:00000000 r8:00000004 r7:00000001 r6:00000000 r5:00000001 r4:bd819a00 [<8038c47c>] (regcache_write) from [<8038b0dc>] (_regmap_raw_write+0x134/0x5f4) r6:be3f9f84 r5:00000001 r4:bd819a00 r3:00000001 [<8038afa8>] (_regmap_raw_write) from [<8038b610>] (_regmap_bus_raw_write+0x74/0x94) r10:00000000 r9:00000001 r8:be3fb080 r7:bd819a00 r6:00000001 r5:00000000 r4:bd819a00 [<8038b59c>] (_regmap_bus_raw_write) from [<8038a8b4>] (_regmap_write+0x60/0x9c) r6:00000001 r5:00000000 r4:bd819a00 r3:8038b59c [<8038a854>] (_regmap_write) from [<8038ba24>] (regmap_write+0x48/0x68) r7:bd81ad80 r6:00000001 r5:00000000 r4:bd819a00 [<8038b9dc>] (regmap_write) from [<80528f30>] (fsl_asrc_dai_probe+0x34/0x104) r6:bd888628 r5:be3fb080 r4:be3b4410 r3:be3b442c ------------[ dump end ]------------ ============================================================================= 2741 /* 2742 * Oi! Can't be having __GFP_FS allocations with IRQs disabled. 2743 */ 2744 if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))) 2745 return; ============================================================================= By looking at 2744 line, we can get that it's because regcache_rbtree_write() would call kmalloc() with GFP flag if it couldn't find an existing block to insert nodes while this kmalloc() call is inside a spin_lock_irq_save pair, i.e. IRQs disabled. Even though this may be a bug that should be fixed, I still try to send this patch as a quick fix (work around) since it does no harm to assign default values of every registers when using regcache. Signed-off-by: Nicolin Chen Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_asrc.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c index 3b145313f93e..653dbaab83a6 100644 --- a/sound/soc/fsl/fsl_asrc.c +++ b/sound/soc/fsl/fsl_asrc.c @@ -684,12 +684,38 @@ static bool fsl_asrc_writeable_reg(struct device *dev, unsigned int reg) } } +static struct reg_default fsl_asrc_reg[] = { + { REG_ASRCTR, 0x0000 }, { REG_ASRIER, 0x0000 }, + { REG_ASRCNCR, 0x0000 }, { REG_ASRCFG, 0x0000 }, + { REG_ASRCSR, 0x0000 }, { REG_ASRCDR1, 0x0000 }, + { REG_ASRCDR2, 0x0000 }, { REG_ASRSTR, 0x0000 }, + { REG_ASRRA, 0x0000 }, { REG_ASRRB, 0x0000 }, + { REG_ASRRC, 0x0000 }, { REG_ASRPM1, 0x0000 }, + { REG_ASRPM2, 0x0000 }, { REG_ASRPM3, 0x0000 }, + { REG_ASRPM4, 0x0000 }, { REG_ASRPM5, 0x0000 }, + { REG_ASRTFR1, 0x0000 }, { REG_ASRCCR, 0x0000 }, + { REG_ASRDIA, 0x0000 }, { REG_ASRDOA, 0x0000 }, + { REG_ASRDIB, 0x0000 }, { REG_ASRDOB, 0x0000 }, + { REG_ASRDIC, 0x0000 }, { REG_ASRDOC, 0x0000 }, + { REG_ASRIDRHA, 0x0000 }, { REG_ASRIDRLA, 0x0000 }, + { REG_ASRIDRHB, 0x0000 }, { REG_ASRIDRLB, 0x0000 }, + { REG_ASRIDRHC, 0x0000 }, { REG_ASRIDRLC, 0x0000 }, + { REG_ASR76K, 0x0A47 }, { REG_ASR56K, 0x0DF3 }, + { REG_ASRMCRA, 0x0000 }, { REG_ASRFSTA, 0x0000 }, + { REG_ASRMCRB, 0x0000 }, { REG_ASRFSTB, 0x0000 }, + { REG_ASRMCRC, 0x0000 }, { REG_ASRFSTC, 0x0000 }, + { REG_ASRMCR1A, 0x0000 }, { REG_ASRMCR1B, 0x0000 }, + { REG_ASRMCR1C, 0x0000 }, +}; + static const struct regmap_config fsl_asrc_regmap_config = { .reg_bits = 32, .reg_stride = 4, .val_bits = 32, .max_register = REG_ASRMCR1C, + .reg_defaults = fsl_asrc_reg, + .num_reg_defaults = ARRAY_SIZE(fsl_asrc_reg), .readable_reg = fsl_asrc_readable_reg, .volatile_reg = fsl_asrc_volatile_reg, .writeable_reg = fsl_asrc_writeable_reg, From b2a9a3b818db727479cc2b9c2924e4ab9a4bbb07 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 28 Oct 2014 08:59:37 +0800 Subject: [PATCH 002/208] ASoC: es8328-i2c: Fix i2c_device_id name field in es8328_id The convention for i2c_device_id name does not need to have company prefix. Signed-off-by: Axel Lin Signed-off-by: Mark Brown --- sound/soc/codecs/es8328-i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/es8328-i2c.c b/sound/soc/codecs/es8328-i2c.c index aae410d122ee..2d05b5d3a6ce 100644 --- a/sound/soc/codecs/es8328-i2c.c +++ b/sound/soc/codecs/es8328-i2c.c @@ -19,7 +19,7 @@ #include "es8328.h" static const struct i2c_device_id es8328_id[] = { - { "everest,es8328", 0 }, + { "es8328", 0 }, { } }; MODULE_DEVICE_TABLE(i2c, es8328_id); From 885e7b0e181c14e4d0ddd26c688bad2b84c1ada9 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 14 Oct 2014 14:16:24 -0700 Subject: [PATCH 003/208] target: Don't call TFO->write_pending if data_length == 0 If an initiator sends a zero-length command (e.g. TEST UNIT READY) but sets the transfer direction in the transport layer to indicate a data-out phase, we still shouldn't try to transfer data. At best it's a NOP, and depending on the transport, we might crash on an uninitialized sg list. Reported-by: Craig Watson Signed-off-by: Roland Dreier Cc: # 3.1 Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 9ea0d5f03f7a..be877bf6f730 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2292,7 +2292,7 @@ transport_generic_new_cmd(struct se_cmd *cmd) * and let it call back once the write buffers are ready. */ target_add_to_state_list(cmd); - if (cmd->data_direction != DMA_TO_DEVICE) { + if (cmd->data_direction != DMA_TO_DEVICE || cmd->data_length == 0) { target_execute_cmd(cmd); return 0; } From ab8edab132829b26dd13db6caca3c242cce35dc1 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 8 Oct 2014 06:19:20 +0000 Subject: [PATCH 004/208] vhost-scsi: Take configfs group dependency during VHOST_SCSI_SET_ENDPOINT This patch addresses a bug where individual vhost-scsi configfs endpoint groups can be removed from below while active exports to QEMU userspace still exist, resulting in an OOPs. It adds a configfs_depend_item() in vhost_scsi_set_endpoint() to obtain an explicit dependency on se_tpg->tpg_group in order to prevent individual vhost-scsi WWPN endpoints from being released via normal configfs methods while an QEMU ioctl reference still exists. Also, add matching configfs_undepend_item() in vhost_scsi_clear_endpoint() to release the dependency, once QEMU's reference to the individual group at /sys/kernel/config/target/vhost/$WWPN/$TPGT is released. (Fix up vhost_scsi_clear_endpoint() error path - DanC) Cc: Michael S. Tsirkin Cc: Paolo Bonzini Cc: Stefan Hajnoczi Cc: # 3.6+ Signed-off-by: Nicholas Bellinger --- drivers/vhost/scsi.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 69906cacd04f..a17f11850669 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1312,6 +1312,7 @@ static int vhost_scsi_set_endpoint(struct vhost_scsi *vs, struct vhost_scsi_target *t) { + struct se_portal_group *se_tpg; struct tcm_vhost_tport *tv_tport; struct tcm_vhost_tpg *tpg; struct tcm_vhost_tpg **vs_tpg; @@ -1359,6 +1360,21 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, ret = -EEXIST; goto out; } + /* + * In order to ensure individual vhost-scsi configfs + * groups cannot be removed while in use by vhost ioctl, + * go ahead and take an explicit se_tpg->tpg_group.cg_item + * dependency now. + */ + se_tpg = &tpg->se_tpg; + ret = configfs_depend_item(se_tpg->se_tpg_tfo->tf_subsys, + &se_tpg->tpg_group.cg_item); + if (ret) { + pr_warn("configfs_depend_item() failed: %d\n", ret); + kfree(vs_tpg); + mutex_unlock(&tpg->tv_tpg_mutex); + goto out; + } tpg->tv_tpg_vhost_count++; tpg->vhost_scsi = vs; vs_tpg[tpg->tport_tpgt] = tpg; @@ -1401,6 +1417,7 @@ static int vhost_scsi_clear_endpoint(struct vhost_scsi *vs, struct vhost_scsi_target *t) { + struct se_portal_group *se_tpg; struct tcm_vhost_tport *tv_tport; struct tcm_vhost_tpg *tpg; struct vhost_virtqueue *vq; @@ -1449,6 +1466,13 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, vs->vs_tpg[target] = NULL; match = true; mutex_unlock(&tpg->tv_tpg_mutex); + /* + * Release se_tpg->tpg_group.cg_item configfs dependency now + * to allow vhost-scsi WWPN se_tpg->tpg_group shutdown to occur. + */ + se_tpg = &tpg->se_tpg; + configfs_undepend_item(se_tpg->se_tpg_tfo->tf_subsys, + &se_tpg->tpg_group.cg_item); } if (match) { for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { From f2774f430ea65fddc068865d364bb254d2816648 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Wed, 15 Oct 2014 10:59:21 -0700 Subject: [PATCH 005/208] iscsi-target: return the correct port in SendTargets The fact that a target is published on the any address has no bearing on which port(s) it is published. SendTargets should always send the portal's port, not the port used for discovery. Signed-off-by: Steven Allen Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index b19e4329ba00..73e58d22e325 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -3491,7 +3491,7 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd, len = sprintf(buf, "TargetAddress=" "%s:%hu,%hu", inaddr_any ? conn->local_ip : np->np_ip, - inaddr_any ? conn->local_port : np->np_port, + np->np_port, tpg->tpgt); len += 1; From ab477c1ff5e0a744c072404bf7db51bfe1f05b6e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sun, 19 Oct 2014 18:05:33 +0300 Subject: [PATCH 006/208] srp-target: Retry when QP creation fails with ENOMEM It is not guaranteed to that srp_sq_size is supported by the HCA. So if we failed to create the QP with ENOMEM, try with a smaller srp_sq_size. Keep it up until we hit MIN_SRPT_SQ_SIZE, then fail the connection. Reported-by: Mark Lehrer Signed-off-by: Bart Van Assche Signed-off-by: Sagi Grimberg Cc: # 3.4+ Signed-off-by: Nicholas Bellinger --- drivers/infiniband/ulp/srpt/ib_srpt.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 7206547c13ce..dc829682701a 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2092,6 +2092,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) if (!qp_init) goto out; +retry: ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, ch->rq_size + srp_sq_size, 0); if (IS_ERR(ch->cq)) { @@ -2115,6 +2116,13 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) ch->qp = ib_create_qp(sdev->pd, qp_init); if (IS_ERR(ch->qp)) { ret = PTR_ERR(ch->qp); + if (ret == -ENOMEM) { + srp_sq_size /= 2; + if (srp_sq_size >= MIN_SRPT_SQ_SIZE) { + ib_destroy_cq(ch->cq); + goto retry; + } + } printk(KERN_ERR "failed to create_qp ret= %d\n", ret); goto err_destroy_cq; } From 29f95bd76f6ec1eff88eec6a04191104a11a7f97 Mon Sep 17 00:00:00 2001 From: Jianqun Date: Wed, 29 Oct 2014 17:45:51 +0800 Subject: [PATCH 007/208] ASoC: rockchip-i2s: fix infinite loop in rockchip_snd_rxctrl We can get into an infinite loop if the I2S_CLR register fails to clear due to a missing break statement, so add that. Signed-off-by: Jianqun Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/rockchip/rockchip_i2s.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c index f373e37f8305..c74ba37f862c 100644 --- a/sound/soc/rockchip/rockchip_i2s.c +++ b/sound/soc/rockchip/rockchip_i2s.c @@ -154,8 +154,10 @@ static void rockchip_snd_rxctrl(struct rk_i2s_dev *i2s, int on) while (val) { regmap_read(i2s->regmap, I2S_CLR, &val); retry--; - if (!retry) + if (!retry) { dev_warn(i2s->dev, "fail to clear\n"); + break; + } } } } From df078d291d9b50e3ccfb8f030f85de701696a5ad Mon Sep 17 00:00:00 2001 From: "Fang, Yang A" Date: Tue, 28 Oct 2014 18:36:36 -0300 Subject: [PATCH 008/208] ASoC: rt5645: Mark RT5645_TDM_CTRL_3 as readable amixer query fails due to it is not readable reigster Signed-off-by: Fang, Yang A Acked-by: Bard Liao Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 3fb83bf09768..d16331e0b64d 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -139,6 +139,7 @@ static const struct reg_default rt5645_reg[] = { { 0x76, 0x000a }, { 0x77, 0x0c00 }, { 0x78, 0x0000 }, + { 0x79, 0x0123 }, { 0x80, 0x0000 }, { 0x81, 0x0000 }, { 0x82, 0x0000 }, @@ -334,6 +335,7 @@ static bool rt5645_readable_register(struct device *dev, unsigned int reg) case RT5645_DMIC_CTRL2: case RT5645_TDM_CTRL_1: case RT5645_TDM_CTRL_2: + case RT5645_TDM_CTRL_3: case RT5645_GLB_CLK: case RT5645_PLL_CTRL1: case RT5645_PLL_CTRL2: From c1b9b9b1ad2df6144ca3fbe6989f7bd9ea5c5562 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 28 Oct 2014 21:01:53 -0700 Subject: [PATCH 009/208] ASoC: fsi: remove unsupported PAUSE flag FSI doesn't support PAUSE. Remove SNDRV_PCM_INFO_PAUSE flags from snd_pcm_hardware info Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/sh/fsi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/sh/fsi.c b/sound/soc/sh/fsi.c index 66fddec9543d..88e5df474ccf 100644 --- a/sound/soc/sh/fsi.c +++ b/sound/soc/sh/fsi.c @@ -1711,8 +1711,7 @@ static const struct snd_soc_dai_ops fsi_dai_ops = { static struct snd_pcm_hardware fsi_pcm_hardware = { .info = SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_MMAP | - SNDRV_PCM_INFO_MMAP_VALID | - SNDRV_PCM_INFO_PAUSE, + SNDRV_PCM_INFO_MMAP_VALID, .buffer_bytes_max = 64 * 1024, .period_bytes_min = 32, .period_bytes_max = 8192, From 706c66213e5e623e23f521b1acbd8171af7a3549 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 28 Oct 2014 21:02:03 -0700 Subject: [PATCH 010/208] ASoC: rsnd: remove unsupported PAUSE flag R-Car sound doesn't support PAUSE. Remove SNDRV_PCM_INFO_PAUSE flags from snd_pcm_hardware info Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/sh/rcar/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index 1922ec57d10a..70042197f9e2 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -886,8 +886,7 @@ static int rsnd_dai_probe(struct platform_device *pdev, static struct snd_pcm_hardware rsnd_pcm_hardware = { .info = SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_MMAP | - SNDRV_PCM_INFO_MMAP_VALID | - SNDRV_PCM_INFO_PAUSE, + SNDRV_PCM_INFO_MMAP_VALID, .buffer_bytes_max = 64 * 1024, .period_bytes_min = 32, .period_bytes_max = 8192, From 1ffae3612fe53dd18b92e696ab4f29df319a508d Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 29 Oct 2014 21:46:30 +0100 Subject: [PATCH 011/208] ASoC: Fix snd_soc_find_dai() matching component by name Commit 14621c7e5e72 ("ASoC: Consolidate CPU and CODEC DAI lookup") consolidated the lookup of CPU DAIs and CODEC DAIs into a single function. When matching a component by name for CODEC DAIs the code previous to the patch compared the name in the DAI link table with component->name. For CPU DAIs the code compared to dev_name(component->dev). The newly introduced function ended up using the later as well. For most components dev_name(component->dev) and component->name are the same. The main notable exception are I2C devices where the driver name and the device name are concatenated to form the component name. By using dev_name(component->dev) instead of component->name the patch broke the matching of I2C CODECs by name. This patch restores the original behavior by using component->name instead of dev_name(component->dev). This will be safe even for CPU DAIs since for CPU DAIs both are the same. Fixes: 14621c7e5e72 ("ASoC: Consolidate CPU and CODEC DAI lookup") Reported-by: Dmitry Eremin-Solenikov Signed-off-by: Lars-Peter Clausen Signed-off-by: Mark Brown --- sound/soc/soc-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 4c8f8a23a0e9..b60ff56ebc0f 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -884,7 +884,7 @@ static struct snd_soc_dai *snd_soc_find_dai( list_for_each_entry(component, &component_list, list) { if (dlc->of_node && component->dev->of_node != dlc->of_node) continue; - if (dlc->name && strcmp(dev_name(component->dev), dlc->name)) + if (dlc->name && strcmp(component->name, dlc->name)) continue; list_for_each_entry(dai, &component->dai_list, list) { if (dlc->dai_name && strcmp(dai->name, dlc->dai_name)) From f57915cfa5b2b14c1cffa2e83c034f55e3f0e70d Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 22 Oct 2014 14:55:49 -0700 Subject: [PATCH 012/208] ib_isert: Add max_send_sge=2 minimum for control PDU responses This patch adds a max_send_sge=2 minimum in isert_conn_setup_qp() to ensure outgoing control PDU responses with tx_desc->num_sge=2 are able to function correctly. This addresses a bug with RDMA hardware using dev_attr.max_sge=3, that in the original code with the ConnectX-2 work-around would result in isert_conn->max_sge=1 being negotiated. Originally reported by Chris with ocrdma driver. Reported-by: Chris Moore Tested-by: Chris Moore Signed-off-by: Or Gerlitz Cc: # 3.10+ Signed-off-by: Nicholas Bellinger --- drivers/infiniband/ulp/isert/ib_isert.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 3effa931fce2..ef5b22b3ea83 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -115,9 +115,12 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id, attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS; /* * FIXME: Use devattr.max_sge - 2 for max_send_sge as - * work-around for RDMA_READ.. + * work-around for RDMA_READs with ConnectX-2. + * + * Also, still make sure to have at least two SGEs for + * outgoing control PDU responses. */ - attr.cap.max_send_sge = device->dev_attr.max_sge - 2; + attr.cap.max_send_sge = max(2, device->dev_attr.max_sge - 2); isert_conn->max_sge = attr.cap.max_send_sge; attr.cap.max_recv_sge = 1; From 3b726ae2de02a406cc91903f80132daee37b6f1b Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 28 Oct 2014 13:45:03 -0700 Subject: [PATCH 013/208] iser-target: Handle DEVICE_REMOVAL event on network portal listener correctly In this case the cm_id->context is the isert_np, and the cm_id->qp is NULL, so use that to distinct the cases. Since we don't expect any other events on this cm_id we can just return -1 for explicit termination of the cm_id by the cma layer. Signed-off-by: Sagi Grimberg Cc: # 3.10+ Signed-off-by: Nicholas Bellinger --- drivers/infiniband/ulp/isert/ib_isert.c | 29 ++++++++++++++++--------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index ef5b22b3ea83..a6c7b395c61d 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -806,14 +806,25 @@ wake_up: complete(&isert_conn->conn_wait); } -static void +static int isert_disconnected_handler(struct rdma_cm_id *cma_id, bool disconnect) { - struct isert_conn *isert_conn = (struct isert_conn *)cma_id->context; + struct isert_conn *isert_conn; + + if (!cma_id->qp) { + struct isert_np *isert_np = cma_id->context; + + isert_np->np_cm_id = NULL; + return -1; + } + + isert_conn = (struct isert_conn *)cma_id->context; isert_conn->disconnect = disconnect; INIT_WORK(&isert_conn->conn_logout_work, isert_disconnect_work); schedule_work(&isert_conn->conn_logout_work); + + return 0; } static int @@ -828,6 +839,9 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) switch (event->event) { case RDMA_CM_EVENT_CONNECT_REQUEST: ret = isert_connect_request(cma_id, event); + if (ret) + pr_err("isert_cma_handler failed RDMA_CM_EVENT: 0x%08x %d\n", + event->event, ret); break; case RDMA_CM_EVENT_ESTABLISHED: isert_connected_handler(cma_id); @@ -837,7 +851,7 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */ disconnect = true; case RDMA_CM_EVENT_TIMEWAIT_EXIT: /* FALLTHRU */ - isert_disconnected_handler(cma_id, disconnect); + ret = isert_disconnected_handler(cma_id, disconnect); break; case RDMA_CM_EVENT_CONNECT_ERROR: default: @@ -845,12 +859,6 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) break; } - if (ret != 0) { - pr_err("isert_cma_handler failed RDMA_CM_EVENT: 0x%08x %d\n", - event->event, ret); - dump_stack(); - } - return ret; } @@ -3193,7 +3201,8 @@ isert_free_np(struct iscsi_np *np) { struct isert_np *isert_np = (struct isert_np *)np->np_context; - rdma_destroy_id(isert_np->np_cm_id); + if (isert_np->np_cm_id) + rdma_destroy_id(isert_np->np_cm_id); np->np_context = NULL; kfree(isert_np); From b6932ee35f1c1364dcea0e691b2feb912a6777e5 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Fri, 24 Oct 2014 15:18:26 -0700 Subject: [PATCH 014/208] target: return CONFLICT only when SA key unmatched PREEMPT (and PREEMPT AND ABORT) should return CONFLICT iff a specified SERVICE ACTION RESERVATION KEY is specified and matches no existing persistent reservation. Without this patch, a PREEMPT will return CONFLICT if either all reservations are held by the initiator (self preemption) or there is nothing to preempt. According to the spec, both of these cases should succeed. Signed-off-by: Steven Allen Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_pr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 8c60a1a1ae8d..9f93b8234095 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -2738,7 +2738,8 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, struct t10_pr_registration *pr_reg, *pr_reg_tmp, *pr_reg_n, *pr_res_holder; struct t10_reservation *pr_tmpl = &dev->t10_pr; u32 pr_res_mapped_lun = 0; - int all_reg = 0, calling_it_nexus = 0, released_regs = 0; + int all_reg = 0, calling_it_nexus = 0; + bool sa_res_key_unmatched = sa_res_key != 0; int prh_type = 0, prh_scope = 0; if (!se_sess) @@ -2813,6 +2814,7 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, if (!all_reg) { if (pr_reg->pr_res_key != sa_res_key) continue; + sa_res_key_unmatched = false; calling_it_nexus = (pr_reg_n == pr_reg) ? 1 : 0; pr_reg_nacl = pr_reg->pr_reg_nacl; @@ -2820,7 +2822,6 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, __core_scsi3_free_registration(dev, pr_reg, (preempt_type == PREEMPT_AND_ABORT) ? &preempt_and_abort_list : NULL, calling_it_nexus); - released_regs++; } else { /* * Case for any existing all registrants type @@ -2838,6 +2839,7 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, if ((sa_res_key) && (pr_reg->pr_res_key != sa_res_key)) continue; + sa_res_key_unmatched = false; calling_it_nexus = (pr_reg_n == pr_reg) ? 1 : 0; if (calling_it_nexus) @@ -2848,7 +2850,6 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, __core_scsi3_free_registration(dev, pr_reg, (preempt_type == PREEMPT_AND_ABORT) ? &preempt_and_abort_list : NULL, 0); - released_regs++; } if (!calling_it_nexus) core_scsi3_ua_allocate(pr_reg_nacl, @@ -2863,7 +2864,7 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, * registered reservation key, then the device server shall * complete the command with RESERVATION CONFLICT status. */ - if (!released_regs) { + if (sa_res_key_unmatched) { spin_unlock(&dev->dev_reservation_lock); core_scsi3_put_pr_reg(pr_reg_n); return TCM_RESERVATION_CONFLICT; From ce5686d4ed12158599d2042a6c8659254ed263ce Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Wed, 29 Oct 2014 11:17:04 +0100 Subject: [PATCH 015/208] perf/x86: Fix embarrasing typo Because we're all human and typing sucks.. Fixes: 7fb0f1de49fc ("perf/x86: Fix compile warnings for intel_uncore") Reported-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: x86@kernel.org Link: http://lkml.kernel.org/n/tip-be0bftjh8yfm4uvmvtf3yi87@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ded8a6774ac9..41a503c15862 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -144,7 +144,7 @@ config INSTRUCTION_DECODER config PERF_EVENTS_INTEL_UNCORE def_bool y - depends on PERF_EVENTS && SUP_SUP_INTEL && PCI + depends on PERF_EVENTS && CPU_SUP_INTEL && PCI config OUTPUT_FORMAT string From f7b8a47da17c9ee4998f2ca2018fcc424e953c0e Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 28 Oct 2014 08:24:34 +0300 Subject: [PATCH 016/208] sched: Remove lockdep check in sched_move_task() sched_move_task() is the only interface to change sched_task_group: cpu_cgrp_subsys methods and autogroup_move_group() use it. Everything is synchronized by task_rq_lock(), so cpu_cgroup_attach() is ordered with other users of sched_move_task(). This means we do no need RCU here: if we've dereferenced a tg here, the .attach method hasn't been called for it yet. Thus, we should pass "true" to task_css_check() to silence lockdep warnings. Fixes: eeb61e53ea19 ("sched: Fix race between task_group and sched_task_group") Reported-by: Oleg Nesterov Reported-by: Fengguang Wu Signed-off-by: Kirill Tkhai Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1414473874.8574.2.camel@tkhai Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 240157c13ddc..6841fb46eb07 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7444,8 +7444,12 @@ void sched_move_task(struct task_struct *tsk) if (unlikely(running)) put_prev_task(rq, tsk); - tg = container_of(task_css_check(tsk, cpu_cgrp_id, - lockdep_is_held(&tsk->sighand->siglock)), + /* + * All callers are synchronized by task_rq_lock(); we do not use RCU + * which is pointless here. Thus, we pass "true" to task_css_check() + * to prevent lockdep warnings. + */ + tg = container_of(task_css_check(tsk, cpu_cgrp_id, true), struct task_group, css); tg = autogroup_task_group(tsk, tg); tsk->sched_task_group = tg; From ea9d0d771fcd32cd56070819749477d511ec9117 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 4 Nov 2014 16:52:28 +0100 Subject: [PATCH 017/208] ASoC: dpcm: Fix race between FE/BE updates and trigger DPCM can update the FE/BE connection states totally asynchronously from the FE's PCM state. Most of FE/BE state changes are protected by mutex, so that they won't race, but there are still some actions that are uncovered. For example, suppose to switch a BE while a FE's stream is running. This would call soc_dpcm_runtime_update(), which sets FE's runtime_update flag, then sets up and starts BEs, and clears FE's runtime_update flag again. When a device emits XRUN during this operation, the PCM core triggers snd_pcm_stop(XRUN). Since the trigger action is an atomic ops, this isn't blocked by the mutex, thus it kicks off DPCM's trigger action. It eventually updates and clears FE's runtime_update flag while soc_dpcm_runtime_update() is running concurrently, and it results in confusion. Usually, for avoiding such a race, we take a lock. There is a PCM stream lock for that purpose. However, as already mentioned, the trigger action is atomic, and we can't take the lock for the whole soc_dpcm_runtime_update() or other operations that include the lengthy jobs like hw_params or prepare. This patch provides an alternative solution. This adds a way to defer the conflicting trigger callback to be executed at the end of FE/BE state changes. For doing it, two things are introduced: - Each runtime_update state change of FEs is protected via PCM stream lock. - The FE's trigger callback checks the runtime_update flag. If it's not set, the trigger action is executed there. If set, mark the pending trigger action and returns immediately. - At the exit of runtime_update state change, it checks whether the pending trigger is present. If yes, it executes the trigger action at this point. Reported-and-tested-by: Qiao Zhou Signed-off-by: Takashi Iwai Acked-by: Liam Girdwood Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- include/sound/soc-dpcm.h | 2 ++ sound/soc/soc-pcm.c | 72 +++++++++++++++++++++++++++++++--------- 2 files changed, 58 insertions(+), 16 deletions(-) diff --git a/include/sound/soc-dpcm.h b/include/sound/soc-dpcm.h index 2883a7a6f9f3..98f2ade0266e 100644 --- a/include/sound/soc-dpcm.h +++ b/include/sound/soc-dpcm.h @@ -102,6 +102,8 @@ struct snd_soc_dpcm_runtime { /* state and update */ enum snd_soc_dpcm_update runtime_update; enum snd_soc_dpcm_state state; + + int trigger_pending; /* trigger cmd + 1 if pending, 0 if not */ }; /* can this BE stop and free */ diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 002311afdeaa..57277dd79e11 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1522,13 +1522,36 @@ static void dpcm_set_fe_runtime(struct snd_pcm_substream *substream) dpcm_init_runtime_hw(runtime, &cpu_dai_drv->capture); } +static int dpcm_fe_dai_do_trigger(struct snd_pcm_substream *substream, int cmd); + +/* Set FE's runtime_update state; the state is protected via PCM stream lock + * for avoiding the race with trigger callback. + * If the state is unset and a trigger is pending while the previous operation, + * process the pending trigger action here. + */ +static void dpcm_set_fe_update_state(struct snd_soc_pcm_runtime *fe, + int stream, enum snd_soc_dpcm_update state) +{ + struct snd_pcm_substream *substream = + snd_soc_dpcm_get_substream(fe, stream); + + snd_pcm_stream_lock_irq(substream); + if (state == SND_SOC_DPCM_UPDATE_NO && fe->dpcm[stream].trigger_pending) { + dpcm_fe_dai_do_trigger(substream, + fe->dpcm[stream].trigger_pending - 1); + fe->dpcm[stream].trigger_pending = 0; + } + fe->dpcm[stream].runtime_update = state; + snd_pcm_stream_unlock_irq(substream); +} + static int dpcm_fe_dai_startup(struct snd_pcm_substream *fe_substream) { struct snd_soc_pcm_runtime *fe = fe_substream->private_data; struct snd_pcm_runtime *runtime = fe_substream->runtime; int stream = fe_substream->stream, ret = 0; - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_FE); ret = dpcm_be_dai_startup(fe, fe_substream->stream); if (ret < 0) { @@ -1550,13 +1573,13 @@ static int dpcm_fe_dai_startup(struct snd_pcm_substream *fe_substream) dpcm_set_fe_runtime(fe_substream); snd_pcm_limit_hw_rates(runtime); - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); return 0; unwind: dpcm_be_dai_startup_unwind(fe, fe_substream->stream); be_err: - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); return ret; } @@ -1603,7 +1626,7 @@ static int dpcm_fe_dai_shutdown(struct snd_pcm_substream *substream) struct snd_soc_pcm_runtime *fe = substream->private_data; int stream = substream->stream; - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_FE); /* shutdown the BEs */ dpcm_be_dai_shutdown(fe, substream->stream); @@ -1617,7 +1640,7 @@ static int dpcm_fe_dai_shutdown(struct snd_pcm_substream *substream) dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_STOP); fe->dpcm[stream].state = SND_SOC_DPCM_STATE_CLOSE; - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); return 0; } @@ -1665,7 +1688,7 @@ static int dpcm_fe_dai_hw_free(struct snd_pcm_substream *substream) int err, stream = substream->stream; mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME); - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_FE); dev_dbg(fe->dev, "ASoC: hw_free FE %s\n", fe->dai_link->name); @@ -1680,7 +1703,7 @@ static int dpcm_fe_dai_hw_free(struct snd_pcm_substream *substream) err = dpcm_be_dai_hw_free(fe, stream); fe->dpcm[stream].state = SND_SOC_DPCM_STATE_HW_FREE; - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); mutex_unlock(&fe->card->mutex); return 0; @@ -1773,7 +1796,7 @@ static int dpcm_fe_dai_hw_params(struct snd_pcm_substream *substream, int ret, stream = substream->stream; mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME); - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_FE); memcpy(&fe->dpcm[substream->stream].hw_params, params, sizeof(struct snd_pcm_hw_params)); @@ -1796,7 +1819,7 @@ static int dpcm_fe_dai_hw_params(struct snd_pcm_substream *substream, fe->dpcm[stream].state = SND_SOC_DPCM_STATE_HW_PARAMS; out: - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); mutex_unlock(&fe->card->mutex); return ret; } @@ -1910,7 +1933,7 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, } EXPORT_SYMBOL_GPL(dpcm_be_dai_trigger); -static int dpcm_fe_dai_trigger(struct snd_pcm_substream *substream, int cmd) +static int dpcm_fe_dai_do_trigger(struct snd_pcm_substream *substream, int cmd) { struct snd_soc_pcm_runtime *fe = substream->private_data; int stream = substream->stream, ret; @@ -1984,6 +2007,23 @@ out: return ret; } +static int dpcm_fe_dai_trigger(struct snd_pcm_substream *substream, int cmd) +{ + struct snd_soc_pcm_runtime *fe = substream->private_data; + int stream = substream->stream; + + /* if FE's runtime_update is already set, we're in race; + * process this trigger later at exit + */ + if (fe->dpcm[stream].runtime_update != SND_SOC_DPCM_UPDATE_NO) { + fe->dpcm[stream].trigger_pending = cmd + 1; + return 0; /* delayed, assuming it's successful */ + } + + /* we're alone, let's trigger */ + return dpcm_fe_dai_do_trigger(substream, cmd); +} + int dpcm_be_dai_prepare(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm; @@ -2027,7 +2067,7 @@ static int dpcm_fe_dai_prepare(struct snd_pcm_substream *substream) dev_dbg(fe->dev, "ASoC: prepare FE %s\n", fe->dai_link->name); - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_FE); /* there is no point preparing this FE if there are no BEs */ if (list_empty(&fe->dpcm[stream].be_clients)) { @@ -2054,7 +2094,7 @@ static int dpcm_fe_dai_prepare(struct snd_pcm_substream *substream) fe->dpcm[stream].state = SND_SOC_DPCM_STATE_PREPARE; out: - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); mutex_unlock(&fe->card->mutex); return ret; @@ -2201,11 +2241,11 @@ static int dpcm_run_new_update(struct snd_soc_pcm_runtime *fe, int stream) { int ret; - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_BE; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_BE); ret = dpcm_run_update_startup(fe, stream); if (ret < 0) dev_err(fe->dev, "ASoC: failed to startup some BEs\n"); - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); return ret; } @@ -2214,11 +2254,11 @@ static int dpcm_run_old_update(struct snd_soc_pcm_runtime *fe, int stream) { int ret; - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_BE; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_BE); ret = dpcm_run_update_shutdown(fe, stream); if (ret < 0) dev_err(fe->dev, "ASoC: failed to shutdown some BEs\n"); - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); return ret; } From ece509c10985ba93ccc8c68f808a9e767250041c Mon Sep 17 00:00:00 2001 From: Dylan Reid Date: Mon, 3 Nov 2014 10:28:56 -0800 Subject: [PATCH 018/208] ASoC: max98090: Correct pclk divisor settings The Baytrail-based chromebooks have a 20MHz mclk, the code was setting the divisor incorrectly in this case. According to the 98090 datasheet, the divisor should be set to DIV1 for 10 <= mclk <= 20. Correct this and the surrounding clock ranges as well to match the datasheet. Signed-off-by: Dylan Reid Signed-off-by: Mark Brown --- sound/soc/codecs/max98090.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c index d519294f57c7..1229554f1464 100644 --- a/sound/soc/codecs/max98090.c +++ b/sound/soc/codecs/max98090.c @@ -1941,13 +1941,13 @@ static int max98090_dai_set_sysclk(struct snd_soc_dai *dai, * 0x02 (when master clk is 20MHz to 40MHz).. * 0x03 (when master clk is 40MHz to 60MHz).. */ - if ((freq >= 10000000) && (freq < 20000000)) { + if ((freq >= 10000000) && (freq <= 20000000)) { snd_soc_write(codec, M98090_REG_SYSTEM_CLOCK, M98090_PSCLK_DIV1); - } else if ((freq >= 20000000) && (freq < 40000000)) { + } else if ((freq > 20000000) && (freq <= 40000000)) { snd_soc_write(codec, M98090_REG_SYSTEM_CLOCK, M98090_PSCLK_DIV2); - } else if ((freq >= 40000000) && (freq < 60000000)) { + } else if ((freq > 40000000) && (freq <= 60000000)) { snd_soc_write(codec, M98090_REG_SYSTEM_CLOCK, M98090_PSCLK_DIV4); } else { From 62e6a3b6f4e1f9b96fa66ab1cdf2ffd8594df9e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20F=C3=A4rber?= Date: Wed, 5 Nov 2014 17:44:52 +0100 Subject: [PATCH 019/208] ASoC: samsung: Add MODULE_DEVICE_TABLE for Snow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This enables the snd_soc_snow module to be auto-loaded. Signed-off-by: Andreas Färber Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/samsung/snow.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/samsung/snow.c b/sound/soc/samsung/snow.c index 0acf5d0eed53..72118a77dd5b 100644 --- a/sound/soc/samsung/snow.c +++ b/sound/soc/samsung/snow.c @@ -110,6 +110,7 @@ static const struct of_device_id snow_of_match[] = { { .compatible = "google,snow-audio-max98095", }, {}, }; +MODULE_DEVICE_TABLE(of, snow_of_match); static struct platform_driver snow_driver = { .driver = { From ac87f22147098da9cf83a0b413ef7dda42277d85 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Thu, 6 Nov 2014 12:23:52 +0800 Subject: [PATCH 020/208] ASoC: rt5670: correct the incorrect default values The patch corrects the incorrect default register values. Signed-off-by: Bard Liao Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/rt5670.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/sound/soc/codecs/rt5670.c b/sound/soc/codecs/rt5670.c index ba9d9b4d4857..57ab19dbaaf3 100644 --- a/sound/soc/codecs/rt5670.c +++ b/sound/soc/codecs/rt5670.c @@ -100,18 +100,18 @@ static const struct reg_default rt5670_reg[] = { { 0x4c, 0x5380 }, { 0x4f, 0x0073 }, { 0x52, 0x00d3 }, - { 0x53, 0xf0f0 }, + { 0x53, 0xf000 }, { 0x61, 0x0000 }, { 0x62, 0x0001 }, { 0x63, 0x00c3 }, { 0x64, 0x0000 }, - { 0x65, 0x0000 }, + { 0x65, 0x0001 }, { 0x66, 0x0000 }, { 0x6f, 0x8000 }, { 0x70, 0x8000 }, { 0x71, 0x8000 }, { 0x72, 0x8000 }, - { 0x73, 0x1110 }, + { 0x73, 0x7770 }, { 0x74, 0x0e00 }, { 0x75, 0x1505 }, { 0x76, 0x0015 }, @@ -125,21 +125,21 @@ static const struct reg_default rt5670_reg[] = { { 0x83, 0x0000 }, { 0x84, 0x0000 }, { 0x85, 0x0000 }, - { 0x86, 0x0008 }, + { 0x86, 0x0004 }, { 0x87, 0x0000 }, { 0x88, 0x0000 }, { 0x89, 0x0000 }, { 0x8a, 0x0000 }, { 0x8b, 0x0000 }, - { 0x8c, 0x0007 }, + { 0x8c, 0x0003 }, { 0x8d, 0x0000 }, { 0x8e, 0x0004 }, { 0x8f, 0x1100 }, { 0x90, 0x0646 }, { 0x91, 0x0c06 }, { 0x93, 0x0000 }, - { 0x94, 0x0000 }, - { 0x95, 0x0000 }, + { 0x94, 0x1270 }, + { 0x95, 0x1000 }, { 0x97, 0x0000 }, { 0x98, 0x0000 }, { 0x99, 0x0000 }, @@ -150,11 +150,11 @@ static const struct reg_default rt5670_reg[] = { { 0x9e, 0x0400 }, { 0xae, 0x7000 }, { 0xaf, 0x0000 }, - { 0xb0, 0x6000 }, + { 0xb0, 0x7000 }, { 0xb1, 0x0000 }, { 0xb2, 0x0000 }, { 0xb3, 0x001f }, - { 0xb4, 0x2206 }, + { 0xb4, 0x220c }, { 0xb5, 0x1f00 }, { 0xb6, 0x0000 }, { 0xb7, 0x0000 }, @@ -171,25 +171,25 @@ static const struct reg_default rt5670_reg[] = { { 0xcf, 0x1813 }, { 0xd0, 0x0690 }, { 0xd1, 0x1c17 }, - { 0xd3, 0xb320 }, + { 0xd3, 0xa220 }, { 0xd4, 0x0000 }, { 0xd6, 0x0400 }, { 0xd9, 0x0809 }, { 0xda, 0x0000 }, { 0xdb, 0x0001 }, { 0xdc, 0x0049 }, - { 0xdd, 0x0009 }, + { 0xdd, 0x0024 }, { 0xe6, 0x8000 }, { 0xe7, 0x0000 }, - { 0xec, 0xb300 }, + { 0xec, 0xa200 }, { 0xed, 0x0000 }, - { 0xee, 0xb300 }, + { 0xee, 0xa200 }, { 0xef, 0x0000 }, { 0xf8, 0x0000 }, { 0xf9, 0x0000 }, { 0xfa, 0x8010 }, { 0xfb, 0x0033 }, - { 0xfc, 0x0080 }, + { 0xfc, 0x0100 }, }; static bool rt5670_volatile_register(struct device *dev, unsigned int reg) From 96927ac96d7658b35a4f0f3dcdb8c6b74472a3ea Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Thu, 6 Nov 2014 12:23:54 +0800 Subject: [PATCH 021/208] ASoC: rt5670: change dapm routes of PLL connection PLL should be powered up once filter power is on. So, "PLL1" should be connected to filters instead of DACs. Signed-off-by: Bard Liao Signed-off-by: Mark Brown --- sound/soc/codecs/rt5670.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/rt5670.c b/sound/soc/codecs/rt5670.c index 57ab19dbaaf3..9bd8b4f63303 100644 --- a/sound/soc/codecs/rt5670.c +++ b/sound/soc/codecs/rt5670.c @@ -1877,6 +1877,10 @@ static const struct snd_soc_dapm_route rt5670_dapm_routes[] = { { "DAC1 MIXR", "DAC1 Switch", "DAC1 R Mux" }, { "DAC1 MIXR", NULL, "DAC Stereo1 Filter" }, + { "DAC Stereo1 Filter", NULL, "PLL1", is_sys_clk_from_pll }, + { "DAC Mono Left Filter", NULL, "PLL1", is_sys_clk_from_pll }, + { "DAC Mono Right Filter", NULL, "PLL1", is_sys_clk_from_pll }, + { "DAC MIX", NULL, "DAC1 MIXL" }, { "DAC MIX", NULL, "DAC1 MIXR" }, @@ -1926,14 +1930,10 @@ static const struct snd_soc_dapm_route rt5670_dapm_routes[] = { { "DAC L1", NULL, "DAC L1 Power" }, { "DAC L1", NULL, "Stereo DAC MIXL" }, - { "DAC L1", NULL, "PLL1", is_sys_clk_from_pll }, { "DAC R1", NULL, "DAC R1 Power" }, { "DAC R1", NULL, "Stereo DAC MIXR" }, - { "DAC R1", NULL, "PLL1", is_sys_clk_from_pll }, { "DAC L2", NULL, "Mono DAC MIXL" }, - { "DAC L2", NULL, "PLL1", is_sys_clk_from_pll }, { "DAC R2", NULL, "Mono DAC MIXR" }, - { "DAC R2", NULL, "PLL1", is_sys_clk_from_pll }, { "OUT MIXL", "BST1 Switch", "BST1" }, { "OUT MIXL", "INL Switch", "INL VOL" }, From 4f031fa9f188b2b0641ac20087d9e16bcfb4e49d Mon Sep 17 00:00:00 2001 From: Ronald Wahl Date: Thu, 6 Nov 2014 11:52:13 +0100 Subject: [PATCH 022/208] mac80211: Fix regression that triggers a kernel BUG with CCMP Commit 7ec7c4a9a686c608315739ab6a2b0527a240883c (mac80211: port CCMP to cryptoapi's CCM driver) introduced a regression when decrypting empty packets (data_len == 0). This will lead to backtraces like: (scatterwalk_start) from [] (scatterwalk_map_and_copy+0x2c/0xa8) (scatterwalk_map_and_copy) from [] (crypto_ccm_decrypt+0x7c/0x25c) (crypto_ccm_decrypt) from [] (ieee80211_aes_ccm_decrypt+0x160/0x170) (ieee80211_aes_ccm_decrypt) from [] (ieee80211_crypto_ccmp_decrypt+0x1ac/0x238) (ieee80211_crypto_ccmp_decrypt) from [] (ieee80211_rx_handlers+0x870/0x1d24) (ieee80211_rx_handlers) from [] (ieee80211_prepare_and_rx_handle+0x8a0/0x91c) (ieee80211_prepare_and_rx_handle) from [] (ieee80211_rx+0x568/0x730) (ieee80211_rx) from [] (__carl9170_rx+0x94c/0xa20) (__carl9170_rx) from [] (carl9170_rx_stream+0x1fc/0x320) (carl9170_rx_stream) from [] (carl9170_usb_tasklet+0x80/0xc8) (carl9170_usb_tasklet) from [] (tasklet_hi_action+0x88/0xcc) (tasklet_hi_action) from [] (__do_softirq+0xcc/0x200) (__do_softirq) from [] (irq_exit+0x80/0xe0) (irq_exit) from [] (handle_IRQ+0x64/0x80) (handle_IRQ) from [] (__irq_svc+0x40/0x4c) (__irq_svc) from [] (arch_cpu_idle+0x2c/0x34) Such packets can appear for example when using the carl9170 wireless driver because hardware sometimes generates garbage when the internal FIFO overruns. This patch adds an additional length check. Cc: stable@vger.kernel.org Fixes: 7ec7c4a9a686 ("mac80211: port CCMP to cryptoapi's CCM driver") Acked-by: Ard Biesheuvel Signed-off-by: Ronald Wahl Signed-off-by: Johannes Berg --- net/mac80211/aes_ccm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c index ec24378caaaf..09d9caaec591 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aes_ccm.c @@ -53,6 +53,9 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, __aligned(__alignof__(struct aead_request)); struct aead_request *aead_req = (void *) aead_req_data; + if (data_len == 0) + return -EINVAL; + memset(aead_req, 0, sizeof(aead_req_data)); sg_init_one(&pt, data, data_len); From 532425a7a7738b479406496ca4ad6b3247688e44 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Thu, 6 Nov 2014 19:56:49 +0800 Subject: [PATCH 023/208] ARM: dts: sun6i: Re-parent ahb1_mux to pll6 as required by dma controller The dma controller requires that the ahb1 bus clock be driven by pll6 for peripheral access to work. Previously this was done in the dma controller driver, but was since removed as part of a series to unify the ahb1_mux and ahb1 clock drivers, in 14e0e28 dmaengine: sun6i: Remove obsolete clk muxing code Unfortunately the rest of that series did not make it, leaving us with broken dma on sun6i. This patch reparents ahb1_mux to pll6 using the DT assigned-clocks properties in the dma controller node. Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun6i-a31.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi index 543f895d18d3..2e652e2339e9 100644 --- a/arch/arm/boot/dts/sun6i-a31.dtsi +++ b/arch/arm/boot/dts/sun6i-a31.dtsi @@ -361,6 +361,10 @@ clocks = <&ahb1_gates 6>; resets = <&ahb1_rst 6>; #dma-cells = <1>; + + /* DMA controller requires AHB1 clocked from PLL6 */ + assigned-clocks = <&ahb1_mux>; + assigned-clock-parents = <&pll6>; }; mmc0: mmc@01c0f000 { From a926a12b5f11007d0ba9eb2e083d86054fb29a06 Mon Sep 17 00:00:00 2001 From: Shinobu Uehara Date: Thu, 30 Oct 2014 14:57:56 +0900 Subject: [PATCH 024/208] ARM: shmobile: r8a7790: Fix SD3CKCR address Signed-off-by: Shinobu Uehara Acked-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/clock-r8a7790.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-shmobile/clock-r8a7790.c b/arch/arm/mach-shmobile/clock-r8a7790.c index 126ddafad526..f62265200592 100644 --- a/arch/arm/mach-shmobile/clock-r8a7790.c +++ b/arch/arm/mach-shmobile/clock-r8a7790.c @@ -68,7 +68,7 @@ #define SDCKCR 0xE6150074 #define SD2CKCR 0xE6150078 -#define SD3CKCR 0xE615007C +#define SD3CKCR 0xE615026C #define MMC0CKCR 0xE6150240 #define MMC1CKCR 0xE6150244 #define SSPCKCR 0xE6150248 From 85eb968e84686faa507daa3c5cfdfbfefc7eda95 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Nov 2014 21:59:34 +0100 Subject: [PATCH 025/208] ARM: shmobile: r8a7740 legacy: Add missing INTCA clock for irqpin module This clock drives the irqpin controller modules. Before, it was assumed enabled by the bootloader or reset state. By making it available to the driver, we make sure it gets enabled when needed, and allow it to be managed by system or runtime PM. Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/clock-r8a7740.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-shmobile/clock-r8a7740.c b/arch/arm/mach-shmobile/clock-r8a7740.c index 0794f0426e70..16a6b7ccbc8b 100644 --- a/arch/arm/mach-shmobile/clock-r8a7740.c +++ b/arch/arm/mach-shmobile/clock-r8a7740.c @@ -455,7 +455,7 @@ enum { MSTP128, MSTP127, MSTP125, MSTP116, MSTP111, MSTP100, MSTP117, - MSTP230, + MSTP230, MSTP229, MSTP222, MSTP218, MSTP217, MSTP216, MSTP214, MSTP207, MSTP206, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200, @@ -479,6 +479,7 @@ static struct clk mstp_clks[MSTP_NR] = { [MSTP100] = SH_CLK_MSTP32(&div4_clks[DIV4_B], SMSTPCR1, 0, 0), /* LCDC0 */ [MSTP230] = SH_CLK_MSTP32(&div6_clks[DIV6_SUB], SMSTPCR2, 30, 0), /* SCIFA6 */ + [MSTP229] = SH_CLK_MSTP32(&div4_clks[DIV4_HP], SMSTPCR2, 29, 0), /* INTCA */ [MSTP222] = SH_CLK_MSTP32(&div6_clks[DIV6_SUB], SMSTPCR2, 22, 0), /* SCIFA7 */ [MSTP218] = SH_CLK_MSTP32(&div4_clks[DIV4_HP], SMSTPCR2, 18, 0), /* DMAC1 */ [MSTP217] = SH_CLK_MSTP32(&div4_clks[DIV4_HP], SMSTPCR2, 17, 0), /* DMAC2 */ @@ -575,6 +576,10 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("sh-dma-engine.0", &mstp_clks[MSTP218]), CLKDEV_DEV_ID("sh-sci.7", &mstp_clks[MSTP222]), CLKDEV_DEV_ID("e6cd0000.serial", &mstp_clks[MSTP222]), + CLKDEV_DEV_ID("renesas_intc_irqpin.0", &mstp_clks[MSTP229]), + CLKDEV_DEV_ID("renesas_intc_irqpin.1", &mstp_clks[MSTP229]), + CLKDEV_DEV_ID("renesas_intc_irqpin.2", &mstp_clks[MSTP229]), + CLKDEV_DEV_ID("renesas_intc_irqpin.3", &mstp_clks[MSTP229]), CLKDEV_DEV_ID("sh-sci.6", &mstp_clks[MSTP230]), CLKDEV_DEV_ID("e6cc0000.serial", &mstp_clks[MSTP230]), From 4f37828d4d69a46830e0525a065da9847fc7a819 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Nov 2014 11:04:33 +0100 Subject: [PATCH 026/208] ARM: shmobile: r8a7740 legacy: Correct IIC0 parent clock According to the datasheet, the operating clock for IIC0 is the HPP (RT Peri) clock, not the SUB (Peri) clock. Both clocks run at the same speed (50 Mhz). This is consistent with IIC0 being located in the A4R PM domain, and IIC1 in the A3SP PM domain. Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/clock-r8a7740.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-shmobile/clock-r8a7740.c b/arch/arm/mach-shmobile/clock-r8a7740.c index 16a6b7ccbc8b..19df9cb30495 100644 --- a/arch/arm/mach-shmobile/clock-r8a7740.c +++ b/arch/arm/mach-shmobile/clock-r8a7740.c @@ -474,7 +474,7 @@ static struct clk mstp_clks[MSTP_NR] = { [MSTP127] = SH_CLK_MSTP32(&div4_clks[DIV4_S], SMSTPCR1, 27, 0), /* CEU20 */ [MSTP125] = SH_CLK_MSTP32(&div6_clks[DIV6_SUB], SMSTPCR1, 25, 0), /* TMU0 */ [MSTP117] = SH_CLK_MSTP32(&div4_clks[DIV4_B], SMSTPCR1, 17, 0), /* LCDC1 */ - [MSTP116] = SH_CLK_MSTP32(&div6_clks[DIV6_SUB], SMSTPCR1, 16, 0), /* IIC0 */ + [MSTP116] = SH_CLK_MSTP32(&div4_clks[DIV4_HPP], SMSTPCR1, 16, 0), /* IIC0 */ [MSTP111] = SH_CLK_MSTP32(&div6_clks[DIV6_SUB], SMSTPCR1, 11, 0), /* TMU1 */ [MSTP100] = SH_CLK_MSTP32(&div4_clks[DIV4_B], SMSTPCR1, 0, 0), /* LCDC0 */ From edd7b938637701567a54306adb27cfb4345fedc5 Mon Sep 17 00:00:00 2001 From: Shinobu Uehara Date: Thu, 30 Oct 2014 14:57:57 +0900 Subject: [PATCH 027/208] ARM: shmobile: r8a7790: Fix SD3CKCR address to device tree Signed-off-by: Shinobu Uehara Acked-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- arch/arm/boot/dts/r8a7790.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi index d0e17733dc1a..e20affe156c1 100644 --- a/arch/arm/boot/dts/r8a7790.dtsi +++ b/arch/arm/boot/dts/r8a7790.dtsi @@ -666,9 +666,9 @@ #clock-cells = <0>; clock-output-names = "sd2"; }; - sd3_clk: sd3_clk@e615007c { + sd3_clk: sd3_clk@e615026c { compatible = "renesas,r8a7790-div6-clock", "renesas,cpg-div6-clock"; - reg = <0 0xe615007c 0 4>; + reg = <0 0xe615026c 0 4>; clocks = <&pll1_div2_clk>; #clock-cells = <0>; clock-output-names = "sd3"; From b89ff7c3c2dee189489a5f45eb8d72e106179299 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Nov 2014 11:04:34 +0100 Subject: [PATCH 028/208] ARM: shmobile: r8a7740 dtsi: Correct IIC0 parent clock According to the datasheet, the operating clock for IIC0 is the HPP (RT Peri) clock, not the SUB (Peri) clock. Both clocks run at the same speed (50 Mhz). This is consistent with IIC0 being located in the A4R PM domain, and IIC1 in the A3SP PM domain. Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- arch/arm/boot/dts/r8a7740.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/r8a7740.dtsi b/arch/arm/boot/dts/r8a7740.dtsi index d46c213a17ad..eed697a6bd6b 100644 --- a/arch/arm/boot/dts/r8a7740.dtsi +++ b/arch/arm/boot/dts/r8a7740.dtsi @@ -433,7 +433,7 @@ clocks = <&cpg_clocks R8A7740_CLK_S>, <&cpg_clocks R8A7740_CLK_S>, <&sub_clk>, <&cpg_clocks R8A7740_CLK_B>, - <&sub_clk>, <&sub_clk>, + <&cpg_clocks R8A7740_CLK_HPP>, <&sub_clk>, <&cpg_clocks R8A7740_CLK_B>; #clock-cells = <1>; renesas,clock-indices = < From c123588b3b193d06588dfb51f475407f835ebfb2 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 7 Nov 2014 17:53:40 +0300 Subject: [PATCH 029/208] sched/numa: Fix out of bounds read in sched_init_numa() On latest mm + KASan patchset I've got this: ================================================================== BUG: AddressSanitizer: out of bounds access in sched_init_smp+0x3ba/0x62c at addr ffff88006d4bee6c ============================================================================= BUG kmalloc-8 (Not tainted): kasan error ----------------------------------------------------------------------------- Disabling lock debugging due to kernel taint INFO: Allocated in alloc_vfsmnt+0xb0/0x2c0 age=75 cpu=0 pid=0 __slab_alloc+0x4b4/0x4f0 __kmalloc_track_caller+0x15f/0x1e0 kstrdup+0x44/0x90 alloc_vfsmnt+0xb0/0x2c0 vfs_kern_mount+0x35/0x190 kern_mount_data+0x25/0x50 pid_ns_prepare_proc+0x19/0x50 alloc_pid+0x5e2/0x630 copy_process.part.41+0xdf5/0x2aa0 do_fork+0xf5/0x460 kernel_thread+0x21/0x30 rest_init+0x1e/0x90 start_kernel+0x522/0x531 x86_64_start_reservations+0x2a/0x2c x86_64_start_kernel+0x15b/0x16a INFO: Slab 0xffffea0001b52f80 objects=24 used=22 fp=0xffff88006d4befc0 flags=0x100000000004080 INFO: Object 0xffff88006d4bed20 @offset=3360 fp=0xffff88006d4bee70 Bytes b4 ffff88006d4bed10: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ Object ffff88006d4bed20: 70 72 6f 63 00 6b 6b a5 proc.kk. Redzone ffff88006d4bed28: cc cc cc cc cc cc cc cc ........ Padding ffff88006d4bee68: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ CPU: 0 PID: 1 Comm: swapper/0 Tainted: G B 3.18.0-rc3-mm1+ #108 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 ffff88006d4be000 0000000000000000 ffff88006d4bed20 ffff88006c86fd18 ffffffff81cd0a59 0000000000000058 ffff88006d404240 ffff88006c86fd48 ffffffff811fa3a8 ffff88006d404240 ffffea0001b52f80 ffff88006d4bed20 Call Trace: dump_stack (lib/dump_stack.c:52) print_trailer (mm/slub.c:645) object_err (mm/slub.c:652) ? sched_init_smp (kernel/sched/core.c:6552 kernel/sched/core.c:7063) kasan_report_error (mm/kasan/report.c:102 mm/kasan/report.c:178) ? kasan_poison_shadow (mm/kasan/kasan.c:48) ? kasan_unpoison_shadow (mm/kasan/kasan.c:54) ? kasan_poison_shadow (mm/kasan/kasan.c:48) ? kasan_kmalloc (mm/kasan/kasan.c:311) __asan_load4 (mm/kasan/kasan.c:371) ? sched_init_smp (kernel/sched/core.c:6552 kernel/sched/core.c:7063) sched_init_smp (kernel/sched/core.c:6552 kernel/sched/core.c:7063) kernel_init_freeable (init/main.c:869 init/main.c:997) ? finish_task_switch (kernel/sched/sched.h:1036 kernel/sched/core.c:2248) ? rest_init (init/main.c:924) kernel_init (init/main.c:929) ? rest_init (init/main.c:924) ret_from_fork (arch/x86/kernel/entry_64.S:348) ? rest_init (init/main.c:924) Read of size 4 by task swapper/0: Memory state around the buggy address: ffff88006d4beb80: fc fc fc fc fc fc fc fc fc fc 00 fc fc fc fc fc ffff88006d4bec00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88006d4bec80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88006d4bed00: fc fc fc fc 00 fc fc fc fc fc fc fc fc fc fc fc ffff88006d4bed80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88006d4bee00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc 04 fc ^ ffff88006d4bee80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88006d4bef00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88006d4bef80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ffff88006d4bf000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88006d4bf080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Zero 'level' (e.g. on non-NUMA system) causing out of bounds access in this line: sched_max_numa_distance = sched_domains_numa_distance[level - 1]; Fix this by exiting from sched_init_numa() earlier. Signed-off-by: Andrey Ryabinin Reviewed-by: Rik van Riel Fixes: 9942f79ba ("sched/numa: Export info needed for NUMA balancing on complex topologies") Cc: peterz@infradead.org Link: http://lkml.kernel.org/r/1415372020-1871-1-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 6841fb46eb07..5f12ca65c9a7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6368,6 +6368,10 @@ static void sched_init_numa(void) if (!sched_debug()) break; } + + if (!level) + return; + /* * 'level' contains the number of unique distances, excluding the * identity distance node_distance(i,i). From 6b96686ecffcbea85dcb502e4584e4a20a2bfb29 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 7 Nov 2014 15:34:54 +0100 Subject: [PATCH 030/208] netfilter: nft_masq: fix uninitialized range in nft_masq_{ipv4, ipv6}_eval When transferring from the original range in nf_nat_masquerade_{ipv4,ipv6}() we copy over values from stack in from min_proto/max_proto due to uninitialized range variable in both, nft_masq_{ipv4,ipv6}_eval. As we only initialize flags at this time from nft_masq struct, just zero out the rest. Fixes: 9ba1f726bec09 ("netfilter: nf_tables: add new nft_masq expression") Signed-off-by: Daniel Borkmann Acked-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nft_masq_ipv4.c | 1 + net/ipv6/netfilter/nft_masq_ipv6.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c index c1023c445920..665de06561cd 100644 --- a/net/ipv4/netfilter/nft_masq_ipv4.c +++ b/net/ipv4/netfilter/nft_masq_ipv4.c @@ -24,6 +24,7 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr, struct nf_nat_range range; unsigned int verdict; + memset(&range, 0, sizeof(range)); range.flags = priv->flags; verdict = nf_nat_masquerade_ipv4(pkt->skb, pkt->ops->hooknum, diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c index 8a7ac685076d..529c119cbb14 100644 --- a/net/ipv6/netfilter/nft_masq_ipv6.c +++ b/net/ipv6/netfilter/nft_masq_ipv6.c @@ -25,6 +25,7 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr, struct nf_nat_range range; unsigned int verdict; + memset(&range, 0, sizeof(range)); range.flags = priv->flags; verdict = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out); From fb6eaf2ccca7d3580931bcb4b735101b461f38cf Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 5 Nov 2014 19:10:52 -0600 Subject: [PATCH 031/208] rtlwifi: Fix setting of tx descriptor for new trx flow Device RTL8192EE uses a new form of trx flow. This fix sets up the descriptors correctly. Signed-off-by: Larry Finger Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/pci.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c index 25daa8715219..116f746e7c73 100644 --- a/drivers/net/wireless/rtlwifi/pci.c +++ b/drivers/net/wireless/rtlwifi/pci.c @@ -1127,9 +1127,14 @@ static void _rtl_pci_prepare_bcn_tasklet(struct ieee80211_hw *hw) __skb_queue_tail(&ring->queue, pskb); - rtlpriv->cfg->ops->set_desc(hw, (u8 *)pdesc, true, HW_DESC_OWN, - &temp_one); - + if (rtlpriv->use_new_trx_flow) { + temp_one = 4; + rtlpriv->cfg->ops->set_desc(hw, (u8 *)pbuffer_desc, true, + HW_DESC_OWN, (u8 *)&temp_one); + } else { + rtlpriv->cfg->ops->set_desc(hw, (u8 *)pdesc, true, HW_DESC_OWN, + &temp_one); + } return; } From caea2172c23465a77556b2e1d06412b532b90235 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 5 Nov 2014 19:10:53 -0600 Subject: [PATCH 032/208] rtlwifi: Fix errors in descriptor manipulation There are typos in the handling of the descriptor pointers where the wrong descriptor is referenced. There is also an error in which the pointer is incremented twice. Signed-off-by: Larry Finger Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/pci.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c index 116f746e7c73..6d2b6281e22b 100644 --- a/drivers/net/wireless/rtlwifi/pci.c +++ b/drivers/net/wireless/rtlwifi/pci.c @@ -1375,9 +1375,9 @@ static void _rtl_pci_free_tx_ring(struct ieee80211_hw *hw, ring->desc = NULL; if (rtlpriv->use_new_trx_flow) { pci_free_consistent(rtlpci->pdev, - sizeof(*ring->desc) * ring->entries, + sizeof(*ring->buffer_desc) * ring->entries, ring->buffer_desc, ring->buffer_desc_dma); - ring->desc = NULL; + ring->buffer_desc = NULL; } } @@ -1548,7 +1548,6 @@ int rtl_pci_reset_trx_ring(struct ieee80211_hw *hw) true, HW_DESC_TXBUFF_ADDR), skb->len, PCI_DMA_TODEVICE); - ring->idx = (ring->idx + 1) % ring->entries; kfree_skb(skb); ring->idx = (ring->idx + 1) % ring->entries; } From d1cd5ba4ca8b41793f4e581dd1dbf46b7f2cf691 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Fri, 7 Nov 2014 10:05:12 -0600 Subject: [PATCH 033/208] rtlwifi: rtl8192se: Fix connection problems Changes in the vendor driver were added to rtlwifi, but some updates to rtl8192se were missed, and the driver could neither scan nor connect. There are other changes that will enhance performance, but this minimal set fix the basic functionality. Signed-off-by: Larry Finger Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/pci.c | 3 ++- drivers/net/wireless/rtlwifi/rtl8192se/hw.c | 7 +++++-- drivers/net/wireless/rtlwifi/rtl8192se/phy.c | 2 ++ drivers/net/wireless/rtlwifi/rtl8192se/sw.c | 16 ++++++++++++++++ 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c index 6d2b6281e22b..61f5d36eca6a 100644 --- a/drivers/net/wireless/rtlwifi/pci.c +++ b/drivers/net/wireless/rtlwifi/pci.c @@ -842,7 +842,8 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) break; } /* handle command packet here */ - if (rtlpriv->cfg->ops->rx_command_packet(hw, stats, skb)) { + if (rtlpriv->cfg->ops->rx_command_packet && + rtlpriv->cfg->ops->rx_command_packet(hw, stats, skb)) { dev_kfree_skb_any(skb); goto end; } diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/hw.c b/drivers/net/wireless/rtlwifi/rtl8192se/hw.c index 00e067044c08..5761d5b49e39 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192se/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192se/hw.c @@ -1201,6 +1201,9 @@ static int _rtl92se_set_media_status(struct ieee80211_hw *hw, } + if (type != NL80211_IFTYPE_AP && + rtlpriv->mac80211.link_state < MAC80211_LINKED) + bt_msr = rtl_read_byte(rtlpriv, MSR) & ~MSR_LINK_MASK; rtl_write_byte(rtlpriv, (MSR), bt_msr); temp = rtl_read_dword(rtlpriv, TCR); @@ -1262,6 +1265,7 @@ void rtl92se_enable_interrupt(struct ieee80211_hw *hw) rtl_write_dword(rtlpriv, INTA_MASK, rtlpci->irq_mask[0]); /* Support Bit 32-37(Assign as Bit 0-5) interrupt setting now */ rtl_write_dword(rtlpriv, INTA_MASK + 4, rtlpci->irq_mask[1] & 0x3F); + rtlpci->irq_enabled = true; } void rtl92se_disable_interrupt(struct ieee80211_hw *hw) @@ -1276,8 +1280,7 @@ void rtl92se_disable_interrupt(struct ieee80211_hw *hw) rtlpci = rtl_pcidev(rtl_pcipriv(hw)); rtl_write_dword(rtlpriv, INTA_MASK, 0); rtl_write_dword(rtlpriv, INTA_MASK + 4, 0); - - synchronize_irq(rtlpci->pdev->irq); + rtlpci->irq_enabled = false; } static u8 _rtl92s_set_sysclk(struct ieee80211_hw *hw, u8 data) diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/phy.c b/drivers/net/wireless/rtlwifi/rtl8192se/phy.c index 77c5b5f35244..4b4612fe2fdb 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192se/phy.c +++ b/drivers/net/wireless/rtlwifi/rtl8192se/phy.c @@ -399,6 +399,8 @@ static bool _rtl92s_phy_sw_chnl_step_by_step(struct ieee80211_hw *hw, case 2: currentcmd = &postcommoncmd[*step]; break; + default: + return true; } if (currentcmd->cmdid == CMDID_END) { diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/sw.c b/drivers/net/wireless/rtlwifi/rtl8192se/sw.c index aadba29c167a..fb003868bdef 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192se/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192se/sw.c @@ -236,6 +236,19 @@ static void rtl92s_deinit_sw_vars(struct ieee80211_hw *hw) } } +static bool rtl92se_is_tx_desc_closed(struct ieee80211_hw *hw, u8 hw_queue, + u16 index) +{ + struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); + struct rtl8192_tx_ring *ring = &rtlpci->tx_ring[hw_queue]; + u8 *entry = (u8 *)(&ring->desc[ring->idx]); + u8 own = (u8)rtl92se_get_desc(entry, true, HW_DESC_OWN); + + if (own) + return false; + return true; +} + static struct rtl_hal_ops rtl8192se_hal_ops = { .init_sw_vars = rtl92s_init_sw_vars, .deinit_sw_vars = rtl92s_deinit_sw_vars, @@ -269,6 +282,7 @@ static struct rtl_hal_ops rtl8192se_hal_ops = { .led_control = rtl92se_led_control, .set_desc = rtl92se_set_desc, .get_desc = rtl92se_get_desc, + .is_tx_desc_closed = rtl92se_is_tx_desc_closed, .tx_polling = rtl92se_tx_polling, .enable_hw_sec = rtl92se_enable_hw_security_config, .set_key = rtl92se_set_key, @@ -306,6 +320,8 @@ static struct rtl_hal_cfg rtl92se_hal_cfg = { .maps[MAC_RCR_ACRC32] = RCR_ACRC32, .maps[MAC_RCR_ACF] = RCR_ACF, .maps[MAC_RCR_AAP] = RCR_AAP, + .maps[MAC_HIMR] = INTA_MASK, + .maps[MAC_HIMRE] = INTA_MASK + 4, .maps[EFUSE_TEST] = REG_EFUSE_TEST, .maps[EFUSE_CTRL] = REG_EFUSE_CTRL, From 9c3a6670863033df421c8e46f79f383cf3a922ee Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sat, 8 Nov 2014 13:59:42 +0100 Subject: [PATCH 034/208] b43: fix NULL pointer dereference in b43_phy_copy() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit phy_read and phy_write are not set for every phy any more sine this: commit d342b95dd735014a590f9051b1ba227eb54ca8f6 Author: Rafał Miłecki Date: Thu Jul 31 21:59:43 2014 +0200 b43: don't duplicate common PHY read/write ops b43_phy_copy() accesses phy_read and phy_write directly and will fail with some phys. This patch fixes the regression by using the b43_phy_read() and b43_phy_write() functions which should be used for read and write access. This should fix this bug report: https://bugzilla.kernel.org/show_bug.cgi?id=87731 Reported-by: Volker Kempter Tested-by: Volker Kempter Signed-off-by: Hauke Mehrtens Signed-off-by: John W. Linville --- drivers/net/wireless/b43/phy_common.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wireless/b43/phy_common.c b/drivers/net/wireless/b43/phy_common.c index 1dfc682a8055..ee27b06074e1 100644 --- a/drivers/net/wireless/b43/phy_common.c +++ b/drivers/net/wireless/b43/phy_common.c @@ -300,9 +300,7 @@ void b43_phy_write(struct b43_wldev *dev, u16 reg, u16 value) void b43_phy_copy(struct b43_wldev *dev, u16 destreg, u16 srcreg) { - assert_mac_suspended(dev); - dev->phy.ops->phy_write(dev, destreg, - dev->phy.ops->phy_read(dev, srcreg)); + b43_phy_write(dev, destreg, b43_phy_read(dev, srcreg)); } void b43_phy_mask(struct b43_wldev *dev, u16 offset, u16 mask) From 9b520d84957d63348e87c0f2cbd21d86e1e8f2f2 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 4 Nov 2014 15:54:11 +0200 Subject: [PATCH 035/208] iwlwifi: mvm: abort scan upon RFKILL This code existed but not for all the different FW APIs we support. Fix this. Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/scan.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/scan.c b/drivers/net/wireless/iwlwifi/mvm/scan.c index b280d5d87127..7554f7053830 100644 --- a/drivers/net/wireless/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/iwlwifi/mvm/scan.c @@ -602,16 +602,6 @@ static int iwl_mvm_cancel_regular_scan(struct iwl_mvm *mvm) SCAN_COMPLETE_NOTIFICATION }; int ret; - if (mvm->scan_status == IWL_MVM_SCAN_NONE) - return 0; - - if (iwl_mvm_is_radio_killed(mvm)) { - ieee80211_scan_completed(mvm->hw, true); - iwl_mvm_unref(mvm, IWL_MVM_REF_SCAN); - mvm->scan_status = IWL_MVM_SCAN_NONE; - return 0; - } - iwl_init_notification_wait(&mvm->notif_wait, &wait_scan_abort, scan_abort_notif, ARRAY_SIZE(scan_abort_notif), @@ -1400,6 +1390,16 @@ int iwl_mvm_unified_sched_scan_lmac(struct iwl_mvm *mvm, int iwl_mvm_cancel_scan(struct iwl_mvm *mvm) { + if (mvm->scan_status == IWL_MVM_SCAN_NONE) + return 0; + + if (iwl_mvm_is_radio_killed(mvm)) { + ieee80211_scan_completed(mvm->hw, true); + iwl_mvm_unref(mvm, IWL_MVM_REF_SCAN); + mvm->scan_status = IWL_MVM_SCAN_NONE; + return 0; + } + if (mvm->fw->ucode_capa.api[0] & IWL_UCODE_TLV_API_LMAC_SCAN) return iwl_mvm_scan_offload_stop(mvm, true); return iwl_mvm_cancel_regular_scan(mvm); From 87dd634ae72bb8f6d0dd12f1cbbc67c7da6dba3b Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Mon, 10 Nov 2014 19:25:22 +0200 Subject: [PATCH 036/208] iwlwifi: pcie: fix prph dump length The length counting previously done had an error in it, causing the length down the data dumping function to be shorter than it should be, causing the end of the data to get truncated off and lost. Cc: [3.17+] Fixes: 67c65f2cf710 ("iwlwifi: dump periphery registers to fw-error-dump") Signed-off-by: Liad Kaufman Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/pcie/trans.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index 160c3ebc48d0..dd2f3f8baa9d 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -1894,8 +1894,7 @@ static u32 iwl_trans_pcie_dump_prph(struct iwl_trans *trans, int reg; __le32 *val; - prph_len += sizeof(*data) + sizeof(*prph) + - num_bytes_in_chunk; + prph_len += sizeof(**data) + sizeof(*prph) + num_bytes_in_chunk; (*data)->type = cpu_to_le32(IWL_FW_ERROR_DUMP_PRPH); (*data)->len = cpu_to_le32(sizeof(*prph) + From 2196937e12b1b4ba139806d132647e1651d655df Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 10 Nov 2014 17:11:21 +0100 Subject: [PATCH 037/208] netfilter: ipset: small potential read beyond the end of buffer We could be reading 8 bytes into a 4 byte buffer here. It seems harmless but adding a check is the right thing to do and it silences a static checker warning. Signed-off-by: Dan Carpenter Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 86f9d76b1464..d259da3ce67a 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1863,6 +1863,12 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) if (*op < IP_SET_OP_VERSION) { /* Check the version at the beginning of operations */ struct ip_set_req_version *req_version = data; + + if (*len < sizeof(struct ip_set_req_version)) { + ret = -EINVAL; + goto done; + } + if (req_version->version != IPSET_PROTOCOL) { ret = -EPROTO; goto done; From cfd9167af14eb4ec21517a32911d460083ee3d59 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 11 Nov 2014 14:28:47 +0100 Subject: [PATCH 038/208] rt2x00: do not align payload on modern H/W RT2800 and newer hardware require padding between header and payload if header length is not multiple of 4. For historical reasons we also align payload to to 4 bytes boundary, but such alignment is not needed on modern H/W. Patch fixes skb_under_panic problems reported from time to time: https://bugzilla.kernel.org/show_bug.cgi?id=84911 https://bugzilla.kernel.org/show_bug.cgi?id=72471 http://marc.info/?l=linux-wireless&m=139108549530402&w=2 https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1087591 Panic happened because we eat 4 bytes of skb headroom on each (re)transmission when sending frame without the payload and the header length not being multiple of 4 (i.e. QoS header has 26 bytes). On such case because paylad_aling=2 is bigger than header_align=0 we increase header_align by 4 bytes. To prevent that we could change the check to: if (payload_length && payload_align > header_align) header_align += 4; but not aligning payload at all is more effective and alignment is not really needed by H/W (that has been tested on OpenWrt project for few years now). Reported-and-tested-by: Antti S. Lankila Debugged-by: Antti S. Lankila Reported-by: Henrik Asp Originally-From: Helmut Schaa Cc: stable@vger.kernel.org Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2x00queue.c | 62 +++++++---------------- 1 file changed, 18 insertions(+), 44 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.c b/drivers/net/wireless/rt2x00/rt2x00queue.c index 8e68f87ab13c..66ff36447b94 100644 --- a/drivers/net/wireless/rt2x00/rt2x00queue.c +++ b/drivers/net/wireless/rt2x00/rt2x00queue.c @@ -158,55 +158,29 @@ void rt2x00queue_align_frame(struct sk_buff *skb) skb_trim(skb, frame_length); } -void rt2x00queue_insert_l2pad(struct sk_buff *skb, unsigned int header_length) +/* + * H/W needs L2 padding between the header and the paylod if header size + * is not 4 bytes aligned. + */ +void rt2x00queue_insert_l2pad(struct sk_buff *skb, unsigned int hdr_len) { - unsigned int payload_length = skb->len - header_length; - unsigned int header_align = ALIGN_SIZE(skb, 0); - unsigned int payload_align = ALIGN_SIZE(skb, header_length); - unsigned int l2pad = payload_length ? L2PAD_SIZE(header_length) : 0; - - /* - * Adjust the header alignment if the payload needs to be moved more - * than the header. - */ - if (payload_align > header_align) - header_align += 4; - - /* There is nothing to do if no alignment is needed */ - if (!header_align) - return; - - /* Reserve the amount of space needed in front of the frame */ - skb_push(skb, header_align); - - /* - * Move the header. - */ - memmove(skb->data, skb->data + header_align, header_length); - - /* Move the payload, if present and if required */ - if (payload_length && payload_align) - memmove(skb->data + header_length + l2pad, - skb->data + header_length + l2pad + payload_align, - payload_length); - - /* Trim the skb to the correct size */ - skb_trim(skb, header_length + l2pad + payload_length); -} - -void rt2x00queue_remove_l2pad(struct sk_buff *skb, unsigned int header_length) -{ - /* - * L2 padding is only present if the skb contains more than just the - * IEEE 802.11 header. - */ - unsigned int l2pad = (skb->len > header_length) ? - L2PAD_SIZE(header_length) : 0; + unsigned int l2pad = (skb->len > hdr_len) ? L2PAD_SIZE(hdr_len) : 0; if (!l2pad) return; - memmove(skb->data + l2pad, skb->data, header_length); + skb_push(skb, l2pad); + memmove(skb->data, skb->data + l2pad, hdr_len); +} + +void rt2x00queue_remove_l2pad(struct sk_buff *skb, unsigned int hdr_len) +{ + unsigned int l2pad = (skb->len > hdr_len) ? L2PAD_SIZE(hdr_len) : 0; + + if (!l2pad) + return; + + memmove(skb->data + l2pad, skb->data, hdr_len); skb_pull(skb, l2pad); } From 0cd75b19899fd86b51a6480fb8c00dcd85a54591 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Tue, 11 Nov 2014 13:58:44 +0100 Subject: [PATCH 039/208] brcmfmac: fix conversion of channel width 20MHZ_NOHT The function chandef_to_chanspec() failed when converting a chandef with bandwidth set to NL80211_CHAN_WIDTH_20_NOHT. This was reported by user running the device in AP mode. ------------[ cut here ]------------ WARNING: CPU: 0 PID: 304 at drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c:381 chandef_to_chanspec.isra.11+0x158/0x184() Modules linked in: CPU: 0 PID: 304 Comm: hostapd Not tainted 3.16.0-rc7-abb+g64aa90f #8 [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (warn_slowpath_common+0x6c/0x8c) [] (warn_slowpath_common) from [] (warn_slowpath_null+0x1c/0x24) [] (warn_slowpath_null) from [] (chandef_to_chanspec.isra.11+0x158/0x184) [] (chandef_to_chanspec.isra.11) from [] (brcmf_cfg80211_start_ap+0x1e4/0x614) [] (brcmf_cfg80211_start_ap) from [] (nl80211_start_ap+0x288/0x414) [] (nl80211_start_ap) from [] (genl_rcv_msg+0x21c/0x38c) [] (genl_rcv_msg) from [] (netlink_rcv_skb+0xac/0xc0) [] (netlink_rcv_skb) from [] (genl_rcv+0x20/0x34) [] (genl_rcv) from [] (netlink_unicast+0x150/0x20c) [] (netlink_unicast) from [] (netlink_sendmsg+0x2b8/0x398) [] (netlink_sendmsg) from [] (sock_sendmsg+0x84/0xa8) [] (sock_sendmsg) from [] (___sys_sendmsg.part.29+0x268/0x278) [] (___sys_sendmsg.part.29) from [] (__sys_sendmsg+0x4c/0x7c) [] (__sys_sendmsg) from [] (ret_fast_syscall+0x0/0x44) ---[ end trace 965ee2158c9905a2 ]--- Cc: stable@vger.kernel.org # v3.17 Reported-by: Pontus Fuchs Reviewed-by: Hante Meuleman Reviewed-by: Daniel (Deognyoun) Kim Reviewed-by: Franky (Zhenhui) Lin Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Arend van Spriel Signed-off-by: John W. Linville --- drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c index 28fa25b509db..39b45c038a93 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c @@ -299,6 +299,7 @@ static u16 chandef_to_chanspec(struct brcmu_d11inf *d11inf, primary_offset = ch->center_freq1 - ch->chan->center_freq; switch (ch->width) { case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_20_NOHT: ch_inf.bw = BRCMU_CHAN_BW_20; WARN_ON(primary_offset != 0); break; @@ -323,6 +324,10 @@ static u16 chandef_to_chanspec(struct brcmu_d11inf *d11inf, ch_inf.sb = BRCMU_CHAN_SB_LU; } break; + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_160: + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: default: WARN_ON_ONCE(1); } @@ -333,6 +338,7 @@ static u16 chandef_to_chanspec(struct brcmu_d11inf *d11inf, case IEEE80211_BAND_5GHZ: ch_inf.band = BRCMU_CHAN_BAND_5G; break; + case IEEE80211_BAND_60GHZ: default: WARN_ON_ONCE(1); } From 4e6ce4dc7ce71d0886908d55129d5d6482a27ff9 Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Thu, 6 Nov 2014 10:52:23 +0530 Subject: [PATCH 040/208] ath9k: Fix RTC_DERIVED_CLK usage Based on the reference clock, which could be 25MHz or 40MHz, AR_RTC_DERIVED_CLK is programmed differently for AR9340 and AR9550. But, when a chip reset is done, processing the initvals sets the register back to the default value. Fix this by moving the code in ath9k_hw_init_pll() to ar9003_hw_override_ini(). Also, do this override for AR9531. Cc: stable@vger.kernel.org Signed-off-by: Miaoqing Pan Signed-off-by: Sujith Manoharan Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/ar9003_phy.c | 13 +++++++++++++ drivers/net/wireless/ath/ath9k/hw.c | 13 ------------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.c b/drivers/net/wireless/ath/ath9k/ar9003_phy.c index 697c4ae90af0..1e8ea5e4d4ca 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.c @@ -664,6 +664,19 @@ static void ar9003_hw_override_ini(struct ath_hw *ah) ah->enabled_cals |= TX_CL_CAL; else ah->enabled_cals &= ~TX_CL_CAL; + + if (AR_SREV_9340(ah) || AR_SREV_9531(ah) || AR_SREV_9550(ah)) { + if (ah->is_clk_25mhz) { + REG_WRITE(ah, AR_RTC_DERIVED_CLK, 0x17c << 1); + REG_WRITE(ah, AR_SLP32_MODE, 0x0010f3d7); + REG_WRITE(ah, AR_SLP32_INC, 0x0001e7ae); + } else { + REG_WRITE(ah, AR_RTC_DERIVED_CLK, 0x261 << 1); + REG_WRITE(ah, AR_SLP32_MODE, 0x0010f400); + REG_WRITE(ah, AR_SLP32_INC, 0x0001e800); + } + udelay(100); + } } static void ar9003_hw_prog_ini(struct ath_hw *ah, diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 8be4b1453394..2ad605760e21 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -861,19 +861,6 @@ static void ath9k_hw_init_pll(struct ath_hw *ah, udelay(RTC_PLL_SETTLE_DELAY); REG_WRITE(ah, AR_RTC_SLEEP_CLK, AR_RTC_FORCE_DERIVED_CLK); - - if (AR_SREV_9340(ah) || AR_SREV_9550(ah)) { - if (ah->is_clk_25mhz) { - REG_WRITE(ah, AR_RTC_DERIVED_CLK, 0x17c << 1); - REG_WRITE(ah, AR_SLP32_MODE, 0x0010f3d7); - REG_WRITE(ah, AR_SLP32_INC, 0x0001e7ae); - } else { - REG_WRITE(ah, AR_RTC_DERIVED_CLK, 0x261 << 1); - REG_WRITE(ah, AR_SLP32_MODE, 0x0010f400); - REG_WRITE(ah, AR_SLP32_INC, 0x0001e800); - } - udelay(100); - } } static void ath9k_hw_init_interrupt_masks(struct ath_hw *ah, From b207422bb92f17f4f892a9b8737d44b37fece25b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 6 Nov 2014 12:52:06 +0100 Subject: [PATCH 041/208] ARM: shmobile: kzm9g legacy: Set i2c clks_per_count to 2 On sh73a0/kzm9g-legacy, probing of the i2c masters fails with: i2c-sh_mobile i2c-sh_mobile.0: timing values out of range: L/H=0x208/0x1bf sh_mobile: probe of i2c-sh_mobile.0 failed with error -22 According to the datasheet, the transfer rate is derived from the HP clock (which runs at 104 MHz) divided by two. Hence i2c_sh_mobile_platform_data.clks_per_count should be set to two. Now probing succeeds, and i2c works: i2c-sh_mobile i2c-sh_mobile.0: I2C adapter 0 with bus speed 100000 Hz (L/H=0x104/0xe0) Signed-off-by: Geert Uytterhoeven Reviewed-by: Wolfram Sang Signed-off-by: Simon Horman --- arch/arm/mach-shmobile/setup-sh73a0.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/arm/mach-shmobile/setup-sh73a0.c b/arch/arm/mach-shmobile/setup-sh73a0.c index b7bd8e509668..328657d011d5 100644 --- a/arch/arm/mach-shmobile/setup-sh73a0.c +++ b/arch/arm/mach-shmobile/setup-sh73a0.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -192,11 +193,18 @@ static struct resource i2c4_resources[] = { }, }; +static struct i2c_sh_mobile_platform_data i2c_platform_data = { + .clks_per_count = 2, +}; + static struct platform_device i2c0_device = { .name = "i2c-sh_mobile", .id = 0, .resource = i2c0_resources, .num_resources = ARRAY_SIZE(i2c0_resources), + .dev = { + .platform_data = &i2c_platform_data, + }, }; static struct platform_device i2c1_device = { @@ -204,6 +212,9 @@ static struct platform_device i2c1_device = { .id = 1, .resource = i2c1_resources, .num_resources = ARRAY_SIZE(i2c1_resources), + .dev = { + .platform_data = &i2c_platform_data, + }, }; static struct platform_device i2c2_device = { @@ -211,6 +222,9 @@ static struct platform_device i2c2_device = { .id = 2, .resource = i2c2_resources, .num_resources = ARRAY_SIZE(i2c2_resources), + .dev = { + .platform_data = &i2c_platform_data, + }, }; static struct platform_device i2c3_device = { @@ -218,6 +232,9 @@ static struct platform_device i2c3_device = { .id = 3, .resource = i2c3_resources, .num_resources = ARRAY_SIZE(i2c3_resources), + .dev = { + .platform_data = &i2c_platform_data, + }, }; static struct platform_device i2c4_device = { @@ -225,6 +242,9 @@ static struct platform_device i2c4_device = { .id = 4, .resource = i2c4_resources, .num_resources = ARRAY_SIZE(i2c4_resources), + .dev = { + .platform_data = &i2c_platform_data, + }, }; static const struct sh_dmae_slave_config sh73a0_dmae_slaves[] = { From 50656d9df63d69ce399c8be62d4473b039dac36a Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Tue, 4 Nov 2014 16:37:40 -0800 Subject: [PATCH 042/208] ipvs: Keep skb->sk when allocating headroom on tunnel xmit ip_vs_prepare_tunneled_skb() ignores ->sk when allocating a new skb, either unconditionally setting ->sk to NULL or allowing the uninitialized ->sk from a newly allocated skb to leak through to the caller. This patch properly copies ->sk and increments its reference count. Signed-off-by: Calvin Owens Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_xmit.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 437a3663ad03..bd90bf8107da 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -846,6 +846,8 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) goto error; + if (skb->sk) + skb_set_owner_w(new_skb, skb->sk); consume_skb(skb); skb = new_skb; } From 1f9cd915b64bb95f7b41667b4bf8b22f0a0a557b Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 11 Nov 2014 19:35:52 +0100 Subject: [PATCH 043/208] dmaengine: sun6i: Fix memcpy operation The prep_memcpy call was not setting any meaningful burst and width because it was relying on the dma_slave_config was not set already. Rework the needed conversion functions, and hardcode the width and burst to use. Signed-off-by: Maxime Ripard Cc: stable@vger.kernel.org Signed-off-by: Vinod Koul --- drivers/dma/sun6i-dma.c | 61 ++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c index 3aa10b328254..91292f5513ff 100644 --- a/drivers/dma/sun6i-dma.c +++ b/drivers/dma/sun6i-dma.c @@ -230,30 +230,25 @@ static inline void sun6i_dma_dump_chan_regs(struct sun6i_dma_dev *sdev, readl(pchan->base + DMA_CHAN_CUR_PARA)); } -static inline int convert_burst(u32 maxburst, u8 *burst) +static inline s8 convert_burst(u32 maxburst) { switch (maxburst) { case 1: - *burst = 0; - break; + return 0; case 8: - *burst = 2; - break; + return 2; default: return -EINVAL; } - - return 0; } -static inline int convert_buswidth(enum dma_slave_buswidth addr_width, u8 *width) +static inline s8 convert_buswidth(enum dma_slave_buswidth addr_width) { if ((addr_width < DMA_SLAVE_BUSWIDTH_1_BYTE) || (addr_width > DMA_SLAVE_BUSWIDTH_4_BYTES)) return -EINVAL; - *width = addr_width >> 1; - return 0; + return addr_width >> 1; } static void *sun6i_dma_lli_add(struct sun6i_dma_lli *prev, @@ -284,26 +279,25 @@ static inline int sun6i_dma_cfg_lli(struct sun6i_dma_lli *lli, struct dma_slave_config *config) { u8 src_width, dst_width, src_burst, dst_burst; - int ret; if (!config) return -EINVAL; - ret = convert_burst(config->src_maxburst, &src_burst); - if (ret) - return ret; + src_burst = convert_burst(config->src_maxburst); + if (src_burst) + return src_burst; - ret = convert_burst(config->dst_maxburst, &dst_burst); - if (ret) - return ret; + dst_burst = convert_burst(config->dst_maxburst); + if (dst_burst) + return dst_burst; - ret = convert_buswidth(config->src_addr_width, &src_width); - if (ret) - return ret; + src_width = convert_buswidth(config->src_addr_width); + if (src_width) + return src_width; - ret = convert_buswidth(config->dst_addr_width, &dst_width); - if (ret) - return ret; + dst_width = convert_buswidth(config->dst_addr_width); + if (dst_width) + return dst_width; lli->cfg = DMA_CHAN_CFG_SRC_BURST(src_burst) | DMA_CHAN_CFG_SRC_WIDTH(src_width) | @@ -542,11 +536,10 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_memcpy( { struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device); struct sun6i_vchan *vchan = to_sun6i_vchan(chan); - struct dma_slave_config *sconfig = &vchan->cfg; struct sun6i_dma_lli *v_lli; struct sun6i_desc *txd; dma_addr_t p_lli; - int ret; + s8 burst, width; dev_dbg(chan2dev(chan), "%s; chan: %d, dest: %pad, src: %pad, len: %zu. flags: 0x%08lx\n", @@ -565,14 +558,21 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_memcpy( goto err_txd_free; } - ret = sun6i_dma_cfg_lli(v_lli, src, dest, len, sconfig); - if (ret) - goto err_dma_free; + v_lli->src = src; + v_lli->dst = dest; + v_lli->len = len; + v_lli->para = NORMAL_WAIT; + burst = convert_burst(8); + width = convert_buswidth(DMA_SLAVE_BUSWIDTH_4_BYTES); v_lli->cfg |= DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) | DMA_CHAN_CFG_DST_DRQ(DRQ_SDRAM) | DMA_CHAN_CFG_DST_LINEAR_MODE | - DMA_CHAN_CFG_SRC_LINEAR_MODE; + DMA_CHAN_CFG_SRC_LINEAR_MODE | + DMA_CHAN_CFG_SRC_BURST(burst) | + DMA_CHAN_CFG_SRC_WIDTH(width) | + DMA_CHAN_CFG_DST_BURST(burst) | + DMA_CHAN_CFG_DST_WIDTH(width); sun6i_dma_lli_add(NULL, v_lli, p_lli, txd); @@ -580,8 +580,6 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_memcpy( return vchan_tx_prep(&vchan->vc, &txd->vd, flags); -err_dma_free: - dma_pool_free(sdev->pool, v_lli, p_lli); err_txd_free: kfree(txd); return NULL; @@ -915,6 +913,7 @@ static int sun6i_dma_probe(struct platform_device *pdev) sdc->slave.device_prep_dma_memcpy = sun6i_dma_prep_dma_memcpy; sdc->slave.device_control = sun6i_dma_control; sdc->slave.chancnt = NR_MAX_VCHANS; + sdc->slave.copy_align = 4; sdc->slave.dev = &pdev->dev; From 2daf1b4d18e3add229d1a3b5c554331d99ac6c7e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 7 Nov 2014 18:48:33 +0100 Subject: [PATCH 044/208] netfilter: nft_compat: use current net namespace Instead of init_net when using xtables over nftables compat. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 9d6d6f60a80f..b92f129beade 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -117,7 +117,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, struct xt_target *target, void *info, union nft_entry *entry, u8 proto, bool inv) { - par->net = &init_net; + par->net = ctx->net; par->table = ctx->table->name; switch (ctx->afi->family) { case AF_INET: @@ -324,7 +324,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, struct xt_match *match, void *info, union nft_entry *entry, u8 proto, bool inv) { - par->net = &init_net; + par->net = ctx->net; par->table = ctx->table->name; switch (ctx->afi->family) { case AF_INET: From c918687f5e3962375a19de6ded3c1be85ebdbcd6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 10 Nov 2014 20:53:55 +0100 Subject: [PATCH 045/208] netfilter: nft_compat: relax chain type validation Check for nat chain dependency only, which is the one that can actually crash the kernel. Don't care if mangle, filter and security specific match and targets are used out of their scope, they are harmless. This restores iptables-compat with mangle specific match/target when used out of the OUTPUT chain, that are actually emulated through filter chains, which broke when performing strict validation. Fixes: f3f5dde ("netfilter: nft_compat: validate chain type in match/target") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index b92f129beade..70dc96516305 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -21,45 +21,17 @@ #include #include -static const struct { - const char *name; - u8 type; -} table_to_chaintype[] = { - { "filter", NFT_CHAIN_T_DEFAULT }, - { "raw", NFT_CHAIN_T_DEFAULT }, - { "security", NFT_CHAIN_T_DEFAULT }, - { "mangle", NFT_CHAIN_T_ROUTE }, - { "nat", NFT_CHAIN_T_NAT }, - { }, -}; - -static int nft_compat_table_to_chaintype(const char *table) -{ - int i; - - for (i = 0; table_to_chaintype[i].name != NULL; i++) { - if (strcmp(table_to_chaintype[i].name, table) == 0) - return table_to_chaintype[i].type; - } - - return -1; -} - static int nft_compat_chain_validate_dependency(const char *tablename, const struct nft_chain *chain) { - enum nft_chain_type type; const struct nft_base_chain *basechain; if (!tablename || !(chain->flags & NFT_BASE_CHAIN)) return 0; - type = nft_compat_table_to_chaintype(tablename); - if (type < 0) - return -EINVAL; - basechain = nft_base_chain(chain); - if (basechain->type->type != type) + if (strcmp(tablename, "nat") == 0 && + basechain->type->type != NFT_CHAIN_T_NAT) return -EINVAL; return 0; From afefb6f928ed42d5db452ee9251ce6de62673c67 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 10 Nov 2014 19:08:21 +0100 Subject: [PATCH 046/208] netfilter: nft_compat: use the match->table to validate dependencies Instead of the match->name, which is of course not relevant. Fixes: f3f5dde ("netfilter: nft_compat: validate chain type in match/target") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 70dc96516305..265e190f2218 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -346,7 +346,7 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, union nft_entry e = {}; int ret; - ret = nft_compat_chain_validate_dependency(match->name, ctx->chain); + ret = nft_compat_chain_validate_dependency(match->table, ctx->chain); if (ret < 0) goto err; @@ -420,7 +420,7 @@ static int nft_match_validate(const struct nft_ctx *ctx, if (!(hook_mask & match->hooks)) return -EINVAL; - ret = nft_compat_chain_validate_dependency(match->name, + ret = nft_compat_chain_validate_dependency(match->table, ctx->chain); if (ret < 0) return ret; From b326dd37b94e29bf6a15940f4fa66aa21a678ab1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 10 Nov 2014 21:14:12 +0100 Subject: [PATCH 047/208] netfilter: nf_tables: restore synchronous object release from commit/abort The existing xtables matches and targets, when used from nft_compat, may sleep from the destroy path, ie. when removing rules. Since the objects are released via call_rcu from softirq context, this results in lockdep splats and possible lockups that may be hard to reproduce. Patrick also indicated that delayed object release via call_rcu can cause us problems in the ordering of event notifications when anonymous sets are in place. So, this patch restores the synchronous object release from the commit and abort paths. This includes a call to synchronize_rcu() to make sure that no packets are walking on the objects that are going to be released. This is slowier though, but it's simple and it resolves the aforementioned problems. This is a partial revert of c7c32e7 ("netfilter: nf_tables: defer all object release via rcu") that was introduced in 3.16 to speed up interaction with userspace. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 -- net/netfilter/nf_tables_api.c | 24 ++++++++---------------- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 845c596bf594..3ae969e3acf0 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -396,14 +396,12 @@ struct nft_rule { /** * struct nft_trans - nf_tables object update in transaction * - * @rcu_head: rcu head to defer release of transaction data * @list: used internally * @msg_type: message type * @ctx: transaction context * @data: internal information related to the transaction */ struct nft_trans { - struct rcu_head rcu_head; struct list_head list; int msg_type; struct nft_ctx ctx; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 11ab4b078f3b..66e8425dbfe7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3484,13 +3484,8 @@ static void nft_chain_commit_update(struct nft_trans *trans) } } -/* Schedule objects for release via rcu to make sure no packets are accesing - * removed rules. - */ -static void nf_tables_commit_release_rcu(struct rcu_head *rt) +static void nf_tables_commit_release(struct nft_trans *trans) { - struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head); - switch (trans->msg_type) { case NFT_MSG_DELTABLE: nf_tables_table_destroy(&trans->ctx); @@ -3612,10 +3607,11 @@ static int nf_tables_commit(struct sk_buff *skb) } } + synchronize_rcu(); + list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { list_del(&trans->list); - trans->ctx.nla = NULL; - call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu); + nf_tables_commit_release(trans); } nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); @@ -3623,13 +3619,8 @@ static int nf_tables_commit(struct sk_buff *skb) return 0; } -/* Schedule objects for release via rcu to make sure no packets are accesing - * aborted rules. - */ -static void nf_tables_abort_release_rcu(struct rcu_head *rt) +static void nf_tables_abort_release(struct nft_trans *trans) { - struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head); - switch (trans->msg_type) { case NFT_MSG_NEWTABLE: nf_tables_table_destroy(&trans->ctx); @@ -3725,11 +3716,12 @@ static int nf_tables_abort(struct sk_buff *skb) } } + synchronize_rcu(); + list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list, list) { list_del(&trans->list); - trans->ctx.nla = NULL; - call_rcu(&trans->rcu_head, nf_tables_abort_release_rcu); + nf_tables_abort_release(trans); } return 0; From 2cb1e0259f50c7be88eb277c7442fa74a3ce7c57 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 12 Nov 2014 15:40:44 +0100 Subject: [PATCH 048/208] ASoC: cs42l51: re-hook of_match_table pointer In commit a1253ef6d3fa ("ASoC: cs42l51: split i2c from codec driver"), the I2C part of the CS42L51 was moved to a separate file, but the definition of the of_device_id array was left in the driver file itself, no longer connected to the platform_driver structure using the .of_match_table pointer. This commit exports the of_device_id array in cs42l51, and uses it as .of_match_able in cs42l51-i2c.c. This solution was suggested by Brian Austin. Fixes: a1253ef6d3fa ("ASoC: cs42l51: split i2c from codec driver") Signed-off-by: Thomas Petazzoni Acked-by: Brian Austin Signed-off-by: Mark Brown Cc: --- sound/soc/codecs/cs42l51-i2c.c | 1 + sound/soc/codecs/cs42l51.c | 4 +++- sound/soc/codecs/cs42l51.h | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs42l51-i2c.c b/sound/soc/codecs/cs42l51-i2c.c index cee51ae177c1..c40428f25ba5 100644 --- a/sound/soc/codecs/cs42l51-i2c.c +++ b/sound/soc/codecs/cs42l51-i2c.c @@ -46,6 +46,7 @@ static struct i2c_driver cs42l51_i2c_driver = { .driver = { .name = "cs42l51", .owner = THIS_MODULE, + .of_match_table = cs42l51_of_match, }, .probe = cs42l51_i2c_probe, .remove = cs42l51_i2c_remove, diff --git a/sound/soc/codecs/cs42l51.c b/sound/soc/codecs/cs42l51.c index 09488d97de60..669c38fc3034 100644 --- a/sound/soc/codecs/cs42l51.c +++ b/sound/soc/codecs/cs42l51.c @@ -558,11 +558,13 @@ error: } EXPORT_SYMBOL_GPL(cs42l51_probe); -static const struct of_device_id cs42l51_of_match[] = { +const struct of_device_id cs42l51_of_match[] = { { .compatible = "cirrus,cs42l51", }, { } }; MODULE_DEVICE_TABLE(of, cs42l51_of_match); +EXPORT_SYMBOL_GPL(cs42l51_of_match); + MODULE_AUTHOR("Arnaud Patard "); MODULE_DESCRIPTION("Cirrus Logic CS42L51 ALSA SoC Codec Driver"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/codecs/cs42l51.h b/sound/soc/codecs/cs42l51.h index 8c55bf384bc6..0ca805492ac4 100644 --- a/sound/soc/codecs/cs42l51.h +++ b/sound/soc/codecs/cs42l51.h @@ -22,6 +22,7 @@ struct device; extern const struct regmap_config cs42l51_regmap; int cs42l51_probe(struct device *dev, struct regmap *regmap); +extern const struct of_device_id cs42l51_of_match[]; #define CS42L51_CHIP_ID 0x1B #define CS42L51_CHIP_REV_A 0x00 From 121a2f6d5f09d929fc663349ba34e248e8d07391 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 3 Nov 2014 23:20:04 +0100 Subject: [PATCH 049/208] ARM: tegra: Add serial port labels to Tegra124 DT These labels will be used to provide deterministic numbering of consoles in a later patch. Signed-off-by: Lucas Stach [treding@nvidia.com: drop aliases, reword commit message] Signed-off-by: Thierry Reding --- arch/arm/boot/dts/tegra124.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi index 478c555ebd96..df2b06b29985 100644 --- a/arch/arm/boot/dts/tegra124.dtsi +++ b/arch/arm/boot/dts/tegra124.dtsi @@ -286,7 +286,7 @@ * the APB DMA based serial driver, the comptible is * "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart". */ - serial@0,70006000 { + uarta: serial@0,70006000 { compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart"; reg = <0x0 0x70006000 0x0 0x40>; reg-shift = <2>; @@ -299,7 +299,7 @@ status = "disabled"; }; - serial@0,70006040 { + uartb: serial@0,70006040 { compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart"; reg = <0x0 0x70006040 0x0 0x40>; reg-shift = <2>; @@ -312,7 +312,7 @@ status = "disabled"; }; - serial@0,70006200 { + uartc: serial@0,70006200 { compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart"; reg = <0x0 0x70006200 0x0 0x40>; reg-shift = <2>; @@ -325,7 +325,7 @@ status = "disabled"; }; - serial@0,70006300 { + uartd: serial@0,70006300 { compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart"; reg = <0x0 0x70006300 0x0 0x40>; reg-shift = <2>; From c4574aa00e7c144ae4d1bfc2388433d9eb82e4d3 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Tue, 11 Nov 2014 12:49:30 -0800 Subject: [PATCH 050/208] ARM: dts: tegra: move serial aliases to per-board There are general changes pending to make the /aliases/serial* entries number the serial ports on the system. On Tegra, so far the ports have been just numbered dynamically as they are configured so that makes them change. To avoid this, add specific aliases per board to keep the old numbers. This allows us to change the numbering by default on future SoCs while keeping the numbering on existing boards. Signed-off-by: Olof Johansson Signed-off-by: Thierry Reding --- arch/arm/boot/dts/tegra114-dalmore.dts | 1 + arch/arm/boot/dts/tegra114-roth.dts | 4 ++++ arch/arm/boot/dts/tegra114-tn7.dts | 4 ++++ arch/arm/boot/dts/tegra114.dtsi | 7 ------- arch/arm/boot/dts/tegra124-jetson-tk1.dts | 1 + arch/arm/boot/dts/tegra124-nyan-big.dts | 1 + arch/arm/boot/dts/tegra124-venice2.dts | 1 + arch/arm/boot/dts/tegra20-harmony.dts | 1 + arch/arm/boot/dts/tegra20-iris-512.dts | 5 +++++ arch/arm/boot/dts/tegra20-medcom-wide.dts | 4 ++++ arch/arm/boot/dts/tegra20-paz00.dts | 2 ++ arch/arm/boot/dts/tegra20-seaboard.dts | 1 + arch/arm/boot/dts/tegra20-tamonten.dtsi | 1 + arch/arm/boot/dts/tegra20-trimslice.dts | 1 + arch/arm/boot/dts/tegra20-ventana.dts | 1 + arch/arm/boot/dts/tegra20-whistler.dts | 1 + arch/arm/boot/dts/tegra20.dtsi | 8 -------- arch/arm/boot/dts/tegra30-apalis-eval.dts | 4 ++++ arch/arm/boot/dts/tegra30-beaver.dts | 1 + arch/arm/boot/dts/tegra30-cardhu.dtsi | 2 ++ arch/arm/boot/dts/tegra30-colibri-eval-v3.dts | 3 +++ arch/arm/boot/dts/tegra30.dtsi | 8 -------- 22 files changed, 39 insertions(+), 23 deletions(-) diff --git a/arch/arm/boot/dts/tegra114-dalmore.dts b/arch/arm/boot/dts/tegra114-dalmore.dts index 5c21d216515a..8b7aa0dcdc6e 100644 --- a/arch/arm/boot/dts/tegra114-dalmore.dts +++ b/arch/arm/boot/dts/tegra114-dalmore.dts @@ -15,6 +15,7 @@ aliases { rtc0 = "/i2c@7000d000/tps65913@58"; rtc1 = "/rtc@7000e000"; + serial0 = &uartd; }; memory { diff --git a/arch/arm/boot/dts/tegra114-roth.dts b/arch/arm/boot/dts/tegra114-roth.dts index c7c6825f11fb..a80f1e24a113 100644 --- a/arch/arm/boot/dts/tegra114-roth.dts +++ b/arch/arm/boot/dts/tegra114-roth.dts @@ -15,6 +15,10 @@ linux,initrd-end = <0x82800000>; }; + aliases { + serial0 = &uartd; + }; + firmware { trusted-foundations { compatible = "tlm,trusted-foundations"; diff --git a/arch/arm/boot/dts/tegra114-tn7.dts b/arch/arm/boot/dts/tegra114-tn7.dts index 963662145635..2301c6601d02 100644 --- a/arch/arm/boot/dts/tegra114-tn7.dts +++ b/arch/arm/boot/dts/tegra114-tn7.dts @@ -15,6 +15,10 @@ linux,initrd-end = <0x82800000>; }; + aliases { + serial0 = &uartd; + }; + firmware { trusted-foundations { compatible = "tlm,trusted-foundations"; diff --git a/arch/arm/boot/dts/tegra114.dtsi b/arch/arm/boot/dts/tegra114.dtsi index 2ca9c1807f72..222f3b3f4dd5 100644 --- a/arch/arm/boot/dts/tegra114.dtsi +++ b/arch/arm/boot/dts/tegra114.dtsi @@ -9,13 +9,6 @@ compatible = "nvidia,tegra114"; interrupt-parent = <&gic>; - aliases { - serial0 = &uarta; - serial1 = &uartb; - serial2 = &uartc; - serial3 = &uartd; - }; - host1x@50000000 { compatible = "nvidia,tegra114-host1x", "simple-bus"; reg = <0x50000000 0x00028000>; diff --git a/arch/arm/boot/dts/tegra124-jetson-tk1.dts b/arch/arm/boot/dts/tegra124-jetson-tk1.dts index 029c9a021541..51b373ff1065 100644 --- a/arch/arm/boot/dts/tegra124-jetson-tk1.dts +++ b/arch/arm/boot/dts/tegra124-jetson-tk1.dts @@ -10,6 +10,7 @@ aliases { rtc0 = "/i2c@0,7000d000/pmic@40"; rtc1 = "/rtc@0,7000e000"; + serial0 = &uartd; }; memory { diff --git a/arch/arm/boot/dts/tegra124-nyan-big.dts b/arch/arm/boot/dts/tegra124-nyan-big.dts index 7d0784ce4c74..53181d310247 100644 --- a/arch/arm/boot/dts/tegra124-nyan-big.dts +++ b/arch/arm/boot/dts/tegra124-nyan-big.dts @@ -10,6 +10,7 @@ aliases { rtc0 = "/i2c@0,7000d000/pmic@40"; rtc1 = "/rtc@0,7000e000"; + serial0 = &uarta; }; memory { diff --git a/arch/arm/boot/dts/tegra124-venice2.dts b/arch/arm/boot/dts/tegra124-venice2.dts index 13008858e967..5c3f7813360d 100644 --- a/arch/arm/boot/dts/tegra124-venice2.dts +++ b/arch/arm/boot/dts/tegra124-venice2.dts @@ -10,6 +10,7 @@ aliases { rtc0 = "/i2c@0,7000d000/pmic@40"; rtc1 = "/rtc@0,7000e000"; + serial0 = &uarta; }; memory { diff --git a/arch/arm/boot/dts/tegra20-harmony.dts b/arch/arm/boot/dts/tegra20-harmony.dts index a37279af687c..b926a07b9443 100644 --- a/arch/arm/boot/dts/tegra20-harmony.dts +++ b/arch/arm/boot/dts/tegra20-harmony.dts @@ -10,6 +10,7 @@ aliases { rtc0 = "/i2c@7000d000/tps6586x@34"; rtc1 = "/rtc@7000e000"; + serial0 = &uartd; }; memory { diff --git a/arch/arm/boot/dts/tegra20-iris-512.dts b/arch/arm/boot/dts/tegra20-iris-512.dts index 8cfb83f42e1f..1dd7d7bfdfcc 100644 --- a/arch/arm/boot/dts/tegra20-iris-512.dts +++ b/arch/arm/boot/dts/tegra20-iris-512.dts @@ -6,6 +6,11 @@ model = "Toradex Colibri T20 512MB on Iris"; compatible = "toradex,iris", "toradex,colibri_t20-512", "nvidia,tegra20"; + aliases { + serial0 = &uarta; + serial1 = &uartd; + }; + host1x@50000000 { hdmi@54280000 { status = "okay"; diff --git a/arch/arm/boot/dts/tegra20-medcom-wide.dts b/arch/arm/boot/dts/tegra20-medcom-wide.dts index 1b7c56b33aca..9b87526ab0b7 100644 --- a/arch/arm/boot/dts/tegra20-medcom-wide.dts +++ b/arch/arm/boot/dts/tegra20-medcom-wide.dts @@ -6,6 +6,10 @@ model = "Avionic Design Medcom-Wide board"; compatible = "ad,medcom-wide", "ad,tamonten", "nvidia,tegra20"; + aliases { + serial0 = &uartd; + }; + pwm@7000a000 { status = "okay"; }; diff --git a/arch/arm/boot/dts/tegra20-paz00.dts b/arch/arm/boot/dts/tegra20-paz00.dts index d4438e30de45..ed7e1009326c 100644 --- a/arch/arm/boot/dts/tegra20-paz00.dts +++ b/arch/arm/boot/dts/tegra20-paz00.dts @@ -10,6 +10,8 @@ aliases { rtc0 = "/i2c@7000d000/tps6586x@34"; rtc1 = "/rtc@7000e000"; + serial0 = &uarta; + serial1 = &uartc; }; memory { diff --git a/arch/arm/boot/dts/tegra20-seaboard.dts b/arch/arm/boot/dts/tegra20-seaboard.dts index a1d4bf9895d7..ea282c7c0ca5 100644 --- a/arch/arm/boot/dts/tegra20-seaboard.dts +++ b/arch/arm/boot/dts/tegra20-seaboard.dts @@ -10,6 +10,7 @@ aliases { rtc0 = "/i2c@7000d000/tps6586x@34"; rtc1 = "/rtc@7000e000"; + serial0 = &uartd; }; memory { diff --git a/arch/arm/boot/dts/tegra20-tamonten.dtsi b/arch/arm/boot/dts/tegra20-tamonten.dtsi index 80e7d386ce34..13d4e6185275 100644 --- a/arch/arm/boot/dts/tegra20-tamonten.dtsi +++ b/arch/arm/boot/dts/tegra20-tamonten.dtsi @@ -7,6 +7,7 @@ aliases { rtc0 = "/i2c@7000d000/tps6586x@34"; rtc1 = "/rtc@7000e000"; + serial0 = &uartd; }; memory { diff --git a/arch/arm/boot/dts/tegra20-trimslice.dts b/arch/arm/boot/dts/tegra20-trimslice.dts index 5ad87979ab13..d99af4ef9c64 100644 --- a/arch/arm/boot/dts/tegra20-trimslice.dts +++ b/arch/arm/boot/dts/tegra20-trimslice.dts @@ -10,6 +10,7 @@ aliases { rtc0 = "/i2c@7000c500/rtc@56"; rtc1 = "/rtc@7000e000"; + serial0 = &uarta; }; memory { diff --git a/arch/arm/boot/dts/tegra20-ventana.dts b/arch/arm/boot/dts/tegra20-ventana.dts index ca8484cccddc..04c58e9ca490 100644 --- a/arch/arm/boot/dts/tegra20-ventana.dts +++ b/arch/arm/boot/dts/tegra20-ventana.dts @@ -10,6 +10,7 @@ aliases { rtc0 = "/i2c@7000d000/tps6586x@34"; rtc1 = "/rtc@7000e000"; + serial0 = &uartd; }; memory { diff --git a/arch/arm/boot/dts/tegra20-whistler.dts b/arch/arm/boot/dts/tegra20-whistler.dts index 1843725785c9..340d81108df1 100644 --- a/arch/arm/boot/dts/tegra20-whistler.dts +++ b/arch/arm/boot/dts/tegra20-whistler.dts @@ -10,6 +10,7 @@ aliases { rtc0 = "/i2c@7000d000/max8907@3c"; rtc1 = "/rtc@7000e000"; + serial0 = &uarta; }; memory { diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi index 3b374c49d04d..8acf5d85c99d 100644 --- a/arch/arm/boot/dts/tegra20.dtsi +++ b/arch/arm/boot/dts/tegra20.dtsi @@ -9,14 +9,6 @@ compatible = "nvidia,tegra20"; interrupt-parent = <&intc>; - aliases { - serial0 = &uarta; - serial1 = &uartb; - serial2 = &uartc; - serial3 = &uartd; - serial4 = &uarte; - }; - host1x@50000000 { compatible = "nvidia,tegra20-host1x", "simple-bus"; reg = <0x50000000 0x00024000>; diff --git a/arch/arm/boot/dts/tegra30-apalis-eval.dts b/arch/arm/boot/dts/tegra30-apalis-eval.dts index 45d40f024585..6236bdecb48b 100644 --- a/arch/arm/boot/dts/tegra30-apalis-eval.dts +++ b/arch/arm/boot/dts/tegra30-apalis-eval.dts @@ -11,6 +11,10 @@ rtc0 = "/i2c@7000c000/rtc@68"; rtc1 = "/i2c@7000d000/tps65911@2d"; rtc2 = "/rtc@7000e000"; + serial0 = &uarta; + serial1 = &uartb; + serial2 = &uartc; + serial3 = &uartd; }; pcie-controller@00003000 { diff --git a/arch/arm/boot/dts/tegra30-beaver.dts b/arch/arm/boot/dts/tegra30-beaver.dts index cee8f2246fdb..6b157eeabcc5 100644 --- a/arch/arm/boot/dts/tegra30-beaver.dts +++ b/arch/arm/boot/dts/tegra30-beaver.dts @@ -9,6 +9,7 @@ aliases { rtc0 = "/i2c@7000d000/tps65911@2d"; rtc1 = "/rtc@7000e000"; + serial0 = &uarta; }; memory { diff --git a/arch/arm/boot/dts/tegra30-cardhu.dtsi b/arch/arm/boot/dts/tegra30-cardhu.dtsi index 206379546244..a1b682ea01bd 100644 --- a/arch/arm/boot/dts/tegra30-cardhu.dtsi +++ b/arch/arm/boot/dts/tegra30-cardhu.dtsi @@ -30,6 +30,8 @@ aliases { rtc0 = "/i2c@7000d000/tps65911@2d"; rtc1 = "/rtc@7000e000"; + serial0 = &uarta; + serial1 = &uartc; }; memory { diff --git a/arch/arm/boot/dts/tegra30-colibri-eval-v3.dts b/arch/arm/boot/dts/tegra30-colibri-eval-v3.dts index 7793abd5bef1..4d3ddc585641 100644 --- a/arch/arm/boot/dts/tegra30-colibri-eval-v3.dts +++ b/arch/arm/boot/dts/tegra30-colibri-eval-v3.dts @@ -10,6 +10,9 @@ rtc0 = "/i2c@7000c000/rtc@68"; rtc1 = "/i2c@7000d000/tps65911@2d"; rtc2 = "/rtc@7000e000"; + serial0 = &uarta; + serial1 = &uartb; + serial2 = &uartd; }; host1x@50000000 { diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi index aa6ccea13d30..b270b9e3d455 100644 --- a/arch/arm/boot/dts/tegra30.dtsi +++ b/arch/arm/boot/dts/tegra30.dtsi @@ -9,14 +9,6 @@ compatible = "nvidia,tegra30"; interrupt-parent = <&intc>; - aliases { - serial0 = &uarta; - serial1 = &uartb; - serial2 = &uartc; - serial3 = &uartd; - serial4 = &uarte; - }; - pcie-controller@00003000 { compatible = "nvidia,tegra30-pcie"; device_type = "pci"; From edbde56a25f484d46c9e75563d3b310bde1c185e Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 13 Nov 2014 13:59:02 +0900 Subject: [PATCH 051/208] ARM: tegra: Remove eMMC vmmc property for roth/tn7 This property was wrong and broke eMMC since commit 52221610d ("mmc: sdhci: Improve external VDD regulator support"). Align the eMMC properties to those of other Tegra boards. Signed-off-by: Alexandre Courbot Signed-off-by: Thierry Reding --- arch/arm/boot/dts/tegra114-roth.dts | 1 - arch/arm/boot/dts/tegra114-tn7.dts | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/arm/boot/dts/tegra114-roth.dts b/arch/arm/boot/dts/tegra114-roth.dts index a80f1e24a113..54c79c5a9fbc 100644 --- a/arch/arm/boot/dts/tegra114-roth.dts +++ b/arch/arm/boot/dts/tegra114-roth.dts @@ -975,7 +975,6 @@ sdhci@78000600 { status = "okay"; bus-width = <8>; - vmmc-supply = <&vdd_1v8>; non-removable; }; diff --git a/arch/arm/boot/dts/tegra114-tn7.dts b/arch/arm/boot/dts/tegra114-tn7.dts index 2301c6601d02..f91c2c9b2f94 100644 --- a/arch/arm/boot/dts/tegra114-tn7.dts +++ b/arch/arm/boot/dts/tegra114-tn7.dts @@ -244,7 +244,6 @@ sdhci@78000600 { status = "okay"; bus-width = <8>; - vmmc-supply = <&vdd_1v8>; non-removable; }; From 221b9bf42b26a22e6904d20f35c53aec2e73a646 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 13 Nov 2014 13:59:03 +0900 Subject: [PATCH 052/208] ARM: tegra: roth: Fix SD card VDD_IO regulator vddio_sdmmc3 is a vdd_io, and thus should be under the vqmmc-supply property, not vmmc-supply. Signed-off-by: Alexandre Courbot Signed-off-by: Thierry Reding --- arch/arm/boot/dts/tegra114-roth.dts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/tegra114-roth.dts b/arch/arm/boot/dts/tegra114-roth.dts index 54c79c5a9fbc..38acf78d7815 100644 --- a/arch/arm/boot/dts/tegra114-roth.dts +++ b/arch/arm/boot/dts/tegra114-roth.dts @@ -920,8 +920,6 @@ regulator-name = "vddio-sdmmc3"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; - regulator-always-on; - regulator-boot-on; }; ldousb { @@ -966,7 +964,7 @@ sdhci@78000400 { status = "okay"; bus-width = <4>; - vmmc-supply = <&vddio_sdmmc3>; + vqmmc-supply = <&vddio_sdmmc3>; cd-gpios = <&gpio TEGRA_GPIO(V, 2) GPIO_ACTIVE_LOW>; power-gpios = <&gpio TEGRA_GPIO(H, 0) GPIO_ACTIVE_HIGH>; }; From 336b5be2c56f3cf2252f5a39d344ef67cf9bdf7a Mon Sep 17 00:00:00 2001 From: Duc Dang Date: Thu, 6 Nov 2014 17:14:18 -0800 Subject: [PATCH 053/208] PCI: xgene: Assign resources to bus before adding new devices The X-Gene PCIe driver assumes pci_scan_root_bus() assigns resources as proposed in [1]. But we dropped patch [1] because it would break some architectures, which means the X-Gene PCIe driver is currently broken. Add calls to scan the bus, assign resources, and add devices in the X-Gene driver to fix this. [bhelgaas: changelog] [1] http://lkml.kernel.org/r/1412000971-9242-11-git-send-email-Liviu.Dudau@arm.com Signed-off-by: Duc Dang Signed-off-by: Tanmay Inamdar Signed-off-by: Bjorn Helgaas --- drivers/pci/host/pci-xgene.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c index 9ecabfa8c634..2988fe136c1e 100644 --- a/drivers/pci/host/pci-xgene.c +++ b/drivers/pci/host/pci-xgene.c @@ -631,10 +631,15 @@ static int xgene_pcie_probe_bridge(struct platform_device *pdev) if (ret) return ret; - bus = pci_scan_root_bus(&pdev->dev, 0, &xgene_pcie_ops, port, &res); + bus = pci_create_root_bus(&pdev->dev, 0, + &xgene_pcie_ops, port, &res); if (!bus) return -ENOMEM; + pci_scan_child_bus(bus); + pci_assign_unassigned_bus_resources(bus); + pci_bus_add_devices(bus); + platform_set_drvdata(pdev, port); return 0; } From 2801f7252d5be3f4f3886c2a5890284232801afe Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 6 Nov 2014 16:23:20 +0100 Subject: [PATCH 054/208] PCI: Add missing DT binding for "linux,pci-domain" property 41e5c0f81d3e ("of/pci: Add pci_get_new_domain_nr() and of_get_pci_domain_nr()") added parsing of the "linux,pci-domain" property, but didn't add the binding documentation. Since this property will be supported by a number of host bridge drivers, add it to the common PCI binding doc. Fixes: 41e5c0f81d3e ("of/pci: Add pci_get_new_domain_nr() and of_get_pci_domain_nr()") Signed-off-by: Lucas Stach Signed-off-by: Bjorn Helgaas Acked-by: Liviu Dudau Acked-by: Rob Herring --- Documentation/devicetree/bindings/pci/pci.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/devicetree/bindings/pci/pci.txt b/Documentation/devicetree/bindings/pci/pci.txt index 41aeed38926d..f8fbe9af7b2f 100644 --- a/Documentation/devicetree/bindings/pci/pci.txt +++ b/Documentation/devicetree/bindings/pci/pci.txt @@ -7,3 +7,14 @@ And for the interrupt mapping part: Open Firmware Recommended Practice: Interrupt Mapping http://www.openfirmware.org/1275/practice/imap/imap0_9d.pdf + +Additionally to the properties specified in the above standards a host bridge +driver implementation may support the following properties: + +- linux,pci-domain: + If present this property assigns a fixed PCI domain number to a host bridge, + otherwise an unstable (across boots) unique number will be assigned. + It is required to either not set this property at all or set it for all + host bridges in the system, otherwise potentially conflicting domain numbers + may be assigned to root buses behind different host bridges. The domain + number for each host bridge in the system must be unique. From 7a1562d4f2d01721ad07c3a326db7512077ceea9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 11 Nov 2014 12:09:46 -0800 Subject: [PATCH 055/208] PCI: Apply _HPX Link Control settings to all devices with a link Previously we applied _HPX type 2 record Link Control register settings only to bridges with a subordinate bus. But it's better to apply them to all devices with a link because if the subordinate bus has not been allocated yet, we won't apply settings to the device. Use pcie_cap_has_lnkctl() to determine whether the device has a Link Control register instead of looking at dev->subordinate. [bhelgaas: changelog] Fixes: 6cd33649fa83 ("PCI: Add pci_configure_device() during enumeration") Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas --- drivers/pci/access.c | 2 +- drivers/pci/pci.h | 2 ++ drivers/pci/probe.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pci/access.c b/drivers/pci/access.c index d292d7cb3417..49dd766852ba 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -444,7 +444,7 @@ static inline int pcie_cap_version(const struct pci_dev *dev) return pcie_caps_reg(dev) & PCI_EXP_FLAGS_VERS; } -static inline bool pcie_cap_has_lnkctl(const struct pci_dev *dev) +bool pcie_cap_has_lnkctl(const struct pci_dev *dev) { int type = pci_pcie_type(dev); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 0601890db22d..4a3902d8e6fe 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -6,6 +6,8 @@ extern const unsigned char pcie_link_speed[]; +bool pcie_cap_has_lnkctl(const struct pci_dev *dev); + /* Functions internal to the PCI core code */ int pci_create_sysfs_dev_files(struct pci_dev *pdev); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 5ed99309c758..6244b1834dfe 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1323,7 +1323,7 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp) ~hpp->pci_exp_devctl_and, hpp->pci_exp_devctl_or); /* Initialize Link Control Register */ - if (dev->subordinate) + if (pcie_cap_has_lnkctl(dev)) pcie_capability_clear_and_set_word(dev, PCI_EXP_LNKCTL, ~hpp->pci_exp_lnkctl_and, hpp->pci_exp_lnkctl_or); From c251ea7bd7a04f1f2575467e0de76e803cf59149 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 14 Nov 2014 02:14:47 -0200 Subject: [PATCH 056/208] ASoC: sgtl5000: Fix SMALL_POP bit definition On a mx28evk with a sgtl5000 codec we notice a loud 'click' sound to happen 5 seconds after the end of a playback. The SMALL_POP bit should fix this, but its definition is incorrect: according to the sgtl5000 manual it is bit 0 of CHIP_REF_CTRL register, not bit 1. Fix the definition accordingly and enable the bit as intended per the code comment. After applying this change, no loud 'click' sound is heard after playback Signed-off-by: Fabio Estevam Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/sgtl5000.c | 3 +-- sound/soc/codecs/sgtl5000.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index 6bb77d76561b..dab9b15304af 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1299,8 +1299,7 @@ static int sgtl5000_probe(struct snd_soc_codec *codec) /* enable small pop, introduce 400ms delay in turning off */ snd_soc_update_bits(codec, SGTL5000_CHIP_REF_CTRL, - SGTL5000_SMALL_POP, - SGTL5000_SMALL_POP); + SGTL5000_SMALL_POP, 1); /* disable short cut detector */ snd_soc_write(codec, SGTL5000_CHIP_SHORT_CTRL, 0); diff --git a/sound/soc/codecs/sgtl5000.h b/sound/soc/codecs/sgtl5000.h index 2f8c88931f69..bd7a344bf8c5 100644 --- a/sound/soc/codecs/sgtl5000.h +++ b/sound/soc/codecs/sgtl5000.h @@ -275,7 +275,7 @@ #define SGTL5000_BIAS_CTRL_MASK 0x000e #define SGTL5000_BIAS_CTRL_SHIFT 1 #define SGTL5000_BIAS_CTRL_WIDTH 3 -#define SGTL5000_SMALL_POP 0x0001 +#define SGTL5000_SMALL_POP 0 /* * SGTL5000_CHIP_MIC_CTRL From 5195c14c8b27cc0b18220ddbf0e5ad3328a04187 Mon Sep 17 00:00:00 2001 From: bill bonaparte Date: Thu, 6 Nov 2014 14:36:48 +0100 Subject: [PATCH 057/208] netfilter: conntrack: fix race in __nf_conntrack_confirm against get_next_corpse After removal of the central spinlock nf_conntrack_lock, in commit 93bb0ceb75be2 ("netfilter: conntrack: remove central spinlock nf_conntrack_lock"), it is possible to race against get_next_corpse(). The race is against the get_next_corpse() cleanup on the "unconfirmed" list (a per-cpu list with seperate locking), which set the DYING bit. Fix this race, in __nf_conntrack_confirm(), by removing the CT from unconfirmed list before checking the DYING bit. In case race occured, re-add the CT to the dying list. While at this, fix coding style of the comment that has been updated. Fixes: 93bb0ceb75be2 ("netfilter: conntrack: remove central spinlock nf_conntrack_lock") Reported-by: bill bonaparte Signed-off-by: bill bonaparte Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5016a6929085..2c699757bccf 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -611,12 +611,16 @@ __nf_conntrack_confirm(struct sk_buff *skb) */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); pr_debug("Confirming conntrack %p\n", ct); - /* We have to check the DYING flag inside the lock to prevent - a race against nf_ct_get_next_corpse() possibly called from - user context, else we insert an already 'dead' hash, blocking - further use of that particular connection -JM */ + + /* We have to check the DYING flag after unlink to prevent + * a race against nf_ct_get_next_corpse() possibly called from + * user context, else we insert an already 'dead' hash, blocking + * further use of that particular connection -JM. + */ + nf_ct_del_from_dying_or_unconfirmed_list(ct); if (unlikely(nf_ct_is_dying(ct))) { + nf_ct_add_to_dying_list(ct); nf_conntrack_double_unlock(hash, reply_hash); local_bh_enable(); return NF_ACCEPT; @@ -636,8 +640,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) goto out; - nf_ct_del_from_dying_or_unconfirmed_list(ct); - /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ From 84bc88688e3f6ef843aa8803dbcd90168bb89faf Mon Sep 17 00:00:00 2001 From: Vincent BENAYOUN Date: Thu, 13 Nov 2014 13:47:26 +0100 Subject: [PATCH 058/208] inetdevice: fixed signed integer overflow There could be a signed overflow in the following code. The expression, (32-logmask) is comprised between 0 and 31 included. It may be equal to 31. In such a case the left shift will produce a signed integer overflow. According to the C99 Standard, this is an undefined behavior. A simple fix is to replace the signed int 1 with the unsigned int 1U. Signed-off-by: Vincent BENAYOUN Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 0068708161ff..0a21fbefdfbe 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -242,7 +242,7 @@ static inline void in_dev_put(struct in_device *idev) static __inline__ __be32 inet_make_mask(int logmask) { if (logmask) - return htonl(~((1<<(32-logmask))-1)); + return htonl(~((1U<<(32-logmask))-1)); return 0; } From 23e62de33d179e229e4c1dfd93f90a3c7355c519 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 13 Nov 2014 16:38:12 -0800 Subject: [PATCH 059/208] net: Add vxlan_gso_check() helper Most NICs that report NETIF_F_GSO_UDP_TUNNEL support VXLAN, and not other UDP-based encapsulation protocols where the format and size of the header differs. This patch implements a generic ndo_gso_check() for VXLAN which will only advertise GSO support when the skb looks like it contains VXLAN (or no UDP tunnelling at all). Implementation shamelessly stolen from Tom Herbert: http://thread.gmane.org/gmane.linux.network/332428/focus=333111 Signed-off-by: Joe Stringer Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 13 +++++++++++++ include/net/vxlan.h | 2 ++ 2 files changed, 15 insertions(+) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index fa9dc45b75a6..6b658638b456 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1571,6 +1571,19 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) return false; } +bool vxlan_gso_check(struct sk_buff *skb) +{ + if ((skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) && + (skb->inner_protocol_type != ENCAP_TYPE_ETHER || + skb->inner_protocol != htons(ETH_P_TEB) || + (skb_inner_mac_header(skb) - skb_transport_header(skb) != + sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) + return false; + + return true; +} +EXPORT_SYMBOL_GPL(vxlan_gso_check); + #if IS_ENABLED(CONFIG_IPV6) static int vxlan6_xmit_skb(struct vxlan_sock *vs, struct dst_entry *dst, struct sk_buff *skb, diff --git a/include/net/vxlan.h b/include/net/vxlan.h index d5f59f3fc35d..afadf8e53f20 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -45,6 +45,8 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, __be32 vni, bool xnet); +bool vxlan_gso_check(struct sk_buff *skb); + /* IP header + UDP + VXLAN + Ethernet header */ #define VXLAN_HEADROOM (20 + 8 + 8 + 14) /* IPv6 header + UDP + VXLAN + Ethernet header */ From 725d548f14362f10c9235ee4b32dc22e7c55e50c Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 13 Nov 2014 16:38:13 -0800 Subject: [PATCH 060/208] be2net: Implement ndo_gso_check() Use vxlan_gso_check() to advertise offload support for this NIC. Signed-off-by: Joe Stringer Acked-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 9a18e7930b31..3e8475cae4f9 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4421,6 +4421,11 @@ static void be_del_vxlan_port(struct net_device *netdev, sa_family_t sa_family, "Disabled VxLAN offloads for UDP port %d\n", be16_to_cpu(port)); } + +static bool be_gso_check(struct sk_buff *skb, struct net_device *dev) +{ + return vxlan_gso_check(skb); +} #endif static const struct net_device_ops be_netdev_ops = { @@ -4450,6 +4455,7 @@ static const struct net_device_ops be_netdev_ops = { #ifdef CONFIG_BE2NET_VXLAN .ndo_add_vxlan_port = be_add_vxlan_port, .ndo_del_vxlan_port = be_del_vxlan_port, + .ndo_gso_check = be_gso_check, #endif }; From 956bdab2e439881e151b7a1779e7f12a3778b68d Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 13 Nov 2014 16:38:14 -0800 Subject: [PATCH 061/208] net/mlx4_en: Implement ndo_gso_check() Use vxlan_gso_check() to advertise offload support for this NIC. Signed-off-by: Joe Stringer Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 02266e3de514..c5fcc56ec06b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2355,6 +2355,11 @@ static void mlx4_en_del_vxlan_port(struct net_device *dev, queue_work(priv->mdev->workqueue, &priv->vxlan_del_task); } + +static bool mlx4_en_gso_check(struct sk_buff *skb, struct net_device *dev) +{ + return vxlan_gso_check(skb); +} #endif static const struct net_device_ops mlx4_netdev_ops = { @@ -2386,6 +2391,7 @@ static const struct net_device_ops mlx4_netdev_ops = { #ifdef CONFIG_MLX4_EN_VXLAN .ndo_add_vxlan_port = mlx4_en_add_vxlan_port, .ndo_del_vxlan_port = mlx4_en_del_vxlan_port, + .ndo_gso_check = mlx4_en_gso_check, #endif }; From 795a05c1c2a012ab2fd08f1523aa3c529049d291 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 13 Nov 2014 16:38:15 -0800 Subject: [PATCH 062/208] qlcnic: Implement ndo_gso_check() Use vxlan_gso_check() to advertise offload support for this NIC. Signed-off-by: Joe Stringer Acked-by: Shahed Shaikh Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index f5e29f7bdae3..a913b3ad2f89 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -503,6 +503,11 @@ static void qlcnic_del_vxlan_port(struct net_device *netdev, adapter->flags |= QLCNIC_DEL_VXLAN_PORT; } + +static bool qlcnic_gso_check(struct sk_buff *skb, struct net_device *dev) +{ + return vxlan_gso_check(skb); +} #endif static const struct net_device_ops qlcnic_netdev_ops = { @@ -526,6 +531,7 @@ static const struct net_device_ops qlcnic_netdev_ops = { #ifdef CONFIG_QLCNIC_VXLAN .ndo_add_vxlan_port = qlcnic_add_vxlan_port, .ndo_del_vxlan_port = qlcnic_del_vxlan_port, + .ndo_gso_check = qlcnic_gso_check, #endif #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = qlcnic_poll_controller, From ab64f16ff2e83371927c57a0380fd3c0fee5c1c1 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 11 Nov 2014 13:40:49 -0800 Subject: [PATCH 063/208] openvswitch: Fix memory leak. Need to free memory in case of sample action error. Introduced by commit 651887b0c22cffcfce7eb9c ("openvswitch: Sample action without side effects"). Signed-off-by: Pravin B Shelar --- net/openvswitch/actions.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 006886dbee36..00e447a17f64 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -722,8 +722,6 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, case OVS_ACTION_ATTR_SAMPLE: err = sample(dp, skb, key, a); - if (unlikely(err)) /* skb already freed. */ - return err; break; } From 856447d0209c2214d806b30bd3b0d873db5998bd Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Tue, 11 Nov 2014 14:32:20 -0800 Subject: [PATCH 064/208] openvswitch: Fix checksum calculation when modifying ICMPv6 packets. The checksum of ICMPv6 packets uses the IP pseudoheader as part of the calculation, unlike ICMP in IPv4. This was not implemented, which means that modifying the IP addresses of an ICMPv6 packet would cause the checksum to no longer be correct as the psuedoheader did not match. Introduced by commit 3fdbd1ce11e5 ("openvswitch: add ipv6 'set' action"). Reported-by: Neal Shrader Signed-off-by: Jesse Gross Signed-off-by: Pravin B Shelar --- net/openvswitch/actions.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 00e447a17f64..8c4229b11c34 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -246,11 +246,11 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto, { int transport_len = skb->len - skb_transport_offset(skb); - if (l4_proto == IPPROTO_TCP) { + if (l4_proto == NEXTHDR_TCP) { if (likely(transport_len >= sizeof(struct tcphdr))) inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb, addr, new_addr, 1); - } else if (l4_proto == IPPROTO_UDP) { + } else if (l4_proto == NEXTHDR_UDP) { if (likely(transport_len >= sizeof(struct udphdr))) { struct udphdr *uh = udp_hdr(skb); @@ -261,6 +261,10 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto, uh->check = CSUM_MANGLED_0; } } + } else if (l4_proto == NEXTHDR_ICMP) { + if (likely(transport_len >= sizeof(struct icmp6hdr))) + inet_proto_csum_replace16(&icmp6_hdr(skb)->icmp6_cksum, + skb, addr, new_addr, 1); } } From 19e7a3df7261c9b7ebced8163c383712d5b6ac6b Mon Sep 17 00:00:00 2001 From: Daniele Di Proietto Date: Tue, 11 Nov 2014 14:51:22 -0800 Subject: [PATCH 065/208] openvswitch: Fix NDP flow mask validation match_validate() enforce that a mask matching on NDP attributes has also an exact match on ICMPv6 type. The ICMPv6 type, which is 8-bit wide, is stored in the 'tp.src' field of 'struct sw_flow_key', which is 16-bit wide. Therefore, an exact match on ICMPv6 type should only check the first 8 bits. This commit fixes a bug that prevented flows with an exact match on NDP field from being installed Introduced by commit 03f0d916aa03 ("openvswitch: Mega flow implementation"). Signed-off-by: Daniele Di Proietto Signed-off-by: Pravin B Shelar --- net/openvswitch/flow_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 939bcb32100f..dda040e693a3 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -145,7 +145,7 @@ static bool match_validate(const struct sw_flow_match *match, if (match->key->eth.type == htons(ETH_P_ARP) || match->key->eth.type == htons(ETH_P_RARP)) { key_expected |= 1 << OVS_KEY_ATTR_ARP; - if (match->mask && (match->mask->key.eth.type == htons(0xffff))) + if (match->mask && (match->mask->key.tp.src == htons(0xff))) mask_allowed |= 1 << OVS_KEY_ATTR_ARP; } From 8ec609d8b561468691b60347ff594bd443ea58c0 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 11 Nov 2014 15:55:16 -0800 Subject: [PATCH 066/208] openvswitch: Convert dp rcu read operation to locked operations dp read operations depends on ovs_dp_cmd_fill_info(). This API needs to looup vport to find dp name, but vport lookup can fail. Therefore to keep vport reference alive we need to take ovs lock. Introduced by commit 6093ae9abac1 ("openvswitch: Minimize dp and vport critical sections"). Signed-off-by: Pravin B Shelar Acked-by: Andy Zhou --- net/openvswitch/datapath.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index e6d7255183eb..f9e556b56086 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1265,7 +1265,7 @@ static size_t ovs_dp_cmd_msg_size(void) return msgsize; } -/* Called with ovs_mutex or RCU read lock. */ +/* Called with ovs_mutex. */ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { @@ -1555,7 +1555,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) if (!reply) return -ENOMEM; - rcu_read_lock(); + ovs_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); if (IS_ERR(dp)) { err = PTR_ERR(dp); @@ -1564,12 +1564,12 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, info->snd_seq, 0, OVS_DP_CMD_NEW); BUG_ON(err < 0); - rcu_read_unlock(); + ovs_unlock(); return genlmsg_reply(reply, info); err_unlock_free: - rcu_read_unlock(); + ovs_unlock(); kfree_skb(reply); return err; } @@ -1581,8 +1581,8 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) int skip = cb->args[0]; int i = 0; - rcu_read_lock(); - list_for_each_entry_rcu(dp, &ovs_net->dps, list_node) { + ovs_lock(); + list_for_each_entry(dp, &ovs_net->dps, list_node) { if (i >= skip && ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, @@ -1590,7 +1590,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) break; i++; } - rcu_read_unlock(); + ovs_unlock(); cb->args[0] = i; From fecaef85f7188ad1822210e2c7a7625c9a32a8e4 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Tue, 11 Nov 2014 14:36:30 -0800 Subject: [PATCH 067/208] openvswitch: Validate IPv6 flow key and mask values. Reject flow label key and mask values with invalid bits set. Introduced by commit 3fdbd1ce11e5 ("openvswitch: add ipv6 'set' action"). Signed-off-by: Jarno Rajahalme Acked-by: Jesse Gross Signed-off-by: Pravin B Shelar --- net/openvswitch/flow_netlink.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index dda040e693a3..fa4ec2e4a78b 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -689,6 +689,13 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); return -EINVAL; } + + if (ipv6_key->ipv6_label & htonl(0xFFF00000)) { + OVS_NLERR("IPv6 flow label %x is out of range (max=%x).\n", + ntohl(ipv6_key->ipv6_label), (1 << 20) - 1); + return -EINVAL; + } + SW_FLOW_KEY_PUT(match, ipv6.label, ipv6_key->ipv6_label, is_mask); SW_FLOW_KEY_PUT(match, ip.proto, From 226424eee809251ec23bd4b09d8efba09c10fc3c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 5 Nov 2014 16:11:44 +0000 Subject: [PATCH 068/208] perf: Fix corruption of sibling list with hotplug When a CPU hotplugged out, we call perf_remove_from_context() (via perf_event_exit_cpu()) to rip each CPU-bound event out of its PMU's cpu context, but leave siblings grouped together. Freeing of these events is left to the mercy of the usual refcounting. When a CPU-bound event's refcount drops to zero we cross-call to __perf_remove_from_context() to clean it up, detaching grouped siblings. This works when the relevant CPU is online, but will fail if the CPU is currently offline, and we won't detach the event from its siblings before freeing the event, leaving the sibling list corrupt. If the sibling list is later walked (e.g. because the CPU cam online again before a remaining sibling's refcount drops to zero), we will walk the now corrupted siblings list, potentially dereferencing garbage values. Given that the events should never be scheduled again (as we removed them from their context), we can simply detatch siblings when the CPU goes down in the first place. If the CPU comes back online, the redundant call to __perf_remove_from_context() is safe. Reported-by: Drew Richardson Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Cc: vincent.weaver@maine.edu Cc: Vince Weaver Cc: Will Deacon Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1415203904-25308-2-git-send-email-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 2b02c9fda790..1cd5eef1fcdd 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1562,8 +1562,10 @@ static void perf_remove_from_context(struct perf_event *event, bool detach_group if (!task) { /* - * Per cpu events are removed via an smp call and - * the removal is always successful. + * Per cpu events are removed via an smp call. The removal can + * fail if the CPU is currently offline, but in that case we + * already called __perf_remove_from_context from + * perf_event_exit_cpu. */ cpu_function_call(event->cpu, __perf_remove_from_context, &re); return; @@ -8117,7 +8119,7 @@ static void perf_pmu_rotate_stop(struct pmu *pmu) static void __perf_event_exit_context(void *__info) { - struct remove_event re = { .detach_group = false }; + struct remove_event re = { .detach_group = true }; struct perf_event_context *ctx = __info; perf_pmu_rotate_stop(ctx->pmu); From 41a134a5830a5e1396723ace0a63000780d6e267 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 3 Nov 2014 17:00:27 -0800 Subject: [PATCH 069/208] perf/x86/intel/uncore: Fix IRP uncore register offsets on Haswell EP The counter register offsets for the IRP box PMU for Haswell-EP were incorrect. The offsets actually changed over IvyBridge EP. Fix them to the correct values. For this we need to fork the read function from the IVB and use an own counter array. Tested-by: patrick.lu@intel.com Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/1415062828-19759-3-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- .../kernel/cpu/perf_event_intel_uncore_snbep.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c index adf138eac85c..0af1c932fa0f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c @@ -2025,13 +2025,27 @@ static struct intel_uncore_type hswep_uncore_imc = { SNBEP_UNCORE_PCI_COMMON_INIT(), }; +static unsigned hswep_uncore_irp_ctrs[] = {0xa0, 0xa8, 0xb0, 0xb8}; + +static u64 hswep_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event) +{ + struct pci_dev *pdev = box->pci_dev; + struct hw_perf_event *hwc = &event->hw; + u64 count = 0; + + pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx], (u32 *)&count); + pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1); + + return count; +} + static struct intel_uncore_ops hswep_uncore_irp_ops = { .init_box = snbep_uncore_pci_init_box, .disable_box = snbep_uncore_pci_disable_box, .enable_box = snbep_uncore_pci_enable_box, .disable_event = ivbep_uncore_irp_disable_event, .enable_event = ivbep_uncore_irp_enable_event, - .read_counter = ivbep_uncore_irp_read_counter, + .read_counter = hswep_uncore_irp_read_counter, }; static struct intel_uncore_type hswep_uncore_irp = { From 6e84a8d7ac3ba246ef44e313e92bc16a1da1b04a Mon Sep 17 00:00:00 2001 From: Jurgen Kramer Date: Sat, 15 Nov 2014 14:01:21 +0100 Subject: [PATCH 070/208] ALSA: usb-audio: Add ctrl message delay quirk for Marantz/Denon devices This patch adds a USB control message delay quirk for a few specific Marantz/Denon devices. Without the delay the DACs will not work properly and produces the following type of messages: Nov 15 10:09:21 orwell kernel: [ 91.342880] usb 3-13: clock source 41 is not valid, cannot use Nov 15 10:09:21 orwell kernel: [ 91.343775] usb 3-13: clock source 41 is not valid, cannot use There are likely other Marantz/Denon devices using the same USB module which exhibit the same problems. But as this cannot be verified I limited the patch to the devices I could test. The following two devices are covered by this path: - Marantz SA-14S1 - Marantz HD-DAC1 Signed-off-by: Jurgen Kramer Cc: Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index d2aa45a8d895..a5941f80fc5b 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1146,6 +1146,20 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe, if ((le16_to_cpu(dev->descriptor.idVendor) == 0x23ba) && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) mdelay(20); + + /* Marantz/Denon devices with USB DAC functionality need a delay + * after each class compliant request + */ + if ((le16_to_cpu(dev->descriptor.idVendor) == 0x154e) && + (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) { + + switch (le16_to_cpu(dev->descriptor.idProduct)) { + case 0x3005: /* Marantz HD-DAC1 */ + case 0x3006: /* Marantz SA-14S1 */ + mdelay(20); + break; + } + } } /* From 68055915c1c22489f9658bd2b7391bb11b2cf4e4 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 3 Nov 2014 17:00:28 -0800 Subject: [PATCH 071/208] perf/x86/intel/uncore: Fix boot crash on SBOX PMU on Haswell-EP There were several reports that on some systems writing the SBOX0 PMU initialization MSR would #GP at boot. This did not happen on all systems -- my two test systems booted fine. Writing the three initialization bits bit-by-bit seems to avoid the problem. So add a special callback to do just that. This replaces an earlier patch that disabled the SBOX. Reported-by: Alexei Starovoitov Reported-and-Tested-by: Patrick Lu Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/1415062828-19759-4-git-send-email-andi@firstfloor.org [ Fixed a whitespace error and added attribution tags that were left out inexplicably. ] Signed-off-by: Ingo Molnar --- .../cpu/perf_event_intel_uncore_snbep.c | 33 +++++++++++++++++-- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c index 0af1c932fa0f..f9ed429d6e4f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c @@ -486,14 +486,17 @@ static struct attribute_group snbep_uncore_qpi_format_group = { .attrs = snbep_uncore_qpi_formats_attr, }; -#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \ - .init_box = snbep_uncore_msr_init_box, \ +#define __SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \ .disable_box = snbep_uncore_msr_disable_box, \ .enable_box = snbep_uncore_msr_enable_box, \ .disable_event = snbep_uncore_msr_disable_event, \ .enable_event = snbep_uncore_msr_enable_event, \ .read_counter = uncore_msr_read_counter +#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \ + __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), \ + .init_box = snbep_uncore_msr_init_box \ + static struct intel_uncore_ops snbep_uncore_msr_ops = { SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), }; @@ -1919,6 +1922,30 @@ static struct intel_uncore_type hswep_uncore_cbox = { .format_group = &hswep_uncore_cbox_format_group, }; +/* + * Write SBOX Initialization register bit by bit to avoid spurious #GPs + */ +static void hswep_uncore_sbox_msr_init_box(struct intel_uncore_box *box) +{ + unsigned msr = uncore_msr_box_ctl(box); + + if (msr) { + u64 init = SNBEP_PMON_BOX_CTL_INT; + u64 flags = 0; + int i; + + for_each_set_bit(i, (unsigned long *)&init, 64) { + flags |= (1ULL << i); + wrmsrl(msr, flags); + } + } +} + +static struct intel_uncore_ops hswep_uncore_sbox_msr_ops = { + __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .init_box = hswep_uncore_sbox_msr_init_box +}; + static struct attribute *hswep_uncore_sbox_formats_attr[] = { &format_attr_event.attr, &format_attr_umask.attr, @@ -1944,7 +1971,7 @@ static struct intel_uncore_type hswep_uncore_sbox = { .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL, .msr_offset = HSWEP_SBOX_MSR_OFFSET, - .ops = &snbep_uncore_msr_ops, + .ops = &hswep_uncore_sbox_msr_ops, .format_group = &hswep_uncore_sbox_format_group, }; From 00b4d9a14125f1e51874def2b9de6092e007412d Mon Sep 17 00:00:00 2001 From: Maxime COQUELIN Date: Thu, 6 Nov 2014 10:54:19 +0100 Subject: [PATCH 072/208] bitops: Fix shift overflow in GENMASK macros On some 32 bits architectures, including x86, GENMASK(31, 0) returns 0 instead of the expected ~0UL. This is the same on some 64 bits architectures with GENMASK_ULL(63, 0). This is due to an overflow in the shift operand, 1 << 32 for GENMASK, 1 << 64 for GENMASK_ULL. Reported-by: Eric Paire Suggested-by: Rasmus Villemoes Signed-off-by: Maxime Coquelin Signed-off-by: Peter Zijlstra (Intel) Cc: # v3.13+ Cc: linux@rasmusvillemoes.dk Cc: gong.chen@linux.intel.com Cc: John Sullivan Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Theodore Ts'o Fixes: 10ef6b0dffe4 ("bitops: Introduce a more generic BITMASK macro") Link: http://lkml.kernel.org/r/1415267659-10563-1-git-send-email-maxime.coquelin@st.com Signed-off-by: Ingo Molnar --- include/linux/bitops.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/bitops.h b/include/linux/bitops.h index be5fd38bd5a0..5d858e02997f 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -18,8 +18,11 @@ * position @h. For example * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ -#define GENMASK(h, l) (((U32_C(1) << ((h) - (l) + 1)) - 1) << (l)) -#define GENMASK_ULL(h, l) (((U64_C(1) << ((h) - (l) + 1)) - 1) << (l)) +#define GENMASK(h, l) \ + (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) + +#define GENMASK_ULL(h, l) \ + (((~0ULL) << (l)) & (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h)))) extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); From 7af683350cb0ddd0e9d3819b4eb7abe9e2d3e709 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 10 Nov 2014 10:54:35 +0100 Subject: [PATCH 073/208] sched/numa: Avoid selecting oneself as swap target Because the whole numa task selection stuff runs with preemption enabled (its long and expensive) we can end up migrating and selecting oneself as a swap target. This doesn't really work out well -- we end up trying to acquire the same lock twice for the swap migrate -- so avoid this. Reported-and-Tested-by: Sasha Levin Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20141110100328.GF29390@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 34baa60f8a7b..3af3d1e7df9b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1179,6 +1179,13 @@ static void task_numa_compare(struct task_numa_env *env, cur = NULL; raw_spin_unlock_irq(&dst_rq->lock); + /* + * Because we have preemption enabled we can get migrated around and + * end try selecting ourselves (current == env->p) as a swap candidate. + */ + if (cur == env->p) + goto unlock; + /* * "imp" is the fault differential for the source task between the * source and destination node. Calculate the total differential for From 23cfa361f3e54a3e184a5e126bbbdd95f984881a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 12 Nov 2014 12:37:37 +0100 Subject: [PATCH 074/208] sched/cputime: Fix cpu_timer_sample_group() double accounting While looking over the cpu-timer code I found that we appear to add the delta for the calling task twice, through: cpu_timer_sample_group() thread_group_cputimer() thread_group_cputime() times->sum_exec_runtime += task_sched_runtime(); *sample = cputime.sum_exec_runtime + task_delta_exec(); Which would make the sample run ahead, making the sleep short. Signed-off-by: Peter Zijlstra (Intel) Cc: KOSAKI Motohiro Cc: Oleg Nesterov Cc: Stanislaw Gruszka Cc: Christoph Lameter Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Rik van Riel Cc: Tejun Heo Link: http://lkml.kernel.org/r/20141112113737.GI10476@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 5 ----- kernel/sched/core.c | 13 ------------- kernel/time/posix-cpu-timers.c | 2 +- 3 files changed, 1 insertion(+), 19 deletions(-) diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 8422b4ed6882..b9376cd5a187 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -77,11 +77,6 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu) return kstat_cpu(cpu).irqs_sum; } -/* - * Lock/unlock the current runqueue - to extract task statistics: - */ -extern unsigned long long task_delta_exec(struct task_struct *); - extern void account_user_time(struct task_struct *, cputime_t, cputime_t); extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); extern void account_steal_time(cputime_t); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5f12ca65c9a7..797a6c84c48d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2499,19 +2499,6 @@ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) return ns; } -unsigned long long task_delta_exec(struct task_struct *p) -{ - unsigned long flags; - struct rq *rq; - u64 ns = 0; - - rq = task_rq_lock(p, &flags); - ns = do_task_delta_exec(p, rq); - task_rq_unlock(rq, p, &flags); - - return ns; -} - /* * Return accounted runtime for the task. * In case the task is currently running, return the runtime plus current's diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 492b986195d5..a16b67859e2a 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -553,7 +553,7 @@ static int cpu_timer_sample_group(const clockid_t which_clock, *sample = cputime_to_expires(cputime.utime); break; case CPUCLOCK_SCHED: - *sample = cputime.sum_exec_runtime + task_delta_exec(p); + *sample = cputime.sum_exec_runtime; break; } return 0; From 6e998916dfe327e785e7c2447959b2c1a3ea4930 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 12 Nov 2014 16:58:44 +0100 Subject: [PATCH 075/208] sched/cputime: Fix clock_nanosleep()/clock_gettime() inconsistency Commit d670ec13178d0 "posix-cpu-timers: Cure SMP wobbles" fixes one glibc test case in cost of breaking another one. After that commit, calling clock_nanosleep(TIMER_ABSTIME, X) and then clock_gettime(&Y) can result of Y time being smaller than X time. Reproducer/tester can be found further below, it can be compiled and ran by: gcc -o tst-cpuclock2 tst-cpuclock2.c -pthread while ./tst-cpuclock2 ; do : ; done This reproducer, when running on a buggy kernel, will complain about "clock_gettime difference too small". Issue happens because on start in thread_group_cputimer() we initialize sum_exec_runtime of cputimer with threads runtime not yet accounted and then add the threads runtime to running cputimer again on scheduler tick, making it's sum_exec_runtime bigger than actual threads runtime. KOSAKI Motohiro posted a fix for this problem, but that patch was never applied: https://lkml.org/lkml/2013/5/26/191 . This patch takes different approach to cure the problem. It calls update_curr() when cputimer starts, that assure we will have updated stats of running threads and on the next schedule tick we will account only the runtime that elapsed from cputimer start. That also assure we have consistent state between cpu times of individual threads and cpu time of the process consisted by those threads. Full reproducer (tst-cpuclock2.c): #define _GNU_SOURCE #include #include #include #include #include #include #include /* Parameters for the Linux kernel ABI for CPU clocks. */ #define CPUCLOCK_SCHED 2 #define MAKE_PROCESS_CPUCLOCK(pid, clock) \ ((~(clockid_t) (pid) << 3) | (clockid_t) (clock)) static pthread_barrier_t barrier; /* Help advance the clock. */ static void *chew_cpu(void *arg) { pthread_barrier_wait(&barrier); while (1) ; return NULL; } /* Don't use the glibc wrapper. */ static int do_nanosleep(int flags, const struct timespec *req) { clockid_t clock_id = MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED); return syscall(SYS_clock_nanosleep, clock_id, flags, req, NULL); } static int64_t tsdiff(const struct timespec *before, const struct timespec *after) { int64_t before_i = before->tv_sec * 1000000000ULL + before->tv_nsec; int64_t after_i = after->tv_sec * 1000000000ULL + after->tv_nsec; return after_i - before_i; } int main(void) { int result = 0; pthread_t th; pthread_barrier_init(&barrier, NULL, 2); if (pthread_create(&th, NULL, chew_cpu, NULL) != 0) { perror("pthread_create"); return 1; } pthread_barrier_wait(&barrier); /* The test. */ struct timespec before, after, sleeptimeabs; int64_t sleepdiff, diffabs; const struct timespec sleeptime = {.tv_sec = 0,.tv_nsec = 100000000 }; /* The relative nanosleep. Not sure why this is needed, but its presence seems to make it easier to reproduce the problem. */ if (do_nanosleep(0, &sleeptime) != 0) { perror("clock_nanosleep"); return 1; } /* Get the current time. */ if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &before) < 0) { perror("clock_gettime[2]"); return 1; } /* Compute the absolute sleep time based on the current time. */ uint64_t nsec = before.tv_nsec + sleeptime.tv_nsec; sleeptimeabs.tv_sec = before.tv_sec + nsec / 1000000000; sleeptimeabs.tv_nsec = nsec % 1000000000; /* Sleep for the computed time. */ if (do_nanosleep(TIMER_ABSTIME, &sleeptimeabs) != 0) { perror("absolute clock_nanosleep"); return 1; } /* Get the time after the sleep. */ if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &after) < 0) { perror("clock_gettime[3]"); return 1; } /* The time after sleep should always be equal to or after the absolute sleep time passed to clock_nanosleep. */ sleepdiff = tsdiff(&sleeptimeabs, &after); if (sleepdiff < 0) { printf("absolute clock_nanosleep woke too early: %" PRId64 "\n", sleepdiff); result = 1; printf("Before %llu.%09llu\n", before.tv_sec, before.tv_nsec); printf("After %llu.%09llu\n", after.tv_sec, after.tv_nsec); printf("Sleep %llu.%09llu\n", sleeptimeabs.tv_sec, sleeptimeabs.tv_nsec); } /* The difference between the timestamps taken before and after the clock_nanosleep call should be equal to or more than the duration of the sleep. */ diffabs = tsdiff(&before, &after); if (diffabs < sleeptime.tv_nsec) { printf("clock_gettime difference too small: %" PRId64 "\n", diffabs); result = 1; } pthread_cancel(th); return result; } Signed-off-by: Stanislaw Gruszka Signed-off-by: Peter Zijlstra (Intel) Cc: Rik van Riel Cc: Frederic Weisbecker Cc: KOSAKI Motohiro Cc: Oleg Nesterov Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20141112155843.GA24803@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 38 +++++++++++--------------------------- kernel/sched/deadline.c | 2 ++ kernel/sched/fair.c | 7 +++++++ kernel/sched/rt.c | 2 ++ kernel/sched/sched.h | 2 ++ 5 files changed, 24 insertions(+), 27 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 797a6c84c48d..24beb9bb4c3e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2474,31 +2474,6 @@ DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat); EXPORT_PER_CPU_SYMBOL(kstat); EXPORT_PER_CPU_SYMBOL(kernel_cpustat); -/* - * Return any ns on the sched_clock that have not yet been accounted in - * @p in case that task is currently running. - * - * Called with task_rq_lock() held on @rq. - */ -static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) -{ - u64 ns = 0; - - /* - * Must be ->curr _and_ ->on_rq. If dequeued, we would - * project cycles that may never be accounted to this - * thread, breaking clock_gettime(). - */ - if (task_current(rq, p) && task_on_rq_queued(p)) { - update_rq_clock(rq); - ns = rq_clock_task(rq) - p->se.exec_start; - if ((s64)ns < 0) - ns = 0; - } - - return ns; -} - /* * Return accounted runtime for the task. * In case the task is currently running, return the runtime plus current's @@ -2508,7 +2483,7 @@ unsigned long long task_sched_runtime(struct task_struct *p) { unsigned long flags; struct rq *rq; - u64 ns = 0; + u64 ns; #if defined(CONFIG_64BIT) && defined(CONFIG_SMP) /* @@ -2527,7 +2502,16 @@ unsigned long long task_sched_runtime(struct task_struct *p) #endif rq = task_rq_lock(p, &flags); - ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); + /* + * Must be ->curr _and_ ->on_rq. If dequeued, we would + * project cycles that may never be accounted to this + * thread, breaking clock_gettime(). + */ + if (task_current(rq, p) && task_on_rq_queued(p)) { + update_rq_clock(rq); + p->sched_class->update_curr(rq); + } + ns = p->se.sum_exec_runtime; task_rq_unlock(rq, p, &flags); return ns; diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 5285332392d5..28fa9d9e9201 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1701,4 +1701,6 @@ const struct sched_class dl_sched_class = { .prio_changed = prio_changed_dl, .switched_from = switched_from_dl, .switched_to = switched_to_dl, + + .update_curr = update_curr_dl, }; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3af3d1e7df9b..ef2b104b254c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -726,6 +726,11 @@ static void update_curr(struct cfs_rq *cfs_rq) account_cfs_rq_runtime(cfs_rq, delta_exec); } +static void update_curr_fair(struct rq *rq) +{ + update_curr(cfs_rq_of(&rq->curr->se)); +} + static inline void update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se) { @@ -7956,6 +7961,8 @@ const struct sched_class fair_sched_class = { .get_rr_interval = get_rr_interval_fair, + .update_curr = update_curr_fair, + #ifdef CONFIG_FAIR_GROUP_SCHED .task_move_group = task_move_group_fair, #endif diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index d024e6ce30ba..20bca398084a 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2128,6 +2128,8 @@ const struct sched_class rt_sched_class = { .prio_changed = prio_changed_rt, .switched_to = switched_to_rt, + + .update_curr = update_curr_rt, }; #ifdef CONFIG_SCHED_DEBUG diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 24156c8434d1..2df8ef067cc5 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1135,6 +1135,8 @@ struct sched_class { unsigned int (*get_rr_interval) (struct rq *rq, struct task_struct *task); + void (*update_curr) (struct rq *rq); + #ifdef CONFIG_FAIR_GROUP_SCHED void (*task_move_group) (struct task_struct *p, int on_rq); #endif From 2cd3949f702692cf4c5d05b463f19cd706a92dd3 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 11 Nov 2014 14:01:33 -0800 Subject: [PATCH 076/208] x86: Require exact match for 'noxsave' command line option We have some very similarly named command-line options: arch/x86/kernel/cpu/common.c:__setup("noxsave", x86_xsave_setup); arch/x86/kernel/cpu/common.c:__setup("noxsaveopt", x86_xsaveopt_setup); arch/x86/kernel/cpu/common.c:__setup("noxsaves", x86_xsaves_setup); __setup() is designed to match options that take arguments, like "foo=bar" where you would have: __setup("foo", x86_foo_func...); The problem is that "noxsave" actually _matches_ "noxsaves" in the same way that "foo" matches "foo=bar". If you boot an old kernel that does not know about "noxsaves" with "noxsaves" on the command line, it will interpret the argument as "noxsave", which is not what you want at all. This makes the "noxsave" handler only return success when it finds an *exact* match. [ tglx: We really need to make __setup() more robust. ] Signed-off-by: Dave Hansen Cc: Dave Hansen Cc: Fenghua Yu Cc: x86@kernel.org Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20141111220133.FE053984@viggo.jf.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4b4f78c9ba19..cfa9b5b2c27a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -146,6 +146,8 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); static int __init x86_xsave_setup(char *s) { + if (strlen(s)) + return 0; setup_clear_cpu_cap(X86_FEATURE_XSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); setup_clear_cpu_cap(X86_FEATURE_XSAVES); From 49dd18ba4615eaa72f15c9087dea1c2ab4744cf5 Mon Sep 17 00:00:00 2001 From: Panu Matilainen Date: Fri, 14 Nov 2014 13:14:32 +0200 Subject: [PATCH 077/208] ipv4: Fix incorrect error code when adding an unreachable route Trying to add an unreachable route incorrectly returns -ESRCH if if custom FIB rules are present: [root@localhost ~]# ip route add 74.125.31.199 dev eth0 via 1.2.3.4 RTNETLINK answers: Network is unreachable [root@localhost ~]# ip rule add to 55.66.77.88 table 200 [root@localhost ~]# ip route add 74.125.31.199 dev eth0 via 1.2.3.4 RTNETLINK answers: No such process [root@localhost ~]# Commit 83886b6b636173b206f475929e58fac75c6f2446 ("[NET]: Change "not found" return value for rule lookup") changed fib_rules_lookup() to use -ESRCH as a "not found" code internally, but for user space it should be translated into -ENETUNREACH. Handle the translation centrally in ipv4-specific fib_lookup(), leaving the DECnet case alone. On a related note, commit b7a71b51ee37d919e4098cd961d59a883fd272d8 ("ipv4: removed redundant conditional") removed a similar translation from ip_route_input_slow() prematurely AIUI. Fixes: b7a71b51ee37 ("ipv4: removed redundant conditional") Signed-off-by: Panu Matilainen Signed-off-by: David S. Miller --- net/ipv4/fib_rules.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index f2e15738534d..8f7bd56955b0 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -62,6 +62,10 @@ int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) else res->tclassid = 0; #endif + + if (err == -ESRCH) + err = -ENETUNREACH; + return err; } EXPORT_SYMBOL_GPL(__fib_lookup); From 9f458945080f9e618641ff3ef04e60be0895d7e4 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 14 Nov 2014 15:16:47 +0100 Subject: [PATCH 078/208] reciprocal_div: objects with exported symbols should be obj-y rather than lib-y Otherwise the exported symbols might be discarded because of no users in vmlinux. Reported-by: Jim Davis Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- lib/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Makefile b/lib/Makefile index 7512dc978f18..0211d2bd5e17 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -10,7 +10,7 @@ endif lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o timerqueue.o\ idr.o int_sqrt.o extable.o \ - sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ + sha1.o md5.o irq_regs.o argv_split.o \ proportions.o flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o @@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ - percpu-refcount.o percpu_ida.o hash.o rhashtable.o + percpu-refcount.o percpu_ida.o hash.o rhashtable.o reciprocal_div.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += kstrtox.o From 35717d8d6fc6fc50692273d6667a0a575c26aa93 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Fri, 14 Nov 2014 15:42:52 +0100 Subject: [PATCH 079/208] drivers: net: cpsw: Fix TX_IN_SEL offset The TX_IN_SEL offset for the CPSW_PORT/TX_IN_CTL register was incorrect. This caused the Dual MAC mode to never get set when it should. It also caused possible unintentional setting of a bit in the CPSW_PORT/TX_BLKS_REM register. The purpose of setting the Dual MAC mode for this register is to: "... allow packets from both ethernet ports to be written into the FIFO without one port starving the other port." - AM335x ARM TRM Signed-off-by: John Ogness Reviewed-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index d8794488f80a..c560f9aeb55d 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -129,9 +129,9 @@ do { \ #define CPSW_VLAN_AWARE BIT(1) #define CPSW_ALE_VLAN_AWARE 1 -#define CPSW_FIFO_NORMAL_MODE (0 << 15) -#define CPSW_FIFO_DUAL_MAC_MODE (1 << 15) -#define CPSW_FIFO_RATE_LIMIT_MODE (2 << 15) +#define CPSW_FIFO_NORMAL_MODE (0 << 16) +#define CPSW_FIFO_DUAL_MAC_MODE (1 << 16) +#define CPSW_FIFO_RATE_LIMIT_MODE (2 << 16) #define CPSW_INTPACEEN (0x3f << 16) #define CPSW_INTPRESCALE_MASK (0x7FF << 0) From 8c2dd54485ccee7fc4086611e188478584758c8d Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 15 Nov 2014 02:11:59 +0300 Subject: [PATCH 080/208] ieee802154: fix error handling in ieee802154fake_probe() In case of any failure ieee802154fake_probe() just calls unregister_netdev(). But it does not look safe to unregister netdevice before it was registered. The patch implements straightforward resource deallocation in case of failure in ieee802154fake_probe(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller --- drivers/net/ieee802154/fakehard.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ieee802154/fakehard.c b/drivers/net/ieee802154/fakehard.c index 9ce854f43917..6cbc56ad9ff4 100644 --- a/drivers/net/ieee802154/fakehard.c +++ b/drivers/net/ieee802154/fakehard.c @@ -377,17 +377,20 @@ static int ieee802154fake_probe(struct platform_device *pdev) err = wpan_phy_register(phy); if (err) - goto out; + goto err_phy_reg; err = register_netdev(dev); - if (err < 0) - goto out; + if (err) + goto err_netdev_reg; dev_info(&pdev->dev, "Added ieee802154 HardMAC hardware\n"); return 0; -out: - unregister_netdev(dev); +err_netdev_reg: + wpan_phy_unregister(phy); +err_phy_reg: + free_netdev(dev); + wpan_phy_free(phy); return err; } From 52cff74eef5dd7bdab759300e7d1ca36eba18254 Mon Sep 17 00:00:00 2001 From: Anish Bhatt Date: Fri, 14 Nov 2014 16:38:31 -0800 Subject: [PATCH 081/208] dcbnl : Disable software interrupts before taking dcb_lock Solves possible lockup issues that can be seen from firmware DCB agents calling into the DCB app api. DCB firmware event queues can be tied in with NAPI so that dcb events are generated in softIRQ context. This can results in calls to dcb_*app() functions which try to take the dcb_lock. If the the event triggers while we also have the dcb_lock because lldpad or some other agent happened to be issuing a get/set command we could see a cpu lockup. This code was not originally written with firmware agents in mind, hence grabbing dcb_lock from softIRQ context was not considered. Signed-off-by: Anish Bhatt Signed-off-by: David S. Miller --- net/dcb/dcbnl.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index ca11d283bbeb..93ea80196f0e 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1080,13 +1080,13 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) if (!app) return -EMSGSIZE; - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); list_for_each_entry(itr, &dcb_app_list, list) { if (itr->ifindex == netdev->ifindex) { err = nla_put(skb, DCB_ATTR_IEEE_APP, sizeof(itr->app), &itr->app); if (err) { - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); return -EMSGSIZE; } } @@ -1097,7 +1097,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) else dcbx = -EOPNOTSUPP; - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); nla_nest_end(skb, app); /* get peer info if available */ @@ -1234,7 +1234,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) } /* local app */ - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); app = nla_nest_start(skb, DCB_ATTR_CEE_APP_TABLE); if (!app) goto dcb_unlock; @@ -1271,7 +1271,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) else dcbx = -EOPNOTSUPP; - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); /* features flags */ if (ops->getfeatcfg) { @@ -1326,7 +1326,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) return 0; dcb_unlock: - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); nla_put_failure: return err; } @@ -1762,10 +1762,10 @@ u8 dcb_getapp(struct net_device *dev, struct dcb_app *app) struct dcb_app_type *itr; u8 prio = 0; - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); if ((itr = dcb_app_lookup(app, dev->ifindex, 0))) prio = itr->app.priority; - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); return prio; } @@ -1789,7 +1789,7 @@ int dcb_setapp(struct net_device *dev, struct dcb_app *new) if (dev->dcbnl_ops->getdcbx) event.dcbx = dev->dcbnl_ops->getdcbx(dev); - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); /* Search for existing match and replace */ if ((itr = dcb_app_lookup(new, dev->ifindex, 0))) { if (new->priority) @@ -1804,7 +1804,7 @@ int dcb_setapp(struct net_device *dev, struct dcb_app *new) if (new->priority) err = dcb_app_add(new, dev->ifindex); out: - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); if (!err) call_dcbevent_notifiers(DCB_APP_EVENT, &event); return err; @@ -1823,10 +1823,10 @@ u8 dcb_ieee_getapp_mask(struct net_device *dev, struct dcb_app *app) struct dcb_app_type *itr; u8 prio = 0; - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); if ((itr = dcb_app_lookup(app, dev->ifindex, 0))) prio |= 1 << itr->app.priority; - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); return prio; } @@ -1850,7 +1850,7 @@ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new) if (dev->dcbnl_ops->getdcbx) event.dcbx = dev->dcbnl_ops->getdcbx(dev); - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); /* Search for existing match and abort if found */ if (dcb_app_lookup(new, dev->ifindex, new->priority)) { err = -EEXIST; @@ -1859,7 +1859,7 @@ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new) err = dcb_app_add(new, dev->ifindex); out: - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); if (!err) call_dcbevent_notifiers(DCB_APP_EVENT, &event); return err; @@ -1882,7 +1882,7 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del) if (dev->dcbnl_ops->getdcbx) event.dcbx = dev->dcbnl_ops->getdcbx(dev); - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); /* Search for existing match and remove it. */ if ((itr = dcb_app_lookup(del, dev->ifindex, del->priority))) { list_del(&itr->list); @@ -1890,7 +1890,7 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del) err = 0; } - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); if (!err) call_dcbevent_notifiers(DCB_APP_EVENT, &event); return err; @@ -1902,12 +1902,12 @@ static void dcb_flushapp(void) struct dcb_app_type *app; struct dcb_app_type *tmp; - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); list_for_each_entry_safe(app, tmp, &dcb_app_list, list) { list_del(&app->list); kfree(app); } - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); } static int __init dcbnl_init(void) From bb2bdeb83fb125c95e47fc7eca2a3e8f868e2a74 Mon Sep 17 00:00:00 2001 From: Martin Hauke Date: Sun, 16 Nov 2014 19:55:25 +0100 Subject: [PATCH 082/208] qmi_wwan: Add support for HP lt4112 LTE/HSPA+ Gobi 4G Modem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added the USB VID/PID for the HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) Signed-off-by: Martin Hauke Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 22756db53dca..b8a82b86f909 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -780,6 +780,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x413c, 0x81a4, 8)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ {QMI_FIXED_INTF(0x413c, 0x81a8, 8)}, /* Dell Wireless 5808 Gobi(TM) 4G LTE Mobile Broadband Card */ {QMI_FIXED_INTF(0x413c, 0x81a9, 8)}, /* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */ + {QMI_FIXED_INTF(0x03f0, 0x581d, 4)}, /* HP lt4112 LTE/HSPA+ Gobi 4G Module (Huawei me906e) */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ From feb91a02ccb09661507f170b2a444aec94f307f9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 5 Nov 2014 20:27:38 +0100 Subject: [PATCH 083/208] ipv6: mld: fix add_grhead skb_over_panic for devs with large MTUs It has been reported that generating an MLD listener report on devices with large MTUs (e.g. 9000) and a high number of IPv6 addresses can trigger a skb_over_panic(): skbuff: skb_over_panic: text:ffffffff80612a5d len:3776 put:20 head:ffff88046d751000 data:ffff88046d751010 tail:0xed0 end:0xec0 dev:port1 ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:100! invalid opcode: 0000 [#1] SMP Modules linked in: ixgbe(O) CPU: 3 PID: 0 Comm: swapper/3 Tainted: G O 3.14.23+ #4 [...] Call Trace: [] ? skb_put+0x3a/0x3b [] ? add_grhead+0x45/0x8e [] ? add_grec+0x394/0x3d4 [] ? mld_ifc_timer_expire+0x195/0x20d [] ? mld_dad_timer_expire+0x45/0x45 [] ? call_timer_fn.isra.29+0x12/0x68 [] ? run_timer_softirq+0x163/0x182 [] ? __do_softirq+0xe0/0x21d [] ? irq_exit+0x4e/0xd3 [] ? smp_apic_timer_interrupt+0x3b/0x46 [] ? apic_timer_interrupt+0x6a/0x70 mld_newpack() skb allocations are usually requested with dev->mtu in size, since commit 72e09ad107e7 ("ipv6: avoid high order allocations") we have changed the limit in order to be less likely to fail. However, in MLD/IGMP code, we have some rather ugly AVAILABLE(skb) macros, which determine if we may end up doing an skb_put() for adding another record. To avoid possible fragmentation, we check the skb's tailroom as skb->dev->mtu - skb->len, which is a wrong assumption as the actual max allocation size can be much smaller. The IGMP case doesn't have this issue as commit 57e1ab6eaddc ("igmp: refine skb allocations") stores the allocation size in the cb[]. Set a reserved_tailroom to make it fit into the MTU and use skb_availroom() helper instead. This also allows to get rid of igmp_skb_size(). Reported-by: Wei Liu Fixes: 72e09ad107e7 ("ipv6: avoid high order allocations") Signed-off-by: Daniel Borkmann Cc: Eric Dumazet Cc: Hannes Frederic Sowa Cc: David L Stevens Acked-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 11 +++++------ net/ipv6/mcast.c | 9 +++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index fb70e3ecc3e4..bb15d0e03d4f 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -318,9 +318,7 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted) return scount; } -#define igmp_skb_size(skb) (*(unsigned int *)((skb)->cb)) - -static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) +static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) { struct sk_buff *skb; struct rtable *rt; @@ -330,6 +328,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) struct flowi4 fl4; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; + unsigned int size = mtu; while (1) { skb = alloc_skb(size + hlen + tlen, @@ -341,7 +340,6 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) return NULL; } skb->priority = TC_PRIO_CONTROL; - igmp_skb_size(skb) = size; rt = ip_route_output_ports(net, &fl4, NULL, IGMPV3_ALL_MCR, 0, 0, 0, @@ -354,6 +352,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) skb_dst_set(skb, &rt->dst); skb->dev = dev; + skb->reserved_tailroom = skb_end_offset(skb) - + min(mtu, skb_end_offset(skb)); skb_reserve(skb, hlen); skb_reset_network_header(skb); @@ -423,8 +423,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc, return skb; } -#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? igmp_skb_size(skb) - (skb)->len : \ - skb_tailroom(skb)) : 0) +#define AVAILABLE(skb) ((skb) ? skb_availroom(skb) : 0) static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 9648de2b6745..ed2c4e400b46 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1550,7 +1550,7 @@ static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb, hdr->daddr = *daddr; } -static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size) +static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu) { struct net_device *dev = idev->dev; struct net *net = dev_net(dev); @@ -1561,13 +1561,13 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size) const struct in6_addr *saddr; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; + unsigned int size = mtu + hlen + tlen; int err; u8 ra[8] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT, 2, 0, 0, IPV6_TLV_PADN, 0 }; /* we assume size > sizeof(ra) here */ - size += hlen + tlen; /* limit our allocations to order-0 page */ size = min_t(int, size, SKB_MAX_ORDER(0, 0)); skb = sock_alloc_send_skb(sk, size, 1, &err); @@ -1576,6 +1576,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size) return NULL; skb->priority = TC_PRIO_CONTROL; + skb->reserved_tailroom = skb_end_offset(skb) - + min(mtu, skb_end_offset(skb)); skb_reserve(skb, hlen); if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) { @@ -1690,8 +1692,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, return skb; } -#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? (skb)->dev->mtu - (skb)->len : \ - skb_tailroom(skb)) : 0) +#define AVAILABLE(skb) ((skb) ? skb_availroom(skb) : 0) static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted, int crsend) From 137bd11090d89b3a3ef4bdb7a6cf964ffc797517 Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Fri, 7 Nov 2014 18:05:17 +0000 Subject: [PATCH 084/208] dmaengine: pl330: Align DMA memcpy operations to MFIFO width The algorithm used for programming the DMA Controller doesn't take into consideration the requirements of transfers that are not aligned to the bus width. This failure may result in DMA transferring one too few MFIFO entries (so too few bytes are copied) or the DMA trying to write one too many MFIFO entries and hanging because this is never provided. See "MFIFO Usage Overview" chapter in the the TRM for "CoreLink DMA Controller DMA-330", Revision r1p1. We work around these shortcomings by making sure we pick a burst size and length which ensures no bursts straddle an MFIFO entry. Signed-off-by: Jon Medhurst [squashed linker error "undefined reference to `__aeabi_uldivmod] Reported-by: kbuild test robot Acked-by: Arnd Bergmann Signed-off-by: Vinod Koul --- drivers/dma/pl330.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 4839bfa74a10..81505420bde4 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2459,16 +2459,25 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst, /* Select max possible burst size */ burst = pl330->pcfg.data_bus_width / 8; - while (burst > 1) { - if (!(len % burst)) - break; + /* + * Make sure we use a burst size that aligns with all the memcpy + * parameters because our DMA programming algorithm doesn't cope with + * transfers which straddle an entry in the DMA device's MFIFO. + */ + while ((src | dst | len) & (burst - 1)) burst /= 2; - } desc->rqcfg.brst_size = 0; while (burst != (1 << desc->rqcfg.brst_size)) desc->rqcfg.brst_size++; + /* + * If burst size is smaller than bus width then make sure we only + * transfer one at a time to avoid a burst stradling an MFIFO entry. + */ + if (desc->rqcfg.brst_size * 8 < pl330->pcfg.data_bus_width) + desc->rqcfg.brst_len = 1; + desc->rqcfg.brst_len = get_burst_len(desc, len); desc->txd.flags = flags; From c27f95568d1ed2573fc9c8848d741da63ca9a34e Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Fri, 7 Nov 2014 18:05:18 +0000 Subject: [PATCH 085/208] dmaengine: pl330: Limit MFIFO usage for memcpy to avoid exhausting entries The MFIFO is shared by all channels so restrict each memcpy to it's fair share. This is being over cautious, but without a global view of DMA channel usage on a system it's not possible to come up with a more optimum safe limit. Signed-off-by: Jon Medhurst Signed-off-by: Vinod Koul --- drivers/dma/pl330.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 81505420bde4..e13b51a46502 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2336,7 +2336,7 @@ static inline int get_burst_len(struct dma_pl330_desc *desc, size_t len) int burst_len; burst_len = pl330->pcfg.data_bus_width / 8; - burst_len *= pl330->pcfg.data_buf_dep; + burst_len *= pl330->pcfg.data_buf_dep / pl330->pcfg.num_chan; burst_len >>= desc->rqcfg.brst_size; /* src/dst_burst_len can't be more than 16 */ From 1f0a5cbf61a54504236bbbe2c98b58e85f90e650 Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Thu, 6 Nov 2014 17:20:12 +0000 Subject: [PATCH 086/208] dmaengine: Fix allocation size for PL330 data buffer depth. The datasheet for PL330 says that the data buffer value in the CRD register is 10bits wide. However, the value stored is "minus one", which the driver corrects for. Maximum value that the data buffer depth can have is 1024 lines, which requires 11 bits for storage. While making updates I found printing the peripheral ID as a hex value to be more useful as the datasheet shows the values that way. Signed-off-by: Liviu Dudau Signed-off-by: Vinod Koul --- drivers/dma/pl330.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index e13b51a46502..19a99743cf52 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -271,7 +271,7 @@ struct pl330_config { #define DMAC_MODE_NS (1 << 0) unsigned int mode; unsigned int data_bus_width:10; /* In number of bits */ - unsigned int data_buf_dep:10; + unsigned int data_buf_dep:11; unsigned int num_chan:4; unsigned int num_peri:6; u32 peri_ns; @@ -2741,7 +2741,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) dev_info(&adev->dev, - "Loaded driver for PL330 DMAC-%d\n", adev->periphid); + "Loaded driver for PL330 DMAC-%x\n", adev->periphid); dev_info(&adev->dev, "\tDBUFF-%ux%ubytes Num_Chans-%u Num_Peri-%u Num_Events-%u\n", pcfg->data_buf_dep, pcfg->data_bus_width / 8, pcfg->num_chan, From 2208d655a91f9879bd9a39ff9df05dd668b3512c Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 14 Nov 2014 09:25:29 +0100 Subject: [PATCH 087/208] drm/i915: drop WaSetupGtModeTdRowDispatch:snb This reverts the regressing commit 6547fbdbfff62c99e4f7b4f985ff8b3454f33b0f Author: Daniel Vetter Date: Fri Dec 14 23:38:29 2012 +0100 drm/i915: Implement WaSetupGtModeTdRowDispatch that causes GPU hangs immediately on boot. Reported-by: Leo Wolf Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=79996 Cc: stable@vger.kernel.org (v3.8+) Signed-off-by: Daniel Vetter [Jani: amended the commit message slightly.] Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_pm.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c27b6140bfd1..ad2fd605f76b 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5469,11 +5469,6 @@ static void gen6_init_clock_gating(struct drm_device *dev) I915_WRITE(_3D_CHICKEN, _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); - /* WaSetupGtModeTdRowDispatch:snb */ - if (IS_SNB_GT1(dev)) - I915_WRITE(GEN6_GT_MODE, - _MASKED_BIT_ENABLE(GEN6_TD_FOUR_ROW_DISPATCH_DISABLE)); - /* WaDisable_RenderCache_OperationalFlush:snb */ I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); From 0485c9dc24ec0939b42ca5104c0373297506b555 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 14 Nov 2014 10:09:49 +0100 Subject: [PATCH 088/208] drm/i915: Kick fbdev before vgacon It's magic, but it seems to work. This fixes a regression introduced in commit 1bb9e632a0aeee1121e652ee4dc80e5e6f14bcd2 Author: Daniel Vetter Date: Tue Jul 8 10:02:43 2014 +0200 drm/i915: Only unbind vgacon, not other console drivers My best guess is that the vga fbdev driver falls over if we rip out parts of vgacon. Hooray. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82439 Cc: stable@vger.kernel.org (v3.16+) Reported-and-tested-by: Lv Zheng Signed-off-by: Daniel Vetter Acked-by: Chris Wilson Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_dma.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 1403b01e8216..318ade9bb5af 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1670,17 +1670,19 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) goto out_regs; if (drm_core_check_feature(dev, DRIVER_MODESET)) { - ret = i915_kick_out_vgacon(dev_priv); - if (ret) { - DRM_ERROR("failed to remove conflicting VGA console\n"); - goto out_gtt; - } - + /* WARNING: Apparently we must kick fbdev drivers before vgacon, + * otherwise the vga fbdev driver falls over. */ ret = i915_kick_out_firmware_fb(dev_priv); if (ret) { DRM_ERROR("failed to remove conflicting framebuffer drivers\n"); goto out_gtt; } + + ret = i915_kick_out_vgacon(dev_priv); + if (ret) { + DRM_ERROR("failed to remove conflicting VGA console\n"); + goto out_gtt; + } } pci_set_master(dev->pdev); From 97840cb67ff5ac8add836684f011fd838518d698 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 14 Nov 2014 18:14:33 +0100 Subject: [PATCH 089/208] netfilter: nfnetlink: fix insufficient validation in nfnetlink_bind Make sure the netlink group exists, otherwise you can trigger an out of bound array memory access from the netlink_bind() path. This splat can only be triggered only by superuser. [ 180.203600] UBSan: Undefined behaviour in ../net/netfilter/nfnetlink.c:467:28 [ 180.204249] index 9 is out of range for type 'int [9]' [ 180.204697] CPU: 0 PID: 1771 Comm: trinity-main Not tainted 3.18.0-rc4-mm1+ #122 [ 180.205365] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org +04/01/2014 [ 180.206498] 0000000000000018 0000000000000000 0000000000000009 ffff88007bdf7da8 [ 180.207220] ffffffff82b0ef5f 0000000000000092 ffffffff845ae2e0 ffff88007bdf7db8 [ 180.207887] ffffffff8199e489 ffff88007bdf7e18 ffffffff8199ea22 0000003900000000 [ 180.208639] Call Trace: [ 180.208857] dump_stack (lib/dump_stack.c:52) [ 180.209370] ubsan_epilogue (lib/ubsan.c:174) [ 180.209849] __ubsan_handle_out_of_bounds (lib/ubsan.c:400) [ 180.210512] nfnetlink_bind (net/netfilter/nfnetlink.c:467) [ 180.210986] netlink_bind (net/netlink/af_netlink.c:1483) [ 180.211495] SYSC_bind (net/socket.c:1541) Moreover, define the missing nf_tables and nf_acct multicast groups too. Reported-by: Andrey Ryabinin Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 6c5a915cfa75..13c2e17bbe27 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -47,6 +47,8 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = { [NFNLGRP_CONNTRACK_EXP_NEW] = NFNL_SUBSYS_CTNETLINK_EXP, [NFNLGRP_CONNTRACK_EXP_UPDATE] = NFNL_SUBSYS_CTNETLINK_EXP, [NFNLGRP_CONNTRACK_EXP_DESTROY] = NFNL_SUBSYS_CTNETLINK_EXP, + [NFNLGRP_NFTABLES] = NFNL_SUBSYS_NFTABLES, + [NFNLGRP_ACCT_QUOTA] = NFNL_SUBSYS_ACCT, }; void nfnl_lock(__u8 subsys_id) @@ -464,7 +466,12 @@ static void nfnetlink_rcv(struct sk_buff *skb) static int nfnetlink_bind(int group) { const struct nfnetlink_subsystem *ss; - int type = nfnl_group2type[group]; + int type; + + if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX) + return -EINVAL; + + type = nfnl_group2type[group]; rcu_read_lock(); ss = nfnetlink_get_subsys(type); @@ -514,6 +521,9 @@ static int __init nfnetlink_init(void) { int i; + for (i = NFNLGRP_NONE + 1; i <= NFNLGRP_MAX; i++) + BUG_ON(nfnl_group2type[i] == NFNL_SUBSYS_NONE); + for (i=0; i Date: Mon, 17 Nov 2014 12:20:28 +0100 Subject: [PATCH 090/208] bridge: fix netfilter/NF_BR_LOCAL_OUT for own, locally generated queries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ebtables on the OUTPUT chain (NF_BR_LOCAL_OUT) would not work as expected for both locally generated IGMP and MLD queries. The IP header specific filter options are off by 14 Bytes for netfilter (actual output on interfaces is fine). NF_HOOK() expects the skb->data to point to the IP header, not the ethernet one (while dev_queue_xmit() does not). Luckily there is an br_dev_queue_push_xmit() helper function already - let's just use that. Introduced by eb1d16414339a6e113d89e2cca2556005d7ce919 ("bridge: Add core IGMP snooping support") Ebtables example: $ ebtables -I OUTPUT -p IPv6 -o eth1 --logical-out br0 \ --log --log-level 6 --log-ip6 --log-prefix="~EBT: " -j DROP before (broken): ~EBT: IN= OUT=eth1 MAC source = 02:04:64:a4:39:c2 \ MAC dest = 33:33:00:00:00:01 proto = 0x86dd IPv6 \ SRC=64a4:39c2:86dd:6000:0000:0020:0001:fe80 IPv6 \ DST=0000:0000:0000:0004:64ff:fea4:39c2:ff02, \ IPv6 priority=0x3, Next Header=2 after (working): ~EBT: IN= OUT=eth1 MAC source = 02:04:64:a4:39:c2 \ MAC dest = 33:33:00:00:00:01 proto = 0x86dd IPv6 \ SRC=fe80:0000:0000:0000:0004:64ff:fea4:39c2 IPv6 \ DST=ff02:0000:0000:0000:0000:0000:0000:0001, \ IPv6 priority=0x0, Next Header=0 Signed-off-by: Linus Lüssing Acked-by: Herbert Xu Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_multicast.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 648d79ccf462..c465876c7861 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -813,10 +813,9 @@ static void __br_multicast_send_query(struct net_bridge *br, return; if (port) { - __skb_push(skb, sizeof(struct ethhdr)); skb->dev = port->dev; NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, - dev_queue_xmit); + br_dev_queue_push_xmit); } else { br_multicast_select_own_querier(br, ip, skb); netif_rx(skb); From a358a0ef861dae6f8330fb034aaa43adae71ebc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johan=20Rast=C3=A9n?= Date: Mon, 17 Nov 2014 08:39:33 +0100 Subject: [PATCH 091/208] ALSA: usb-audio: Set the Control Selector to SU_SELECTOR_CONTROL for UAC2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Specified in section 5.2.5.6.1 of the USB Audio Class 2.0 definition. Solves the following error for C-Media 6632A (Asus Xonar U7): [ 8219.676164] cannot get ctl value: req = 0x81, wValue = 0x0, wIndex = 0x1400, type = 3 Signed-off-by: Johan Rastén Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 2e4a9dbc51fa..6e354d326858 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -2033,10 +2033,11 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid, cval->res = 1; cval->initialized = 1; - if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR) - cval->control = UAC2_CX_CLOCK_SELECTOR; - else + if (state->mixer->protocol == UAC_VERSION_1) cval->control = 0; + else /* UAC_VERSION_2 */ + cval->control = (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR) ? + UAC2_CX_CLOCK_SELECTOR : UAC2_SU_SELECTOR; namelist = kmalloc(sizeof(char *) * desc->bNrInPins, GFP_KERNEL); if (!namelist) { From 9da7a5a9fdeeb76b2243f6b473363a7e6147ab6f Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 17 Nov 2014 10:48:21 +0000 Subject: [PATCH 092/208] ASoC: wm_adsp: Avoid attempt to free buffers that might still be in use We should not free any buffers associated with writing out coefficients to the DSP until all the async writes have completed. This patch updates the out of memory path when allocating a new buffer to include a call to regmap_async_complete. Reported-by: JS Park Signed-off-by: Charles Keepax Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/wm_adsp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index f412a9911a75..67124783558a 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1355,6 +1355,7 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp) file, blocks, pos - firmware->size); out_fw: + regmap_async_complete(regmap); release_firmware(firmware); wm_adsp_buf_free(&buf_list); out: From daad1660285a967b5da363e8de264e86b4a496a0 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Tue, 4 Nov 2014 15:22:50 -0800 Subject: [PATCH 093/208] ath9k: fix regression in bssidmask calculation The commit that went into 3.17: ath9k: Summarize hw state per channel context Group and set hw state (opmode, primary_sta, beacon conf) per channel context instead of whole list of vifs. This would allow each channel context to run in different mode (STA/AP). Signed-off-by: Felix Fietkau Signed-off-by: Rajkumar Manoharan Signed-off-by: John W. Linville broke multi-vif configuration due to not properly calculating the bssid mask. The test case that caught this was: create wlan0 and sta0-4 (6 total), not sure how much that matters. associate all 6 (works fine) disconnect 5 of them, leaving sta0 up Start trying to bring up the other 5 one at a time. It will fail, with iw events looking like this (in these logs, several sta are trying to come up, but symptom is the same with just one) The patch causing the regression made quite a few changes, but the part I think caused this particular problem was not recalculating the bssid mask when adding and removing interfaces. Re-adding those calls fixes my test case. Fix bad comment as well. Signed-off-by: Ben Greear Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 30c66dfcd7a0..4f18a6be0c7d 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -974,9 +974,8 @@ void ath9k_calculate_iter_data(struct ath_softc *sc, struct ath_vif *avp; /* - * Pick the MAC address of the first interface as the new hardware - * MAC address. The hardware will use it together with the BSSID mask - * when matching addresses. + * The hardware will use primary station addr together with the + * BSSID mask when matching addresses. */ memset(iter_data, 0, sizeof(*iter_data)); memset(&iter_data->mask, 0xff, ETH_ALEN); @@ -1205,6 +1204,8 @@ static int ath9k_add_interface(struct ieee80211_hw *hw, list_add_tail(&avp->list, &avp->chanctx->vifs); } + ath9k_calculate_summary_state(sc, avp->chanctx); + ath9k_assign_hw_queues(hw, vif); an->sc = sc; @@ -1274,6 +1275,8 @@ static void ath9k_remove_interface(struct ieee80211_hw *hw, ath_tx_node_cleanup(sc, &avp->mcast_node); + ath9k_calculate_summary_state(sc, avp->chanctx); + mutex_unlock(&sc->mutex); } From 8180bd47b043507568056f74f69b6a5abea26514 Mon Sep 17 00:00:00 2001 From: Mathy Vanhoef Date: Wed, 12 Nov 2014 21:33:34 -0500 Subject: [PATCH 094/208] brcmfmac: kill URB when request timed out Kill the submitted URB in brcmf_usb_dl_cmd if the request timed out. This assures the URB is never submitted twice. It also prevents a possible use-after-free of the URB transfer buffer if a timeout occurs. Signed-off-by: Mathy Vanhoef Acked-by: Arend van Spriel Signed-off-by: John W. Linville --- drivers/net/wireless/brcm80211/brcmfmac/usb.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/brcm80211/brcmfmac/usb.c index dc135915470d..875d1142c8b0 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/usb.c @@ -669,10 +669,12 @@ static int brcmf_usb_dl_cmd(struct brcmf_usbdev_info *devinfo, u8 cmd, goto finalize; } - if (!brcmf_usb_ioctl_resp_wait(devinfo)) + if (!brcmf_usb_ioctl_resp_wait(devinfo)) { + usb_kill_urb(devinfo->ctl_urb); ret = -ETIMEDOUT; - else + } else { memcpy(buffer, tmpbuf, buflen); + } finalize: kfree(tmpbuf); From 4c69f05eaa428e37890daf88b86a567ce615570b Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 14 Nov 2014 14:12:21 -0800 Subject: [PATCH 095/208] brcmfmac: fix error handling of irq_of_parse_and_map Return value of irq_of_parse_and_map() is unsigned int, with 0 indicating failure, so testing for negative result never works. Signed-off-by: Dmitry Torokhov Cc: stable@vger.kernel.org # v3.17 Acked-by: Arend van Spriel Signed-off-by: John W. Linville --- drivers/net/wireless/brcm80211/brcmfmac/of.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/brcm80211/brcmfmac/of.c b/drivers/net/wireless/brcm80211/brcmfmac/of.c index f05f5270fec1..927bffd5be64 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/of.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/of.c @@ -40,8 +40,8 @@ void brcmf_of_probe(struct brcmf_sdio_dev *sdiodev) return; irq = irq_of_parse_and_map(np, 0); - if (irq < 0) { - brcmf_err("interrupt could not be mapped: err=%d\n", irq); + if (!irq) { + brcmf_err("interrupt could not be mapped\n"); devm_kfree(dev, sdiodev->pdata); return; } From d7ce4377494adfaf8afb15ecf4f07d399bbf13d9 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Fri, 14 Nov 2014 13:51:22 +0800 Subject: [PATCH 096/208] powerpc/fsl_msi: mark the msi cascade handler IRQF_NO_THREAD The commit 543c043cbae7 ("powerpc/fsl_msi: change the irq handler from chained to normal") changes the msi cascade handler from chained to normal. Since cascade handler must run in hard interrupt context, this will cause kernel panic if we force threading of all the interrupt handler via kernel command parameter 'threadirqs'. So mark the irq handler IRQF_NO_THREAD explicitly. Signed-off-by: Kevin Hao Signed-off-by: Scott Wood --- arch/powerpc/sysdev/fsl_msi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index de40b48b460e..da08ed088157 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -361,7 +361,7 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, cascade_data->virq = virt_msir; msi->cascade_array[irq_index] = cascade_data; - ret = request_irq(virt_msir, fsl_msi_cascade, 0, + ret = request_irq(virt_msir, fsl_msi_cascade, IRQF_NO_THREAD, "fsl-msi-cascade", cascade_data); if (ret) { dev_err(&dev->dev, "failed to request_irq(%d), ret = %d\n", From 4a83d42ae2041d5b76f1a0662bc3a5a85e4eb0d1 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Tue, 18 Nov 2014 17:57:40 +0800 Subject: [PATCH 097/208] ALSA: hda - move DELL_WMI_MIC_MUTE_LED to the tail in the quirk chain We have one more Dell machine needs DELL_WMI_MIC_MUTE_LED quirk, but the machine uses alc293 instead of alc255. So if DELL_WMI_MIC_MUTE_LED still chain ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, the machine can't use this quirk. To change this situation, let the DELL_WMI_MIC_MUTE_LED to be a standalone quirk, and let other quirks chain it. After this change, this quirk can be chained to any existing quirks, and as a result, it is possible that this quirk is applied to a non-Dell machine or a Dell machine without mic mute led on it, but it is still safe since alc_fixup_dell_wmi() will return an error in these situations. And remove the quirk for machine with subsystem id 0x6010 and 0x601f, these two machines will fall back to the quirk ALC255_FIXUP_DELL1_MIC_NO_PRESENCE-->ALC255_FIXUP_HEADSET_MODE--> ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED through pin_fixup_tbl[]. BugLink: https://bugs.launchpad.net/bugs/1381856 Reported-and-tested-by: Po-Hsu Lin Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 172395465e8a..9da5798c370a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4709,6 +4709,8 @@ static const struct hda_fixup alc269_fixups[] = { [ALC255_FIXUP_HEADSET_MODE] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_mode_alc255, + .chained = true, + .chain_id = ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED }, [ALC255_FIXUP_HEADSET_MODE_NO_HP_MIC] = { .type = HDA_FIXUP_FUNC, @@ -4744,8 +4746,6 @@ static const struct hda_fixup alc269_fixups[] = { [ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_dell_wmi, - .chained_before = true, - .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE }, [ALC282_FIXUP_ASPIRE_V5_PINS] = { .type = HDA_FIXUP_PINS, @@ -4783,10 +4783,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x05f4, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05f5, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05f6, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1028, 0x0610, "Dell", ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED), SND_PCI_QUIRK(0x1028, 0x0615, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK), SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK), - SND_PCI_QUIRK(0x1028, 0x061f, "Dell", ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED), SND_PCI_QUIRK(0x1028, 0x0638, "Dell Inspiron 5439", ALC290_FIXUP_MONO_SPEAKERS_HSJACK), SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), From 6676f3081f7e3dae64e05b87d47a041b782f898a Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Tue, 18 Nov 2014 17:57:41 +0800 Subject: [PATCH 098/208] ALSA: hda - fix the mic mute led problem for Latitude E5550 The microphone mute led on the Latitude E5550 can't work. We need to apply DELL_WMI_MIC_MUTE_LED quirk to this machine. The machine uses alc293 codec and already applied the quirk ALC293_FIXUP_DELL1_MIC_NO_PRESENCE through pin_fixup_tbl[]. Here we just let DELL_WMI_MIC_MUTE_LED be chained to ALC269_FIXUP_HEADSET_MODE, then the machine will have these quirks ALC293_FIXUP_DELL1_MIC_NO_PRESENCE--> ALC269_FIXUP_HEADSET_MODE-->ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED. BugLink: https://bugs.launchpad.net/bugs/1381856 Reported-and-tested-by: Po-Hsu Lin Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 9da5798c370a..8fea1b86df25 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4520,6 +4520,8 @@ static const struct hda_fixup alc269_fixups[] = { [ALC269_FIXUP_HEADSET_MODE] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_mode, + .chained = true, + .chain_id = ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED }, [ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC] = { .type = HDA_FIXUP_FUNC, From 5247a589c24022ab34e780039cc8000c48f2035e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20K=C3=B6rper?= Date: Fri, 31 Oct 2014 07:33:54 +0100 Subject: [PATCH 099/208] can: dev: avoid calling kfree_skb() from interrupt context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ikfree_skb() is Called in can_free_echo_skb(), which might be called from (TX Error) interrupt, which triggers the folloing warning: [ 1153.360705] ------------[ cut here ]------------ [ 1153.360715] WARNING: CPU: 0 PID: 31 at net/core/skbuff.c:563 skb_release_head_state+0xb9/0xd0() [ 1153.360772] Call Trace: [ 1153.360778] [] dump_stack+0x41/0x52 [ 1153.360782] [] warn_slowpath_common+0x7e/0xa0 [ 1153.360784] [] ? skb_release_head_state+0xb9/0xd0 [ 1153.360786] [] ? skb_release_head_state+0xb9/0xd0 [ 1153.360788] [] warn_slowpath_null+0x22/0x30 [ 1153.360791] [] skb_release_head_state+0xb9/0xd0 [ 1153.360793] [] skb_release_all+0x10/0x30 [ 1153.360795] [] kfree_skb+0x36/0x80 [ 1153.360799] [] ? can_free_echo_skb+0x28/0x40 [can_dev] [ 1153.360802] [] can_free_echo_skb+0x28/0x40 [can_dev] [ 1153.360805] [] esd_pci402_interrupt+0x34c/0x57a [esd402] [ 1153.360809] [] handle_irq_event_percpu+0x35/0x180 [ 1153.360811] [] ? handle_irq_event_percpu+0xa3/0x180 [ 1153.360813] [] handle_irq_event+0x31/0x50 [ 1153.360816] [] handle_fasteoi_irq+0x6f/0x120 [ 1153.360818] [] ? handle_edge_irq+0x110/0x110 [ 1153.360822] [] handle_irq+0x71/0x90 [ 1153.360823] [] do_IRQ+0x3c/0xd0 [ 1153.360829] [] common_interrupt+0x2c/0x34 [ 1153.360834] [] ? finish_task_switch+0x47/0xf0 [ 1153.360836] [] __schedule+0x35b/0x7e0 [ 1153.360839] [] ? console_unlock+0x2c4/0x4d0 [ 1153.360842] [] ? n_tty_receive_buf_common+0x890/0x890 [ 1153.360845] [] ? process_one_work+0x196/0x370 [ 1153.360847] [] schedule+0x23/0x60 [ 1153.360849] [] worker_thread+0x161/0x460 [ 1153.360852] [] ? __wake_up_locked+0x1f/0x30 [ 1153.360854] [] ? rescuer_thread+0x2f0/0x2f0 [ 1153.360856] [] kthread+0xa1/0xc0 [ 1153.360859] [] ret_from_kernel_thread+0x21/0x30 [ 1153.360861] [] ? kthread_create_on_node+0x110/0x110 [ 1153.360863] ---[ end trace 5ff83639cbb74b35 ]--- This patch replaces the kfree_skb() by dev_kfree_skb_any(). Signed-off-by: Thomas Körper Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 02492d241e4c..509d5e063d8c 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -382,7 +382,7 @@ void can_free_echo_skb(struct net_device *dev, unsigned int idx) BUG_ON(idx >= priv->echo_skb_max); if (priv->echo_skb[idx]) { - kfree_skb(priv->echo_skb[idx]); + dev_kfree_skb_any(priv->echo_skb[idx]); priv->echo_skb[idx] = NULL; } } From 67b5909edccfe3ea3b85b1d96284d2c53e3fd47c Mon Sep 17 00:00:00 2001 From: Roman Fietze Date: Mon, 20 Oct 2014 10:32:42 +0200 Subject: [PATCH 100/208] can: dev: fix typo CIA -> CiA, CAN in Automation This patch fixes a typo in CAN's dev.c: CIA -> CiA which stands for CAN in Automation. Signed-off-by: Roman Fietze Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 509d5e063d8c..2cfe5012e4e5 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -110,7 +110,7 @@ static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, long rate; u64 v64; - /* Use CIA recommended sample points */ + /* Use CiA recommended sample points */ if (bt->sample_point) { sampl_pt = bt->sample_point; } else { From 98e69016a11b2b9398bea668442193b3b362cd43 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Fri, 7 Nov 2014 16:45:12 +0800 Subject: [PATCH 101/208] can: dev: add can_is_canfd_skb() API The CAN device drivers can use can_is_canfd_skb() to check if the frame to send is on CAN FD mode or normal CAN mode. Acked-by: Oliver Hartkopp Signed-off-by: Dong Aisheng Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 6992afc6ba7f..b37ea95bc348 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -99,6 +99,12 @@ inval_skb: return 1; } +static inline bool can_is_canfd_skb(const struct sk_buff *skb) +{ + /* the CAN specific type of skb is identified by its data length */ + return skb->len == CANFD_MTU; +} + /* get data length from can_dlc with sanitized can_dlc */ u8 can_dlc2len(u8 can_dlc); From efbd50d2f62fc1f69a3dcd153e63ba28cc8eb27f Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 11 Oct 2014 00:31:07 +0400 Subject: [PATCH 102/208] can: esd_usb2: fix memory leak on disconnect It seems struct esd_usb2 dev is not deallocated on disconnect. The patch adds the missing deallocation. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Acked-by: Matthias Fuchs Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/esd_usb2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index b7c9e8b11460..7a90075529c3 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -1143,6 +1143,7 @@ static void esd_usb2_disconnect(struct usb_interface *intf) } } unlink_all_urbs(dev); + kfree(dev); } } From 4e2061b1e1f6b8e698b36a6518b6f8c15edc4547 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Tue, 18 Nov 2014 19:17:06 +0530 Subject: [PATCH 103/208] can: remove unused variable these variable were only assigned some values, but then never reused again. so they are safe to be removed. Signed-off-by: Sudip Mukherjee Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sja1000/kvaser_pci.c | 5 +---- drivers/net/can/usb/ems_usb.c | 3 +-- drivers/net/can/usb/esd_usb2.c | 2 -- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/can/sja1000/kvaser_pci.c b/drivers/net/can/sja1000/kvaser_pci.c index 8ff3424d5147..15c00faeec61 100644 --- a/drivers/net/can/sja1000/kvaser_pci.c +++ b/drivers/net/can/sja1000/kvaser_pci.c @@ -214,7 +214,7 @@ static int kvaser_pci_add_chan(struct pci_dev *pdev, int channel, struct net_device *dev; struct sja1000_priv *priv; struct kvaser_pci *board; - int err, init_step; + int err; dev = alloc_sja1000dev(sizeof(struct kvaser_pci)); if (dev == NULL) @@ -235,7 +235,6 @@ static int kvaser_pci_add_chan(struct pci_dev *pdev, int channel, if (channel == 0) { board->xilinx_ver = ioread8(board->res_addr + XILINX_VERINT) >> 4; - init_step = 2; /* Assert PTADR# - we're in passive mode so the other bits are not important */ @@ -264,8 +263,6 @@ static int kvaser_pci_add_chan(struct pci_dev *pdev, int channel, priv->irq_flags = IRQF_SHARED; dev->irq = pdev->irq; - init_step = 4; - dev_info(&pdev->dev, "reg_base=%p conf_addr=%p irq=%d\n", priv->reg_base, board->conf_addr, dev->irq); diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 00f2534dde73..29d3f0938eb8 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -434,10 +434,9 @@ static void ems_usb_read_bulk_callback(struct urb *urb) if (urb->actual_length > CPC_HEADER_SIZE) { struct ems_cpc_msg *msg; u8 *ibuf = urb->transfer_buffer; - u8 msg_count, again, start; + u8 msg_count, start; msg_count = ibuf[0] & ~0x80; - again = ibuf[0] & 0x80; start = CPC_HEADER_SIZE; diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index 7a90075529c3..c063a54ab8dd 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -464,7 +464,6 @@ static void esd_usb2_write_bulk_callback(struct urb *urb) { struct esd_tx_urb_context *context = urb->context; struct esd_usb2_net_priv *priv; - struct esd_usb2 *dev; struct net_device *netdev; size_t size = sizeof(struct esd_usb2_msg); @@ -472,7 +471,6 @@ static void esd_usb2_write_bulk_callback(struct urb *urb) priv = context->priv; netdev = priv->netdev; - dev = priv->usb2; /* free up our allocated buffer */ usb_free_coherent(urb->dev, size, From fb3ec7ba5a665c280f7299a36460449038fc1083 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Tue, 18 Nov 2014 19:17:07 +0530 Subject: [PATCH 104/208] can: xilinx_can: fix comparison of unsigned variable The variable err was of the type u32. It was being compared with < 0, and being an unsigned variable the comparison would have been always false. Moreover, err was getting the return value from set_reset_mode() and xcan_set_bittiming(), and both are returning int. Signed-off-by: Sudip Mukherjee Reviewed-by: Michal Simek Signed-off-by: Marc Kleine-Budde --- drivers/net/can/xilinx_can.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 5e8b5609c067..47b2f801d127 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -300,7 +300,8 @@ static int xcan_set_bittiming(struct net_device *ndev) static int xcan_chip_start(struct net_device *ndev) { struct xcan_priv *priv = netdev_priv(ndev); - u32 err, reg_msr, reg_sr_mask; + u32 reg_msr, reg_sr_mask; + int err; unsigned long timeout; /* Check if it is in reset mode */ From 92593a035ee6a81f50b54ab34b23b1c1319ee413 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 18 Nov 2014 13:16:13 +0100 Subject: [PATCH 105/208] can: xilinx_can: add .ndo_change_mtu function Use common can_change_mtu function. Signed-off-by: Marc Kleine-Budde --- drivers/net/can/xilinx_can.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 47b2f801d127..8a998e3884ce 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -962,6 +962,7 @@ static const struct net_device_ops xcan_netdev_ops = { .ndo_open = xcan_open, .ndo_stop = xcan_close, .ndo_start_xmit = xcan_start_xmit, + .ndo_change_mtu = can_change_mtu, }; /** From ca976d6af4e50899f4e840e8017ec29d8fe6b50e Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 18 Nov 2014 13:16:13 +0100 Subject: [PATCH 106/208] can: rcar_can: add .ndo_change_mtu function Use common can_change_mtu function. Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rcar_can.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c index 1abe133d1594..9718248e55f1 100644 --- a/drivers/net/can/rcar_can.c +++ b/drivers/net/can/rcar_can.c @@ -628,6 +628,7 @@ static const struct net_device_ops rcar_can_netdev_ops = { .ndo_open = rcar_can_open, .ndo_stop = rcar_can_close, .ndo_start_xmit = rcar_can_start_xmit, + .ndo_change_mtu = can_change_mtu, }; static void rcar_can_rx_pkt(struct rcar_can_priv *priv) From f2a306c29d024193b1272cd014108882f7887a9e Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Fri, 26 Sep 2014 00:26:27 +0200 Subject: [PATCH 107/208] devicetree: bindings: add sandisk to the vendor prefixes Add sandisk to the list of vendors. This prefix should be used also for companies absorbed by Sandisk, like M-Systems. Signed-off-by: Robert Jarzmik Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 723999d73744..3de80303de83 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -127,6 +127,7 @@ renesas Renesas Electronics Corporation ricoh Ricoh Co. Ltd. rockchip Fuzhou Rockchip Electronics Co., Ltd samsung Samsung Semiconductor +sandisk Sandisk Corporation sbs Smart Battery System schindler Schindler seagate Seagate Technology PLC From fb86b97300d930b57471068720c52bfa8622eab7 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 18 Nov 2014 10:46:57 +0100 Subject: [PATCH 108/208] x86, microcode: Update BSPs microcode on resume In the situation when we apply early microcode but do *not* apply late microcode, we fail to update the BSP's microcode on resume because we haven't initialized the uci->mc microcode pointer. So, in order to alleviate that, we go and dig out the stashed microcode patch during early boot. It is basically the same thing that is done on the APs early during boot so do that too here. Tested-by: alex.schnaidt@gmail.com Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=88001 Cc: Henrique de Moraes Holschuh Cc: Fenghua Yu Cc: # v3.9 Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20141118094657.GA6635@pd.tnic Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/core.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index dd9d6190b08d..2ce9051174e6 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -465,6 +465,14 @@ static void mc_bp_resume(void) if (uci->valid && uci->mc) microcode_ops->apply_microcode(cpu); + else if (!uci->mc) + /* + * We might resume and not have applied late microcode but still + * have a newer patch stashed from the early loader. We don't + * have it in uci->mc so we have to load it the same way we're + * applying patches early on the APs. + */ + load_ucode_ap(); } static struct syscore_ops mc_syscore_ops = { From 45e2a9d4701d8c624d4a4bcdd1084eae31e92f58 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 14 Nov 2014 11:47:37 -0800 Subject: [PATCH 109/208] x86, mm: Set NX across entire PMD at boot When setting up permissions on kernel memory at boot, the end of the PMD that was split from bss remained executable. It should be NX like the rest. This performs a PMD alignment instead of a PAGE alignment to get the correct span of memory. Before: ---[ High Kernel Mapping ]--- ... 0xffffffff8202d000-0xffffffff82200000 1868K RW GLB NX pte 0xffffffff82200000-0xffffffff82c00000 10M RW PSE GLB NX pmd 0xffffffff82c00000-0xffffffff82df5000 2004K RW GLB NX pte 0xffffffff82df5000-0xffffffff82e00000 44K RW GLB x pte 0xffffffff82e00000-0xffffffffc0000000 978M pmd After: ---[ High Kernel Mapping ]--- ... 0xffffffff8202d000-0xffffffff82200000 1868K RW GLB NX pte 0xffffffff82200000-0xffffffff82e00000 12M RW PSE GLB NX pmd 0xffffffff82e00000-0xffffffffc0000000 978M pmd [ tglx: Changed it to roundup(_brk_end, PMD_SIZE) and added a comment. We really should unmap the reminder along with the holes caused by init,initdata etc. but thats a different issue ] Signed-off-by: Kees Cook Cc: Andy Lutomirski Cc: Toshi Kani Cc: Yasuaki Ishimatsu Cc: David Vrabel Cc: Wang Nan Cc: Yinghai Lu Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20141114194737.GA3091@www.outflux.net Signed-off-by: Thomas Gleixner --- arch/x86/mm/init_64.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 4cb8763868fc..4e5dfec750fc 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1123,7 +1123,7 @@ void mark_rodata_ro(void) unsigned long end = (unsigned long) &__end_rodata_hpage_align; unsigned long text_end = PFN_ALIGN(&__stop___ex_table); unsigned long rodata_end = PFN_ALIGN(&__end_rodata); - unsigned long all_end = PFN_ALIGN(&_end); + unsigned long all_end; printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", (end - start) >> 10); @@ -1134,7 +1134,16 @@ void mark_rodata_ro(void) /* * The rodata/data/bss/brk section (but not the kernel text!) * should also be not-executable. + * + * We align all_end to PMD_SIZE because the existing mapping + * is a full PMD. If we would align _brk_end to PAGE_SIZE we + * split the PMD and the reminder between _brk_end and the end + * of the PMD will remain mapped executable. + * + * Any PMD which was setup after the one which covers _brk_end + * has been zapped already via cleanup_highmem(). */ + all_end = roundup((unsigned long)_brk_end, PMD_SIZE); set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT); rodata_test(); From 70b61e362187b5fccac206506d402f3424e3e749 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 17 Nov 2014 16:16:04 -0800 Subject: [PATCH 110/208] x86, kaslr: Handle Gold linker for finding bss/brk When building with the Gold linker, the .bss and .brk areas of vmlinux are shown as consecutive instead of having the same file offset. Allow for either state, as long as things add up correctly. Fixes: e6023367d779 ("x86, kaslr: Prevent .bss from overlaping initrd") Reported-by: Markus Trippelsdorf Signed-off-by: Kees Cook Cc: Junjie Mao Link: http://lkml.kernel.org/r/20141118001604.GA25045@www.outflux.net Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner --- arch/x86/tools/calc_run_size.pl | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/tools/calc_run_size.pl b/arch/x86/tools/calc_run_size.pl index 0b0b124d3ece..23210baade2d 100644 --- a/arch/x86/tools/calc_run_size.pl +++ b/arch/x86/tools/calc_run_size.pl @@ -19,7 +19,16 @@ while (<>) { if ($file_offset == 0) { $file_offset = $offset; } elsif ($file_offset != $offset) { - die ".bss and .brk lack common file offset\n"; + # BFD linker shows the same file offset in ELF. + # Gold linker shows them as consecutive. + next if ($file_offset + $mem_size == $offset + $size); + + printf STDERR "file_offset: 0x%lx\n", $file_offset; + printf STDERR "mem_size: 0x%lx\n", $mem_size; + printf STDERR "offset: 0x%lx\n", $offset; + printf STDERR "size: 0x%lx\n", $size; + + die ".bss and .brk are non-contiguous\n"; } } } From 27b3383a1432127bfcf9f8a63bf184ff4d866141 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 22 Oct 2014 11:49:01 +0200 Subject: [PATCH 111/208] of: Spelling s/stucture/structure/ Signed-off-by: Geert Uytterhoeven Cc: Grant Likely Cc: Rob Herring Signed-off-by: Rob Herring --- drivers/of/dynamic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index f297891d8529..d4994177dec2 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -247,7 +247,7 @@ void of_node_release(struct kobject *kobj) * @allocflags: Allocation flags (typically pass GFP_KERNEL) * * Copy a property by dynamically allocating the memory of both the - * property stucture and the property name & contents. The property's + * property structure and the property name & contents. The property's * flags have the OF_DYNAMIC bit set so that we can differentiate between * dynamically allocated properties and not. * Returns the newly allocated property or NULL on out of memory error. From 66865de4314caca30598244b86817e774c188afa Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Sat, 1 Nov 2014 17:35:31 -0600 Subject: [PATCH 112/208] of/irq: Drop obsolete 'interrupts' vs 'interrupts-extended' text a9ecdc0fdc54 ("of/irq: Fix lookup to use 'interrupts-extended' property first") updated the description to say that: - Both 'interrupts' and 'interrupts-extended' may be present - Software should prefer 'interrupts-extended' - Software that doesn't comprehend 'interrupts-extended' may use 'interrupts' But there is still a paragraph at the end that prohibits having both and says 'interrupts' should be preferred. Remove the contradictory text. Fixes: a9ecdc0fdc54 ("of/irq: Fix lookup to use 'interrupts-extended' property first") Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org # v3.13+ Acked-by: Brian Norris Acked-by: Mark Rutland Signed-off-by: Rob Herring --- .../devicetree/bindings/interrupt-controller/interrupts.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/interrupts.txt b/Documentation/devicetree/bindings/interrupt-controller/interrupts.txt index ce6a1a072028..8a3c40829899 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/interrupts.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/interrupts.txt @@ -30,10 +30,6 @@ should only be used when a device has multiple interrupt parents. Example: interrupts-extended = <&intc1 5 1>, <&intc2 1 0>; -A device node may contain either "interrupts" or "interrupts-extended", but not -both. If both properties are present, then the operating system should log an -error and use only the data in "interrupts". - 2) Interrupt controller nodes ----------------------------- From ab74d00a39f70e1bc34a01322bb59f3750ca7a8c Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Sun, 9 Nov 2014 00:55:47 -0800 Subject: [PATCH 113/208] of: Fix crash if an earlycon driver is not found __earlycon_of_table_sentinel.compatible is a char[128], not a pointer, so it will never be NULL. Checking it against NULL causes the match loop to run past the end of the array, and eventually match a bogus entry, under the following conditions: - Kernel command line specifies "earlycon" with no parameters - DT has a stdout-path pointing to a UART node - The UART driver doesn't use OF_EARLYCON_DECLARE (or maybe the console driver is compiled out) Fix this by checking to see if match->compatible is a non-empty string. Signed-off-by: Kevin Cernekee Cc: # 3.16+ Signed-off-by: Rob Herring --- drivers/of/fdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index d1ffca8b34ea..30e97bcc4f88 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -773,7 +773,7 @@ int __init early_init_dt_scan_chosen_serial(void) if (offset < 0) return -ENODEV; - while (match->compatible) { + while (match->compatible[0]) { unsigned long addr; if (fdt_node_check_compatible(fdt, offset, match->compatible)) { match++; From 9b6eab07588c2de102423fe99c875fc4bfda2508 Mon Sep 17 00:00:00 2001 From: Antony Pavlov Date: Sun, 9 Nov 2014 01:37:34 +0300 Subject: [PATCH 114/208] devicetree: vendor-prefixes.txt: fix whitespace Signed-off-by: Antony Pavlov Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 3de80303de83..f9c7e77d218a 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -139,7 +139,7 @@ silergy Silergy Corp. sirf SiRF Technology, Inc. sitronix Sitronix Technology Corporation smsc Standard Microsystems Corporation -snps Synopsys, Inc. +snps Synopsys, Inc. solidrun SolidRun sony Sony Corporation spansion Spansion Inc. From 746c9e9f92dde2789908e51a354ba90a1962a2eb Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 14 Nov 2014 17:55:03 +1100 Subject: [PATCH 115/208] of/base: Fix PowerPC address parsing hack We have a historical hack that treats missing ranges properties as the equivalent of an empty one. This is needed for ancient PowerMac "bad" device-trees, and shouldn't be enabled for any other PowerPC platform, otherwise we get some nasty layout of devices in sysfs or even duplication when a set of otherwise identically named devices is created multiple times under a different parent node with no ranges property. This fix is needed for the PowerNV i2c busses to be exposed properly and will fix a number of other embedded cases. Signed-off-by: Benjamin Herrenschmidt CC: Acked-by: Grant Likely Signed-off-by: Rob Herring --- drivers/of/address.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index afdb78299f61..06af494184d6 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -450,6 +450,21 @@ static struct of_bus *of_match_bus(struct device_node *np) return NULL; } +static int of_empty_ranges_quirk(void) +{ + if (IS_ENABLED(CONFIG_PPC)) { + /* To save cycles, we cache the result */ + static int quirk_state = -1; + + if (quirk_state < 0) + quirk_state = + of_machine_is_compatible("Power Macintosh") || + of_machine_is_compatible("MacRISC"); + return quirk_state; + } + return false; +} + static int of_translate_one(struct device_node *parent, struct of_bus *bus, struct of_bus *pbus, __be32 *addr, int na, int ns, int pna, const char *rprop) @@ -475,12 +490,10 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus, * This code is only enabled on powerpc. --gcl */ ranges = of_get_property(parent, rprop, &rlen); -#if !defined(CONFIG_PPC) - if (ranges == NULL) { + if (ranges == NULL && !of_empty_ranges_quirk()) { pr_err("OF: no ranges; cannot translate\n"); return 1; } -#endif /* !defined(CONFIG_PPC) */ if (ranges == NULL || rlen == 0) { offset = of_read_number(addr, na); memset(addr, 0, pna * 4); From f9cb89b63db8cb2755a5179843a0643cc284f1ef Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Wed, 14 May 2014 11:24:43 +0200 Subject: [PATCH 116/208] of: Add vendor prefix for Chips&Media, Inc. Chips&Media is a developer of Video Codec IP cores. Signed-off-by: Philipp Zabel [robh: fix-up alphabetical ordering] Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index f9c7e77d218a..a0655781d87d 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -34,6 +34,7 @@ chipidea Chipidea, Inc chrp Common Hardware Reference Platform chunghwa Chunghwa Picture Tubes Ltd. cirrus Cirrus Logic, Inc. +cnm Chips&Media, Inc. cortina Cortina Systems, Inc. crystalfontz Crystalfontz America, Inc. dallas Maxim Integrated Products (formerly Dallas Semiconductor) From 5641c09226f401ee054e48521707fb185380e8d3 Mon Sep 17 00:00:00 2001 From: bpqw Date: Wed, 12 Nov 2014 14:26:42 +0000 Subject: [PATCH 117/208] devicetree: bindings: Add vendor prefix for Micron Technology, Inc. This patch is used to add vendor prefix for Micron Technology, Inc. in the vendor-prefixes.txt file. Micron Technology, Inc. is an American multinational corporation based in Boise, Idaho, best known for producing many forms of semiconductor devices. This includes DRAM, SDRAM, flash memory, eMMC and SSDs. Signed-off-by: Bean Huo [robh: cleanup commit msg formatting and company name] Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index a0655781d87d..a344ec2713a5 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -93,6 +93,7 @@ maxim Maxim Integrated Products mediatek MediaTek Inc. micrel Micrel Inc. microchip Microchip Technology Inc. +micron Micron Technology Inc. mitsubishi Mitsubishi Electric Corporation mosaixtech Mosaix Technologies, Inc. moxa Moxa From a0e27f51ba8a04125c22a95c4d3e98297a7191de Mon Sep 17 00:00:00 2001 From: Soren Brinkmann Date: Thu, 6 Nov 2014 07:38:51 -0800 Subject: [PATCH 118/208] documentation: pinctrl bindings: Fix trivial typo 'abitrary' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A misspelled 'arbitrary' propagated to quite a few locations in the DT binding documentation for pin-controllers. Fixing by: git grep abitrary | cut -f1 -d: | xargs sed -i 's/abitrary/arbitrary/' Reported-by: Andreas Färber Signed-off-by: Soren Brinkmann Signed-off-by: Rob Herring --- .../devicetree/bindings/pinctrl/img,tz1090-pdc-pinctrl.txt | 2 +- .../devicetree/bindings/pinctrl/img,tz1090-pinctrl.txt | 2 +- .../devicetree/bindings/pinctrl/lantiq,falcon-pinumx.txt | 2 +- .../devicetree/bindings/pinctrl/lantiq,xway-pinumx.txt | 2 +- .../devicetree/bindings/pinctrl/nvidia,tegra20-pinmux.txt | 2 +- Documentation/devicetree/bindings/pinctrl/pinctrl-sirf.txt | 2 +- Documentation/devicetree/bindings/pinctrl/pinctrl_spear.txt | 2 +- .../devicetree/bindings/pinctrl/qcom,apq8064-pinctrl.txt | 2 +- .../devicetree/bindings/pinctrl/qcom,apq8084-pinctrl.txt | 2 +- .../devicetree/bindings/pinctrl/qcom,ipq8064-pinctrl.txt | 2 +- .../devicetree/bindings/pinctrl/qcom,msm8960-pinctrl.txt | 2 +- .../devicetree/bindings/pinctrl/qcom,msm8974-pinctrl.txt | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/img,tz1090-pdc-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/img,tz1090-pdc-pinctrl.txt index a186181c402b..51b943cc9770 100644 --- a/Documentation/devicetree/bindings/pinctrl/img,tz1090-pdc-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/img,tz1090-pdc-pinctrl.txt @@ -9,7 +9,7 @@ Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices, including the meaning of the phrase "pin configuration node". -TZ1090-PDC's pin configuration nodes act as a container for an abitrary number +TZ1090-PDC's pin configuration nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents some desired configuration for a pin, a group, or a list of pins or groups. This configuration can include the mux function to select on those pin(s)/group(s), and various pin configuration diff --git a/Documentation/devicetree/bindings/pinctrl/img,tz1090-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/img,tz1090-pinctrl.txt index 4b27c99f7f9d..49d0e6050940 100644 --- a/Documentation/devicetree/bindings/pinctrl/img,tz1090-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/img,tz1090-pinctrl.txt @@ -9,7 +9,7 @@ Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices, including the meaning of the phrase "pin configuration node". -TZ1090's pin configuration nodes act as a container for an abitrary number of +TZ1090's pin configuration nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents some desired configuration for a pin, a group, or a list of pins or groups. This configuration can include the mux function to select on those pin(s)/group(s), and various pin configuration diff --git a/Documentation/devicetree/bindings/pinctrl/lantiq,falcon-pinumx.txt b/Documentation/devicetree/bindings/pinctrl/lantiq,falcon-pinumx.txt index daa768956069..ac4da9fe07bd 100644 --- a/Documentation/devicetree/bindings/pinctrl/lantiq,falcon-pinumx.txt +++ b/Documentation/devicetree/bindings/pinctrl/lantiq,falcon-pinumx.txt @@ -9,7 +9,7 @@ Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices, including the meaning of the phrase "pin configuration node". -Lantiq's pin configuration nodes act as a container for an abitrary number of +Lantiq's pin configuration nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents some desired configuration for a pin, a group, or a list of pins or groups. This configuration can include the mux function to select on those group(s), and two pin configuration parameters: diff --git a/Documentation/devicetree/bindings/pinctrl/lantiq,xway-pinumx.txt b/Documentation/devicetree/bindings/pinctrl/lantiq,xway-pinumx.txt index b5469db1d7ad..e89b4677567d 100644 --- a/Documentation/devicetree/bindings/pinctrl/lantiq,xway-pinumx.txt +++ b/Documentation/devicetree/bindings/pinctrl/lantiq,xway-pinumx.txt @@ -9,7 +9,7 @@ Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices, including the meaning of the phrase "pin configuration node". -Lantiq's pin configuration nodes act as a container for an abitrary number of +Lantiq's pin configuration nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents some desired configuration for a pin, a group, or a list of pins or groups. This configuration can include the mux function to select on those group(s), and two pin configuration parameters: diff --git a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra20-pinmux.txt b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra20-pinmux.txt index 61e73cde9ae9..3c8ce28baad6 100644 --- a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra20-pinmux.txt +++ b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra20-pinmux.txt @@ -9,7 +9,7 @@ Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices, including the meaning of the phrase "pin configuration node". -Tegra's pin configuration nodes act as a container for an abitrary number of +Tegra's pin configuration nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents some desired configuration for a pin, a group, or a list of pins or groups. This configuration can include the mux function to select on those pin(s)/group(s), and various pin configuration diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-sirf.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-sirf.txt index c596a6ad3285..5f55be59d914 100644 --- a/Documentation/devicetree/bindings/pinctrl/pinctrl-sirf.txt +++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-sirf.txt @@ -13,7 +13,7 @@ Optional properties: Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices. -SiRFprimaII's pinmux nodes act as a container for an abitrary number of subnodes. +SiRFprimaII's pinmux nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents some desired configuration for a group of pins. Required subnode-properties: diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl_spear.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl_spear.txt index b4480d5c3aca..458615596946 100644 --- a/Documentation/devicetree/bindings/pinctrl/pinctrl_spear.txt +++ b/Documentation/devicetree/bindings/pinctrl/pinctrl_spear.txt @@ -32,7 +32,7 @@ Required properties: Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices. -SPEAr's pinmux nodes act as a container for an abitrary number of subnodes. Each +SPEAr's pinmux nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents muxing for a pin, a group, or a list of pins or groups. diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,apq8064-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,apq8064-pinctrl.txt index 2fb90b37aa09..a7bde64798c7 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,apq8064-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/qcom,apq8064-pinctrl.txt @@ -18,7 +18,7 @@ Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices, including the meaning of the phrase "pin configuration node". -Qualcomm's pin configuration nodes act as a container for an abitrary number of +Qualcomm's pin configuration nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents some desired configuration for a pin, a group, or a list of pins or groups. This configuration can include the mux function to select on those pin(s)/group(s), and various pin configuration diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,apq8084-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,apq8084-pinctrl.txt index ffafa1990a30..c4ea61ac56f2 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,apq8084-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/qcom,apq8084-pinctrl.txt @@ -47,7 +47,7 @@ Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices, including the meaning of the phrase "pin configuration node". -The pin configuration nodes act as a container for an abitrary number of +The pin configuration nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents some desired configuration for a pin, a group, or a list of pins or groups. This configuration can include the mux function to select on those pin(s)/group(s), and various pin configuration diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,ipq8064-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,ipq8064-pinctrl.txt index e33e4dcdce79..6e88e91feb11 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,ipq8064-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/qcom,ipq8064-pinctrl.txt @@ -18,7 +18,7 @@ Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices, including the meaning of the phrase "pin configuration node". -Qualcomm's pin configuration nodes act as a container for an abitrary number of +Qualcomm's pin configuration nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents some desired configuration for a pin, a group, or a list of pins or groups. This configuration can include the mux function to select on those pin(s)/group(s), and various pin configuration diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,msm8960-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,msm8960-pinctrl.txt index 93b7de91b9f6..eb8d8aa41f20 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,msm8960-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/qcom,msm8960-pinctrl.txt @@ -47,7 +47,7 @@ Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices, including the meaning of the phrase "pin configuration node". -The pin configuration nodes act as a container for an abitrary number of +The pin configuration nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents some desired configuration for a pin, a group, or a list of pins or groups. This configuration can include the mux function to select on those pin(s)/group(s), and various pin configuration diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,msm8974-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,msm8974-pinctrl.txt index d2ea80dc43eb..e4d6a9d20f7d 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,msm8974-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/qcom,msm8974-pinctrl.txt @@ -18,7 +18,7 @@ Please refer to pinctrl-bindings.txt in this directory for details of the common pinctrl bindings used by client devices, including the meaning of the phrase "pin configuration node". -Qualcomm's pin configuration nodes act as a container for an abitrary number of +Qualcomm's pin configuration nodes act as a container for an arbitrary number of subnodes. Each of these subnodes represents some desired configuration for a pin, a group, or a list of pins or groups. This configuration can include the mux function to select on those pin(s)/group(s), and various pin configuration From 50212b425d0ef8b606b316826b56abbb48680758 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 18 Nov 2014 13:16:13 +0100 Subject: [PATCH 119/208] can: gs_usb: add .ndo_change_mtu function Use common can_change_mtu function. Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/gs_usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 04b0f84612f0..009acc8641fc 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -718,6 +718,7 @@ static const struct net_device_ops gs_usb_netdev_ops = { .ndo_open = gs_can_open, .ndo_stop = gs_can_close, .ndo_start_xmit = gs_can_start_xmit, + .ndo_change_mtu = can_change_mtu, }; static struct gs_can *gs_make_candev(unsigned int channel, struct usb_interface *intf) From d6fdb38b4f2c4090e176aa55fe73dd2f45cfa4a6 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 29 Oct 2014 18:45:23 +0800 Subject: [PATCH 120/208] can: m_can: add .ndo_change_mtu function Use common can_change_mtu function. Signed-off-by: Dong Aisheng Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 10d571eaed85..e61886bf1bb3 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -992,6 +992,7 @@ static const struct net_device_ops m_can_netdev_ops = { .ndo_open = m_can_open, .ndo_stop = m_can_close, .ndo_start_xmit = m_can_start_xmit, + .ndo_change_mtu = can_change_mtu, }; static int register_m_can_dev(struct net_device *dev) From efe22286e05751e9913a8002fefdb45cdb7361ad Mon Sep 17 00:00:00 2001 From: David Cohen Date: Wed, 15 Oct 2014 14:41:50 -0700 Subject: [PATCH 121/208] can: m_can: add CONFIG_HAS_IOMEM dependence m_can uses io memory which makes it not compilable on architectures without HAS_IOMEM such as UML: drivers/built-in.o: In function `m_can_plat_probe': m_can.c:(.text+0x218cc5): undefined reference to `devm_ioremap_resource' m_can.c:(.text+0x218df9): undefined reference to `devm_ioremap' Signed-off-by: David Cohen Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/m_can/Kconfig b/drivers/net/can/m_can/Kconfig index fca5482c09ac..04f20dd39007 100644 --- a/drivers/net/can/m_can/Kconfig +++ b/drivers/net/can/m_can/Kconfig @@ -1,4 +1,5 @@ config CAN_M_CAN + depends on HAS_IOMEM tristate "Bosch M_CAN devices" ---help--- Say Y here if you want to support for Bosch M_CAN controller. From 962845da54afe2fc7ebf64c2d8bb5aa65a826b2f Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Fri, 7 Nov 2014 16:45:14 +0800 Subject: [PATCH 122/208] can: m_can: add missing message RAM initialization The M_CAN message RAM is usually equipped with a parity or ECC functionality. But RAM cells suffer a hardware reset and can therefore hold arbitrary content at startup - including parity and/or ECC bits. To prevent the M_CAN controller detecting checksum errors when reading potentially uninitialized TX message RAM content to transmit CAN frames the TX message RAM has to be written with (any kind of) initial data. Signed-off-by: Dong Aisheng Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index e61886bf1bb3..268ad5064c47 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1010,7 +1010,7 @@ static int m_can_of_parse_mram(struct platform_device *pdev, struct resource *res; void __iomem *addr; u32 out_val[MRAM_CFG_LEN]; - int ret; + int i, start, end, ret; /* message ram could be shared */ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "message_ram"); @@ -1061,6 +1061,15 @@ static int m_can_of_parse_mram(struct platform_device *pdev, priv->mcfg[MRAM_TXE].off, priv->mcfg[MRAM_TXE].num, priv->mcfg[MRAM_TXB].off, priv->mcfg[MRAM_TXB].num); + /* initialize the entire Message RAM in use to avoid possible + * ECC/parity checksum errors when reading an uninitialized buffer + */ + start = priv->mcfg[MRAM_SIDF].off; + end = priv->mcfg[MRAM_TXB].off + + priv->mcfg[MRAM_TXB].num * TXB_ELEMENT_SIZE; + for (i = start; i < end; i += 4) + writel(0x0, priv->mram_base + i); + return 0; } From f6a99649526370c7a88b86736c02f2a74b5b20e7 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 29 Oct 2014 18:45:21 +0800 Subject: [PATCH 123/208] can: m_can: fix possible sleep in napi poll The m_can_get_berr_counter function can sleep and it may be called in napi poll function. Rework it to fix the following warning. root@imx6qdlsolo:~# cangen can0 -f -L 12 -D 112233445566778899001122 [ 1846.017565] m_can 20e8000.can can0: entered error warning state [ 1846.023551] ------------[ cut here ]------------ [ 1846.028216] WARNING: CPU: 0 PID: 560 at kernel/locking/mutex.c:867 mutex_trylock+0x218/0x23c() [ 1846.036889] DEBUG_LOCKS_WARN_ON(in_interrupt()) [ 1846.041263] Modules linked in: [ 1846.044594] CPU: 0 PID: 560 Comm: cangen Not tainted 3.17.0-rc4-next-20140915-00010-g032d018-dirty #477 [ 1846.054033] Backtrace: [ 1846.056557] [<80012448>] (dump_backtrace) from [<80012728>] (show_stack+0x18/0x1c) [ 1846.064180] r6:809a07ec r5:809a07ec r4:00000000 r3:00000000 [ 1846.069966] [<80012710>] (show_stack) from [<806c9ee0>] (dump_stack+0x8c/0xa4) [ 1846.077264] [<806c9e54>] (dump_stack) from [<8002aa78>] (warn_slowpath_common+0x70/0x94) [ 1846.085403] r6:806cd1b0 r5:00000009 r4:be1d5c20 r3:be07b0c0 [ 1846.091204] [<8002aa08>] (warn_slowpath_common) from [<8002aad4>] (warn_slowpath_fmt+0x38/0x40) [ 1846.099951] r8:8119106c r7:80515aa4 r6:be027000 r5:00000001 r4:809d1df4 [ 1846.106830] [<8002aaa0>] (warn_slowpath_fmt) from [<806cd1b0>] (mutex_trylock+0x218/0x23c) [ 1846.115141] r3:80851c88 r2:8084fb74 [ 1846.118804] [<806ccf98>] (mutex_trylock) from [<80515aa4>] (clk_prepare_lock+0x14/0xf4) [ 1846.126859] r8:00000040 r7:be1d5cec r6:be027000 r5:be255800 r4:be027000 [ 1846.133737] [<80515a90>] (clk_prepare_lock) from [<80517660>] (clk_prepare+0x14/0x2c) [ 1846.141583] r5:be255800 r4:be027000 [ 1846.145272] [<8051764c>] (clk_prepare) from [<8041ff14>] (m_can_get_berr_counter+0x20/0xd4) [ 1846.153672] r4:be255800 r3:be07b0c0 [ 1846.157325] [<8041fef4>] (m_can_get_berr_counter) from [<80420428>] (m_can_poll+0x310/0x8fc) [ 1846.165809] r7:bd4dc540 r6:00000744 r5:11300000 r4:be255800 [ 1846.171590] [<80420118>] (m_can_poll) from [<8056a468>] (net_rx_action+0xcc/0x1b4) [ 1846.179204] r10:00000101 r9:be255ebc r8:00000040 r7:be7c3208 r6:8097c100 r5:be7c3200 [ 1846.187192] r4:0000012c [ 1846.189779] [<8056a39c>] (net_rx_action) from [<8002deec>] (__do_softirq+0xfc/0x2c4) [ 1846.197568] r10:00000101 r9:8097c088 r8:00000003 r7:8097c080 r6:40000001 r5:8097c08c [ 1846.205559] r4:00000020 [ 1846.208144] [<8002ddf0>] (__do_softirq) from [<8002e194>] (do_softirq+0x7c/0x88) [ 1846.215588] r10:00000000 r9:bd516a60 r8:be18ce00 r7:00000000 r6:be255800 r5:8056c0ec [ 1846.223578] r4:60000093 [ 1846.226163] [<8002e118>] (do_softirq) from [<8002e288>] (__local_bh_enable_ip+0xe8/0x10c) [ 1846.234386] r4:00000200 r3:be1d4000 [ 1846.238036] [<8002e1a0>] (__local_bh_enable_ip) from [<8056c108>] (__dev_queue_xmit+0x314/0x6b0) [ 1846.246868] r6:be255800 r5:bd516a00 r4:00000000 r3:be07b0c0 [ 1846.252645] [<8056bdf4>] (__dev_queue_xmit) from [<8056c4b8>] (dev_queue_xmit+0x14/0x18) Signed-off-by: Dong Aisheng Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 268ad5064c47..214160b4c314 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -481,11 +481,23 @@ static int m_can_handle_lec_err(struct net_device *dev, return 1; } +static int __m_can_get_berr_counter(const struct net_device *dev, + struct can_berr_counter *bec) +{ + struct m_can_priv *priv = netdev_priv(dev); + unsigned int ecr; + + ecr = m_can_read(priv, M_CAN_ECR); + bec->rxerr = (ecr & ECR_REC_MASK) >> ECR_REC_SHIFT; + bec->txerr = ecr & ECR_TEC_MASK; + + return 0; +} + static int m_can_get_berr_counter(const struct net_device *dev, struct can_berr_counter *bec) { struct m_can_priv *priv = netdev_priv(dev); - unsigned int ecr; int err; err = clk_prepare_enable(priv->hclk); @@ -498,9 +510,7 @@ static int m_can_get_berr_counter(const struct net_device *dev, return err; } - ecr = m_can_read(priv, M_CAN_ECR); - bec->rxerr = (ecr & ECR_REC_MASK) >> ECR_REC_SHIFT; - bec->txerr = ecr & ECR_TEC_MASK; + __m_can_get_berr_counter(dev, bec); clk_disable_unprepare(priv->cclk); clk_disable_unprepare(priv->hclk); @@ -544,7 +554,7 @@ static int m_can_handle_state_change(struct net_device *dev, if (unlikely(!skb)) return 0; - m_can_get_berr_counter(dev, &bec); + __m_can_get_berr_counter(dev, &bec); switch (new_state) { case CAN_STATE_ERROR_ACTIVE: From 921f168109d029e3b59459c3a2754f0e2a70fad3 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Tue, 18 Nov 2014 19:00:54 +0800 Subject: [PATCH 124/208] can: m_can: fix not set can_dlc for remote frame The original code missed to set the cf->can_dlc in the RTR case, so add it. Signed-off-by: Dong Aisheng Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 214160b4c314..98f7e0ea7f6a 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -330,7 +330,7 @@ static void m_can_read_fifo(const struct net_device *dev, struct can_frame *cf, u32 rxfs) { struct m_can_priv *priv = netdev_priv(dev); - u32 id, fgi; + u32 id, fgi, dlc; /* calculate the fifo get index for where to read data */ fgi = (rxfs & RXFS_FGI_MASK) >> RXFS_FGI_OFF; @@ -340,11 +340,12 @@ static void m_can_read_fifo(const struct net_device *dev, struct can_frame *cf, else cf->can_id = (id >> 18) & CAN_SFF_MASK; + dlc = m_can_fifo_read(priv, fgi, M_CAN_FIFO_DLC); + cf->can_dlc = get_can_dlc((dlc >> 16) & 0x0F); + if (id & RX_BUF_RTR) { cf->can_id |= CAN_RTR_FLAG; } else { - id = m_can_fifo_read(priv, fgi, M_CAN_FIFO_DLC); - cf->can_dlc = get_can_dlc((id >> 16) & 0x0F); *(u32 *)(cf->data + 0) = m_can_fifo_read(priv, fgi, M_CAN_FIFO_DATA(0)); *(u32 *)(cf->data + 4) = m_can_fifo_read(priv, fgi, From 7660f633070986ab4ee41c063a90e140d5781e13 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 29 Oct 2014 18:45:24 +0800 Subject: [PATCH 125/208] can: m_can: add missing delay after setting CCCR_INIT bit The spec mentions there may be a delay until the value written to INIT can be read back due to the synchronization mechanism between the two clock domains. But it does not indicate the exact clock cycles needed. The 5us delay is a test value and seems ok. Without the delay, CCCR.CCE bit may fail to be set and then the initialization fail sometimes when do repeatly up and down. Signed-off-by: Dong Aisheng Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 98f7e0ea7f6a..3ad7d88720b7 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -296,6 +296,7 @@ static inline void m_can_config_endisable(const struct m_can_priv *priv, if (enable) { /* enable m_can configuration */ m_can_write(priv, M_CAN_CCCR, cccr | CCCR_INIT); + udelay(5); /* CCCR.CCE can only be set/reset while CCCR.INIT = '1' */ m_can_write(priv, M_CAN_CCCR, cccr | CCCR_INIT | CCCR_CCE); } else { From a93f5cae67b366e4ce7e9eb336e2885309fd6612 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 29 Oct 2014 18:45:22 +0800 Subject: [PATCH 126/208] can: m_can: fix incorrect error messages Fix a few error messages. Signed-off-by: Dong Aisheng Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 3ad7d88720b7..2f61676b3a97 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -608,14 +608,14 @@ static int m_can_handle_state_errors(struct net_device *dev, u32 psr) if ((psr & PSR_EP) && (priv->can.state != CAN_STATE_ERROR_PASSIVE)) { - netdev_dbg(dev, "entered error warning state\n"); + netdev_dbg(dev, "entered error passive state\n"); work_done += m_can_handle_state_change(dev, CAN_STATE_ERROR_PASSIVE); } if ((psr & PSR_BO) && (priv->can.state != CAN_STATE_BUS_OFF)) { - netdev_dbg(dev, "entered error warning state\n"); + netdev_dbg(dev, "entered error bus off state\n"); work_done += m_can_handle_state_change(dev, CAN_STATE_BUS_OFF); } @@ -627,7 +627,7 @@ static void m_can_handle_other_err(struct net_device *dev, u32 irqstatus) { if (irqstatus & IR_WDI) netdev_err(dev, "Message RAM Watchdog event due to missing READY\n"); - if (irqstatus & IR_BEU) + if (irqstatus & IR_ELO) netdev_err(dev, "Error Logging Overflow\n"); if (irqstatus & IR_BEU) netdev_err(dev, "Bit Error Uncorrected\n"); From 80646733f11c2e9de3b6339f7e635047e6087280 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Tue, 18 Nov 2014 19:00:55 +0800 Subject: [PATCH 127/208] can: m_can: update to support CAN FD features Bosch M_CAN is CAN FD capable device. This patch implements the CAN FD features include up to 64 bytes payload and bitrate switch function. 1) Change the Rx FIFO and Tx Buffer to 64 bytes for support CAN FD up to 64 bytes payload. It's backward compatible with old 8 bytes normal CAN frame. 2) Allocate can frame or canfd frame based on EDL bit 3) Bitrate Switch function is disabled by default and will be enabled according to CANFD_BRS bit in cf->flags. Acked-by: Oliver Hartkopp Signed-off-by: Dong Aisheng Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 177 +++++++++++++++++++++++++--------- 1 file changed, 134 insertions(+), 43 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 2f61676b3a97..d7bc462aafdc 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -105,14 +105,36 @@ enum m_can_mram_cfg { MRAM_CFG_NUM, }; +/* Fast Bit Timing & Prescaler Register (FBTP) */ +#define FBTR_FBRP_MASK 0x1f +#define FBTR_FBRP_SHIFT 16 +#define FBTR_FTSEG1_SHIFT 8 +#define FBTR_FTSEG1_MASK (0xf << FBTR_FTSEG1_SHIFT) +#define FBTR_FTSEG2_SHIFT 4 +#define FBTR_FTSEG2_MASK (0x7 << FBTR_FTSEG2_SHIFT) +#define FBTR_FSJW_SHIFT 0 +#define FBTR_FSJW_MASK 0x3 + /* Test Register (TEST) */ #define TEST_LBCK BIT(4) /* CC Control Register(CCCR) */ -#define CCCR_TEST BIT(7) -#define CCCR_MON BIT(5) -#define CCCR_CCE BIT(1) -#define CCCR_INIT BIT(0) +#define CCCR_TEST BIT(7) +#define CCCR_CMR_MASK 0x3 +#define CCCR_CMR_SHIFT 10 +#define CCCR_CMR_CANFD 0x1 +#define CCCR_CMR_CANFD_BRS 0x2 +#define CCCR_CMR_CAN 0x3 +#define CCCR_CME_MASK 0x3 +#define CCCR_CME_SHIFT 8 +#define CCCR_CME_CAN 0 +#define CCCR_CME_CANFD 0x1 +#define CCCR_CME_CANFD_BRS 0x2 +#define CCCR_TEST BIT(7) +#define CCCR_MON BIT(5) +#define CCCR_CCE BIT(1) +#define CCCR_INIT BIT(0) +#define CCCR_CANFD 0x10 /* Bit Timing & Prescaler Register (BTP) */ #define BTR_BRP_MASK 0x3ff @@ -204,6 +226,7 @@ enum m_can_mram_cfg { /* Rx Buffer / FIFO Element Size Configuration (RXESC) */ #define M_CAN_RXESC_8BYTES 0x0 +#define M_CAN_RXESC_64BYTES 0x777 /* Tx Buffer Configuration(TXBC) */ #define TXBC_NDTB_OFF 16 @@ -211,6 +234,7 @@ enum m_can_mram_cfg { /* Tx Buffer Element Size Configuration(TXESC) */ #define TXESC_TBDS_8BYTES 0x0 +#define TXESC_TBDS_64BYTES 0x7 /* Tx Event FIFO Con.guration (TXEFC) */ #define TXEFC_EFS_OFF 16 @@ -219,11 +243,11 @@ enum m_can_mram_cfg { /* Message RAM Configuration (in bytes) */ #define SIDF_ELEMENT_SIZE 4 #define XIDF_ELEMENT_SIZE 8 -#define RXF0_ELEMENT_SIZE 16 -#define RXF1_ELEMENT_SIZE 16 +#define RXF0_ELEMENT_SIZE 72 +#define RXF1_ELEMENT_SIZE 72 #define RXB_ELEMENT_SIZE 16 #define TXE_ELEMENT_SIZE 8 -#define TXB_ELEMENT_SIZE 16 +#define TXB_ELEMENT_SIZE 72 /* Message RAM Elements */ #define M_CAN_FIFO_ID 0x0 @@ -231,11 +255,17 @@ enum m_can_mram_cfg { #define M_CAN_FIFO_DATA(n) (0x8 + ((n) << 2)) /* Rx Buffer Element */ +/* R0 */ #define RX_BUF_ESI BIT(31) #define RX_BUF_XTD BIT(30) #define RX_BUF_RTR BIT(29) +/* R1 */ +#define RX_BUF_ANMF BIT(31) +#define RX_BUF_EDL BIT(21) +#define RX_BUF_BRS BIT(20) /* Tx Buffer Element */ +/* R0 */ #define TX_BUF_XTD BIT(30) #define TX_BUF_RTR BIT(29) @@ -327,42 +357,67 @@ static inline void m_can_disable_all_interrupts(const struct m_can_priv *priv) m_can_write(priv, M_CAN_ILE, 0x0); } -static void m_can_read_fifo(const struct net_device *dev, struct can_frame *cf, - u32 rxfs) +static void m_can_read_fifo(struct net_device *dev, u32 rxfs) { + struct net_device_stats *stats = &dev->stats; struct m_can_priv *priv = netdev_priv(dev); + struct canfd_frame *cf; + struct sk_buff *skb; u32 id, fgi, dlc; + int i; /* calculate the fifo get index for where to read data */ fgi = (rxfs & RXFS_FGI_MASK) >> RXFS_FGI_OFF; + dlc = m_can_fifo_read(priv, fgi, M_CAN_FIFO_DLC); + if (dlc & RX_BUF_EDL) + skb = alloc_canfd_skb(dev, &cf); + else + skb = alloc_can_skb(dev, (struct can_frame **)&cf); + if (!skb) { + stats->rx_dropped++; + return; + } + + if (dlc & RX_BUF_EDL) + cf->len = can_dlc2len((dlc >> 16) & 0x0F); + else + cf->len = get_can_dlc((dlc >> 16) & 0x0F); + id = m_can_fifo_read(priv, fgi, M_CAN_FIFO_ID); if (id & RX_BUF_XTD) cf->can_id = (id & CAN_EFF_MASK) | CAN_EFF_FLAG; else cf->can_id = (id >> 18) & CAN_SFF_MASK; - dlc = m_can_fifo_read(priv, fgi, M_CAN_FIFO_DLC); - cf->can_dlc = get_can_dlc((dlc >> 16) & 0x0F); + if (id & RX_BUF_ESI) { + cf->flags |= CANFD_ESI; + netdev_dbg(dev, "ESI Error\n"); + } - if (id & RX_BUF_RTR) { + if (!(dlc & RX_BUF_EDL) && (id & RX_BUF_RTR)) { cf->can_id |= CAN_RTR_FLAG; } else { - *(u32 *)(cf->data + 0) = m_can_fifo_read(priv, fgi, - M_CAN_FIFO_DATA(0)); - *(u32 *)(cf->data + 4) = m_can_fifo_read(priv, fgi, - M_CAN_FIFO_DATA(1)); + if (dlc & RX_BUF_BRS) + cf->flags |= CANFD_BRS; + + for (i = 0; i < cf->len; i += 4) + *(u32 *)(cf->data + i) = + m_can_fifo_read(priv, fgi, + M_CAN_FIFO_DATA(i / 4)); } /* acknowledge rx fifo 0 */ m_can_write(priv, M_CAN_RXF0A, fgi); + + stats->rx_packets++; + stats->rx_bytes += cf->len; + + netif_receive_skb(skb); } static int m_can_do_rx_poll(struct net_device *dev, int quota) { struct m_can_priv *priv = netdev_priv(dev); - struct net_device_stats *stats = &dev->stats; - struct sk_buff *skb; - struct can_frame *frame; u32 pkts = 0; u32 rxfs; @@ -376,18 +431,7 @@ static int m_can_do_rx_poll(struct net_device *dev, int quota) if (rxfs & RXFS_RFL) netdev_warn(dev, "Rx FIFO 0 Message Lost\n"); - skb = alloc_can_skb(dev, &frame); - if (!skb) { - stats->rx_dropped++; - return pkts; - } - - m_can_read_fifo(dev, frame, rxfs); - - stats->rx_packets++; - stats->rx_bytes += frame->can_dlc; - - netif_receive_skb(skb); + m_can_read_fifo(dev, rxfs); quota--; pkts++; @@ -745,10 +789,23 @@ static const struct can_bittiming_const m_can_bittiming_const = { .brp_inc = 1, }; +static const struct can_bittiming_const m_can_data_bittiming_const = { + .name = KBUILD_MODNAME, + .tseg1_min = 2, /* Time segment 1 = prop_seg + phase_seg1 */ + .tseg1_max = 16, + .tseg2_min = 1, /* Time segment 2 = phase_seg2 */ + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 1, + .brp_max = 32, + .brp_inc = 1, +}; + static int m_can_set_bittiming(struct net_device *dev) { struct m_can_priv *priv = netdev_priv(dev); const struct can_bittiming *bt = &priv->can.bittiming; + const struct can_bittiming *dbt = &priv->can.data_bittiming; u16 brp, sjw, tseg1, tseg2; u32 reg_btp; @@ -759,7 +816,17 @@ static int m_can_set_bittiming(struct net_device *dev) reg_btp = (brp << BTR_BRP_SHIFT) | (sjw << BTR_SJW_SHIFT) | (tseg1 << BTR_TSEG1_SHIFT) | (tseg2 << BTR_TSEG2_SHIFT); m_can_write(priv, M_CAN_BTP, reg_btp); - netdev_dbg(dev, "setting BTP 0x%x\n", reg_btp); + + if (priv->can.ctrlmode & CAN_CTRLMODE_FD) { + brp = dbt->brp - 1; + sjw = dbt->sjw - 1; + tseg1 = dbt->prop_seg + dbt->phase_seg1 - 1; + tseg2 = dbt->phase_seg2 - 1; + reg_btp = (brp << FBTR_FBRP_SHIFT) | (sjw << FBTR_FSJW_SHIFT) | + (tseg1 << FBTR_FTSEG1_SHIFT) | + (tseg2 << FBTR_FTSEG2_SHIFT); + m_can_write(priv, M_CAN_FBTP, reg_btp); + } return 0; } @@ -779,8 +846,8 @@ static void m_can_chip_config(struct net_device *dev) m_can_config_endisable(priv, true); - /* RX Buffer/FIFO Element Size 8 bytes data field */ - m_can_write(priv, M_CAN_RXESC, M_CAN_RXESC_8BYTES); + /* RX Buffer/FIFO Element Size 64 bytes data field */ + m_can_write(priv, M_CAN_RXESC, M_CAN_RXESC_64BYTES); /* Accept Non-matching Frames Into FIFO 0 */ m_can_write(priv, M_CAN_GFC, 0x0); @@ -789,8 +856,8 @@ static void m_can_chip_config(struct net_device *dev) m_can_write(priv, M_CAN_TXBC, (1 << TXBC_NDTB_OFF) | priv->mcfg[MRAM_TXB].off); - /* only support 8 bytes firstly */ - m_can_write(priv, M_CAN_TXESC, TXESC_TBDS_8BYTES); + /* support 64 bytes payload */ + m_can_write(priv, M_CAN_TXESC, TXESC_TBDS_64BYTES); m_can_write(priv, M_CAN_TXEFC, (1 << TXEFC_EFS_OFF) | priv->mcfg[MRAM_TXE].off); @@ -805,7 +872,8 @@ static void m_can_chip_config(struct net_device *dev) RXFC_FWM_1 | priv->mcfg[MRAM_RXF1].off); cccr = m_can_read(priv, M_CAN_CCCR); - cccr &= ~(CCCR_TEST | CCCR_MON); + cccr &= ~(CCCR_TEST | CCCR_MON | (CCCR_CMR_MASK << CCCR_CMR_SHIFT) | + (CCCR_CME_MASK << CCCR_CME_SHIFT)); test = m_can_read(priv, M_CAN_TEST); test &= ~TEST_LBCK; @@ -817,6 +885,9 @@ static void m_can_chip_config(struct net_device *dev) test |= TEST_LBCK; } + if (priv->can.ctrlmode & CAN_CTRLMODE_FD) + cccr |= CCCR_CME_CANFD_BRS << CCCR_CME_SHIFT; + m_can_write(priv, M_CAN_CCCR, cccr); m_can_write(priv, M_CAN_TEST, test); @@ -881,11 +952,13 @@ static struct net_device *alloc_m_can_dev(void) priv->dev = dev; priv->can.bittiming_const = &m_can_bittiming_const; + priv->can.data_bittiming_const = &m_can_data_bittiming_const; priv->can.do_set_mode = m_can_set_mode; priv->can.do_get_berr_counter = m_can_get_berr_counter; priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_LISTENONLY | - CAN_CTRLMODE_BERR_REPORTING; + CAN_CTRLMODE_BERR_REPORTING | + CAN_CTRLMODE_FD; return dev; } @@ -968,8 +1041,9 @@ static netdev_tx_t m_can_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct m_can_priv *priv = netdev_priv(dev); - struct can_frame *cf = (struct can_frame *)skb->data; - u32 id; + struct canfd_frame *cf = (struct canfd_frame *)skb->data; + u32 id, cccr; + int i; if (can_dropped_invalid_skb(dev, skb)) return NETDEV_TX_OK; @@ -988,11 +1062,28 @@ static netdev_tx_t m_can_start_xmit(struct sk_buff *skb, /* message ram configuration */ m_can_fifo_write(priv, 0, M_CAN_FIFO_ID, id); - m_can_fifo_write(priv, 0, M_CAN_FIFO_DLC, cf->can_dlc << 16); - m_can_fifo_write(priv, 0, M_CAN_FIFO_DATA(0), *(u32 *)(cf->data + 0)); - m_can_fifo_write(priv, 0, M_CAN_FIFO_DATA(1), *(u32 *)(cf->data + 4)); + m_can_fifo_write(priv, 0, M_CAN_FIFO_DLC, can_len2dlc(cf->len) << 16); + + for (i = 0; i < cf->len; i += 4) + m_can_fifo_write(priv, 0, M_CAN_FIFO_DATA(i / 4), + *(u32 *)(cf->data + i)); + can_put_echo_skb(skb, dev, 0); + if (priv->can.ctrlmode & CAN_CTRLMODE_FD) { + cccr = m_can_read(priv, M_CAN_CCCR); + cccr &= ~(CCCR_CMR_MASK << CCCR_CMR_SHIFT); + if (can_is_canfd_skb(skb)) { + if (cf->flags & CANFD_BRS) + cccr |= CCCR_CMR_CANFD_BRS << CCCR_CMR_SHIFT; + else + cccr |= CCCR_CMR_CANFD << CCCR_CMR_SHIFT; + } else { + cccr |= CCCR_CMR_CAN << CCCR_CMR_SHIFT; + } + m_can_write(priv, M_CAN_CCCR, cccr); + } + /* enable first TX buffer to start transfer */ m_can_write(priv, M_CAN_TXBTIE, 0x1); m_can_write(priv, M_CAN_TXBAR, 0x1); From 11bf7828a59880427403e13dcff8228d67e9e0f7 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Mon, 17 Nov 2014 16:24:54 -0800 Subject: [PATCH 128/208] vxlan: Inline vxlan_gso_check(). Suggested-by: Or Gerlitz Signed-off-by: Joe Stringer Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 19 ------------------- include/net/vxlan.h | 18 +++++++++++++++++- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 6b658638b456..e1e335c339e3 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -67,12 +67,6 @@ #define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags required value. */ -/* VXLAN protocol header */ -struct vxlanhdr { - __be32 vx_flags; - __be32 vx_vni; -}; - /* UDP port for VXLAN traffic. * The IANA assigned port is 4789, but the Linux default is 8472 * for compatibility with early adopters. @@ -1571,19 +1565,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) return false; } -bool vxlan_gso_check(struct sk_buff *skb) -{ - if ((skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) && - (skb->inner_protocol_type != ENCAP_TYPE_ETHER || - skb->inner_protocol != htons(ETH_P_TEB) || - (skb_inner_mac_header(skb) - skb_transport_header(skb) != - sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) - return false; - - return true; -} -EXPORT_SYMBOL_GPL(vxlan_gso_check); - #if IS_ENABLED(CONFIG_IPV6) static int vxlan6_xmit_skb(struct vxlan_sock *vs, struct dst_entry *dst, struct sk_buff *skb, diff --git a/include/net/vxlan.h b/include/net/vxlan.h index afadf8e53f20..57cccd0052e5 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -8,6 +8,12 @@ #define VNI_HASH_BITS 10 #define VNI_HASH_SIZE (1<gso_type & SKB_GSO_UDP_TUNNEL) && + (skb->inner_protocol_type != ENCAP_TYPE_ETHER || + skb->inner_protocol != htons(ETH_P_TEB) || + (skb_inner_mac_header(skb) - skb_transport_header(skb) != + sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) + return false; + + return true; +} /* IP header + UDP + VXLAN + Ethernet header */ #define VXLAN_HEADROOM (20 + 8 + 8 + 14) From 280ba51d60be6f4ca3347eaa60783314f38df72e Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 18 Nov 2014 22:35:31 +0100 Subject: [PATCH 129/208] mac80211: minstrel_ht: fix a crash in rate sorting The commit 5935839ad73583781b8bbe8d91412f6826e218a4 "mac80211: improve minstrel_ht rate sorting by throughput & probability" introduced a crash on rate sorting that occurs when the rate added to the sorting array is faster than all the previous rates. Due to an off-by-one error, it reads the rate index from tp_list[-1], which contains uninitialized stack garbage, and then uses the resulting index for accessing the group rate stats, leading to a crash if the garbage value is big enough. Cc: Thomas Huehn Reported-by: Jouni Malinen Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_ht.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index df90ce2db00c..408fd8ab4eef 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -252,19 +252,16 @@ minstrel_ht_sort_best_tp_rates(struct minstrel_ht_sta *mi, u8 index, cur_thr = mi->groups[cur_group].rates[cur_idx].cur_tp; cur_prob = mi->groups[cur_group].rates[cur_idx].probability; - tmp_group = tp_list[j - 1] / MCS_GROUP_RATES; - tmp_idx = tp_list[j - 1] % MCS_GROUP_RATES; - tmp_thr = mi->groups[tmp_group].rates[tmp_idx].cur_tp; - tmp_prob = mi->groups[tmp_group].rates[tmp_idx].probability; - - while (j > 0 && (cur_thr > tmp_thr || - (cur_thr == tmp_thr && cur_prob > tmp_prob))) { - j--; + do { tmp_group = tp_list[j - 1] / MCS_GROUP_RATES; tmp_idx = tp_list[j - 1] % MCS_GROUP_RATES; tmp_thr = mi->groups[tmp_group].rates[tmp_idx].cur_tp; tmp_prob = mi->groups[tmp_group].rates[tmp_idx].probability; - } + if (cur_thr < tmp_thr || + (cur_thr == tmp_thr && cur_prob <= tmp_prob)) + break; + j--; + } while (j > 0); if (j < MAX_THR_RATES - 1) { memmove(&tp_list[j + 1], &tp_list[j], (sizeof(*tp_list) * From 6bab4a8a1888729f17f4923cc5867e4674f66333 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 18 Nov 2014 23:59:33 +0100 Subject: [PATCH 130/208] clockevent: sun4i: Fix race condition in the probe code The interrupts were activated and the handler registered before the clockevent was registered in the probe function. The interrupt handler, however, was making the assumption that the clockevent device was registered. That could cause a null pointer dereference if the timer interrupt was firing during this narrow window. Fix that by moving the clockevent registration before the interrupt is enabled. Reported-by: Roman Byshko Signed-off-by: Maxime Ripard Cc: stable@vger.kernel.org Signed-off-by: Daniel Lezcano --- drivers/clocksource/sun4i_timer.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c index efb17c3ee120..f4a9c0058b4d 100644 --- a/drivers/clocksource/sun4i_timer.c +++ b/drivers/clocksource/sun4i_timer.c @@ -182,6 +182,12 @@ static void __init sun4i_timer_init(struct device_node *node) /* Make sure timer is stopped before playing with interrupts */ sun4i_clkevt_time_stop(0); + sun4i_clockevent.cpumask = cpu_possible_mask; + sun4i_clockevent.irq = irq; + + clockevents_config_and_register(&sun4i_clockevent, rate, + TIMER_SYNC_TICKS, 0xffffffff); + ret = setup_irq(irq, &sun4i_timer_irq); if (ret) pr_warn("failed to setup irq %d\n", irq); @@ -189,12 +195,6 @@ static void __init sun4i_timer_init(struct device_node *node) /* Enable timer0 interrupt */ val = readl(timer_base + TIMER_IRQ_EN_REG); writel(val | TIMER_IRQ_EN(0), timer_base + TIMER_IRQ_EN_REG); - - sun4i_clockevent.cpumask = cpu_possible_mask; - sun4i_clockevent.irq = irq; - - clockevents_config_and_register(&sun4i_clockevent, rate, - TIMER_SYNC_TICKS, 0xffffffff); } CLOCKSOURCE_OF_DECLARE(sun4i, "allwinner,sun4i-a10-timer", sun4i_timer_init); From 21255dad9dffa4407cab866f5561cb9568f7f7d8 Mon Sep 17 00:00:00 2001 From: James Cowgill Date: Thu, 13 Nov 2014 11:08:06 +0000 Subject: [PATCH 131/208] MIPS: Loongson3: Fix __node_distances undefined error export the __node_distances symbol in the loongson3 numa code to fix the build error: Building modules, stage 2. MODPOST 221 modules ERROR: "__node_distances" [drivers/block/nvme.ko] undefined! scripts/Makefile.modpost:90: recipe for target '__modpost' failed when building the kernel with: CONFIG_CPU_LOONGSON3=y CONFIG_NUMA=y CONFIG_BLK_DEV_NVME=m Signed-off-by: James Cowgill Cc: # v3.17+ Reviewed-by: James Hogan Reviewed-by: Huacai Chen Cc: linux-mips@linux-mips.org Cc: Wei Yongjun Patchwork: https://patchwork.linux-mips.org/patch/8444/ Signed-off-by: Ralf Baechle --- arch/mips/loongson/loongson-3/numa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/loongson/loongson-3/numa.c b/arch/mips/loongson/loongson-3/numa.c index 37ed184398c6..42323bcc5d28 100644 --- a/arch/mips/loongson/loongson-3/numa.c +++ b/arch/mips/loongson/loongson-3/numa.c @@ -33,6 +33,7 @@ static struct node_data prealloc__node_data[MAX_NUMNODES]; unsigned char __node_distances[MAX_NUMNODES][MAX_NUMNODES]; +EXPORT_SYMBOL(__node_distances); struct node_data *__node_data[MAX_NUMNODES]; EXPORT_SYMBOL(__node_data); From 5829b0ecc584d15ae4eeabe69f2ab554bdec4689 Mon Sep 17 00:00:00 2001 From: James Cowgill Date: Thu, 13 Nov 2014 11:08:07 +0000 Subject: [PATCH 132/208] MIPS: IP27: Fix __node_distances undefined error export the __node_distances symbol in the ip27 memory code to fix the build error: Building modules, stage 2. MODPOST 311 modules ERROR: "__node_distances" [drivers/block/nvme.ko] undefined! scripts/Makefile.modpost:90: recipe for target '__modpost' failed when building the kernel with: CONFIG_SGI_IP27=y CONFIG_BLK_DEV_NVME=m Signed-off-by: James Cowgill Cc: # v3.15+ Reviewed-by: James Hogan Signed-off-by: Ralf Baechle --- arch/mips/sgi-ip27/ip27-memory.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index a95c00f5fb96..a304bcc37e4f 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -107,6 +107,7 @@ static void router_recurse(klrou_t *router_a, klrou_t *router_b, int depth) } unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; +EXPORT_SYMBOL(__node_distances); static int __init compute_node_distance(nasid_t nasid_a, nasid_t nasid_b) { From cc94ea31150f83d1f70ad854c920dd6b739c1628 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 4 Nov 2014 14:13:22 +0800 Subject: [PATCH 133/208] MIPS: Loongson: Fix the write-combine CCA value setting All Loongson-2/3 processors support _CACHE_UNCACHED_ACCELERATED, not only Loongson-3A. Signed-off-by: Huacai Chen Cc: John Crispin Cc: Steven J. Hill Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu Patchwork: https://patchwork.linux-mips.org/patch/8319/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cpu-probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 94c4a0c0a577..08dc945f7608 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -765,7 +765,6 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) break; case PRID_REV_LOONGSON3A: c->cputype = CPU_LOONGSON3; - c->writecombine = _CACHE_UNCACHED_ACCELERATED; __cpu_name[cpu] = "ICT Loongson-3"; set_elf_platform(cpu, "loongson3a"); break; @@ -782,6 +781,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) MIPS_CPU_FPU | MIPS_CPU_LLSC | MIPS_CPU_32FPR; c->tlbsize = 64; + c->writecombine = _CACHE_UNCACHED_ACCELERATED; break; case PRID_IMP_LOONGSON_32: /* Loongson-1 */ decode_configs(c); From 7352c8b13dd9a848b0c5d6209d62761afb164dcb Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 4 Nov 2014 14:13:23 +0800 Subject: [PATCH 134/208] MIPS: Loongson: Set Loongson-3's ISA level to MIPS64R1 In CPU manual Loongson-3 is MIPS64R2 compatible, but during tests we found that its EI/DI instructions have problems. So we just set the ISA level to MIPS64R1. Signed-off-by: Huacai Chen Cc: John Crispin Cc: Steven J. Hill Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu Patchwork: https://patchwork.linux-mips.org/patch/8320/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h | 2 -- arch/mips/kernel/cpu-probe.c | 5 ++++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h index 7d28f95b0512..6d69332f21ec 100644 --- a/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h @@ -41,10 +41,8 @@ #define cpu_has_mcheck 0 #define cpu_has_mdmx 0 #define cpu_has_mips16 0 -#define cpu_has_mips32r1 0 #define cpu_has_mips32r2 0 #define cpu_has_mips3d 0 -#define cpu_has_mips64r1 0 #define cpu_has_mips64r2 0 #define cpu_has_mipsmt 0 #define cpu_has_prefetch 0 diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 08dc945f7608..d5a4f380b019 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -757,26 +757,29 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) c->cputype = CPU_LOONGSON2; __cpu_name[cpu] = "ICT Loongson-2"; set_elf_platform(cpu, "loongson2e"); + set_isa(c, MIPS_CPU_ISA_III); break; case PRID_REV_LOONGSON2F: c->cputype = CPU_LOONGSON2; __cpu_name[cpu] = "ICT Loongson-2"; set_elf_platform(cpu, "loongson2f"); + set_isa(c, MIPS_CPU_ISA_III); break; case PRID_REV_LOONGSON3A: c->cputype = CPU_LOONGSON3; __cpu_name[cpu] = "ICT Loongson-3"; set_elf_platform(cpu, "loongson3a"); + set_isa(c, MIPS_CPU_ISA_M64R1); break; case PRID_REV_LOONGSON3B_R1: case PRID_REV_LOONGSON3B_R2: c->cputype = CPU_LOONGSON3; __cpu_name[cpu] = "ICT Loongson-3"; set_elf_platform(cpu, "loongson3b"); + set_isa(c, MIPS_CPU_ISA_M64R1); break; } - set_isa(c, MIPS_CPU_ISA_III); c->options = R4K_OPTS | MIPS_CPU_FPU | MIPS_CPU_LLSC | MIPS_CPU_32FPR; From bbaf113a481b6ce32444c125807ad3618643ce57 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Fri, 17 Oct 2014 18:10:24 +0300 Subject: [PATCH 135/208] MIPS: oprofile: Fix backtrace on 64-bit kernel Fix incorrect cast that always results in wrong address for the new frame on 64-bit kernels. Signed-off-by: Aaro Koskinen Cc: stable@vger.kernel.org Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/8110/ Signed-off-by: Ralf Baechle --- arch/mips/oprofile/backtrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/oprofile/backtrace.c b/arch/mips/oprofile/backtrace.c index 6854ed5097d2..83a1dfd8f0e3 100644 --- a/arch/mips/oprofile/backtrace.c +++ b/arch/mips/oprofile/backtrace.c @@ -92,7 +92,7 @@ static inline int unwind_user_frame(struct stackframe *old_frame, /* This marks the end of the previous function, which means we overran. */ break; - stack_size = (unsigned) stack_adjustment; + stack_size = (unsigned long) stack_adjustment; } else if (is_ra_save_ins(&ip)) { int ra_slot = ip.i_format.simmediate; if (ra_slot < 0) From 58563817cfed0432e9a54476d5fc6c3aeba475e4 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Mon, 17 Nov 2014 09:30:23 +0000 Subject: [PATCH 136/208] MIPS: asm: uaccess: Add v1 register to clobber list on EVA When EVA is turned on and prefetching is being used in memcpy.S, the v1 register is being used as a helper register to the PREFE instruction. However, v1 ($3) was not in the clobber list, which means that the compiler did not preserve it across function calls, and that could corrupt the value of the register leading to all sorts of userland crashes. We fix this problem by using the DADDI_SCRATCH macro to define the clobbered register when CONFIG_EVA && CONFIG_CPU_HAS_PREFETCH are enabled. Signed-off-by: Markos Chandras Cc: # v3.15+ Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/8510/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/uaccess.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index a10951090234..b9ab717e3619 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h @@ -773,10 +773,11 @@ extern void __put_user_unaligned_unknown(void); "jal\t" #destination "\n\t" #endif -#ifndef CONFIG_CPU_DADDI_WORKAROUNDS -#define DADDI_SCRATCH "$0" -#else +#if defined(CONFIG_CPU_DADDI_WORKAROUNDS) || (defined(CONFIG_EVA) && \ + defined(CONFIG_CPU_HAS_PREFETCH)) #define DADDI_SCRATCH "$3" +#else +#define DADDI_SCRATCH "$0" #endif extern size_t __copy_user(void *__to, const void *__from, size_t __n); From 6a8dff6ab16c903b0d8ef5fbf21543f39bf5d675 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Mon, 17 Nov 2014 09:31:07 +0000 Subject: [PATCH 137/208] MIPS: tlb-r4k: Add missing HTW stop/start sequences HTW needs to stop and start again whenever the EntryHI register changes otherwise an inflight HTW operation might use the new EntryHI register for updating an old entry and that could lead to crashes or even a machine check exception. We fix this by ensuring the HTW has stop whenever the EntryHI register is about to change Signed-off-by: Markos Chandras Cc: # v3.17+ Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/8511/ Signed-off-by: Ralf Baechle --- arch/mips/mm/tlb-r4k.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index fa6ebd4bc9e9..c3917e251f59 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -299,6 +299,7 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) local_irq_save(flags); + htw_stop(); pid = read_c0_entryhi() & ASID_MASK; address &= (PAGE_MASK << 1); write_c0_entryhi(address | pid); @@ -346,6 +347,7 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) tlb_write_indexed(); } tlbw_use_hazard(); + htw_start(); flush_itlb_vm(vma); local_irq_restore(flags); } @@ -422,6 +424,7 @@ __init int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1, local_irq_save(flags); /* Save old context and create impossible VPN2 value */ + htw_stop(); old_ctx = read_c0_entryhi(); old_pagemask = read_c0_pagemask(); wired = read_c0_wired(); @@ -443,6 +446,7 @@ __init int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1, write_c0_entryhi(old_ctx); write_c0_pagemask(old_pagemask); + htw_start(); out: local_irq_restore(flags); return ret; From 51b1029d9966060c6ad02030e6f251425b4f06c1 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Mon, 17 Nov 2014 09:32:38 +0000 Subject: [PATCH 138/208] MIPS: lib: memcpy: Restore NOP on delay slot before returning to caller Commit cf62a8b8134dd3 ("MIPS: lib: memcpy: Use macro to build the copy_user code") switched to a macro in order to build the memcpy symbols in preparation for the EVA support. However, this commit also removed the NOP instruction after the 'jr ra' when returning back to the caller. This had no visible side-effects since the next instruction was a load to the t0 register which was already in the clobbered list, but it may have undesired effects in the future if some other code is introduced in between the .Ldone and the .Ll_exc_copy labels. Signed-off-by: Markos Chandras Cc: # v3.15+ Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/8512/ Signed-off-by: Ralf Baechle --- arch/mips/lib/memcpy.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index c17ef80cf65a..5d3238af9b5c 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S @@ -503,6 +503,7 @@ STOREB(t0, NBYTES-2(dst), .Ls_exc_p1\@) .Ldone\@: jr ra + nop .if __memcpy == 1 END(memcpy) .set __memcpy, 0 From 640465bda58c7078725201be7430c31a349121e9 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 18 Nov 2014 18:47:13 +0100 Subject: [PATCH 139/208] MIPS: Zero variable read by get_user / __get_user in case of an error. This wasn't happening in all cases. Signed-off-by: Ralf Baechle --- arch/mips/include/asm/uaccess.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index b9ab717e3619..133678ab4eb8 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h @@ -301,7 +301,8 @@ do { \ __get_kernel_common((x), size, __gu_ptr); \ else \ __get_user_common((x), size, __gu_ptr); \ - } \ + } else \ + (x) = 0; \ \ __gu_err; \ }) @@ -316,6 +317,7 @@ do { \ " .insn \n" \ " .section .fixup,\"ax\" \n" \ "3: li %0, %4 \n" \ + " move %1, $0 \n" \ " j 2b \n" \ " .previous \n" \ " .section __ex_table,\"a\" \n" \ @@ -630,6 +632,7 @@ do { \ " .insn \n" \ " .section .fixup,\"ax\" \n" \ "3: li %0, %4 \n" \ + " move %1, $0 \n" \ " j 2b \n" \ " .previous \n" \ " .section __ex_table,\"a\" \n" \ From 99436f7d69045800ffd1d66912f85d37150c7e2b Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 17 Nov 2014 16:09:54 +0000 Subject: [PATCH 140/208] MIPS: jump_label.c: Correct the span of the J instruction Correct the check for the span of the 256MB segment addressable by the J instruction according to this instruction's semantics. The calculation of the jump target is applied to the address of the delay-slot instruction that immediately follows. Adjust the check accordingly by adding 4 to `e->code' that holds the address of the J instruction itself. Signed-off-by: Maciej W. Rozycki Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/8515/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/jump_label.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/jump_label.c b/arch/mips/kernel/jump_label.c index 6001610cfe55..f65a843e883b 100644 --- a/arch/mips/kernel/jump_label.c +++ b/arch/mips/kernel/jump_label.c @@ -27,8 +27,8 @@ void arch_jump_label_transform(struct jump_entry *e, union mips_instruction *insn_p = (union mips_instruction *)(unsigned long)e->code; - /* Jump only works within a 256MB aligned region. */ - BUG_ON((e->target & ~J_RANGE_MASK) != (e->code & ~J_RANGE_MASK)); + /* Jump only works within a 256MB aligned region of its delay slot. */ + BUG_ON((e->target & ~J_RANGE_MASK) != ((e->code + 4) & ~J_RANGE_MASK)); /* Target must have 4 byte alignment. */ BUG_ON((e->target & 3) != 0); From 935c2dbec4d6d3163ee8e7409996904a734ad89a Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 17 Nov 2014 16:10:32 +0000 Subject: [PATCH 141/208] MIPS: jump_label.c: Handle the microMIPS J instruction encoding Implement the microMIPS encoding of the J instruction for the purpose of the static keys feature, fixing a crash early on in bootstrap as the kernel is unhappy seeing the ISA bit set in jump table entries. Make sure the ISA bit correctly reflects the instruction encoding chosen for the kernel, 0 for the standard MIPS and 1 for the microMIPS encoding. Also make sure the instruction to patch is a 32-bit NOP in the microMIPS mode as by default the 16-bit short encoding is assumed Signed-off-by: Maciej W. Rozycki Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/8516/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/jump_label.h | 8 +++++- arch/mips/kernel/jump_label.c | 40 +++++++++++++++++++++++------- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index e194f957ca8c..fdbff44e5482 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -20,9 +20,15 @@ #define WORD_INSN ".word" #endif +#ifdef CONFIG_CPU_MICROMIPS +#define NOP_INSN "nop32" +#else +#define NOP_INSN "nop" +#endif + static __always_inline bool arch_static_branch(struct static_key *key) { - asm_volatile_goto("1:\tnop\n\t" + asm_volatile_goto("1:\t" NOP_INSN "\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" diff --git a/arch/mips/kernel/jump_label.c b/arch/mips/kernel/jump_label.c index f65a843e883b..dda800e9e731 100644 --- a/arch/mips/kernel/jump_label.c +++ b/arch/mips/kernel/jump_label.c @@ -18,31 +18,53 @@ #ifdef HAVE_JUMP_LABEL -#define J_RANGE_MASK ((1ul << 28) - 1) +/* + * Define parameters for the standard MIPS and the microMIPS jump + * instruction encoding respectively: + * + * - the ISA bit of the target, either 0 or 1 respectively, + * + * - the amount the jump target address is shifted right to fit in the + * immediate field of the machine instruction, either 2 or 1, + * + * - the mask determining the size of the jump region relative to the + * delay-slot instruction, either 256MB or 128MB, + * + * - the jump target alignment, either 4 or 2 bytes. + */ +#define J_ISA_BIT IS_ENABLED(CONFIG_CPU_MICROMIPS) +#define J_RANGE_SHIFT (2 - J_ISA_BIT) +#define J_RANGE_MASK ((1ul << (26 + J_RANGE_SHIFT)) - 1) +#define J_ALIGN_MASK ((1ul << J_RANGE_SHIFT) - 1) void arch_jump_label_transform(struct jump_entry *e, enum jump_label_type type) { + union mips_instruction *insn_p; union mips_instruction insn; - union mips_instruction *insn_p = - (union mips_instruction *)(unsigned long)e->code; - /* Jump only works within a 256MB aligned region of its delay slot. */ + insn_p = (union mips_instruction *)msk_isa16_mode(e->code); + + /* Jump only works within an aligned region its delay slot is in. */ BUG_ON((e->target & ~J_RANGE_MASK) != ((e->code + 4) & ~J_RANGE_MASK)); - /* Target must have 4 byte alignment. */ - BUG_ON((e->target & 3) != 0); + /* Target must have the right alignment and ISA must be preserved. */ + BUG_ON((e->target & J_ALIGN_MASK) != J_ISA_BIT); if (type == JUMP_LABEL_ENABLE) { - insn.j_format.opcode = j_op; - insn.j_format.target = (e->target & J_RANGE_MASK) >> 2; + insn.j_format.opcode = J_ISA_BIT ? mm_j32_op : j_op; + insn.j_format.target = e->target >> J_RANGE_SHIFT; } else { insn.word = 0; /* nop */ } get_online_cpus(); mutex_lock(&text_mutex); - *insn_p = insn; + if (IS_ENABLED(CONFIG_CPU_MICROMIPS)) { + insn_p->halfword[0] = insn.word >> 16; + insn_p->halfword[1] = insn.word; + } else + *insn_p = insn; flush_icache_range((unsigned long)insn_p, (unsigned long)insn_p + sizeof(*insn_p)); From 49e41938f8a53d01d1c5d133a038acb9650008f9 Mon Sep 17 00:00:00 2001 From: Tyler Baker Date: Tue, 18 Nov 2014 11:10:53 -0800 Subject: [PATCH 142/208] ARM: multi_v7_defconfig: fix failure setting CPU voltage by enabling dependent I2C controller This patch fixes a long standing issue introduced during the 3.16 merge window. Shortly after the merge, exynos5250-based arndale boards began to produce the following errors: kern.err kernel: exynos-cpufreq exynos-cpufreq: failed to set cpu voltage kern.err kernel: cpufreq: __target_index: Failed to change cpu frequency: -22 Further analysis revealed that the S5M8767 voltage regulator used on the exynos5250-based arndale board utilizes the S3C2410 I2C controller. If the S3C2410 I2C controller driver is not enabled, the S5M8767 voltage regulator fails to probe. Therefore a dependency exists between these two drivers. In the exynos_defconfig both CONFIG_REGULATOR_S5M8767 and CONFIG_I2C_S3C2410 options are enabled, and no errors are produced. However, in the multi_v7_defconfig only the CONFIG_REGULATOR_S5M8767 option is enabled and the errors are present. So let's enable the CONFIG_I2C_S3C2410 option in the multi_v7_defconfig to allow the S5M8767 voltage regulator to probe. Signed-off-by: Tyler Baker Acked-by: Kukjin Kim Signed-off-by: Kevin Hilman --- arch/arm/configs/multi_v7_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 3487046d8a78..9d7a32f93fcf 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -217,6 +217,7 @@ CONFIG_I2C_CADENCE=y CONFIG_I2C_DESIGNWARE_PLATFORM=y CONFIG_I2C_EXYNOS5=y CONFIG_I2C_MV64XXX=y +CONFIG_I2C_S3C2410=y CONFIG_I2C_SIRF=y CONFIG_I2C_TEGRA=y CONFIG_I2C_ST=y From c1a2086e2d8c4eb4e8630ba752e911ec180dec67 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 19 Nov 2014 16:22:32 +0000 Subject: [PATCH 143/208] of/selftest: Fix off-by-one error in removal path The removal path for selftest data has an off by one error that causes the code to dereference beyond the end of the nodes[] array on the first pass through. The old code only worked by chance on a lot of platforms, but the bug was recently exposed on aarch64. The fix is simple. Decrement the node count before dereferencing, not after. Reported-by: Kevin Hilman Cc: Rob Herring Cc: Gaurav Minocha Cc: # v3.17+ --- drivers/of/selftest.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c index 11b873c54a77..e6c14dc400e9 100644 --- a/drivers/of/selftest.c +++ b/drivers/of/selftest.c @@ -896,7 +896,7 @@ static void selftest_data_remove(void) return; } - while (last_node_index >= 0) { + while (last_node_index-- > 0) { if (nodes[last_node_index]) { np = of_find_node_by_path(nodes[last_node_index]->full_name); if (strcmp(np->full_name, "/aliases") != 0) { @@ -908,7 +908,6 @@ static void selftest_data_remove(void) } } } - last_node_index--; } } From 4a7795d35e252f38298980530e01e21867f8f856 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 19 Nov 2014 15:50:34 +0800 Subject: [PATCH 144/208] vfs: fix reference leak in d_prune_aliases() In "d_prune_alias(): just lock the parent and call __dentry_kill()" the old dget + d_drop + dput has been replaced with lock_parent + __dentry_kill; unfortunately, dput() does more than just killing dentry - it also drops the reference to parent. New variant leaks that reference and needs dput(parent) after killing the child off. Signed-off-by: Yan, Zheng Signed-off-by: Al Viro --- fs/dcache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/dcache.c b/fs/dcache.c index 3ffef7f4e5cd..5bc72b07fde2 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -778,6 +778,7 @@ restart: struct dentry *parent = lock_parent(dentry); if (likely(!dentry->d_lockref.count)) { __dentry_kill(dentry); + dput(parent); goto restart; } if (parent) From 7ca2f234404e738cc807ed87c43f9932513bc8c6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Nov 2014 15:11:24 +0100 Subject: [PATCH 145/208] isofs: avoid unused function warning With the isofs_hash() function removed, isofs_hash_ms() is the only user of isofs_hash_common(), but it's defined inside of an #ifdef, which triggers this gcc warning in ARM axm55xx_defconfig starting with v3.18-rc3: fs/isofs/inode.c:177:1: warning: 'isofs_hash_common' defined but not used [-Wunused-function] This patch moves the function inside of the same #ifdef section to avoid that warning, which seems the best compromise of a relatively harmless patch for a late -rc. Signed-off-by: Arnd Bergmann Fixes: b0afd8e5db7b ("isofs: don't bother with ->d_op for normal case") Signed-off-by: Al Viro --- fs/isofs/inode.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index fe839b915116..d67a16f2a45d 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -170,27 +170,6 @@ struct iso9660_options{ s32 sbsector; }; -/* - * Compute the hash for the isofs name corresponding to the dentry. - */ -static int -isofs_hash_common(struct qstr *qstr, int ms) -{ - const char *name; - int len; - - len = qstr->len; - name = qstr->name; - if (ms) { - while (len && name[len-1] == '.') - len--; - } - - qstr->hash = full_name_hash(name, len); - - return 0; -} - /* * Compute the hash for the isofs name corresponding to the dentry. */ @@ -263,6 +242,27 @@ isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry, } #ifdef CONFIG_JOLIET +/* + * Compute the hash for the isofs name corresponding to the dentry. + */ +static int +isofs_hash_common(struct qstr *qstr, int ms) +{ + const char *name; + int len; + + len = qstr->len; + name = qstr->name; + if (ms) { + while (len && name[len-1] == '.') + len--; + } + + qstr->hash = full_name_hash(name, len); + + return 0; +} + static int isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr) { From f82c458a2c3ffb94b431fc6ad791a79df1b3713e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 19 Nov 2014 10:25:09 -0800 Subject: [PATCH 146/208] btrfs: fix lockups from btrfs_clear_path_blocking The fair reader/writer locks mean that btrfs_clear_path_blocking needs to strictly follow lock ordering rules even when we already have blocking locks on a given path. Before we can clear a blocking lock on the path, we need to make sure all of the locks have been converted to blocking. This will remove lock inversions against anyone spinning in write_lock() against the buffers we're trying to get read locks on. These inversions didn't exist before the fair read/writer locks, but now we need to be more careful. We papered over this deadlock in the past by changing btrfs_try_read_lock() to be a true trylock against both the spinlock and the blocking lock. This was slower, and not sufficient to fix all the deadlocks. This patch adds a btrfs_tree_read_lock_atomic(), which basically means get the spinlock but trylock on the blocking lock. Signed-off-by: Chris Mason Signed-off-by: Josef Bacik Reported-by: Patrick Schmid cc: stable@vger.kernel.org #v3.15+ --- fs/btrfs/ctree.c | 14 ++------------ fs/btrfs/locking.c | 24 +++++++++++++++++++++--- fs/btrfs/locking.h | 2 ++ 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 19bc6162fb8e..150822ee0a0b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -80,13 +80,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, { int i; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - /* lockdep really cares that we take all of these spinlocks - * in the right order. If any of the locks in the path are not - * currently blocking, it is going to complain. So, make really - * really sure by forcing the path to blocking before we clear - * the path blocking. - */ if (held) { btrfs_set_lock_blocking_rw(held, held_rw); if (held_rw == BTRFS_WRITE_LOCK) @@ -95,7 +88,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, held_rw = BTRFS_READ_LOCK_BLOCKING; } btrfs_set_path_blocking(p); -#endif for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { if (p->nodes[i] && p->locks[i]) { @@ -107,10 +99,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, } } -#ifdef CONFIG_DEBUG_LOCK_ALLOC if (held) btrfs_clear_lock_blocking_rw(held, held_rw); -#endif } /* this also releases the path */ @@ -2893,7 +2883,7 @@ cow_done: } p->locks[level] = BTRFS_WRITE_LOCK; } else { - err = btrfs_try_tree_read_lock(b); + err = btrfs_tree_read_lock_atomic(b); if (!err) { btrfs_set_path_blocking(p); btrfs_tree_read_lock(b); @@ -3025,7 +3015,7 @@ again: } level = btrfs_header_level(b); - err = btrfs_try_tree_read_lock(b); + err = btrfs_tree_read_lock_atomic(b); if (!err) { btrfs_set_path_blocking(p); btrfs_tree_read_lock(b); diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 5665d2149249..f8229ef1b46d 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -127,6 +127,26 @@ again: atomic_inc(&eb->spinning_readers); } +/* + * take a spinning read lock. + * returns 1 if we get the read lock and 0 if we don't + * this won't wait for blocking writers + */ +int btrfs_tree_read_lock_atomic(struct extent_buffer *eb) +{ + if (atomic_read(&eb->blocking_writers)) + return 0; + + read_lock(&eb->lock); + if (atomic_read(&eb->blocking_writers)) { + read_unlock(&eb->lock); + return 0; + } + atomic_inc(&eb->read_locks); + atomic_inc(&eb->spinning_readers); + return 1; +} + /* * returns 1 if we get the read lock and 0 if we don't * this won't wait for blocking writers @@ -158,9 +178,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb) atomic_read(&eb->blocking_readers)) return 0; - if (!write_trylock(&eb->lock)) - return 0; - + write_lock(&eb->lock); if (atomic_read(&eb->blocking_writers) || atomic_read(&eb->blocking_readers)) { write_unlock(&eb->lock); diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index b81e0e9a4894..c44a9d5f5362 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h @@ -35,6 +35,8 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw); void btrfs_assert_tree_locked(struct extent_buffer *eb); int btrfs_try_tree_read_lock(struct extent_buffer *eb); int btrfs_try_tree_write_lock(struct extent_buffer *eb); +int btrfs_tree_read_lock_atomic(struct extent_buffer *eb); + static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw) { From b8e4500f42fe4464a33a887579147050bed8fcef Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 18 Nov 2014 15:14:44 +0100 Subject: [PATCH 147/208] bonding: fix curr_active_slave/carrier with loadbalance arp monitoring Since commit 6fde8f037e60 ("bonding: fix locking in bond_loadbalance_arp_mon()") we can have a stale bond carrier state and stale curr_active_slave when using arp monitoring in loadbalance modes. The reason is that in bond_loadbalance_arp_mon() we can't have do_failover == true but slave_state_changed == false, whenever do_failover is true then slave_state_changed is also true. Then the following piece from bond_loadbalance_arp_mon(): if (slave_state_changed) { bond_slave_state_change(bond); if (BOND_MODE(bond) == BOND_MODE_XOR) bond_update_slave_arr(bond, NULL); } else if (do_failover) { block_netpoll_tx(); bond_select_active_slave(bond); unblock_netpoll_tx(); } will execute only the first branch, always and regardless of do_failover. Since these two events aren't related in such way, we need to decouple and consider them separately. For example this issue could lead to the following result: Bonding Mode: load balancing (round-robin) *MII Status: down* MII Polling Interval (ms): 0 Up Delay (ms): 0 Down Delay (ms): 0 ARP Polling Interval (ms): 100 ARP IP target/s (n.n.n.n form): 192.168.9.2 Slave Interface: ens12 *MII Status: up* Speed: 10000 Mbps Duplex: full Link Failure Count: 2 Permanent HW addr: 00:0f:53:01:42:2c Slave queue ID: 0 Slave Interface: eth1 *MII Status: up* Speed: Unknown Duplex: Unknown Link Failure Count: 70 Permanent HW addr: 52:54:00:2f:0f:8e Slave queue ID: 0 Since some interfaces are up, then the status of the bond should also be up, but it will never change unless something invokes bond_set_carrier() (i.e. enslave, bond_select_active_slave etc). Now, if I force the calling of bond_select_active_slave via for example changing primary_reselect (it can change in any mode), then the MII status goes to "up" because it calls bond_select_active_slave() which should've been done from bond_loadbalance_arp_mon() itself. CC: Veaceslav Falico CC: Jay Vosburgh CC: Andy Gospodarek CC: Ding Tianhong Fixes: 6fde8f037e60 ("bonding: fix locking in bond_loadbalance_arp_mon()") Signed-off-by: Nikolay Aleksandrov Acked-by: Veaceslav Falico Acked-by: Andy Gospodarek Acked-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index c9ac06cfe6b7..a5115fb7cf33 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2471,7 +2471,8 @@ static void bond_loadbalance_arp_mon(struct work_struct *work) bond_slave_state_change(bond); if (BOND_MODE(bond) == BOND_MODE_XOR) bond_update_slave_arr(bond, NULL); - } else if (do_failover) { + } + if (do_failover) { block_netpoll_tx(); bond_select_active_slave(bond); unblock_netpoll_tx(); From 9737c6ab7afbc950e997ef80cba2c40dbbd16ea4 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 18 Nov 2014 17:51:27 +0200 Subject: [PATCH 148/208] net/mlx4_en: Add VXLAN ndo calls to the PF net device ops too This is currently missing, which results in a crash when one attempts to set VXLAN tunnel over the mlx4_en when acting as PF. [ 2408.785472] BUG: unable to handle kernel NULL pointer dereference at (null) [...] [ 2408.994104] Call Trace: [ 2408.996584] [] ? vxlan_get_rx_port+0xd6/0x103 [vxlan] [ 2409.003316] [] ? vxlan_lowerdev_event+0xf2/0xf2 [vxlan] [ 2409.010225] [] mlx4_en_start_port+0x862/0x96a [mlx4_en] [ 2409.017132] [] mlx4_en_open+0x17f/0x1b8 [mlx4_en] While here, make sure to invoke vxlan_get_rx_port() only when VXLAN offloads are actually enabled and not when they are only supported. Reported-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index c5fcc56ec06b..4d69e382b4e5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1693,7 +1693,7 @@ int mlx4_en_start_port(struct net_device *dev) mlx4_set_stats_bitmap(mdev->dev, &priv->stats_bitmap); #ifdef CONFIG_MLX4_EN_VXLAN - if (priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS) + if (priv->mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) vxlan_get_rx_port(dev); #endif priv->port_up = true; @@ -2422,6 +2422,11 @@ static const struct net_device_ops mlx4_netdev_ops_master = { .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif .ndo_get_phys_port_id = mlx4_en_get_phys_port_id, +#ifdef CONFIG_MLX4_EN_VXLAN + .ndo_add_vxlan_port = mlx4_en_add_vxlan_port, + .ndo_del_vxlan_port = mlx4_en_del_vxlan_port, + .ndo_gso_check = mlx4_en_gso_check, +#endif }; int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, From 7fc986d8a9727e5d40da3c2c1c343da6142e82a9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 19 Nov 2014 14:30:32 -0700 Subject: [PATCH 149/208] PCI: Support 64-bit bridge windows if we have 64-bit dma_addr_t Aaron reported that a 32-bit x86 kernel with Physical Address Extension (PAE) support complains about bridge prefetchable memory windows above 4GB: pci_bus 0000:00: root bus resource [mem 0x380000000000-0x383fffffffff] ... pci 0000:03:00.0: reg 0x10: [mem 0x383fffc00000-0x383fffdfffff 64bit pref] pci 0000:03:00.0: reg 0x20: [mem 0x383fffe04000-0x383fffe07fff 64bit pref] pci 0000:03:00.1: reg 0x10: [mem 0x383fffa00000-0x383fffbfffff 64bit pref] pci 0000:03:00.1: reg 0x20: [mem 0x383fffe00000-0x383fffe03fff 64bit pref] pci 0000:00:02.2: PCI bridge to [bus 03-04] pci 0000:00:02.2: bridge window [io 0x1000-0x1fff] pci 0000:00:02.2: bridge window [mem 0x91900000-0x91cfffff] pci 0000:00:02.2: can't handle 64-bit address space for bridge In this kernel, unsigned long is 32 bits and dma_addr_t is 64 bits. Previously we used "unsigned long" to hold the bridge window address. But this is a bus address, so we should use dma_addr_t instead. Use dma_addr_t to hold the bridge window base and limit. The question of whether the CPU can actually *address* the window is separate and depends on what the physical address space of the CPU is and whether the host bridge does any address translation. [bhelgaas: fix "shift count > width of type", changelog, stable tag] Fixes: d56dbf5bab8c ("PCI: Allocate 64-bit BARs above 4G when possible") Link: https://bugzilla.kernel.org/show_bug.cgi?id=88131 Reported-by: Aaron Ma Tested-by: Aaron Ma Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org # v3.14+ --- drivers/pci/probe.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 6244b1834dfe..c8ca98c2b480 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -407,15 +407,16 @@ static void pci_read_bridge_mmio_pref(struct pci_bus *child) { struct pci_dev *dev = child->self; u16 mem_base_lo, mem_limit_lo; - unsigned long base, limit; + u64 base64, limit64; + dma_addr_t base, limit; struct pci_bus_region region; struct resource *res; res = child->resource[2]; pci_read_config_word(dev, PCI_PREF_MEMORY_BASE, &mem_base_lo); pci_read_config_word(dev, PCI_PREF_MEMORY_LIMIT, &mem_limit_lo); - base = ((unsigned long) mem_base_lo & PCI_PREF_RANGE_MASK) << 16; - limit = ((unsigned long) mem_limit_lo & PCI_PREF_RANGE_MASK) << 16; + base64 = (mem_base_lo & PCI_PREF_RANGE_MASK) << 16; + limit64 = (mem_limit_lo & PCI_PREF_RANGE_MASK) << 16; if ((mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) { u32 mem_base_hi, mem_limit_hi; @@ -429,17 +430,20 @@ static void pci_read_bridge_mmio_pref(struct pci_bus *child) * this, just assume they are not being used. */ if (mem_base_hi <= mem_limit_hi) { -#if BITS_PER_LONG == 64 - base |= ((unsigned long) mem_base_hi) << 32; - limit |= ((unsigned long) mem_limit_hi) << 32; -#else - if (mem_base_hi || mem_limit_hi) { - dev_err(&dev->dev, "can't handle 64-bit address space for bridge\n"); - return; - } -#endif + base64 |= (u64) mem_base_hi << 32; + limit64 |= (u64) mem_limit_hi << 32; } } + + base = (dma_addr_t) base64; + limit = (dma_addr_t) limit64; + + if (base != base64) { + dev_err(&dev->dev, "can't handle bridge window above 4GB (bus address %#010llx)\n", + (unsigned long long) base64); + return; + } + if (base <= limit) { res->flags = (mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH; From ffb1388a364d135810337182d6800a0c7ee44f48 Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Wed, 19 Nov 2014 09:35:39 +0800 Subject: [PATCH 150/208] ipv6: delete protocol and unregister rtnetlink when cleanup pim6_protocol was added when initiation, but it not deleted. Similarly, unregister RTNL_FAMILY_IP6MR rtnetlink. Signed-off-by: Duan Jiong Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 0171f08325c3..1a01d79b8698 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1439,6 +1439,10 @@ reg_pernet_fail: void ip6_mr_cleanup(void) { + rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE); +#ifdef CONFIG_IPV6_PIMSM_V2 + inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); +#endif unregister_netdevice_notifier(&ip6_mr_notifier); unregister_pernet_subsys(&ip6mr_net_ops); kmem_cache_destroy(mrt_cachep); From ee7255ada313a6db99be47ce174b0bfb8295a041 Mon Sep 17 00:00:00 2001 From: Anish Bhatt Date: Tue, 18 Nov 2014 19:09:51 -0800 Subject: [PATCH 151/208] cxgb4i : Don't block unload/cxgb4 unload when remote closes TCP connection cxgb4i was returning wrong error and not releasing module reference if remote end abruptly closed TCP connection. This prevents the cxgb4 network module from being unloaded, further affecting other network drivers dependent on cxgb4 Sending to net as this affects all cxgb4 based network drivers. Signed-off-by: Anish Bhatt Signed-off-by: David S. Miller --- drivers/scsi/cxgbi/cxgb4i/cxgb4i.c | 2 ++ drivers/scsi/cxgbi/libcxgbi.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c index 3e0a0d315f72..f48f40ce9f87 100644 --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c @@ -828,6 +828,8 @@ static void do_act_open_rpl(struct cxgbi_device *cdev, struct sk_buff *skb) if (status == CPL_ERR_RTX_NEG_ADVICE) goto rel_skb; + module_put(THIS_MODULE); + if (status && status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && status != CPL_ERR_ARP_MISS) diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c index 674d498b46ab..9d63853c5fce 100644 --- a/drivers/scsi/cxgbi/libcxgbi.c +++ b/drivers/scsi/cxgbi/libcxgbi.c @@ -816,7 +816,7 @@ static void cxgbi_inform_iscsi_conn_closing(struct cxgbi_sock *csk) read_lock_bh(&csk->callback_lock); if (csk->user_data) iscsi_conn_failure(csk->user_data, - ISCSI_ERR_CONN_FAILED); + ISCSI_ERR_TCP_CONN_CLOSE); read_unlock_bh(&csk->callback_lock); } } From 78579b7c7eb45f0e7ec5e9437087ed21749f9a9c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 19 Nov 2014 01:44:11 +0100 Subject: [PATCH 152/208] ACPI / PM: Ignore wakeup setting if the ACPI companion can't wake up As reported by Dmitry, on some Chromebooks there are devices with corresponding ACPI objects and with unusual system wakeup configuration. Namely, they technically are wakeup-capable, but the wakeup is handled via a platform-specific out-of-band mechanism and the ACPI PM layer has no information on the wakeup capability. As a result, device_may_wakeup(dev) called from acpi_dev_suspend_late() returns 'true' for those devices, but the wakeup.flags.valid flag is unset for the corresponding ACPI device objects, so acpi_device_wakeup() reproducibly fails for them causing acpi_dev_suspend_late() to return an error code. The entire system suspend is then aborted and the machines in question cannot suspend at all. Address the problem by ignoring the device_may_wakeup(dev) return value in acpi_dev_suspend_late() if the ACPI companion of the device being handled has wakeup.flags.valid unset (in which case it is clear that the wakeup is supposed to be handled by other means). This fixes a regression introduced by commit a76e9bd89ae7 (i2c: attach/detach I2C client device to the ACPI power domain) as the affected systems could suspend and resume successfully before that commit. Fixes: a76e9bd89ae7 (i2c: attach/detach I2C client device to the ACPI power domain) Reported-by: Dmitry Torokhov Reviewed-by: Dmitry Torokhov Cc: 3.13+ # 3.13+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/device_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 143ec6ea1468..7db193160766 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -878,7 +878,7 @@ int acpi_dev_suspend_late(struct device *dev) return 0; target_state = acpi_target_system_state(); - wakeup = device_may_wakeup(dev); + wakeup = device_may_wakeup(dev) && acpi_device_can_wakeup(adev); error = acpi_device_wakeup(adev, target_state, wakeup); if (wakeup && error) return error; From b1a5ad006b34ded9dc7ec64988deba1b3ecad367 Mon Sep 17 00:00:00 2001 From: Chris Moore Date: Tue, 4 Nov 2014 16:28:29 +0000 Subject: [PATCH 153/208] IB/isert: Adjust CQ size to HW limits isert has an issue of trying to create a CQ with more CQEs than are supported by the hardware, that currently results in failures during isert_device creation during first session login. This is the isert version of the patch that Minh Tran submitted for iser, and is simple a workaround required to function with existing ocrdma hardware. Signed-off-by: Chris Moore Reviewied-by: Sagi Grimberg Cc: # 3.10+ Signed-off-by: Nicholas Bellinger --- drivers/infiniband/ulp/isert/ib_isert.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index a6c7b395c61d..10641b7816f4 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -228,12 +228,16 @@ isert_create_device_ib_res(struct isert_device *device) struct isert_cq_desc *cq_desc; struct ib_device_attr *dev_attr; int ret = 0, i, j; + int max_rx_cqe, max_tx_cqe; dev_attr = &device->dev_attr; ret = isert_query_device(ib_dev, dev_attr); if (ret) return ret; + max_rx_cqe = min(ISER_MAX_RX_CQ_LEN, dev_attr->max_cqe); + max_tx_cqe = min(ISER_MAX_TX_CQ_LEN, dev_attr->max_cqe); + /* asign function handlers */ if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS && dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) { @@ -275,7 +279,7 @@ isert_create_device_ib_res(struct isert_device *device) isert_cq_rx_callback, isert_cq_event_callback, (void *)&cq_desc[i], - ISER_MAX_RX_CQ_LEN, i); + max_rx_cqe, i); if (IS_ERR(device->dev_rx_cq[i])) { ret = PTR_ERR(device->dev_rx_cq[i]); device->dev_rx_cq[i] = NULL; @@ -287,7 +291,7 @@ isert_create_device_ib_res(struct isert_device *device) isert_cq_tx_callback, isert_cq_event_callback, (void *)&cq_desc[i], - ISER_MAX_TX_CQ_LEN, i); + max_tx_cqe, i); if (IS_ERR(device->dev_tx_cq[i])) { ret = PTR_ERR(device->dev_tx_cq[i]); device->dev_tx_cq[i] = NULL; From 788ec2fc2ca295a2d929986e95231214ecd8d142 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 19 Nov 2014 17:13:44 +0000 Subject: [PATCH 154/208] of/selftest: Fix testing when /aliases is missing The /aliases node isn't always present in the device tree, but the unittest code assumes that /aliases is there. Add a check when inserting the testcase data to see if of_aliases needs to be updated, and undo the settings when the nodes are removed. Signed-off-by: Grant Likely Cc: Rob Herring Cc: Gaurav Minocha Cc: --- drivers/of/selftest.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c index e6c14dc400e9..e2d79afa9dc6 100644 --- a/drivers/of/selftest.c +++ b/drivers/of/selftest.c @@ -899,7 +899,11 @@ static void selftest_data_remove(void) while (last_node_index-- > 0) { if (nodes[last_node_index]) { np = of_find_node_by_path(nodes[last_node_index]->full_name); - if (strcmp(np->full_name, "/aliases") != 0) { + if (np == nodes[last_node_index]) { + if (of_aliases == np) { + of_node_put(of_aliases); + of_aliases = NULL; + } detach_node_and_children(np); } else { for_each_property_of_node(np, prop) { @@ -920,6 +924,8 @@ static int __init of_selftest(void) res = selftest_data_add(); if (res) return res; + if (!of_aliases) + of_aliases = of_find_node_by_path("/aliases"); np = of_find_node_by_path("/testcase-data/phandle-tests/consumer-a"); if (!np) { From ef94b1864d1ed5be54376404bb23d22ed0481feb Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 20 Nov 2014 16:39:59 +0100 Subject: [PATCH 155/208] ovl: rename filesystem type to "overlay" Some distributions carry an "old" format of overlayfs while mainline has a "new" format. The distros will possibly want to keep the old overlayfs alongside the new for compatibility reasons. To make it possible to differentiate the two versions change the name of the new one from "overlayfs" to "overlay". Signed-off-by: Miklos Szeredi Reported-by: Serge Hallyn Cc: Andy Whitcroft --- Documentation/filesystems/overlayfs.txt | 2 +- MAINTAINERS | 2 +- fs/Makefile | 2 +- fs/overlayfs/Kconfig | 2 +- fs/overlayfs/Makefile | 4 ++-- fs/overlayfs/super.c | 6 +++--- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt index 530850a72735..a27c950ece61 100644 --- a/Documentation/filesystems/overlayfs.txt +++ b/Documentation/filesystems/overlayfs.txt @@ -64,7 +64,7 @@ is formed. At mount time, the two directories given as mount options "lowerdir" and "upperdir" are combined into a merged directory: - mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper,\ + mount -t overlay overlay -olowerdir=/lower,upperdir=/upper,\ workdir=/work /merged The "workdir" needs to be an empty directory on the same filesystem diff --git a/MAINTAINERS b/MAINTAINERS index c444907ccd69..af1e738e8772 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6888,7 +6888,7 @@ F: drivers/scsi/osd/ F: include/scsi/osd_* F: fs/exofs/ -OVERLAYFS FILESYSTEM +OVERLAY FILESYSTEM M: Miklos Szeredi L: linux-fsdevel@vger.kernel.org S: Supported diff --git a/fs/Makefile b/fs/Makefile index 34a1b9dea6dd..da0bbb456d3f 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -104,7 +104,7 @@ obj-$(CONFIG_QNX6FS_FS) += qnx6/ obj-$(CONFIG_AUTOFS4_FS) += autofs4/ obj-$(CONFIG_ADFS_FS) += adfs/ obj-$(CONFIG_FUSE_FS) += fuse/ -obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/ +obj-$(CONFIG_OVERLAY_FS) += overlayfs/ obj-$(CONFIG_UDF_FS) += udf/ obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ obj-$(CONFIG_OMFS_FS) += omfs/ diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index e60125976873..34355818a2e0 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -1,4 +1,4 @@ -config OVERLAYFS_FS +config OVERLAY_FS tristate "Overlay filesystem support" help An overlay filesystem combines two filesystems - an 'upper' filesystem diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile index 8f91889480d0..900daed3e91d 100644 --- a/fs/overlayfs/Makefile +++ b/fs/overlayfs/Makefile @@ -2,6 +2,6 @@ # Makefile for the overlay filesystem. # -obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o +obj-$(CONFIG_OVERLAY_FS) += overlay.o -overlayfs-objs := super.o inode.o dir.o readdir.o copy_up.o +overlay-objs := super.o inode.o dir.o readdir.o copy_up.o diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 08b704cebfc4..b92bd1829cf7 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -24,7 +24,7 @@ MODULE_AUTHOR("Miklos Szeredi "); MODULE_DESCRIPTION("Overlay filesystem"); MODULE_LICENSE("GPL"); -#define OVERLAYFS_SUPER_MAGIC 0x794c764f +#define OVERLAYFS_SUPER_MAGIC 0x794c7630 struct ovl_config { char *lowerdir; @@ -776,11 +776,11 @@ static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, static struct file_system_type ovl_fs_type = { .owner = THIS_MODULE, - .name = "overlayfs", + .name = "overlay", .mount = ovl_mount, .kill_sb = kill_anon_super, }; -MODULE_ALIAS_FS("overlayfs"); +MODULE_ALIAS_FS("overlay"); static int __init ovl_init(void) { From a105d685a8483985a01776411de191a726b48132 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 20 Nov 2014 16:39:59 +0100 Subject: [PATCH 156/208] ovl: fix remove/copy-up race ovl_remove_and_whiteout() needs to check if upper dentry exists or not after having locked upper parent directory. Previously we used a "type" value computed before locking the upper parent directory, which is susceptible to racing with copy-up. There's a similar check in ovl_check_empty_and_clear(). This one is not actually racy, since copy-up doesn't change the "emptyness" property of a directory. Add a comment to this effect, and check the existence of upper dentry locally to make the code cleaner. Signed-off-by: Miklos Szeredi --- fs/overlayfs/dir.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 15cd91ad9940..8ffc4b980f1b 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -284,8 +284,7 @@ out: return ERR_PTR(err); } -static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry, - enum ovl_path_type type) +static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry) { int err; struct dentry *ret = NULL; @@ -294,8 +293,17 @@ static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry, err = ovl_check_empty_dir(dentry, &list); if (err) ret = ERR_PTR(err); - else if (type == OVL_PATH_MERGE) - ret = ovl_clear_empty(dentry, &list); + else { + /* + * If no upperdentry then skip clearing whiteouts. + * + * Can race with copy-up, since we don't hold the upperdir + * mutex. Doesn't matter, since copy-up can't create a + * non-empty directory from an empty one. + */ + if (ovl_dentry_upper(dentry)) + ret = ovl_clear_empty(dentry, &list); + } ovl_cache_free(&list); @@ -487,8 +495,7 @@ out: return err; } -static int ovl_remove_and_whiteout(struct dentry *dentry, - enum ovl_path_type type, bool is_dir) +static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) { struct dentry *workdir = ovl_workdir(dentry); struct inode *wdir = workdir->d_inode; @@ -500,7 +507,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, int err; if (is_dir) { - opaquedir = ovl_check_empty_and_clear(dentry, type); + opaquedir = ovl_check_empty_and_clear(dentry); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) goto out; @@ -515,9 +522,10 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, if (IS_ERR(whiteout)) goto out_unlock; - if (type == OVL_PATH_LOWER) { + upper = ovl_dentry_upper(dentry); + if (!upper) { upper = lookup_one_len(dentry->d_name.name, upperdir, - dentry->d_name.len); + dentry->d_name.len); err = PTR_ERR(upper); if (IS_ERR(upper)) goto kill_whiteout; @@ -529,7 +537,6 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, } else { int flags = 0; - upper = ovl_dentry_upper(dentry); if (opaquedir) upper = opaquedir; err = -ESTALE; @@ -648,7 +655,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) cap_raise(override_cred->cap_effective, CAP_CHOWN); old_cred = override_creds(override_cred); - err = ovl_remove_and_whiteout(dentry, type, is_dir); + err = ovl_remove_and_whiteout(dentry, is_dir); revert_creds(old_cred); put_cred(override_cred); @@ -781,7 +788,7 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, } if (overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) { - opaquedir = ovl_check_empty_and_clear(new, new_type); + opaquedir = ovl_check_empty_and_clear(new); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) { opaquedir = NULL; From 521484639ec19a6f1ed56de6993feb255f5f676c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 20 Nov 2014 16:40:00 +0100 Subject: [PATCH 157/208] ovl: fix race in private xattr checks Xattr operations can race with copy up. This does not matter as long as we consistently fiter out "trunsted.overlay.opaque" attribute on upper directories. Previously we checked parent against OVL_PATH_MERGE. This is too general, and prone to race with copy-up. I.e. we found the parent to be on the lower layer but ovl_dentry_real() would return the copied-up dentry, possibly with the "opaque" attribute. So instead use ovl_path_real() and decide to filter the attributes based on the actual type of the dentry we'll use. Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index af2d18c9fcee..07d74b24913b 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -235,26 +235,36 @@ out: return err; } +static bool ovl_need_xattr_filter(struct dentry *dentry, + enum ovl_path_type type) +{ + return type == OVL_PATH_UPPER && S_ISDIR(dentry->d_inode->i_mode); +} + ssize_t ovl_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) { - if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && - ovl_is_private_xattr(name)) + struct path realpath; + enum ovl_path_type type = ovl_path_real(dentry, &realpath); + + if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) return -ENODATA; - return vfs_getxattr(ovl_dentry_real(dentry), name, value, size); + return vfs_getxattr(realpath.dentry, name, value, size); } ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) { + struct path realpath; + enum ovl_path_type type = ovl_path_real(dentry, &realpath); ssize_t res; int off; - res = vfs_listxattr(ovl_dentry_real(dentry), list, size); + res = vfs_listxattr(realpath.dentry, list, size); if (res <= 0 || size == 0) return res; - if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE) + if (!ovl_need_xattr_filter(dentry, type)) return res; /* filter out private xattrs */ @@ -279,17 +289,16 @@ int ovl_removexattr(struct dentry *dentry, const char *name) { int err; struct path realpath; - enum ovl_path_type type; + enum ovl_path_type type = ovl_path_real(dentry, &realpath); err = ovl_want_write(dentry); if (err) goto out; - if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && - ovl_is_private_xattr(name)) + err = -ENODATA; + if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) goto out_drop_write; - type = ovl_path_real(dentry, &realpath); if (type == OVL_PATH_LOWER) { err = vfs_getxattr(realpath.dentry, name, NULL, 0); if (err < 0) From 91c77947133f7aef851b625701e182d3f99d14a9 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 20 Nov 2014 16:40:00 +0100 Subject: [PATCH 158/208] ovl: allow filenames with comma Allow option separator (comma) to be escaped with backslash. Signed-off-by: Miklos Szeredi --- fs/overlayfs/super.c | 48 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index b92bd1829cf7..eee7a62e1c0e 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -462,11 +462,34 @@ static const match_table_t ovl_tokens = { {OPT_ERR, NULL} }; +static char *ovl_next_opt(char **s) +{ + char *sbegin = *s; + char *p; + + if (sbegin == NULL) + return NULL; + + for (p = sbegin; *p; p++) { + if (*p == '\\') { + p++; + if (!*p) + break; + } else if (*p == ',') { + *p = '\0'; + *s = p + 1; + return sbegin; + } + } + *s = NULL; + return sbegin; +} + static int ovl_parse_opt(char *opt, struct ovl_config *config) { char *p; - while ((p = strsep(&opt, ",")) != NULL) { + while ((p = ovl_next_opt(&opt)) != NULL) { int token; substring_t args[MAX_OPT_ARGS]; @@ -554,15 +577,34 @@ out_dput: goto out_unlock; } +static void ovl_unescape(char *s) +{ + char *d = s; + + for (;; s++, d++) { + if (*s == '\\') + s++; + *d = *s; + if (!*s) + break; + } +} + static int ovl_mount_dir(const char *name, struct path *path) { int err; + char *tmp = kstrdup(name, GFP_KERNEL); - err = kern_path(name, LOOKUP_FOLLOW, path); + if (!tmp) + return -ENOMEM; + + ovl_unescape(tmp); + err = kern_path(tmp, LOOKUP_FOLLOW, path); if (err) { - pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); + pr_err("overlayfs: failed to resolve '%s': %i\n", tmp, err); err = -EINVAL; } + kfree(tmp); return err; } From 71d509280f7e92eb60ae6b7c78c20afafff060c7 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 20 Nov 2014 16:40:01 +0100 Subject: [PATCH 159/208] ovl: use lockless_dereference() for upperdentry Don't open code lockless_dereference() in ovl_upperdentry_dereference(). Signed-off-by: Miklos Szeredi --- fs/overlayfs/super.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index eee7a62e1c0e..f16d318b71f8 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -84,12 +84,7 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry) static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) { - struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry); - /* - * Make sure to order reads to upperdentry wrt ovl_dentry_update() - */ - smp_read_barrier_depends(); - return upperdentry; + return lockless_dereference(oe->__upperdentry); } void ovl_path_upper(struct dentry *dentry, struct path *path) From c9f00fdb9ab3999cb2fb582ad82a5db9e70c82f5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 20 Nov 2014 16:40:01 +0100 Subject: [PATCH 160/208] ovl: pass dentry into ovl_dir_read_merged() Pass dentry into ovl_dir_read_merged() insted of upperpath and lowerpath. This cleans up callers and paves the way for multi-layer directory reads. Signed-off-by: Miklos Szeredi --- fs/overlayfs/readdir.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 2a7ef4f8e2a6..7299e962f334 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -274,11 +274,11 @@ static int ovl_dir_mark_whiteouts(struct dentry *dir, return 0; } -static inline int ovl_dir_read_merged(struct path *upperpath, - struct path *lowerpath, - struct list_head *list) +static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) { int err; + struct path lowerpath; + struct path upperpath; struct ovl_readdir_data rdd = { .ctx.actor = ovl_fill_merge, .list = list, @@ -286,25 +286,28 @@ static inline int ovl_dir_read_merged(struct path *upperpath, .is_merge = false, }; - if (upperpath->dentry) { - err = ovl_dir_read(upperpath, &rdd); + ovl_path_lower(dentry, &lowerpath); + ovl_path_upper(dentry, &upperpath); + + if (upperpath.dentry) { + err = ovl_dir_read(&upperpath, &rdd); if (err) goto out; - if (lowerpath->dentry) { - err = ovl_dir_mark_whiteouts(upperpath->dentry, &rdd); + if (lowerpath.dentry) { + err = ovl_dir_mark_whiteouts(upperpath.dentry, &rdd); if (err) goto out; } } - if (lowerpath->dentry) { + if (lowerpath.dentry) { /* * Insert lowerpath entries before upperpath ones, this allows * offsets to be reasonably constant */ list_add(&rdd.middle, rdd.list); rdd.is_merge = true; - err = ovl_dir_read(lowerpath, &rdd); + err = ovl_dir_read(&lowerpath, &rdd); list_del(&rdd.middle); } out: @@ -329,8 +332,6 @@ static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) { int res; - struct path lowerpath; - struct path upperpath; struct ovl_dir_cache *cache; cache = ovl_dir_cache(dentry); @@ -347,10 +348,7 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) cache->refcount = 1; INIT_LIST_HEAD(&cache->entries); - ovl_path_lower(dentry, &lowerpath); - ovl_path_upper(dentry, &upperpath); - - res = ovl_dir_read_merged(&upperpath, &lowerpath, &cache->entries); + res = ovl_dir_read_merged(dentry, &cache->entries); if (res) { ovl_cache_free(&cache->entries); kfree(cache); @@ -538,14 +536,9 @@ const struct file_operations ovl_dir_operations = { int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) { int err; - struct path lowerpath; - struct path upperpath; struct ovl_cache_entry *p; - ovl_path_upper(dentry, &upperpath); - ovl_path_lower(dentry, &lowerpath); - - err = ovl_dir_read_merged(&upperpath, &lowerpath, list); + err = ovl_dir_read_merged(dentry, list); if (err) return err; From 1d113735ecf21de74a04c3b58fa106ac2e64ca0d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 20 Nov 2014 16:40:01 +0100 Subject: [PATCH 161/208] ovl: update MAINTAINERS There's a union/overlay specific mailing list now. Also add a git tree. Signed-off-by: Miklos Szeredi --- MAINTAINERS | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index af1e738e8772..0ff630de8a6d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6890,9 +6890,10 @@ F: fs/exofs/ OVERLAY FILESYSTEM M: Miklos Szeredi -L: linux-fsdevel@vger.kernel.org +L: linux-unionfs@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git S: Supported -F: fs/overlayfs/* +F: fs/overlayfs/ F: Documentation/filesystems/overlayfs.txt P54 WIRELESS DRIVER From 7676895f4736421ebafc48de5078e25ea69e88ee Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 20 Nov 2014 16:40:02 +0100 Subject: [PATCH 162/208] ovl: ovl_dir_fsync() cleanup Check against !OVL_PATH_LOWER instead of OVL_PATH_MERGE. For a copied up directory the two are currently equivalent. Signed-off-by: Miklos Szeredi --- fs/overlayfs/readdir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 7299e962f334..ab1e3dcbed95 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -450,10 +450,10 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, /* * Need to check if we started out being a lower dir, but got copied up */ - if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) { + if (!od->is_upper && ovl_path_type(dentry) != OVL_PATH_LOWER) { struct inode *inode = file_inode(file); - realfile =lockless_dereference(od->upperfile); + realfile = lockless_dereference(od->upperfile); if (!realfile) { struct path upperpath; From b7bc596ebbe0cddc97d76ef9309f64471bbf13eb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 19 Nov 2014 13:12:54 -0500 Subject: [PATCH 163/208] drm/radeon: disable native backlight control on pre-r6xx asics (v2) Just use the acpi interface. That's what windows uses on this generation and it's the only thing that seems to work reliably on these generation parts. You can still force the native backlight interface by setting radeon.backlight=1 Bug: https://bugzilla.kernel.org/show_bug.cgi?id=88501 v2: merge into above if/else block Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_encoders.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c index 9a19e52cc655..6b670b0bc47b 100644 --- a/drivers/gpu/drm/radeon/radeon_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_encoders.c @@ -179,6 +179,9 @@ static void radeon_encoder_add_backlight(struct radeon_encoder *radeon_encoder, (rdev->pdev->subsystem_vendor == 0x1734) && (rdev->pdev->subsystem_device == 0x1107)) use_bl = false; + /* disable native backlight control on older asics */ + else if (rdev->family < CHIP_R600) + use_bl = false; else use_bl = true; } From 28731d5818ae25b92d1fb82fe0ac196e97102c1b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Nov 2014 19:17:02 -0500 Subject: [PATCH 164/208] drm/radeon: fix endian swapping in vbios fetch for tdp table Value needs to be swapped on BE. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/r600_dpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c index f6309bd23e01..b5c73df8e202 100644 --- a/drivers/gpu/drm/radeon/r600_dpm.c +++ b/drivers/gpu/drm/radeon/r600_dpm.c @@ -1256,7 +1256,7 @@ int r600_parse_extended_power_table(struct radeon_device *rdev) (mode_info->atom_context->bios + data_offset + le16_to_cpu(ext_hdr->usPowerTuneTableOffset)); rdev->pm.dpm.dyn_state.cac_tdp_table->maximum_power_delivery_limit = - ppt->usMaximumPowerDeliveryLimit; + le16_to_cpu(ppt->usMaximumPowerDeliveryLimit); pt = &ppt->power_tune_table; } else { ATOM_PPLIB_POWERTUNE_Table *ppt = (ATOM_PPLIB_POWERTUNE_Table *) From a1d69c60c44134f64945bbf6a6dfda22eaf4a214 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 19 Nov 2014 22:13:10 +0100 Subject: [PATCH 165/208] brcmfmac: don't include linux/unaligned/access_ok.h This is a specific implementation, is the multiplexer that has the arch-specific knowledge of which of the implementations needs to be used, so include that. This issue was revealed by kbuild testing when was added in resulting in redefinition of get_unaligned_be16 (and probably others). Cc: stable@vger.kernel.org # v3.17 Reported-by: Fengguang Wu Signed-off-by: Johannes Berg Signed-off-by: Arend van Spriel Signed-off-by: John W. Linville --- drivers/net/wireless/brcm80211/brcmfmac/pcie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/brcm80211/brcmfmac/pcie.c index 8c0632ec9f7a..16fef3382019 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/pcie.c @@ -19,10 +19,10 @@ #include #include #include -#include #include #include #include +#include #include #include From b5e212a3051b65e426a513901d9c7001681c7215 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 19 Nov 2014 13:56:19 -0800 Subject: [PATCH 166/208] x86, syscall: Fix _TIF_NOHZ handling in syscall_trace_enter_phase1 TIF_NOHZ is 19 (i.e. _TIF_SYSCALL_TRACE | _TIF_NOTIFY_RESUME | _TIF_SINGLESTEP), not (1<<19). This code is involved in Dave's trinity lockup, but I don't see why it would cause any of the problems he's seeing, except inadvertently by causing a different path through entry_64.S's syscall handling. Signed-off-by: Andy Lutomirski Cc: Don Zickus Cc: Peter Zijlstra Cc: Dave Jones Cc: Linus Torvalds Link: http://lkml.kernel.org/r/a6cd3b60a3f53afb6e1c8081b0ec30ff19003dd7.1416434075.git.luto@amacapital.net Signed-off-by: Thomas Gleixner --- arch/x86/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 749b0e423419..e510618b2e91 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1484,7 +1484,7 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) */ if (work & _TIF_NOHZ) { user_exit(); - work &= ~TIF_NOHZ; + work &= ~_TIF_NOHZ; } #ifdef CONFIG_SECCOMP From a5f6fc28d6e6cc379c6839f21820e62262419584 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 19 Nov 2014 18:05:26 +0100 Subject: [PATCH 167/208] pptp: fix stack info leak in pptp_getname() pptp_getname() only partially initializes the stack variable sa, particularly only fills the pptp part of the sa_addr union. The code thereby discloses 16 bytes of kernel stack memory via getsockname(). Fix this by memset(0)'ing the union before. Cc: Dmitry Kozlov Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- drivers/net/ppp/pptp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 1aff970be33e..1dc628ffce2b 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -506,7 +506,9 @@ static int pptp_getname(struct socket *sock, struct sockaddr *uaddr, int len = sizeof(struct sockaddr_pppox); struct sockaddr_pppox sp; - sp.sa_family = AF_PPPOX; + memset(&sp.sa_addr, 0, sizeof(sp.sa_addr)); + + sp.sa_family = AF_PPPOX; sp.sa_protocol = PX_PROTO_PPTP; sp.sa_addr.pptp = pppox_sk(sock->sk)->proto.pptp.src_addr; From d3052bb5d306b29c1e7d9e5998c5ac4ca1ff0ca9 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 19 Nov 2014 13:54:49 -0800 Subject: [PATCH 168/208] openvswitch: Don't validate IPv6 label masks. When userspace doesn't provide a mask, OVS datapath generates a fully unwildcarded mask for the flow by copying the flow and setting all bits in all fields. For IPv6 label, this creates a mask that matches on the upper 12 bits, causing the following error: openvswitch: netlink: Invalid IPv6 flow label value (value=ffffffff, max=fffff) This patch ignores the label validation check for masks, avoiding this error. Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index fa4ec2e4a78b..089b195c064a 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -690,7 +690,7 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, return -EINVAL; } - if (ipv6_key->ipv6_label & htonl(0xFFF00000)) { + if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) { OVS_NLERR("IPv6 flow label %x is out of range (max=%x).\n", ntohl(ipv6_key->ipv6_label), (1 << 20) - 1); return -EINVAL; From 01462405f0c093b2f8dfddafcadcda6c9e4c5cdf Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Wed, 19 Nov 2014 23:05:49 +0100 Subject: [PATCH 169/208] ipx: fix locking regression in ipx_sendmsg and ipx_recvmsg This fixes an old regression introduced by commit b0d0d915 (ipx: remove the BKL). When a recvmsg syscall blocks waiting for new data, no data can be sent on the same socket with sendmsg because ipx_recvmsg() sleeps with the socket locked. This breaks mars-nwe (NetWare emulator): - the ncpserv process reads the request using recvmsg - ncpserv forks and spawns nwconn - ncpserv calls a (blocking) recvmsg and waits for new requests - nwconn deadlocks in sendmsg on the same socket Commit b0d0d915 has simply replaced BKL locking with lock_sock/release_sock. Unlike now, BKL got unlocked while sleeping, so a blocking recvmsg did not block a concurrent sendmsg. Only keep the socket locked while actually working with the socket data and release it prior to calling skb_recv_datagram(). Signed-off-by: Jiri Bohac Reviewed-by: Arnd Bergmann Signed-off-by: David S. Miller --- net/ipx/af_ipx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 91729b807c7d..1b095ca37aa4 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1764,6 +1764,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, struct ipxhdr *ipx = NULL; struct sk_buff *skb; int copied, rc; + bool locked = true; lock_sock(sk); /* put the autobinding in */ @@ -1790,6 +1791,8 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, if (sock_flag(sk, SOCK_ZAPPED)) goto out; + release_sock(sk); + locked = false; skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &rc); if (!skb) { @@ -1826,7 +1829,8 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, out_free: skb_free_datagram(sk, skb); out: - release_sock(sk); + if (locked) + release_sock(sk); return rc; } From 17544e2ad78fa0bbff6fcdbf09426d04ce95ed1e Mon Sep 17 00:00:00 2001 From: Anish Bhatt Date: Thu, 20 Nov 2014 17:11:46 -0800 Subject: [PATCH 170/208] cxgb4 : Fix DCB priority groups being returned in wrong order Peer priority groups were being reversed, but this was missed in the previous fix sent out for this issue. v2 : Previous patch was doing extra unnecessary work, result is the same. Please ignore previous patch Fixes : ee7bc3cdc270 ('cxgb4 : dcb open-lldp interop fixes') Signed-off-by: Anish Bhatt Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c index cca604994003..4fe33606f372 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c @@ -1082,7 +1082,7 @@ static int cxgb4_cee_peer_getpg(struct net_device *dev, struct cee_pg *pg) pgid = be32_to_cpu(pcmd.u.dcb.pgid.pgid); for (i = 0; i < CXGB4_MAX_PRIORITY; i++) - pg->prio_pg[i] = (pgid >> (i * 4)) & 0xF; + pg->prio_pg[7 - i] = (pgid >> (i * 4)) & 0xF; INIT_PORT_DCB_READ_PEER_CMD(pcmd, pi->port_id); pcmd.u.dcb.pgrate.type = FW_PORT_DCB_TYPE_PGRATE; From 892d6eb1245b771987afb8667a65344e568d3439 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 20 Nov 2014 17:03:05 +0800 Subject: [PATCH 171/208] virtio-net: validate features during probe We currently trigger BUG when VIRTIO_NET_F_CTRL_VQ is not set but one of features depending on it is. That's not a friendly way to report errors to hypervisors. Let's check, and fail probe instead. Cc: Rusty Russell Cc: Cornelia Huck Cc: Wanlong Gao Signed-off-by: Michael S. Tsirkin Signed-off-by: Jason Wang Acked-by: Cornelia Huck Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index ec2a8b41ed41..b0bc8ead47de 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1673,6 +1673,40 @@ static const struct attribute_group virtio_net_mrg_rx_group = { }; #endif +static bool virtnet_fail_on_feature(struct virtio_device *vdev, + unsigned int fbit, + const char *fname, const char *dname) +{ + if (!virtio_has_feature(vdev, fbit)) + return false; + + dev_err(&vdev->dev, "device advertises feature %s but not %s", + fname, dname); + + return true; +} + +#define VIRTNET_FAIL_ON(vdev, fbit, dbit) \ + virtnet_fail_on_feature(vdev, fbit, #fbit, dbit) + +static bool virtnet_validate_features(struct virtio_device *vdev) +{ + if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) && + (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX, + "VIRTIO_NET_F_CTRL_VQ") || + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN, + "VIRTIO_NET_F_CTRL_VQ") || + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE, + "VIRTIO_NET_F_CTRL_VQ") || + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") || + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, + "VIRTIO_NET_F_CTRL_VQ"))) { + return false; + } + + return true; +} + static int virtnet_probe(struct virtio_device *vdev) { int i, err; @@ -1680,6 +1714,9 @@ static int virtnet_probe(struct virtio_device *vdev) struct virtnet_info *vi; u16 max_queue_pairs; + if (!virtnet_validate_features(vdev)) + return -EINVAL; + /* Find if host supports multiqueue virtio_net device */ err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ, struct virtio_net_config, From e7820e39b7d19b9fe1928fc19de9361b44150ca6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 21 Nov 2014 11:47:16 -0800 Subject: [PATCH 172/208] net: Revert "net: avoid one atomic operation in skb_clone()" Not sure what I was thinking, but doing anything after releasing a refcount is suicidal or/and embarrassing. By the time we set skb->fclone to SKB_FCLONE_FREE, another cpu could have released last reference and freed whole skb. We potentially corrupt memory or trap if CONFIG_DEBUG_PAGEALLOC is set. Reported-by: Chris Mason Fixes: ce1a4ea3f1258 ("net: avoid one atomic operation in skb_clone()") Signed-off-by: Eric Dumazet Cc: Sabrina Dubroca Signed-off-by: David S. Miller --- net/core/skbuff.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c16615bfb61e..32e31c299631 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -552,20 +552,13 @@ static void kfree_skbmem(struct sk_buff *skb) case SKB_FCLONE_CLONE: fclones = container_of(skb, struct sk_buff_fclones, skb2); - /* Warning : We must perform the atomic_dec_and_test() before - * setting skb->fclone back to SKB_FCLONE_FREE, otherwise - * skb_clone() could set clone_ref to 2 before our decrement. - * Anyway, if we are going to free the structure, no need to - * rewrite skb->fclone. + /* The clone portion is available for + * fast-cloning again. */ - if (atomic_dec_and_test(&fclones->fclone_ref)) { + skb->fclone = SKB_FCLONE_FREE; + + if (atomic_dec_and_test(&fclones->fclone_ref)) kmem_cache_free(skbuff_fclone_cache, fclones); - } else { - /* The clone portion is available for - * fast-cloning again. - */ - skb->fclone = SKB_FCLONE_FREE; - } break; } } @@ -887,11 +880,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) if (skb->fclone == SKB_FCLONE_ORIG && n->fclone == SKB_FCLONE_FREE) { n->fclone = SKB_FCLONE_CLONE; - /* As our fastclone was free, clone_ref must be 1 at this point. - * We could use atomic_inc() here, but it is faster - * to set the final value. - */ - atomic_set(&fclones->fclone_ref, 2); + atomic_inc(&fclones->fclone_ref); } else { if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; From 0c228e833c88e3aa029250f5db77d5968c5ce5b5 Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Thu, 20 Nov 2014 15:09:53 -0800 Subject: [PATCH 173/208] tcp: Restore RFC5961-compliant behavior for SYN packets Commit c3ae62af8e755 ("tcp: should drop incoming frames without ACK flag set") was created to mitigate a security vulnerability in which a local attacker is able to inject data into locally-opened sockets by using TCP protocol statistics in procfs to quickly find the correct sequence number. This broke the RFC5961 requirement to send a challenge ACK in response to spurious RST packets, which was subsequently fixed by commit 7b514a886ba50 ("tcp: accept RST without ACK flag"). Unfortunately, the RFC5961 requirement that spurious SYN packets be handled in a similar manner remains broken. RFC5961 section 4 states that: ... the handling of the SYN in the synchronized state SHOULD be performed as follows: 1) If the SYN bit is set, irrespective of the sequence number, TCP MUST send an ACK (also referred to as challenge ACK) to the remote peer: After sending the acknowledgment, TCP MUST drop the unacceptable segment and stop processing further. By sending an ACK, the remote peer is challenged to confirm the loss of the previous connection and the request to start a new connection. A legitimate peer, after restart, would not have a TCB in the synchronized state. Thus, when the ACK arrives, the peer should send a RST segment back with the sequence number derived from the ACK field that caused the RST. This RST will confirm that the remote peer has indeed closed the previous connection. Upon receipt of a valid RST, the local TCP endpoint MUST terminate its connection. The local TCP endpoint should then rely on SYN retransmission from the remote end to re-establish the connection. This patch lets SYN packets through the discard added in c3ae62af8e755, so that spurious SYN packets are properly dealt with as per the RFC. The challenge ACK is sent unconditionally and is rate-limited, so the original vulnerability is not reintroduced by this patch. Signed-off-by: Calvin Owens Acked-by: Eric Dumazet Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 88fa2d160685..d107ee246a1d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5231,7 +5231,7 @@ slow_path: if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb)) goto csum_error; - if (!th->ack && !th->rst) + if (!th->ack && !th->rst && !th->syn) goto discard; /* @@ -5650,7 +5650,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, goto discard; } - if (!th->ack && !th->rst) + if (!th->ack && !th->rst && !th->syn) goto discard; if (!tcp_validate_incoming(sk, skb, th, 0)) From 4aab3b5b3ccf94fc907e66233e6ca4d8675759a6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 22 Nov 2014 09:22:42 -0500 Subject: [PATCH 174/208] percpu-ref: fix DEAD flag contamination of percpu pointer While decoupling ATOMIC and DEAD flags, f47ad4578461 ("percpu_ref: decouple switching to percpu mode and reinit") updated __ref_is_percpu() so that it only tests ATOMIC flag to determine whether the ref is in percpu mode or not; however, while DEAD implies ATOMIC, the two flags are set separately during percpu_ref_kill() and if __ref_is_percpu() races percpu_ref_kill(), it may see DEAD w/o ATOMIC. Because __ref_is_percpu() returns @ref->percpu_count_ptr value verbatim as the percpu pointer after testing ATOMIC, the pointer may now be contaminated with the DEAD flag. This can be fixed by clearing the flag bits before returning the pointer which was the fix proposed by Shaohua; however, as DEAD implies ATOMIC, we can just test for both flags at once and avoid the explicit masking. Update __ref_is_percpu() so that it tests that both ATOMIC and DEAD are clear before returning @ref->percpu_count_ptr as the percpu pointer. Signed-off-by: Tejun Heo Reported-and-Reviewed-by: Shaohua Li Link: http://lkml.kernel.org/r/995deb699f5b873c45d667df4add3b06f73c2c25.1416638887.git.shli@kernel.org Fixes: f47ad4578461 ("percpu_ref: decouple switching to percpu mode and reinit") --- include/linux/percpu-refcount.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index d5c89e0dd0e6..51ce60c35f4c 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -133,7 +133,13 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref, /* paired with smp_store_release() in percpu_ref_reinit() */ smp_read_barrier_depends(); - if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC)) + /* + * Theoretically, the following could test just ATOMIC; however, + * then we'd have to mask off DEAD separately as DEAD may be + * visible without ATOMIC if we race with percpu_ref_kill(). DEAD + * implies ATOMIC anyway. Test them together. + */ + if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC_DEAD)) return false; *percpu_countp = (unsigned long __percpu *)percpu_ptr; From af726f21ed8af2cdaa4e93098dc211521218ae65 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 22 Nov 2014 18:00:31 -0800 Subject: [PATCH 175/208] x86_64, traps: Fix the espfix64 #DF fixup and rewrite it in C There's nothing special enough about the espfix64 double fault fixup to justify writing it in assembly. Move it to C. This also fixes a bug: if the double fault came from an IST stack, the old asm code would return to a partially uninitialized stack frame. Fixes: 3891a04aafd668686239349ea58f3314ea2af86b Signed-off-by: Andy Lutomirski Reviewed-by: Thomas Gleixner Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- arch/x86/kernel/entry_64.S | 34 ++-------------------------------- arch/x86/kernel/traps.c | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 32 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index df088bb03fb3..a4dc8de7c4be 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -828,6 +828,7 @@ ENTRY(native_iret) jnz native_irq_return_ldt #endif +.global native_irq_return_iret native_irq_return_iret: iretq _ASM_EXTABLE(native_irq_return_iret, bad_iret) @@ -922,37 +923,6 @@ ENTRY(retint_kernel) CFI_ENDPROC END(common_interrupt) - /* - * If IRET takes a fault on the espfix stack, then we - * end up promoting it to a doublefault. In that case, - * modify the stack to make it look like we just entered - * the #GP handler from user space, similar to bad_iret. - */ -#ifdef CONFIG_X86_ESPFIX64 - ALIGN -__do_double_fault: - XCPT_FRAME 1 RDI+8 - movq RSP(%rdi),%rax /* Trap on the espfix stack? */ - sarq $PGDIR_SHIFT,%rax - cmpl $ESPFIX_PGD_ENTRY,%eax - jne do_double_fault /* No, just deliver the fault */ - cmpl $__KERNEL_CS,CS(%rdi) - jne do_double_fault - movq RIP(%rdi),%rax - cmpq $native_irq_return_iret,%rax - jne do_double_fault /* This shouldn't happen... */ - movq PER_CPU_VAR(kernel_stack),%rax - subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */ - movq %rax,RSP(%rdi) - movq $0,(%rax) /* Missing (lost) #GP error code */ - movq $general_protection,RIP(%rdi) - retq - CFI_ENDPROC -END(__do_double_fault) -#else -# define __do_double_fault do_double_fault -#endif - /* * APIC interrupts. */ @@ -1124,7 +1094,7 @@ idtentry overflow do_overflow has_error_code=0 idtentry bounds do_bounds has_error_code=0 idtentry invalid_op do_invalid_op has_error_code=0 idtentry device_not_available do_device_not_available has_error_code=0 -idtentry double_fault __do_double_fault has_error_code=1 paranoid=1 +idtentry double_fault do_double_fault has_error_code=1 paranoid=1 idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 idtentry invalid_TSS do_invalid_TSS has_error_code=1 idtentry segment_not_present do_segment_not_present has_error_code=1 diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 0d0e922fafc1..819662746e23 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -259,6 +259,30 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) static const char str[] = "double fault"; struct task_struct *tsk = current; +#ifdef CONFIG_X86_ESPFIX64 + extern unsigned char native_irq_return_iret[]; + + /* + * If IRET takes a non-IST fault on the espfix64 stack, then we + * end up promoting it to a doublefault. In that case, modify + * the stack to make it look like we just entered the #GP + * handler from user space, similar to bad_iret. + */ + if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && + regs->cs == __KERNEL_CS && + regs->ip == (unsigned long)native_irq_return_iret) + { + struct pt_regs *normal_regs = task_pt_regs(current); + + /* Fake a #GP(0) from userspace. */ + memmove(&normal_regs->ip, (void *)regs->sp, 5*8); + normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ + regs->ip = (unsigned long)general_protection; + regs->sp = (unsigned long)&normal_regs->orig_ax; + return; + } +#endif + exception_enter(); /* Return not checked because double check cannot be ignored */ notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); From 6f442be2fb22be02cafa606f1769fa1e6f894441 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 22 Nov 2014 18:00:32 -0800 Subject: [PATCH 176/208] x86_64, traps: Stop using IST for #SS On a 32-bit kernel, this has no effect, since there are no IST stacks. On a 64-bit kernel, #SS can only happen in user code, on a failed iret to user space, a canonical violation on access via RSP or RBP, or a genuine stack segment violation in 32-bit kernel code. The first two cases don't need IST, and the latter two cases are unlikely fatal bugs, and promoting them to double faults would be fine. This fixes a bug in which the espfix64 code mishandles a stack segment violation. This saves 4k of memory per CPU and a tiny bit of code. Signed-off-by: Andy Lutomirski Reviewed-by: Thomas Gleixner Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- arch/x86/include/asm/page_32_types.h | 1 - arch/x86/include/asm/page_64_types.h | 11 +++++------ arch/x86/include/asm/traps.h | 1 + arch/x86/kernel/dumpstack_64.c | 1 - arch/x86/kernel/entry_64.S | 2 +- arch/x86/kernel/traps.c | 18 +----------------- 6 files changed, 8 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index f48b17df4224..3a52ee0e726d 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -20,7 +20,6 @@ #define THREAD_SIZE_ORDER 1 #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) -#define STACKFAULT_STACK 0 #define DOUBLEFAULT_STACK 1 #define NMI_STACK 0 #define DEBUG_STACK 0 diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 678205195ae1..75450b2c7be4 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -14,12 +14,11 @@ #define IRQ_STACK_ORDER 2 #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) -#define STACKFAULT_STACK 1 -#define DOUBLEFAULT_STACK 2 -#define NMI_STACK 3 -#define DEBUG_STACK 4 -#define MCE_STACK 5 -#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ +#define DOUBLEFAULT_STACK 1 +#define NMI_STACK 2 +#define DEBUG_STACK 3 +#define MCE_STACK 4 +#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */ #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index bc8352e7010a..707adc6549d8 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -39,6 +39,7 @@ asmlinkage void simd_coprocessor_error(void); #ifdef CONFIG_TRACING asmlinkage void trace_page_fault(void); +#define trace_stack_segment stack_segment #define trace_divide_error divide_error #define trace_bounds bounds #define trace_invalid_op invalid_op diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 1abcb50b48ae..ff86f19b5758 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -24,7 +24,6 @@ static char x86_stack_ids[][8] = { [ DEBUG_STACK-1 ] = "#DB", [ NMI_STACK-1 ] = "NMI", [ DOUBLEFAULT_STACK-1 ] = "#DF", - [ STACKFAULT_STACK-1 ] = "#SS", [ MCE_STACK-1 ] = "#MC", #if DEBUG_STKSZ > EXCEPTION_STKSZ [ N_EXCEPTION_STACKS ... diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index a4dc8de7c4be..49a0c1781253 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1259,7 +1259,7 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK -idtentry stack_segment do_stack_segment has_error_code=1 paranoid=1 +idtentry stack_segment do_stack_segment has_error_code=1 #ifdef CONFIG_XEN idtentry xen_debug do_debug has_error_code=0 idtentry xen_int3 do_int3 has_error_code=0 diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 819662746e23..48035e9cdde9 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -233,27 +233,11 @@ DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op) DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) -#ifdef CONFIG_X86_32 DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) -#endif DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) #ifdef CONFIG_X86_64 /* Runs on IST stack */ -dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) -{ - enum ctx_state prev_state; - - prev_state = exception_enter(); - if (notify_die(DIE_TRAP, "stack segment", regs, error_code, - X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) { - preempt_conditional_sti(regs); - do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); - preempt_conditional_cli(regs); - } - exception_exit(prev_state); -} - dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) { static const char str[] = "double fault"; @@ -802,7 +786,7 @@ void __init trap_init(void) set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun); set_intr_gate(X86_TRAP_TS, invalid_TSS); set_intr_gate(X86_TRAP_NP, segment_not_present); - set_intr_gate_ist(X86_TRAP_SS, &stack_segment, STACKFAULT_STACK); + set_intr_gate(X86_TRAP_SS, stack_segment); set_intr_gate(X86_TRAP_GP, general_protection); set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug); set_intr_gate(X86_TRAP_MF, coprocessor_error); From b645af2d5905c4e32399005b867987919cbfc3ae Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 22 Nov 2014 18:00:33 -0800 Subject: [PATCH 177/208] x86_64, traps: Rework bad_iret It's possible for iretq to userspace to fail. This can happen because of a bad CS, SS, or RIP. Historically, we've handled it by fixing up an exception from iretq to land at bad_iret, which pretends that the failed iret frame was really the hardware part of #GP(0) from userspace. To make this work, there's an extra fixup to fudge the gs base into a usable state. This is suboptimal because it loses the original exception. It's also buggy because there's no guarantee that we were on the kernel stack to begin with. For example, if the failing iret happened on return from an NMI, then we'll end up executing general_protection on the NMI stack. This is bad for several reasons, the most immediate of which is that general_protection, as a non-paranoid idtentry, will try to deliver signals and/or schedule from the wrong stack. This patch throws out bad_iret entirely. As a replacement, it augments the existing swapgs fudge into a full-blown iret fixup, mostly written in C. It's should be clearer and more correct. Signed-off-by: Andy Lutomirski Reviewed-by: Thomas Gleixner Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- arch/x86/kernel/entry_64.S | 45 ++++++++++++++++---------------------- arch/x86/kernel/traps.c | 29 ++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 26 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 49a0c1781253..c0226ab54106 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -830,8 +830,13 @@ ENTRY(native_iret) .global native_irq_return_iret native_irq_return_iret: + /* + * This may fault. Non-paranoid faults on return to userspace are + * handled by fixup_bad_iret. These include #SS, #GP, and #NP. + * Double-faults due to espfix64 are handled in do_double_fault. + * Other faults here are fatal. + */ iretq - _ASM_EXTABLE(native_irq_return_iret, bad_iret) #ifdef CONFIG_X86_ESPFIX64 native_irq_return_ldt: @@ -859,25 +864,6 @@ native_irq_return_ldt: jmp native_irq_return_iret #endif - .section .fixup,"ax" -bad_iret: - /* - * The iret traps when the %cs or %ss being restored is bogus. - * We've lost the original trap vector and error code. - * #GPF is the most likely one to get for an invalid selector. - * So pretend we completed the iret and took the #GPF in user mode. - * - * We are now running with the kernel GS after exception recovery. - * But error_entry expects us to have user GS to match the user %cs, - * so swap back. - */ - pushq $0 - - SWAPGS - jmp general_protection - - .previous - /* edi: workmask, edx: work */ retint_careful: CFI_RESTORE_STATE @@ -1369,17 +1355,16 @@ error_sti: /* * There are two places in the kernel that can potentially fault with - * usergs. Handle them here. The exception handlers after iret run with - * kernel gs again, so don't set the user space flag. B stepping K8s - * sometimes report an truncated RIP for IRET exceptions returning to - * compat mode. Check for these here too. + * usergs. Handle them here. B stepping K8s sometimes report a + * truncated RIP for IRET exceptions returning to compat mode. Check + * for these here too. */ error_kernelspace: CFI_REL_OFFSET rcx, RCX+8 incl %ebx leaq native_irq_return_iret(%rip),%rcx cmpq %rcx,RIP+8(%rsp) - je error_swapgs + je error_bad_iret movl %ecx,%eax /* zero extend */ cmpq %rax,RIP+8(%rsp) je bstep_iret @@ -1390,7 +1375,15 @@ error_kernelspace: bstep_iret: /* Fix truncated RIP */ movq %rcx,RIP+8(%rsp) - jmp error_swapgs + /* fall through */ + +error_bad_iret: + SWAPGS + mov %rsp,%rdi + call fixup_bad_iret + mov %rax,%rsp + decl %ebx /* Return to usergs */ + jmp error_sti CFI_ENDPROC END(error_entry) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 48035e9cdde9..de801f22128a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -407,6 +407,35 @@ asmlinkage __visible struct pt_regs *sync_regs(struct pt_regs *eregs) return regs; } NOKPROBE_SYMBOL(sync_regs); + +struct bad_iret_stack { + void *error_entry_ret; + struct pt_regs regs; +}; + +asmlinkage __visible +struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) +{ + /* + * This is called from entry_64.S early in handling a fault + * caused by a bad iret to user mode. To handle the fault + * correctly, we want move our stack frame to task_pt_regs + * and we want to pretend that the exception came from the + * iret target. + */ + struct bad_iret_stack *new_stack = + container_of(task_pt_regs(current), + struct bad_iret_stack, regs); + + /* Copy the IRET target to the new stack. */ + memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); + + /* Copy the remainder of the stack from the current stack. */ + memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip)); + + BUG_ON(!user_mode_vm(&new_stack->regs)); + return new_stack; +} #endif /* From 90e362f4a75d0911ca75e5cd95591a6cf1f169dc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Nov 2014 23:04:52 +0100 Subject: [PATCH 178/208] sched: Provide update_curr callbacks for stop/idle scheduling classes Chris bisected a NULL pointer deference in task_sched_runtime() to commit 6e998916dfe3 'sched/cputime: Fix clock_nanosleep()/clock_gettime() inconsistency'. Chris observed crashes in atop or other /proc walking programs when he started fork bombs on his machine. He assumed that this is a new exit race, but that does not make any sense when looking at that commit. What's interesting is that, the commit provides update_curr callbacks for all scheduling classes except stop_task and idle_task. While nothing can ever hit that via the clock_nanosleep() and clock_gettime() interfaces, which have been the target of the commit in question, the author obviously forgot that there are other code paths which invoke task_sched_runtime() do_task_stat(() thread_group_cputime_adjusted() thread_group_cputime() task_cputime() task_sched_runtime() if (task_current(rq, p) && task_on_rq_queued(p)) { update_rq_clock(rq); up->sched_class->update_curr(rq); } If the stats are read for a stomp machine task, aka 'migration/N' and that task is current on its cpu, this will happily call the NULL pointer of stop_task->update_curr. Ooops. Chris observation that this happens faster when he runs the fork bomb makes sense as the fork bomb will kick migration threads more often so the probability to hit the issue will increase. Add the missing update_curr callbacks to the scheduler classes stop_task and idle_task. While idle tasks cannot be monitored via /proc we have other means to hit the idle case. Fixes: 6e998916dfe3 'sched/cputime: Fix clock_nanosleep()/clock_gettime() inconsistency' Reported-by: Chris Mason Reported-and-tested-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Ingo Molnar Cc: Stanislaw Gruszka Cc: Peter Zijlstra Signed-off-by: Linus Torvalds --- kernel/sched/idle_task.c | 5 +++++ kernel/sched/stop_task.c | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index 67ad4e7f506a..c65dac8c97cd 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c @@ -75,6 +75,10 @@ static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task return 0; } +static void update_curr_idle(struct rq *rq) +{ +} + /* * Simple, special scheduling class for the per-CPU idle tasks: */ @@ -101,4 +105,5 @@ const struct sched_class idle_sched_class = { .prio_changed = prio_changed_idle, .switched_to = switched_to_idle, + .update_curr = update_curr_idle, }; diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index 67426e529f59..79ffec45a6ac 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -102,6 +102,10 @@ get_rr_interval_stop(struct rq *rq, struct task_struct *task) return 0; } +static void update_curr_stop(struct rq *rq) +{ +} + /* * Simple, special scheduling class for the per-CPU stop tasks: */ @@ -128,4 +132,5 @@ const struct sched_class stop_sched_class = { .prio_changed = prio_changed_stop, .switched_to = switched_to_stop, + .update_curr = update_curr_stop, }; From 82975bc6a6df743b9a01810fb32cb65d0ec5d60b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 21 Nov 2014 13:26:07 -0800 Subject: [PATCH 179/208] uprobes, x86: Fix _TIF_UPROBE vs _TIF_NOTIFY_RESUME x86 call do_notify_resume on paranoid returns if TIF_UPROBE is set but not on non-paranoid returns. I suspect that this is a mistake and that the code only works because int3 is paranoid. Setting _TIF_NOTIFY_RESUME in the uprobe code was probably a workaround for the x86 bug. With that bug fixed, we can remove _TIF_NOTIFY_RESUME from the uprobes code. Reported-by: Oleg Nesterov Acked-by: Srikar Dronamraju Acked-by: Borislav Petkov Signed-off-by: Andy Lutomirski Signed-off-by: Linus Torvalds --- arch/x86/include/asm/thread_info.h | 2 +- kernel/events/uprobes.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 854053889d4d..547e344a6dc6 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -141,7 +141,7 @@ struct thread_info { /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ - _TIF_USER_RETURN_NOTIFY) + _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 1d0af8a2c646..ed8f2cde34c5 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1640,7 +1640,6 @@ bool uprobe_deny_signal(void) if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) { utask->state = UTASK_SSTEP_TRAPPED; set_tsk_thread_flag(t, TIF_UPROBE); - set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); } } From 5d01410fe4d92081f349b013a2e7a95429e4f2c9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 23 Nov 2014 15:25:20 -0800 Subject: [PATCH 180/208] Linux 3.18-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 00d618bbe8e7..2fd5c4e5c139 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Diseased Newt # *DOCUMENTATION* From c1b69b1ca1f5a8669034348cee9e495fde99bf69 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Oct 2014 17:56:11 +0200 Subject: [PATCH 181/208] nfs: Remove dead case from nfs4_map_errors() NFS4ERR_ACCESS has number 13 and thus is matched and returned immediately at the beginning of nfs4_map_errors() and there's no point in checking it later. Coverity-id: 733891 Signed-off-by: Jan Kara Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 69dc20a743f9..15d5eb52cde8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -158,8 +158,6 @@ static int nfs4_map_errors(int err) return -EACCES; case -NFS4ERR_MINOR_VERS_MISMATCH: return -EPROTONOSUPPORT; - case -NFS4ERR_ACCESS: - return -EACCES; case -NFS4ERR_FILE_OPEN: return -EBUSY; default: From 878ffa9f855e84d35d950f1abeb5a37b8435baea Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 23 Oct 2014 14:00:54 -0400 Subject: [PATCH 182/208] NFS: Use nfs_server_capable() for checknig NFS_CAP_SEEK This should make the code easier to maintain in the future. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 0886f1db5917..056f5aafecc1 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -50,7 +50,7 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) struct nfs_server *server = NFS_SERVER(inode); int status; - if (!(server->caps & NFS_CAP_SEEK)) + if (!nfs_server_capable(inode, NFS_CAP_SEEK)) return -ENOTSUPP; status = nfs42_set_rw_stateid(&args.sa_stateid, filep, FMODE_READ); From 860a0d9e511f278bedab62d555a457c18e0841d5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 28 Oct 2014 14:24:12 -0400 Subject: [PATCH 183/208] sunrpc: add some tracepoints in svc_rqst handling functions ...just around svc_send, svc_recv and svc_process for now. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/trace/events/sunrpc.h | 55 +++++++++++++++++++++++++++++++++++ net/sunrpc/svc.c | 21 +++++++------ net/sunrpc/svc_xprt.c | 31 +++++++++++++------- 3 files changed, 88 insertions(+), 19 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 1fef3e6e9436..6260f5134212 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -306,6 +307,60 @@ DEFINE_RPC_SOCKET_EVENT_DONE(rpc_socket_reset_connection); DEFINE_RPC_SOCKET_EVENT(rpc_socket_close); DEFINE_RPC_SOCKET_EVENT(rpc_socket_shutdown); +TRACE_EVENT(svc_recv, + TP_PROTO(struct svc_rqst *rqst, int status), + + TP_ARGS(rqst, status), + + TP_STRUCT__entry( + __field(struct sockaddr *, addr) + __field(__be32, xid) + __field(int, status) + ), + + TP_fast_assign( + __entry->addr = (struct sockaddr *)&rqst->rq_addr; + __entry->xid = status > 0 ? rqst->rq_xid : 0; + __entry->status = status; + ), + + TP_printk("addr=%pIScp xid=0x%x status=%d", __entry->addr, + be32_to_cpu(__entry->xid), __entry->status) +); + +DECLARE_EVENT_CLASS(svc_rqst_status, + + TP_PROTO(struct svc_rqst *rqst, int status), + + TP_ARGS(rqst, status), + + TP_STRUCT__entry( + __field(struct sockaddr *, addr) + __field(__be32, xid) + __field(int, dropme) + __field(int, status) + ), + + TP_fast_assign( + __entry->addr = (struct sockaddr *)&rqst->rq_addr; + __entry->xid = rqst->rq_xid; + __entry->dropme = (int)rqst->rq_dropme; + __entry->status = status; + ), + + TP_printk("addr=%pIScp rq_xid=0x%x dropme=%d status=%d", + __entry->addr, be32_to_cpu(__entry->xid), __entry->dropme, + __entry->status) +); + +DEFINE_EVENT(svc_rqst_status, svc_process, + TP_PROTO(struct svc_rqst *rqst, int status), + TP_ARGS(rqst, status)); + +DEFINE_EVENT(svc_rqst_status, svc_send, + TP_PROTO(struct svc_rqst *rqst, int status), + TP_ARGS(rqst, status)); + #endif /* _TRACE_SUNRPC_H */ #include diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index ca8a7958f4e6..371a8bbb43d6 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -28,6 +28,8 @@ #include #include +#include + #define RPCDBG_FACILITY RPCDBG_SVCDSP static void svc_unregister(const struct svc_serv *serv, struct net *net); @@ -1314,24 +1316,25 @@ svc_process(struct svc_rqst *rqstp) rqstp->rq_res.tail[0].iov_base = NULL; rqstp->rq_res.tail[0].iov_len = 0; - rqstp->rq_xid = svc_getu32(argv); - dir = svc_getnl(argv); if (dir != 0) { /* direction != CALL */ svc_printk(rqstp, "bad direction %d, dropping request\n", dir); serv->sv_stats->rpcbadfmt++; - svc_drop(rqstp); - return 0; + goto out_drop; } /* Returns 1 for send, 0 for drop */ - if (svc_process_common(rqstp, argv, resv)) - return svc_send(rqstp); - else { - svc_drop(rqstp); - return 0; + if (likely(svc_process_common(rqstp, argv, resv))) { + int ret = svc_send(rqstp); + + trace_svc_process(rqstp, ret); + return ret; } +out_drop: + trace_svc_process(rqstp, 0); + svc_drop(rqstp); + return 0; } #if defined(CONFIG_SUNRPC_BACKCHANNEL) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c179ca2a5aa4..bbb3b044b877 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -15,6 +15,7 @@ #include #include #include +#include #define RPCDBG_FACILITY RPCDBG_SVCXPRT @@ -773,35 +774,43 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) err = svc_alloc_arg(rqstp); if (err) - return err; + goto out; try_to_freeze(); cond_resched(); + err = -EINTR; if (signalled() || kthread_should_stop()) - return -EINTR; + goto out; xprt = svc_get_next_xprt(rqstp, timeout); - if (IS_ERR(xprt)) - return PTR_ERR(xprt); + if (IS_ERR(xprt)) { + err = PTR_ERR(xprt); + goto out; + } len = svc_handle_xprt(rqstp, xprt); /* No data, incomplete (TCP) read, or accept() */ + err = -EAGAIN; if (len <= 0) - goto out; + goto out_release; clear_bit(XPT_OLD, &xprt->xpt_flags); rqstp->rq_secure = xprt->xpt_ops->xpo_secure_port(rqstp); rqstp->rq_chandle.defer = svc_defer; + rqstp->rq_xid = svc_getu32(&rqstp->rq_arg.head[0]); if (serv->sv_stats) serv->sv_stats->netcnt++; + trace_svc_recv(rqstp, len); return len; -out: +out_release: rqstp->rq_res.len = 0; svc_xprt_release(rqstp); - return -EAGAIN; +out: + trace_svc_recv(rqstp, err); + return err; } EXPORT_SYMBOL_GPL(svc_recv); @@ -821,12 +830,12 @@ EXPORT_SYMBOL_GPL(svc_drop); int svc_send(struct svc_rqst *rqstp) { struct svc_xprt *xprt; - int len; + int len = -EFAULT; struct xdr_buf *xb; xprt = rqstp->rq_xprt; if (!xprt) - return -EFAULT; + goto out; /* release the receive skb before sending the reply */ rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp); @@ -849,7 +858,9 @@ int svc_send(struct svc_rqst *rqstp) svc_xprt_release(rqstp); if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) - return 0; + len = 0; +out: + trace_svc_send(rqstp, len); return len; } From 3705ad64f123271b2b88dbff0c9891b7b90299d2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 28 Oct 2014 14:24:13 -0400 Subject: [PATCH 184/208] sunrpc: add new tracepoints in xprt handling code ...so we can keep track of when calls are sent and replies received. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/trace/events/sunrpc.h | 63 +++++++++++++++++++++++++++++++++++ net/sunrpc/xprt.c | 9 ++++- net/sunrpc/xprtsock.c | 8 ++++- 3 files changed, 78 insertions(+), 2 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 6260f5134212..5edb16bcd836 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -307,6 +307,69 @@ DEFINE_RPC_SOCKET_EVENT_DONE(rpc_socket_reset_connection); DEFINE_RPC_SOCKET_EVENT(rpc_socket_close); DEFINE_RPC_SOCKET_EVENT(rpc_socket_shutdown); +DECLARE_EVENT_CLASS(rpc_xprt_event, + TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status), + + TP_ARGS(xprt, xid, status), + + TP_STRUCT__entry( + __field(__be32, xid) + __field(int, status) + __string(addr, xprt->address_strings[RPC_DISPLAY_ADDR]) + __string(port, xprt->address_strings[RPC_DISPLAY_PORT]) + ), + + TP_fast_assign( + __entry->xid = xid; + __entry->status = status; + __assign_str(addr, xprt->address_strings[RPC_DISPLAY_ADDR]); + __assign_str(port, xprt->address_strings[RPC_DISPLAY_PORT]); + ), + + TP_printk("peer=%s/%s xid=0x%x status=%d", __get_str(addr), + __get_str(port), be32_to_cpu(__entry->xid), + __entry->status) +); + +DEFINE_EVENT(rpc_xprt_event, xprt_lookup_rqst, + TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status), + TP_ARGS(xprt, xid, status)); + +DEFINE_EVENT(rpc_xprt_event, xprt_transmit, + TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status), + TP_ARGS(xprt, xid, status)); + +DEFINE_EVENT(rpc_xprt_event, xprt_complete_rqst, + TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status), + TP_ARGS(xprt, xid, status)); + +TRACE_EVENT(xs_tcp_data_ready, + TP_PROTO(struct rpc_xprt *xprt, int err, unsigned int total), + + TP_ARGS(xprt, err, total), + + TP_STRUCT__entry( + __field(int, err) + __field(unsigned int, total) + __string(addr, xprt ? xprt->address_strings[RPC_DISPLAY_ADDR] : + "(null)") + __string(port, xprt ? xprt->address_strings[RPC_DISPLAY_PORT] : + "(null)") + ), + + TP_fast_assign( + __entry->err = err; + __entry->total = total; + __assign_str(addr, xprt ? + xprt->address_strings[RPC_DISPLAY_ADDR] : "(null)"); + __assign_str(port, xprt ? + xprt->address_strings[RPC_DISPLAY_PORT] : "(null)"); + ), + + TP_printk("peer=[%s]:%s err=%d total=%u", __get_str(addr), + __get_str(port), __entry->err, __entry->total) +); + TRACE_EVENT(svc_recv, TP_PROTO(struct svc_rqst *rqst, int status), diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 56e4e150e80e..1b2e5e616cae 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -49,6 +49,8 @@ #include #include +#include + #include "sunrpc.h" /* @@ -772,11 +774,14 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) struct rpc_rqst *entry; list_for_each_entry(entry, &xprt->recv, rq_list) - if (entry->rq_xid == xid) + if (entry->rq_xid == xid) { + trace_xprt_lookup_rqst(xprt, xid, 0); return entry; + } dprintk("RPC: xprt_lookup_rqst did not find xid %08x\n", ntohl(xid)); + trace_xprt_lookup_rqst(xprt, xid, -ENOENT); xprt->stat.bad_xids++; return NULL; } @@ -810,6 +815,7 @@ void xprt_complete_rqst(struct rpc_task *task, int copied) dprintk("RPC: %5u xid %08x complete (%d bytes received)\n", task->tk_pid, ntohl(req->rq_xid), copied); + trace_xprt_complete_rqst(xprt, req->rq_xid, copied); xprt->stat.recvs++; req->rq_rtt = ktime_sub(ktime_get(), req->rq_xtime); @@ -926,6 +932,7 @@ void xprt_transmit(struct rpc_task *task) req->rq_xtime = ktime_get(); status = xprt->ops->send_request(task); + trace_xprt_transmit(xprt, req->rq_xid, status); if (status != 0) { task->tk_status = status; return; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 3b305ab17afe..b63e26272dc2 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1454,12 +1454,15 @@ static void xs_tcp_data_ready(struct sock *sk) struct rpc_xprt *xprt; read_descriptor_t rd_desc; int read; + unsigned long total = 0; dprintk("RPC: xs_tcp_data_ready...\n"); read_lock_bh(&sk->sk_callback_lock); - if (!(xprt = xprt_from_sock(sk))) + if (!(xprt = xprt_from_sock(sk))) { + read = 0; goto out; + } /* Any data means we had a useful conversation, so * the we don't need to delay the next reconnect */ @@ -1471,8 +1474,11 @@ static void xs_tcp_data_ready(struct sock *sk) do { rd_desc.count = 65536; read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); + if (read > 0) + total += read; } while (read > 0); out: + trace_xs_tcp_data_ready(xprt, read, total); read_unlock_bh(&sk->sk_callback_lock); } From 1a867a0898b2e366a1eb5b7fe21413a2b2b1629f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 28 Oct 2014 14:24:14 -0400 Subject: [PATCH 185/208] sunrpc: add tracepoints in xs_tcp_data_recv Add tracepoints inside the main loop on xs_tcp_data_recv that allow us to keep an eye on what's happening during each phase of it. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtsock.h | 59 +++++++++++++++++++++++++++++++ include/trace/events/sunrpc.h | 44 +++++++++++++++++++++++- net/sunrpc/xprtsock.c | 61 ++------------------------------- 3 files changed, 104 insertions(+), 60 deletions(-) diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 1ad36cc25b2e..7591788e9fbf 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -17,6 +17,65 @@ void cleanup_socket_xprt(void); #define RPC_DEF_MIN_RESVPORT (665U) #define RPC_DEF_MAX_RESVPORT (1023U) +struct sock_xprt { + struct rpc_xprt xprt; + + /* + * Network layer + */ + struct socket * sock; + struct sock * inet; + + /* + * State of TCP reply receive + */ + __be32 tcp_fraghdr, + tcp_xid, + tcp_calldir; + + u32 tcp_offset, + tcp_reclen; + + unsigned long tcp_copied, + tcp_flags; + + /* + * Connection of transports + */ + struct delayed_work connect_worker; + struct sockaddr_storage srcaddr; + unsigned short srcport; + + /* + * UDP socket buffer size parameters + */ + size_t rcvsize, + sndsize; + + /* + * Saved socket callback addresses + */ + void (*old_data_ready)(struct sock *); + void (*old_state_change)(struct sock *); + void (*old_write_space)(struct sock *); + void (*old_error_report)(struct sock *); +}; + +/* + * TCP receive state flags + */ +#define TCP_RCV_LAST_FRAG (1UL << 0) +#define TCP_RCV_COPY_FRAGHDR (1UL << 1) +#define TCP_RCV_COPY_XID (1UL << 2) +#define TCP_RCV_COPY_DATA (1UL << 3) +#define TCP_RCV_READ_CALLDIR (1UL << 4) +#define TCP_RCV_COPY_CALLDIR (1UL << 5) + +/* + * TCP RPC flags + */ +#define TCP_RPC_REPLY (1UL << 6) + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_XPRTSOCK_H */ diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 5edb16bcd836..171ca4ff6d99 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -326,7 +327,7 @@ DECLARE_EVENT_CLASS(rpc_xprt_event, __assign_str(port, xprt->address_strings[RPC_DISPLAY_PORT]); ), - TP_printk("peer=%s/%s xid=0x%x status=%d", __get_str(addr), + TP_printk("peer=[%s]:%s xid=0x%x status=%d", __get_str(addr), __get_str(port), be32_to_cpu(__entry->xid), __entry->status) ); @@ -370,6 +371,47 @@ TRACE_EVENT(xs_tcp_data_ready, __get_str(port), __entry->err, __entry->total) ); +#define rpc_show_sock_xprt_flags(flags) \ + __print_flags(flags, "|", \ + { TCP_RCV_LAST_FRAG, "TCP_RCV_LAST_FRAG" }, \ + { TCP_RCV_COPY_FRAGHDR, "TCP_RCV_COPY_FRAGHDR" }, \ + { TCP_RCV_COPY_XID, "TCP_RCV_COPY_XID" }, \ + { TCP_RCV_COPY_DATA, "TCP_RCV_COPY_DATA" }, \ + { TCP_RCV_READ_CALLDIR, "TCP_RCV_READ_CALLDIR" }, \ + { TCP_RCV_COPY_CALLDIR, "TCP_RCV_COPY_CALLDIR" }, \ + { TCP_RPC_REPLY, "TCP_RPC_REPLY" }) + +TRACE_EVENT(xs_tcp_data_recv, + TP_PROTO(struct sock_xprt *xs), + + TP_ARGS(xs), + + TP_STRUCT__entry( + __string(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR]) + __string(port, xs->xprt.address_strings[RPC_DISPLAY_PORT]) + __field(__be32, xid) + __field(unsigned long, flags) + __field(unsigned long, copied) + __field(unsigned int, reclen) + __field(unsigned long, offset) + ), + + TP_fast_assign( + __assign_str(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR]); + __assign_str(port, xs->xprt.address_strings[RPC_DISPLAY_PORT]); + __entry->xid = xs->tcp_xid; + __entry->flags = xs->tcp_flags; + __entry->copied = xs->tcp_copied; + __entry->reclen = xs->tcp_reclen; + __entry->offset = xs->tcp_offset; + ), + + TP_printk("peer=[%s]:%s xid=0x%x flags=%s copied=%lu reclen=%u offset=%lu", + __get_str(addr), __get_str(port), be32_to_cpu(__entry->xid), + rpc_show_sock_xprt_flags(__entry->flags), + __entry->copied, __entry->reclen, __entry->offset) +); + TRACE_EVENT(svc_recv, TP_PROTO(struct svc_rqst *rqst, int status), diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b63e26272dc2..31c015196a29 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -216,65 +216,6 @@ static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count) } #endif -struct sock_xprt { - struct rpc_xprt xprt; - - /* - * Network layer - */ - struct socket * sock; - struct sock * inet; - - /* - * State of TCP reply receive - */ - __be32 tcp_fraghdr, - tcp_xid, - tcp_calldir; - - u32 tcp_offset, - tcp_reclen; - - unsigned long tcp_copied, - tcp_flags; - - /* - * Connection of transports - */ - struct delayed_work connect_worker; - struct sockaddr_storage srcaddr; - unsigned short srcport; - - /* - * UDP socket buffer size parameters - */ - size_t rcvsize, - sndsize; - - /* - * Saved socket callback addresses - */ - void (*old_data_ready)(struct sock *); - void (*old_state_change)(struct sock *); - void (*old_write_space)(struct sock *); - void (*old_error_report)(struct sock *); -}; - -/* - * TCP receive state flags - */ -#define TCP_RCV_LAST_FRAG (1UL << 0) -#define TCP_RCV_COPY_FRAGHDR (1UL << 1) -#define TCP_RCV_COPY_XID (1UL << 2) -#define TCP_RCV_COPY_DATA (1UL << 3) -#define TCP_RCV_READ_CALLDIR (1UL << 4) -#define TCP_RCV_COPY_CALLDIR (1UL << 5) - -/* - * TCP RPC flags - */ -#define TCP_RPC_REPLY (1UL << 6) - static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) { return (struct rpc_xprt *) sk->sk_user_data; @@ -1415,6 +1356,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns dprintk("RPC: xs_tcp_data_recv started\n"); do { + trace_xs_tcp_data_recv(transport); /* Read in a new fragment marker if necessary */ /* Can we ever really expect to get completely empty fragments? */ if (transport->tcp_flags & TCP_RCV_COPY_FRAGHDR) { @@ -1439,6 +1381,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns /* Skip over any trailing bytes on short reads */ xs_tcp_read_discard(transport, &desc); } while (desc.count); + trace_xs_tcp_data_recv(transport); dprintk("RPC: xs_tcp_data_recv done\n"); return len - desc.count; } From 6a74c0c9402b85647793da70edc9d6b097d54472 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Nov 2014 16:47:02 -0500 Subject: [PATCH 186/208] pnfs/blocklayout: fix end calculation in pnfs_num_cont_bytes Use the number of pages in the pagecache mapping instead of the number of pnfs requests which is only slightly related. Reported-by: Weston Andros Adamson Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 4f46f7a05289..77fec6a55f57 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -812,7 +812,7 @@ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) /* Optimize common case that writes from 0 to end of file */ end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); - if (end != NFS_I(inode)->npages) { + if (end != inode->i_mapping->nrpages) { rcu_read_lock(); end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX); rcu_read_unlock(); From cb1410c71e0b6b2eba8c1890645a76ff86169d24 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Wed, 12 Nov 2014 12:08:00 -0500 Subject: [PATCH 187/208] NFS: fix subtle change in COMMIT behavior Recent work in the pgio layer made it possible for there to be more than one request per page. This caused a subtle change in commit behavior, because write.c:nfs_commit_unstable_pages compares the number of *pages* waiting for writeback against the number of requests on a commit list to choose when to send a COMMIT in a non-blocking flush. This is probably hard to hit in normal operation - you have to be using rsize/wsize < PAGE_SIZE, or pnfs with lots of boundaries that are not page aligned to have a noticeable change in behavior. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 2 +- fs/nfs/inode.c | 8 ++++---- fs/nfs/pagelist.c | 11 ++++++++--- fs/nfs/write.c | 17 ++++++++++++----- include/linux/nfs_fs.h | 4 ++-- 5 files changed, 27 insertions(+), 15 deletions(-) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 73466b934090..e36a9d78ea49 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -49,7 +49,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, goto out_iput; res->size = i_size_read(inode); res->change_attr = delegation->change_attr; - if (nfsi->npages != 0) + if (nfsi->nrequests != 0) res->change_attr++; res->ctime = inode->i_ctime; res->mtime = inode->i_mtime; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 00689a8a85e4..2b48ce58a584 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1149,7 +1149,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) && (fattr->valid & NFS_ATTR_FATTR_SIZE) && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) - && nfsi->npages == 0) { + && nfsi->nrequests == 0) { i_size_write(inode, nfs_size_to_loff_t(fattr->size)); ret |= NFS_INO_INVALID_ATTR; } @@ -1192,7 +1192,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (fattr->valid & NFS_ATTR_FATTR_SIZE) { cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); - if (cur_size != new_isize && nfsi->npages == 0) + if (cur_size != new_isize && nfsi->nrequests == 0) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; } @@ -1619,7 +1619,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (new_isize != cur_isize) { /* Do we perhaps have any outstanding writes, or has * the file grown beyond our last write? */ - if ((nfsi->npages == 0) || new_isize > cur_isize) { + if ((nfsi->nrequests == 0) || new_isize > cur_isize) { i_size_write(inode, new_isize); invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; invalid &= ~NFS_INO_REVAL_PAGECACHE; @@ -1784,7 +1784,7 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_LIST_HEAD(&nfsi->commit_info.list); - nfsi->npages = 0; + nfsi->nrequests = 0; nfsi->commit_info.ncommit = 0; atomic_set(&nfsi->commit_info.rpcs_out, 0); atomic_set(&nfsi->silly_count, 1); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ed0db61f8543..2b5e769beb16 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -258,6 +258,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit) static inline void nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) { + struct inode *inode; WARN_ON_ONCE(prev == req); if (!prev) { @@ -276,12 +277,16 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) * nfs_page_group_destroy is called */ kref_get(&req->wb_head->wb_kref); - /* grab extra ref if head request has extra ref from - * the write/commit path to handle handoff between write - * and commit lists */ + /* grab extra ref and bump the request count if head request + * has extra ref from the write/commit path to handle handoff + * between write and commit lists. */ if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) { + inode = page_file_mapping(req->wb_page)->host; set_bit(PG_INODE_REF, &req->wb_flags); kref_get(&req->wb_kref); + spin_lock(&inode->i_lock); + NFS_I(inode)->nrequests++; + spin_unlock(&inode->i_lock); } } } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f83b02dc9166..d489ff3f438f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -670,7 +670,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) nfs_lock_request(req); spin_lock(&inode->i_lock); - if (!nfsi->npages && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) + if (!nfsi->nrequests && + NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) inode->i_version++; /* * Swap-space should not get truncated. Hence no need to plug the race @@ -681,9 +682,11 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) SetPagePrivate(req->wb_page); set_page_private(req->wb_page, (unsigned long)req); } - nfsi->npages++; + nfsi->nrequests++; /* this a head request for a page group - mark it as having an - * extra reference so sub groups can follow suit */ + * extra reference so sub groups can follow suit. + * This flag also informs pgio layer when to bump nrequests when + * adding subrequests. */ WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags)); kref_get(&req->wb_kref); spin_unlock(&inode->i_lock); @@ -709,7 +712,11 @@ static void nfs_inode_remove_request(struct nfs_page *req) wake_up_page(head->wb_page, PG_private); clear_bit(PG_MAPPED, &head->wb_flags); } - nfsi->npages--; + nfsi->nrequests--; + spin_unlock(&inode->i_lock); + } else { + spin_lock(&inode->i_lock); + nfsi->nrequests--; spin_unlock(&inode->i_lock); } @@ -1735,7 +1742,7 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr /* Don't commit yet if this is a non-blocking flush and there * are a lot of outstanding writes for this mapping. */ - if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1)) + if (nfsi->commit_info.ncommit <= (nfsi->nrequests >> 1)) goto out_mark_dirty; /* don't wait for the COMMIT response */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c72d1ad41ad4..6d627b92df53 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -163,7 +163,7 @@ struct nfs_inode { */ __be32 cookieverf[2]; - unsigned long npages; + unsigned long nrequests; struct nfs_mds_commit_info commit_info; /* Open contexts for shared mmap writes */ @@ -520,7 +520,7 @@ extern void nfs_commit_free(struct nfs_commit_data *data); static inline int nfs_have_writebacks(struct inode *inode) { - return NFS_I(inode)->npages != 0; + return NFS_I(inode)->nrequests != 0; } /* From 4bd5a980de87d2b5af417485bde97b8eb3d6cf6a Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 17 Nov 2014 11:05:17 +0800 Subject: [PATCH 188/208] nfs41: fix nfs4_proc_layoutget error handling nfs4_layoutget_release() drops layout hdr refcnt. Grab the refcnt early so that it is safe to call .release in case nfs4_alloc_pages fails. Signed-off-by: Peng Tao Fixes: a47970ff78147 ("NFSv4.1: Hold reference to layout hdr in layoutget") Cc: stable@vger.kernel.org # 3.9+ Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 15d5eb52cde8..3138913d1cdf 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7702,6 +7702,9 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) dprintk("--> %s\n", __func__); + /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ + pnfs_get_layout_hdr(NFS_I(inode)->layout); + lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); if (!lgp->args.layout.pages) { nfs4_layoutget_release(lgp); @@ -7714,9 +7717,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) lgp->res.seq_res.sr_slot = NULL; nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); - /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ - pnfs_get_layout_hdr(NFS_I(inode)->layout); - task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return ERR_CAST(task); From 10b89567db51e143c2f0828839332502916d012d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 17 Nov 2014 16:58:03 -0500 Subject: [PATCH 189/208] lockd: eliminate LOCKD_DEBUG LOCKD_DEBUG is always the same value as CONFIG_SUNRPC_DEBUG, so we can just use it instead. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/lockd/svclock.c | 2 +- include/linux/lockd/debug.h | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 13db95f54176..56598742dde4 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -53,7 +53,7 @@ static const struct rpc_call_ops nlmsvc_grant_ops; static LIST_HEAD(nlm_blocked); static DEFINE_SPINLOCK(nlm_blocked_lock); -#ifdef LOCKD_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie) { /* diff --git a/include/linux/lockd/debug.h b/include/linux/lockd/debug.h index 257d3779f2ab..0ca8109934e4 100644 --- a/include/linux/lockd/debug.h +++ b/include/linux/lockd/debug.h @@ -17,12 +17,8 @@ * Enable lockd debugging. * Requires RPC_DEBUG. */ -#ifdef RPC_DEBUG -# define LOCKD_DEBUG 1 -#endif - #undef ifdebug -#if defined(RPC_DEBUG) && defined(LOCKD_DEBUG) +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define ifdebug(flag) if (unlikely(nlm_debug & NLMDBG_##flag)) #else # define ifdebug(flag) if (0) From f895b252d4edf66b2895fb5a7b17a638665f3e1f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 17 Nov 2014 16:58:04 -0500 Subject: [PATCH 190/208] sunrpc: eliminate RPC_DEBUG It's always set to whatever CONFIG_SUNRPC_DEBUG is, so just use that. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 2 +- include/linux/sunrpc/debug.h | 9 +++------ include/linux/sunrpc/sched.h | 8 ++++---- include/uapi/linux/nfsd/debug.h | 2 +- net/sunrpc/auth.c | 4 ++-- net/sunrpc/auth_generic.c | 2 +- net/sunrpc/auth_gss/auth_gss.c | 2 +- net/sunrpc/auth_gss/gss_generic_token.c | 2 +- net/sunrpc/auth_gss/gss_krb5_crypto.c | 2 +- net/sunrpc/auth_gss/gss_krb5_keys.c | 2 +- net/sunrpc/auth_gss/gss_krb5_mech.c | 2 +- net/sunrpc/auth_gss/gss_krb5_seal.c | 2 +- net/sunrpc/auth_gss/gss_krb5_seqnum.c | 2 +- net/sunrpc/auth_gss/gss_krb5_unseal.c | 2 +- net/sunrpc/auth_gss/gss_krb5_wrap.c | 2 +- net/sunrpc/auth_gss/gss_mech_switch.c | 2 +- net/sunrpc/auth_gss/gss_rpc_xdr.h | 2 +- net/sunrpc/auth_gss/svcauth_gss.c | 2 +- net/sunrpc/auth_null.c | 4 ++-- net/sunrpc/auth_unix.c | 2 +- net/sunrpc/backchannel_rqst.c | 2 +- net/sunrpc/clnt.c | 6 +++--- net/sunrpc/rpcb_clnt.c | 2 +- net/sunrpc/sched.c | 4 ++-- net/sunrpc/sunrpc_syms.c | 4 ++-- net/sunrpc/svc.c | 2 +- net/sunrpc/sysctl.c | 2 +- net/sunrpc/xprt.c | 2 +- net/sunrpc/xprtrdma/rpc_rdma.c | 4 ++-- net/sunrpc/xprtrdma/transport.c | 8 ++++---- net/sunrpc/xprtrdma/verbs.c | 8 ++++---- net/sunrpc/xprtsock.c | 8 ++++---- 32 files changed, 53 insertions(+), 56 deletions(-) diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 8e030075fe79..a7cbb570cc5c 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -53,7 +53,7 @@ struct rpc_cred { struct rcu_head cr_rcu; struct rpc_auth * cr_auth; const struct rpc_credops *cr_ops; -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) unsigned long cr_magic; /* 0x0f4aa4f0 */ #endif unsigned long cr_expire; /* when to gc */ diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index 9385bd74c860..51143757b8f7 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -14,9 +14,6 @@ /* * Enable RPC debugging/profiling. */ -#ifdef CONFIG_SUNRPC_DEBUG -#define RPC_DEBUG -#endif #ifdef CONFIG_TRACEPOINTS #define RPC_TRACEPOINTS #endif @@ -25,7 +22,7 @@ /* * Debugging macros etc */ -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) extern unsigned int rpc_debug; extern unsigned int nfs_debug; extern unsigned int nfsd_debug; @@ -36,7 +33,7 @@ extern unsigned int nlm_debug; #define dprintk_rcu(args...) dfprintk_rcu(FACILITY, ## args) #undef ifdebug -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define ifdebug(fac) if (unlikely(rpc_debug & RPCDBG_##fac)) # define dfprintk(fac, args...) \ @@ -65,7 +62,7 @@ extern unsigned int nlm_debug; /* * Sysctl interface for RPC debugging */ -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) void rpc_register_sysctl(void); void rpc_unregister_sysctl(void); #endif diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 1a8959944c5f..fecdbf1b4797 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -79,7 +79,7 @@ struct rpc_task { unsigned short tk_flags; /* misc flags */ unsigned short tk_timeouts; /* maj timeouts */ -#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS) +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || defined(RPC_TRACEPOINTS) unsigned short tk_pid; /* debugging aid */ #endif unsigned char tk_priority : 2,/* Task priority */ @@ -187,7 +187,7 @@ struct rpc_wait_queue { unsigned char nr; /* # tasks remaining for cookie */ unsigned short qlen; /* total # tasks waiting in queue */ struct rpc_timer timer_list; -#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS) +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || defined(RPC_TRACEPOINTS) const char * name; #endif }; @@ -237,7 +237,7 @@ void rpc_free(void *); int rpciod_up(void); void rpciod_down(void); int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *); -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct net; void rpc_show_tasks(struct net *); #endif @@ -251,7 +251,7 @@ static inline int rpc_wait_for_completion_task(struct rpc_task *task) return __rpc_wait_for_completion_task(task, NULL); } -#if defined(RPC_DEBUG) || defined (RPC_TRACEPOINTS) +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || defined (RPC_TRACEPOINTS) static inline const char * rpc_qname(const struct rpc_wait_queue *q) { return ((q && q->name) ? q->name : "unknown"); diff --git a/include/uapi/linux/nfsd/debug.h b/include/uapi/linux/nfsd/debug.h index a6f453c740b8..1fdc95bb2375 100644 --- a/include/uapi/linux/nfsd/debug.h +++ b/include/uapi/linux/nfsd/debug.h @@ -15,7 +15,7 @@ * Enable debugging for nfsd. * Requires RPC_DEBUG. */ -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define NFSD_DEBUG 1 #endif diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 383eb919ac0b..47f38be4155f 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -16,7 +16,7 @@ #include #include -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif @@ -646,7 +646,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, cred->cr_auth = auth; cred->cr_ops = ops; cred->cr_expire = jiffies; -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) cred->cr_magic = RPCAUTH_CRED_MAGIC; #endif cred->cr_uid = acred->uid; diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index 6f6b829c9e8e..41248b1820c7 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -14,7 +14,7 @@ #include #include -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 53ed8d3f8897..dace13d7638e 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -66,7 +66,7 @@ static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED; #define GSS_KEY_EXPIRE_TIMEO 240 static unsigned int gss_key_expire_timeo = GSS_KEY_EXPIRE_TIMEO; -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c index c586e92bcf76..254defe446a7 100644 --- a/net/sunrpc/auth_gss/gss_generic_token.c +++ b/net/sunrpc/auth_gss/gss_generic_token.c @@ -38,7 +38,7 @@ #include -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index f5ed9f6ece06..b5408e8a37f2 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -45,7 +45,7 @@ #include #include -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c index 24589bd2a4b6..234fa8d0fd9b 100644 --- a/net/sunrpc/auth_gss/gss_krb5_keys.c +++ b/net/sunrpc/auth_gss/gss_krb5_keys.c @@ -61,7 +61,7 @@ #include #include -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 0d3c158ef8fa..28db442a0034 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -45,7 +45,7 @@ #include #include -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index 42768e5c3994..1d74d653e6c0 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -64,7 +64,7 @@ #include #include -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c index 62ac90c62cb1..20d55c793eb6 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c +++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c @@ -35,7 +35,7 @@ #include #include -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index 6c981ddc19f8..dcf9515d9aef 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c @@ -62,7 +62,7 @@ #include #include -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 4b614c604fe0..ca7e92a32f84 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -35,7 +35,7 @@ #include #include -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 92d5ab99fbf3..7063d856a598 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -46,7 +46,7 @@ #include #include -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h index 685a688f3d8a..9d88c6239f01 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.h +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h @@ -25,7 +25,7 @@ #include #include -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index c548ab213f76..de856ddf5fed 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -51,7 +51,7 @@ #include "gss_rpc_upcall.h" -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 712c123e04e9..c2a2b584a056 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -10,7 +10,7 @@ #include #include -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif @@ -138,7 +138,7 @@ struct rpc_cred null_cred = { .cr_ops = &null_credops, .cr_count = ATOMIC_INIT(1), .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE, -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) .cr_magic = RPCAUTH_CRED_MAGIC, #endif }; diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index d5d692366294..4feda2d0a833 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -25,7 +25,7 @@ struct unx_cred { #define UNX_WRITESLACK (21 + (UNX_MAXNODENAME >> 2)) -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 9761a0da964d..651f49ab601f 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -27,7 +27,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) #define RPCDBG_FACILITY RPCDBG_TRANS #endif diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 9acd6ce88db7..36c64ef460cf 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -42,7 +42,7 @@ #include "sunrpc.h" #include "netns.h" -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_CALL #endif @@ -1396,7 +1396,7 @@ rpc_restart_call(struct rpc_task *task) } EXPORT_SYMBOL_GPL(rpc_restart_call); -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static const char *rpc_proc_name(const struct rpc_task *task) { const struct rpc_procinfo *proc = task->tk_msg.rpc_proc; @@ -2421,7 +2421,7 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int } EXPORT_SYMBOL_GPL(rpc_call_null); -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static void rpc_show_header(void) { printk(KERN_INFO "-pid- flgs status -client- --rqstp- " diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 1891a1022c17..05202012bcfc 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -32,7 +32,7 @@ #include "netns.h" -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_BIND #endif diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index fe3441abdbe5..574b2977fc4b 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -24,7 +24,7 @@ #include "sunrpc.h" -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) #define RPCDBG_FACILITY RPCDBG_SCHED #endif @@ -258,7 +258,7 @@ static int rpc_wait_bit_killable(struct wait_bit_key *key) return 0; } -#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS) +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || defined(RPC_TRACEPOINTS) static void rpc_task_set_debuginfo(struct rpc_task *task) { static atomic_t rpc_pid; diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index cd30120de9e4..f632e476ab6c 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -97,7 +97,7 @@ init_sunrpc(void) err = register_rpc_pipefs(); if (err) goto out4; -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) rpc_register_sysctl(); #endif svc_init_xprt_sock(); /* svc sock transport */ @@ -123,7 +123,7 @@ cleanup_sunrpc(void) unregister_rpc_pipefs(); rpc_destroy_mempool(); unregister_pernet_subsys(&sunrpc_net_ops); -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) rpc_unregister_sysctl(); #endif rcu_barrier(); /* Wait for completion of call_rcu()'s */ diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 371a8bbb43d6..2783fd80c229 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1042,7 +1042,7 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net) /* * dprintk the given error with the address of the client that caused it. */ -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static __printf(2, 3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) { diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index c99c58e2ee66..887f0183b4c6 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -37,7 +37,7 @@ EXPORT_SYMBOL_GPL(nfsd_debug); unsigned int nlm_debug; EXPORT_SYMBOL_GPL(nlm_debug); -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static struct ctl_table_header *sunrpc_table_header; static struct ctl_table sunrpc_table[]; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 1b2e5e616cae..894d071426b2 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -57,7 +57,7 @@ * Local variables */ -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_XPRT #endif diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 6166c985fe24..df01d124936c 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -49,11 +49,11 @@ #include -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_TRANS #endif -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static const char transfertypes[][12] = { "pure inline", /* no chunks */ " read chunk", /* some argument via rdma read */ diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 6a4615dd0261..ef58ebadb3ae 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -55,7 +55,7 @@ #include "xprt_rdma.h" -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_TRANS #endif @@ -75,7 +75,7 @@ static unsigned int xprt_rdma_inline_write_padding; static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; int xprt_rdma_pad_optimize = 0; -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE; static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE; @@ -705,7 +705,7 @@ static void __exit xprt_rdma_cleanup(void) int rc; dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (sunrpc_table_header) { unregister_sysctl_table(sunrpc_table_header); sunrpc_table_header = NULL; @@ -736,7 +736,7 @@ static int __init xprt_rdma_init(void) dprintk("\tPadding %d\n\tMemreg %d\n", xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy); -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (!sunrpc_table_header) sunrpc_table_header = register_sysctl_table(sunrpc_table); #endif diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 61c41298b4ea..b92b04083e40 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -57,7 +57,7 @@ * Globals/Macros */ -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_TRANS #endif @@ -313,7 +313,7 @@ rpcrdma_flush_cqs(struct rpcrdma_ep *ep) rpcrdma_sendcq_upcall(ep->rep_attr.send_cq, ep); } -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static const char * const conn[] = { "address resolved", "address error", @@ -344,7 +344,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) struct rpcrdma_xprt *xprt = id->context; struct rpcrdma_ia *ia = &xprt->rx_ia; struct rpcrdma_ep *ep = &xprt->rx_ep; -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr; #endif struct ib_qp_attr attr; @@ -408,7 +408,7 @@ connected: break; } -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (connstate == 1) { int ird = attr.max_dest_rd_atomic; int tird = ep->rep_remote_cma.responder_resources; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 31c015196a29..87ce7e8bb8dc 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -75,7 +75,7 @@ static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; * someone else's file names! */ -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; @@ -186,7 +186,7 @@ static struct ctl_table sunrpc_table[] = { */ #define XS_IDLE_DISC_TO (5U * 60 * HZ) -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # undef RPC_DEBUG_DATA # define RPCDBG_FACILITY RPCDBG_TRANS #endif @@ -2991,7 +2991,7 @@ static struct xprt_class xs_bc_tcp_transport = { */ int init_socket_xprt(void) { -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (!sunrpc_table_header) sunrpc_table_header = register_sysctl_table(sunrpc_table); #endif @@ -3010,7 +3010,7 @@ int init_socket_xprt(void) */ void cleanup_socket_xprt(void) { -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (sunrpc_table_header) { unregister_sysctl_table(sunrpc_table_header); sunrpc_table_header = NULL; From 1306729b0d4f4a0bd0d098711ed3d938dc5a1a28 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 17 Nov 2014 16:58:05 -0500 Subject: [PATCH 191/208] sunrpc: eliminate RPC_TRACEPOINTS It's always set to the same value as CONFIG_TRACEPOINTS, so we can just use that instead. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/sunrpc/debug.h | 9 --------- include/linux/sunrpc/sched.h | 6 +++--- net/sunrpc/sched.c | 2 +- 3 files changed, 4 insertions(+), 13 deletions(-) diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index 51143757b8f7..43f38ee9668c 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -10,15 +10,6 @@ #include - -/* - * Enable RPC debugging/profiling. - */ -#ifdef CONFIG_TRACEPOINTS -#define RPC_TRACEPOINTS -#endif -/* #define RPC_PROFILE */ - /* * Debugging macros etc */ diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index fecdbf1b4797..5f1e6bd4c316 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -79,7 +79,7 @@ struct rpc_task { unsigned short tk_flags; /* misc flags */ unsigned short tk_timeouts; /* maj timeouts */ -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || defined(RPC_TRACEPOINTS) +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) unsigned short tk_pid; /* debugging aid */ #endif unsigned char tk_priority : 2,/* Task priority */ @@ -187,7 +187,7 @@ struct rpc_wait_queue { unsigned char nr; /* # tasks remaining for cookie */ unsigned short qlen; /* total # tasks waiting in queue */ struct rpc_timer timer_list; -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || defined(RPC_TRACEPOINTS) +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) const char * name; #endif }; @@ -251,7 +251,7 @@ static inline int rpc_wait_for_completion_task(struct rpc_task *task) return __rpc_wait_for_completion_task(task, NULL); } -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || defined (RPC_TRACEPOINTS) +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) static inline const char * rpc_qname(const struct rpc_wait_queue *q) { return ((q && q->name) ? q->name : "unknown"); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 574b2977fc4b..d20f2329eea3 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -258,7 +258,7 @@ static int rpc_wait_bit_killable(struct wait_bit_key *key) return 0; } -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || defined(RPC_TRACEPOINTS) +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) static void rpc_task_set_debuginfo(struct rpc_task *task) { static atomic_t rpc_pid; From fe0bf1185ddf7e9d193cfe397ed1414d91526e19 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 18 Nov 2014 13:23:43 +0100 Subject: [PATCH 192/208] NFS: Deletion of unnecessary checks before the function call "nfs_put_client" The nfs_put_client() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Trond Myklebust --- fs/nfs/filelayout/filelayoutdev.c | 3 +-- fs/nfs/nfs4client.c | 15 +++++---------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 9bb806a76d99..bfecac781f19 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -204,8 +204,7 @@ destroy_ds(struct nfs4_pnfs_ds *ds) ifdebug(FACILITY) print_ds(ds); - if (ds->ds_clp) - nfs_put_client(ds->ds_clp); + nfs_put_client(ds->ds_clp); while (!list_empty(&ds->ds_addrs)) { da = list_first_entry(&ds->ds_addrs, diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index ffdb28d86cf8..5f07a0ed9188 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -498,8 +498,7 @@ int nfs40_walk_client_list(struct nfs_client *new, atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); prev = pos; status = nfs_wait_client_init_complete(pos); @@ -517,8 +516,7 @@ int nfs40_walk_client_list(struct nfs_client *new, atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); prev = pos; status = nfs4_proc_setclientid_confirm(pos, &clid, cred); @@ -549,8 +547,7 @@ int nfs40_walk_client_list(struct nfs_client *new, /* No match found. The server lost our clientid */ out: - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); dprintk("NFS: <-- %s status = %d\n", __func__, status); return status; } @@ -641,8 +638,7 @@ int nfs41_walk_client_list(struct nfs_client *new, atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); prev = pos; status = nfs_wait_client_init_complete(pos); @@ -675,8 +671,7 @@ int nfs41_walk_client_list(struct nfs_client *new, /* No matching nfs_client found. */ spin_unlock(&nn->nfs_client_lock); dprintk("NFS: <-- %s status = %d\n", __func__, status); - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); return status; } #endif /* CONFIG_NFS_V4_1 */ From 5a254d08b086d80cbead2ebcee6d2a4b3a15587a Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Sun, 23 Nov 2014 12:47:17 +0800 Subject: [PATCH 193/208] nfs: replace nfs_add_stats with nfs_inc_stats when add one Signed-off-by: Li RongQing Signed-off-by: Trond Myklebust --- fs/nfs/read.c | 2 +- fs/nfs/write.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index beff2769c5c5..c91a4799c562 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -269,7 +269,7 @@ int nfs_readpage(struct file *file, struct page *page) dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", page, PAGE_CACHE_SIZE, page_file_index(page)); nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); - nfs_add_stats(inode, NFSIOS_READPAGES, 1); + nfs_inc_stats(inode, NFSIOS_READPAGES); /* * Try to flush any pending writes to the file.. diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d489ff3f438f..af3af685a9e3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -575,7 +575,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st int ret; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); - nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); + nfs_inc_stats(inode, NFSIOS_WRITEPAGES); nfs_pageio_cond_complete(pgio, page_file_index(page)); ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); From e9f456ca50e579dfacd996f50637f0bb0a585dfc Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Sun, 23 Nov 2014 12:47:41 +0800 Subject: [PATCH 194/208] nfs: define nfs_inc_fscache_stats and using it as possible Define and use nfs_inc_fscache_stats when plus one, which can save to pass one parameter. Signed-off-by: Li RongQing Signed-off-by: Trond Myklebust --- fs/nfs/fscache.c | 24 ++++++++++++------------ fs/nfs/iostat.h | 5 +++++ 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 3ef01f0ba0bc..d63bea8bbfbb 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -269,8 +269,8 @@ int nfs_fscache_release_page(struct page *page, gfp_t gfp) if (!fscache_maybe_release_page(cookie, page, gfp)) return 0; - nfs_add_fscache_stats(page->mapping->host, - NFSIOS_FSCACHE_PAGES_UNCACHED, 1); + nfs_inc_fscache_stats(page->mapping->host, + NFSIOS_FSCACHE_PAGES_UNCACHED); } return 1; @@ -293,8 +293,8 @@ void __nfs_fscache_invalidate_page(struct page *page, struct inode *inode) BUG_ON(!PageLocked(page)); fscache_uncache_page(cookie, page); - nfs_add_fscache_stats(page->mapping->host, - NFSIOS_FSCACHE_PAGES_UNCACHED, 1); + nfs_inc_fscache_stats(page->mapping->host, + NFSIOS_FSCACHE_PAGES_UNCACHED); } /* @@ -343,19 +343,19 @@ int __nfs_readpage_from_fscache(struct nfs_open_context *ctx, case 0: /* read BIO submitted (page in fscache) */ dfprintk(FSCACHE, "NFS: readpage_from_fscache: BIO submitted\n"); - nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK, 1); + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK); return ret; case -ENOBUFS: /* inode not in cache */ case -ENODATA: /* page not in cache */ - nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1); + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL); dfprintk(FSCACHE, "NFS: readpage_from_fscache %d\n", ret); return 1; default: dfprintk(FSCACHE, "NFS: readpage_from_fscache %d\n", ret); - nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1); + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL); } return ret; } @@ -429,11 +429,11 @@ void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync) if (ret != 0) { fscache_uncache_page(nfs_i_fscache(inode), page); - nfs_add_fscache_stats(inode, - NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, 1); - nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED, 1); + nfs_inc_fscache_stats(inode, + NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL); + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED); } else { - nfs_add_fscache_stats(inode, - NFSIOS_FSCACHE_PAGES_WRITTEN_OK, 1); + nfs_inc_fscache_stats(inode, + NFSIOS_FSCACHE_PAGES_WRITTEN_OK); } } diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index c5832487c456..0cb806fbd4c4 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -55,6 +55,11 @@ static inline void nfs_add_fscache_stats(struct inode *inode, { this_cpu_add(NFS_SERVER(inode)->io_stats->fscache[stat], addend); } +static inline void nfs_inc_fscache_stats(struct inode *inode, + enum nfs_stat_fscachecounters stat) +{ + this_cpu_inc(NFS_SERVER(inode)->io_stats->fscache[stat]); +} #endif static inline struct nfs_iostats __percpu *nfs_alloc_iostats(void) From 92b98361f119a6919061d067c6584de9ae2de9f4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 8 Nov 2014 20:14:12 -0500 Subject: [PATCH 195/208] xprtrdma: Return an errno from rpcrdma_register_external() The RPC/RDMA send_request method and the chunk registration code expects an errno from the registration function. This allows the upper layers to distinguish between a recoverable failure (for example, temporary memory exhaustion) and a hard failure (for example, a bug in the registration logic). Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 61c41298b4ea..6ea29420f0ac 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1918,10 +1918,10 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, break; default: - return -1; + return -EIO; } if (rc) - return -1; + return rc; return nsegs; } From e7104a2a96069975d489c60a30564372c6273a85 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 8 Nov 2014 20:14:20 -0500 Subject: [PATCH 196/208] xprtrdma: Cap req_cqinit Recent work made FRMR registration and invalidation completions unsignaled. This greatly reduces the adapter interrupt rate. Every so often, however, a posted send Work Request is allowed to signal. Otherwise, the provider's Work Queue will wrap and the workload will hang. The number of Work Requests that are allowed to remain unsignaled is determined by the value of req_cqinit. Currently, this is set to the size of the send Work Queue divided by two, minus 1. For FRMR, the send Work Queue is the maximum number of concurrent RPCs (currently 32) times the maximum number of Work Requests an RPC might use (currently 7, though some adapters may need more). For mlx4, this is 224 entries. This leaves completion signaling disabled for 111 send Work Requests. Some providers hold back dispatching Work Requests until a CQE is generated. If completions are disabled, then no CQEs are generated for quite some time, and that can stall the Work Queue. I've seen this occur running xfstests generic/113 over NFSv4, where eventually, posting a FAST_REG_MR Work Request fails with -ENOMEM because the Work Queue has overflowed. The connection is dropped and re-established. Cap the rep_cqinit setting so completions are not left turned off for too long. BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=269 Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 4 +++- net/sunrpc/xprtrdma/xprt_rdma.h | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 6ea29420f0ac..af45cf390126 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -733,7 +733,9 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, /* set trigger for requesting send completion */ ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; - if (ep->rep_cqinit <= 2) + if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS) + ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS; + else if (ep->rep_cqinit <= 2) ep->rep_cqinit = 0; INIT_CQCOUNT(ep); ep->rep_ia = ia; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index ac7fc9a31342..b799041b75bf 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -97,6 +97,12 @@ struct rpcrdma_ep { struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE]; }; +/* + * Force a signaled SEND Work Request every so often, + * in case the provider needs to do some housekeeping. + */ +#define RPCRDMA_MAX_UNSIGNALED_SENDS (32) + #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) From 467c9674bccc073684ee34f4bd205cf1b135d76e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 8 Nov 2014 20:14:29 -0500 Subject: [PATCH 197/208] xprtrdma: unmap all FMRs during transport disconnect When using RPCRDMA_MTHCAFMR memory registration, after a few transport disconnect / reconnect cycles, ib_map_phys_fmr() starts to return EINVAL because the provider has exhausted its map pool. Make sure that all FMRs are unmapped during transport disconnect, and that ->send_request remarshals them during an RPC retransmit. This resets the transport's MRs to ensure that none are leaked during a disconnect. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/transport.c | 2 +- net/sunrpc/xprtrdma/verbs.c | 42 ++++++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 6a4615dd0261..cfe9a810e6bc 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -599,7 +599,7 @@ xprt_rdma_send_request(struct rpc_task *task) if (req->rl_niovs == 0) rc = rpcrdma_marshal_req(rqst); - else if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) + else if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_ALLPHYSICAL) rc = rpcrdma_marshal_chunks(rqst, 0); if (rc < 0) goto failed_marshal; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index af45cf390126..3c88276090e3 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -62,6 +62,7 @@ #endif static void rpcrdma_reset_frmrs(struct rpcrdma_ia *); +static void rpcrdma_reset_fmrs(struct rpcrdma_ia *); /* * internal functions @@ -868,8 +869,19 @@ retry: rpcrdma_ep_disconnect(ep, ia); rpcrdma_flush_cqs(ep); - if (ia->ri_memreg_strategy == RPCRDMA_FRMR) + switch (ia->ri_memreg_strategy) { + case RPCRDMA_FRMR: rpcrdma_reset_frmrs(ia); + break; + case RPCRDMA_MTHCAFMR: + rpcrdma_reset_fmrs(ia); + break; + case RPCRDMA_ALLPHYSICAL: + break; + default: + rc = -EIO; + goto out; + } xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); id = rpcrdma_create_id(xprt, ia, @@ -1289,6 +1301,34 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) kfree(buf->rb_pool); } +/* After a disconnect, unmap all FMRs. + * + * This is invoked only in the transport connect worker in order + * to serialize with rpcrdma_register_fmr_external(). + */ +static void +rpcrdma_reset_fmrs(struct rpcrdma_ia *ia) +{ + struct rpcrdma_xprt *r_xprt = + container_of(ia, struct rpcrdma_xprt, rx_ia); + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct list_head *pos; + struct rpcrdma_mw *r; + LIST_HEAD(l); + int rc; + + list_for_each(pos, &buf->rb_all) { + r = list_entry(pos, struct rpcrdma_mw, mw_all); + + INIT_LIST_HEAD(&l); + list_add(&r->r.fmr->list, &l); + rc = ib_unmap_fmr(&l); + if (rc) + dprintk("RPC: %s: ib_unmap_fmr failed %i\n", + __func__, rc); + } +} + /* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in * an unusable state. Find FRMRs in this state and dereg / reg * each. FRMRs that are VALID and attached to an rpcrdma_req are From f1a03b76fecdb23983e2ad7e817e98d093ece9f7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 8 Nov 2014 20:14:37 -0500 Subject: [PATCH 198/208] xprtrdma: Refactor tasklet scheduling Restore the separate function that schedules the reply handling tasklet. I need to call it from two different paths. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 3c88276090e3..478b2fda83a6 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -106,6 +106,17 @@ rpcrdma_run_tasklet(unsigned long data) static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL); +static void +rpcrdma_schedule_tasklet(struct list_head *sched_list) +{ + unsigned long flags; + + spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); + list_splice_tail(sched_list, &rpcrdma_tasklets_g); + spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); + tasklet_schedule(&rpcrdma_tasklet_g); +} + static void rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) { @@ -244,7 +255,6 @@ rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) struct list_head sched_list; struct ib_wc *wcs; int budget, count, rc; - unsigned long flags; INIT_LIST_HEAD(&sched_list); budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE; @@ -262,10 +272,7 @@ rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) rc = 0; out_schedule: - spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); - list_splice_tail(&sched_list, &rpcrdma_tasklets_g); - spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); - tasklet_schedule(&rpcrdma_tasklet_g); + rpcrdma_schedule_tasklet(&sched_list); return rc; } From 5c166bef4fa138da45d756dc77a8cb29fced1714 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 8 Nov 2014 20:14:45 -0500 Subject: [PATCH 199/208] xprtrdma: Re-write rpcrdma_flush_cqs() Currently rpcrdma_flush_cqs() attempts to avoid code duplication, and simply invokes rpcrdma_recvcq_upcall and rpcrdma_sendcq_upcall. 1. rpcrdma_flush_cqs() can run concurrently with provider upcalls. Both flush_cqs() and the upcalls were invoking ib_poll_cq() in different threads using the same wc buffers (ep->rep_recv_wcs and ep->rep_send_wcs), added by commit 1c00dd077654 ("xprtrmda: Reduce calls to ib_poll_cq() in completion handlers"). During transport disconnect processing, this sometimes resulted in the same reply getting added to the rpcrdma_tasklets_g list more than once, which corrupted the list. 2. The upcall functions drain only a limited number of CQEs, thanks to the poll budget added by commit 8301a2c047cc ("xprtrdma: Limit work done by completion handler"). Fixes: a7bc211ac926 ("xprtrdma: On disconnect, don't ignore ... ") BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=276 Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 478b2fda83a6..e6ac9643fe56 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -317,8 +317,15 @@ rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context) static void rpcrdma_flush_cqs(struct rpcrdma_ep *ep) { - rpcrdma_recvcq_upcall(ep->rep_attr.recv_cq, ep); - rpcrdma_sendcq_upcall(ep->rep_attr.send_cq, ep); + struct ib_wc wc; + LIST_HEAD(sched_list); + + while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0) + rpcrdma_recvcq_process_wc(&wc, &sched_list); + if (!list_empty(&sched_list)) + rpcrdma_schedule_tasklet(&sched_list); + while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0) + rpcrdma_sendcq_process_wc(&wc); } #ifdef RPC_DEBUG From d5440e27d3e572272db957d6b9661a902b7c339f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 8 Nov 2014 20:14:53 -0500 Subject: [PATCH 200/208] xprtrdma: Enable pad optimization The Linux NFS/RDMA server used to reject NFSv3 WRITE requests when pad optimization was enabled. That bug was fixed by commit e560e3b510d2 ("svcrdma: Add zero padding if the client doesn't send it"). We can now enable pad optimization on the client, which helps performance and is supported now by both Linux and Solaris servers. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index cfe9a810e6bc..8ed25760764b 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -73,7 +73,7 @@ static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_inline_write_padding; static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; - int xprt_rdma_pad_optimize = 0; + int xprt_rdma_pad_optimize = 1; #ifdef RPC_DEBUG From 7ff11de1bae02a41cac6503f858218ac1b9a3cbe Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 8 Nov 2014 20:15:01 -0500 Subject: [PATCH 201/208] xprtrdma: Display async errors An async error upcall is a hard error, and should be reported in the system log. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index e6ac9643fe56..5783c1a55b09 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -106,6 +106,32 @@ rpcrdma_run_tasklet(unsigned long data) static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL); +static const char * const async_event[] = { + "CQ error", + "QP fatal error", + "QP request error", + "QP access error", + "communication established", + "send queue drained", + "path migration successful", + "path mig error", + "device fatal error", + "port active", + "port error", + "LID change", + "P_key change", + "SM change", + "SRQ error", + "SRQ limit reached", + "last WQE reached", + "client reregister", + "GID change", +}; + +#define ASYNC_MSG(status) \ + ((status) < ARRAY_SIZE(async_event) ? \ + async_event[(status)] : "unknown async error") + static void rpcrdma_schedule_tasklet(struct list_head *sched_list) { @@ -122,8 +148,9 @@ rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) { struct rpcrdma_ep *ep = context; - dprintk("RPC: %s: QP error %X on device %s ep %p\n", - __func__, event->event, event->device->name, context); + pr_err("RPC: %s: %s on device %s ep %p\n", + __func__, ASYNC_MSG(event->event), + event->device->name, context); if (ep->rep_connected == 1) { ep->rep_connected = -EIO; ep->rep_func(ep); @@ -136,8 +163,9 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) { struct rpcrdma_ep *ep = context; - dprintk("RPC: %s: CQ error %X on device %s ep %p\n", - __func__, event->event, event->device->name, context); + pr_err("RPC: %s: %s on device %s ep %p\n", + __func__, ASYNC_MSG(event->event), + event->device->name, context); if (ep->rep_connected == 1) { ep->rep_connected = -EIO; ep->rep_func(ep); From edef1297f33a4546559d905457b435a5ea160bab Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 8 Nov 2014 20:15:09 -0500 Subject: [PATCH 202/208] SUNRPC: serialize iostats updates Occasionally mountstats reports a negative retransmission rate. Ensure that two RPCs completing concurrently don't confuse the sums in the transport's op_metrics array. Since pNFS filelayout can invoke rpc_count_iostats() on another transport from xprt_release(), we can't rely on simply holding the transport_lock in xprt_release(). There's nothing for it but hard serialization. One spin lock per RPC operation should make this as painless as it can be. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/metrics.h | 3 +++ net/sunrpc/stats.c | 21 ++++++++++++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h index 1565bbe86d51..eecb5a71e6c0 100644 --- a/include/linux/sunrpc/metrics.h +++ b/include/linux/sunrpc/metrics.h @@ -27,10 +27,13 @@ #include #include +#include #define RPC_IOSTATS_VERS "1.0" struct rpc_iostats { + spinlock_t om_lock; + /* * These counters give an idea about how many request * transmissions are required, on average, to complete that diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 54530490944e..9711a155bc50 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -116,7 +116,15 @@ EXPORT_SYMBOL_GPL(svc_seq_show); */ struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt) { - return kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL); + struct rpc_iostats *stats; + int i; + + stats = kcalloc(clnt->cl_maxproc, sizeof(*stats), GFP_KERNEL); + if (stats) { + for (i = 0; i < clnt->cl_maxproc; i++) + spin_lock_init(&stats[i].om_lock); + } + return stats; } EXPORT_SYMBOL_GPL(rpc_alloc_iostats); @@ -135,20 +143,21 @@ EXPORT_SYMBOL_GPL(rpc_free_iostats); * rpc_count_iostats - tally up per-task stats * @task: completed rpc_task * @stats: array of stat structures - * - * Relies on the caller for serialization. */ void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_iostats *op_metrics; - ktime_t delta; + ktime_t delta, now; if (!stats || !req) return; + now = ktime_get(); op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx]; + spin_lock(&op_metrics->om_lock); + op_metrics->om_ops++; op_metrics->om_ntrans += req->rq_ntrans; op_metrics->om_timeouts += task->tk_timeouts; @@ -161,8 +170,10 @@ void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt); - delta = ktime_sub(ktime_get(), task->tk_start); + delta = ktime_sub(now, task->tk_start); op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta); + + spin_unlock(&op_metrics->om_lock); } EXPORT_SYMBOL_GPL(rpc_count_iostats); From 6dd3436b9dc0df4b9ae7bb4e0076996a5ffda219 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 8 Nov 2014 20:15:18 -0500 Subject: [PATCH 203/208] NFS: SETCLIENTID XDR buffer sizes are incorrect Use the correct calculation of the maximum size of a clientaddr4 when encoding and decoding SETCLIENTID operations. clientaddr4 is defined in section 2.2.10 of RFC3530bis-31. The usage in encode_setclientid_maxsz is missing the 4-byte length in both strings, but is otherwise correct. decode_setclientid_maxsz simply asks for a page of receive buffer space, which is unnecessarily large (more than 4KB). Note that a SETCLIENTID reply is either clientid+verifier, or clientaddr4, depending on the returned NFS status. It doesn't hurt to allocate enough space for both. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/nfs4xdr.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 206c08a60c7f..f8afa6783fb8 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -141,13 +141,15 @@ static int nfs4_stat_to_errno(int); XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \ XDR_QUADLEN(NFS4_SETCLIENTID_NAMELEN) + \ 1 /* sc_prog */ + \ - XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ - XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \ + 1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ + 1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \ 1) /* sc_cb_ident */ #define decode_setclientid_maxsz \ (op_decode_hdr_maxsz + \ - 2 + \ - 1024) /* large value for CLID_INUSE */ + 2 /* clientid */ + \ + XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \ + 1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ + 1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN)) #define encode_setclientid_confirm_maxsz \ (op_encode_hdr_maxsz + \ 3 + (NFS4_VERIFIER_SIZE >> 2)) From c2ef47b7f518f5eae5ffed11b8ac754fb263d398 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 8 Nov 2014 20:15:26 -0500 Subject: [PATCH 204/208] NFS: Clean up nfs4_init_callback() nfs4_init_callback() is never invoked for NFS versions other than 4. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/nfs4client.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index ffdb28d86cf8..5f4b818182f9 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -241,28 +241,25 @@ void nfs4_free_client(struct nfs_client *clp) */ static int nfs4_init_callback(struct nfs_client *clp) { + struct rpc_xprt *xprt; int error; - if (clp->rpc_ops->version == 4) { - struct rpc_xprt *xprt; + xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt); - xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt); - - if (nfs4_has_session(clp)) { - error = xprt_setup_backchannel(xprt, - NFS41_BC_MIN_CALLBACKS); - if (error < 0) - return error; - } - - error = nfs_callback_up(clp->cl_mvops->minor_version, xprt); - if (error < 0) { - dprintk("%s: failed to start callback. Error = %d\n", - __func__, error); + if (nfs4_has_session(clp)) { + error = xprt_setup_backchannel(xprt, NFS41_BC_MIN_CALLBACKS); + if (error < 0) return error; - } - __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); } + + error = nfs_callback_up(clp->cl_mvops->minor_version, xprt); + if (error < 0) { + dprintk("%s: failed to start callback. Error = %d\n", + __func__, error); + return error; + } + __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); + return 0; } From f4ac1674f5da420ef17896f0f222c5215ebcde80 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 25 Nov 2014 13:18:15 -0500 Subject: [PATCH 205/208] nfs: Add ALLOCATE support This patch adds support for using the NFS v4.2 operation ALLOCATE to preallocate data in a file. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 1 + fs/nfs/nfs42.h | 1 + fs/nfs/nfs42proc.c | 58 ++++++++++++++++++++++++++++++ fs/nfs/nfs42xdr.c | 75 +++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4file.c | 28 +++++++++++++++ fs/nfs/nfs4proc.c | 3 +- fs/nfs/nfs4xdr.c | 1 + include/linux/nfs4.h | 1 + include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_xdr.h | 14 ++++++++ 11 files changed, 183 insertions(+), 1 deletion(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 2b48ce58a584..4bffe637ea32 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -192,6 +192,7 @@ void nfs_zap_caches(struct inode *inode) nfs_zap_caches_locked(inode); spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL_GPL(nfs_zap_caches); void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) { diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index d10333a197bf..42656a9cf485 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -6,6 +6,7 @@ #define __LINUX_FS_NFS_NFS4_2_H /* nfs4.2proc.c */ +int nfs42_proc_allocate(struct file *, loff_t, loff_t); loff_t nfs42_proc_llseek(struct file *, loff_t, int); /* nfs4.2xdr.h */ diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 056f5aafecc1..581f34e9d8a0 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -32,6 +32,64 @@ static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file, return ret; } +static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, + loff_t offset, loff_t len) +{ + struct inode *inode = file_inode(filep); + struct nfs42_falloc_args args = { + .falloc_fh = NFS_FH(inode), + .falloc_offset = offset, + .falloc_length = len, + }; + struct nfs42_falloc_res res; + struct nfs_server *server = NFS_SERVER(inode); + int status; + + msg->rpc_argp = &args; + msg->rpc_resp = &res; + + status = nfs42_set_rw_stateid(&args.falloc_stateid, filep, FMODE_WRITE); + if (status) + return status; + + return nfs4_call_sync(server->client, server, msg, + &args.seq_args, &res.seq_res, 0); +} + +static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, + loff_t offset, loff_t len) +{ + struct nfs_server *server = NFS_SERVER(file_inode(filep)); + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs42_proc_fallocate(msg, filep, offset, len); + if (err == -ENOTSUPP) + return -EOPNOTSUPP; + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + + return err; +} + +int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ALLOCATE], + }; + struct inode *inode = file_inode(filep); + int err; + + if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE)) + return -EOPNOTSUPP; + + err = nfs42_proc_fallocate(&msg, filep, offset, len); + if (err == -EOPNOTSUPP) + NFS_SERVER(inode)->caps &= ~NFS_CAP_ALLOCATE; + return err; +} + loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) { struct inode *inode = file_inode(filep); diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index c90469b604b8..4248d034479c 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -4,6 +4,12 @@ #ifndef __LINUX_FS_NFS_NFS4_2XDR_H #define __LINUX_FS_NFS_NFS4_2XDR_H +#define encode_fallocate_maxsz (encode_stateid_maxsz + \ + 2 /* offset */ + \ + 2 /* length */) +#define encode_allocate_maxsz (op_encode_hdr_maxsz + \ + encode_fallocate_maxsz) +#define decode_allocate_maxsz (op_decode_hdr_maxsz) #define encode_seek_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + \ 2 /* offset */ + \ @@ -14,6 +20,12 @@ 2 /* offset */ + \ 2 /* length */) +#define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_allocate_maxsz) +#define NFS4_dec_allocate_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_allocate_maxsz) #define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_seek_maxsz) @@ -22,6 +34,22 @@ decode_seek_maxsz) +static void encode_fallocate(struct xdr_stream *xdr, + struct nfs42_falloc_args *args) +{ + encode_nfs4_stateid(xdr, &args->falloc_stateid); + encode_uint64(xdr, args->falloc_offset); + encode_uint64(xdr, args->falloc_length); +} + +static void encode_allocate(struct xdr_stream *xdr, + struct nfs42_falloc_args *args, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_ALLOCATE, decode_allocate_maxsz, hdr); + encode_fallocate(xdr, args); +} + static void encode_seek(struct xdr_stream *xdr, struct nfs42_seek_args *args, struct compound_hdr *hdr) @@ -32,6 +60,24 @@ static void encode_seek(struct xdr_stream *xdr, encode_uint32(xdr, args->sa_what); } +/* + * Encode ALLOCATE request + */ +static void nfs4_xdr_enc_allocate(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_falloc_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->falloc_fh, &hdr); + encode_allocate(xdr, args, &hdr); + encode_nops(&hdr); +} + /* * Encode SEEK request */ @@ -50,6 +96,11 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req, encode_nops(&hdr); } +static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) +{ + return decode_op_hdr(xdr, OP_ALLOCATE); +} + static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res) { int status; @@ -72,6 +123,30 @@ out_overflow: return -EIO; } +/* + * Decode ALLOCATE request + */ +static int nfs4_xdr_dec_allocate(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_falloc_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_allocate(xdr, res); +out: + return status; +} + /* * Decode SEEK request */ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index be6cac37ea10..a08178764cf9 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -226,6 +226,7 @@ int nfs4_replace_transport(struct nfs_server *server, const struct nfs4_fs_locations *locations); /* nfs4proc.c */ +extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *); extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *, struct rpc_message *, struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index c51fb4db9bfe..f78e9fd0735a 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -3,6 +3,8 @@ * * Copyright (C) 1992 Rick Sladkey */ +#include +#include #include #include "internal.h" #include "fscache.h" @@ -134,6 +136,29 @@ static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence) return nfs_file_llseek(filep, offset, whence); } } + +static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t len) +{ + struct inode *inode = file_inode(filep); + long ret; + + if (!S_ISREG(inode->i_mode)) + return -EOPNOTSUPP; + + if (mode != 0) + return -EOPNOTSUPP; + + ret = inode_newsize_ok(inode, offset + len); + if (ret < 0) + return ret; + + mutex_lock(&inode->i_mutex); + ret = nfs42_proc_allocate(filep, offset, len); + mutex_unlock(&inode->i_mutex); + + nfs_zap_caches(inode); + return ret; +} #endif /* CONFIG_NFS_V4_2 */ const struct file_operations nfs4_file_operations = { @@ -155,6 +180,9 @@ const struct file_operations nfs4_file_operations = { .flock = nfs_flock, .splice_read = nfs_file_splice_read, .splice_write = iter_file_splice_write, +#ifdef CONFIG_NFS_V4_2 + .fallocate = nfs42_fallocate, +#endif /* CONFIG_NFS_V4_2 */ .check_flags = nfs_check_flags, .setlease = simple_nosetlease, }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3138913d1cdf..b1b403b0ca0d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -342,7 +342,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) /* This is the error handling routine for processes that are allowed * to sleep. */ -static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) { struct nfs_client *clp = server->nfs_client; struct nfs4_state *state = exception->state; @@ -8424,6 +8424,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1 + | NFS_CAP_ALLOCATE | NFS_CAP_SEEK, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 206c08a60c7f..0a1484561e4b 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7394,6 +7394,7 @@ struct rpc_procinfo nfs4_procedures[] = { #endif /* CONFIG_NFS_V4_1 */ #ifdef CONFIG_NFS_V4_2 PROC(SEEK, enc_seek, dec_seek), + PROC(ALLOCATE, enc_allocate, dec_allocate), #endif /* CONFIG_NFS_V4_2 */ }; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 356acc2846fd..2b28a21edcd0 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -490,6 +490,7 @@ enum { /* nfs42 */ NFSPROC4_CLNT_SEEK, + NFSPROC4_CLNT_ALLOCATE, }; /* nfs41 types */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a32ba0d7a98f..df6ed429b406 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -231,5 +231,6 @@ struct nfs_server { #define NFS_CAP_ATOMIC_OPEN_V1 (1U << 17) #define NFS_CAP_SECURITY_LABEL (1U << 18) #define NFS_CAP_SEEK (1U << 19) +#define NFS_CAP_ALLOCATE (1U << 20) #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 47ebb4fafd87..467c84efb596 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1243,6 +1243,20 @@ nfs_free_pnfs_ds_cinfo(struct pnfs_ds_commit_info *cinfo) #endif /* CONFIG_NFS_V4_1 */ #ifdef CONFIG_NFS_V4_2 +struct nfs42_falloc_args { + struct nfs4_sequence_args seq_args; + + struct nfs_fh *falloc_fh; + nfs4_stateid falloc_stateid; + u64 falloc_offset; + u64 falloc_length; +}; + +struct nfs42_falloc_res { + struct nfs4_sequence_res seq_res; + unsigned int status; +}; + struct nfs42_seek_args { struct nfs4_sequence_args seq_args; From 624bd5b7b683c978c6d5f4e9f6142cfb3470983d Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 25 Nov 2014 13:18:16 -0500 Subject: [PATCH 206/208] nfs: Add DEALLOCATE support This patch adds support for using the NFS v4.2 operation DEALLOCATE to punch holes in a file. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs42.h | 1 + fs/nfs/nfs42proc.c | 17 +++++++++++ fs/nfs/nfs42xdr.c | 64 +++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4file.c | 7 +++-- fs/nfs/nfs4proc.c | 1 + fs/nfs/nfs4xdr.c | 1 + include/linux/nfs4.h | 1 + include/linux/nfs_fs_sb.h | 1 + 8 files changed, 91 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index 42656a9cf485..7afb8947dfdf 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -7,6 +7,7 @@ /* nfs4.2proc.c */ int nfs42_proc_allocate(struct file *, loff_t, loff_t); +int nfs42_proc_deallocate(struct file *, loff_t, loff_t); loff_t nfs42_proc_llseek(struct file *, loff_t, int); /* nfs4.2xdr.h */ diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 581f34e9d8a0..cb170722769c 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -90,6 +90,23 @@ int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len) return err; } +int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DEALLOCATE], + }; + struct inode *inode = file_inode(filep); + int err; + + if (!nfs_server_capable(inode, NFS_CAP_DEALLOCATE)) + return -EOPNOTSUPP; + + err = nfs42_proc_fallocate(&msg, filep, offset, len); + if (err == -EOPNOTSUPP) + NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE; + return err; +} + loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) { struct inode *inode = file_inode(filep); diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 4248d034479c..038a7e1521fa 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -10,6 +10,9 @@ #define encode_allocate_maxsz (op_encode_hdr_maxsz + \ encode_fallocate_maxsz) #define decode_allocate_maxsz (op_decode_hdr_maxsz) +#define encode_deallocate_maxsz (op_encode_hdr_maxsz + \ + encode_fallocate_maxsz) +#define decode_deallocate_maxsz (op_decode_hdr_maxsz) #define encode_seek_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + \ 2 /* offset */ + \ @@ -26,6 +29,12 @@ #define NFS4_dec_allocate_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ decode_allocate_maxsz) +#define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_deallocate_maxsz) +#define NFS4_dec_deallocate_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_deallocate_maxsz) #define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_seek_maxsz) @@ -50,6 +59,14 @@ static void encode_allocate(struct xdr_stream *xdr, encode_fallocate(xdr, args); } +static void encode_deallocate(struct xdr_stream *xdr, + struct nfs42_falloc_args *args, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_DEALLOCATE, decode_deallocate_maxsz, hdr); + encode_fallocate(xdr, args); +} + static void encode_seek(struct xdr_stream *xdr, struct nfs42_seek_args *args, struct compound_hdr *hdr) @@ -78,6 +95,24 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode DEALLOCATE request + */ +static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_falloc_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->falloc_fh, &hdr); + encode_deallocate(xdr, args, &hdr); + encode_nops(&hdr); +} + /* * Encode SEEK request */ @@ -101,6 +136,11 @@ static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) return decode_op_hdr(xdr, OP_ALLOCATE); } +static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) +{ + return decode_op_hdr(xdr, OP_DEALLOCATE); +} + static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res) { int status; @@ -147,6 +187,30 @@ out: return status; } +/* + * Decode DEALLOCATE request + */ +static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_falloc_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_deallocate(xdr, res); +out: + return status; +} + /* * Decode SEEK request */ diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index f78e9fd0735a..8b46389c4c5b 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -145,7 +145,7 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t if (!S_ISREG(inode->i_mode)) return -EOPNOTSUPP; - if (mode != 0) + if ((mode != 0) && (mode != (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE))) return -EOPNOTSUPP; ret = inode_newsize_ok(inode, offset + len); @@ -153,7 +153,10 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t return ret; mutex_lock(&inode->i_mutex); - ret = nfs42_proc_allocate(filep, offset, len); + if (mode & FALLOC_FL_PUNCH_HOLE) + ret = nfs42_proc_deallocate(filep, offset, len); + else + ret = nfs42_proc_allocate(filep, offset, len); mutex_unlock(&inode->i_mutex); nfs_zap_caches(inode); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b1b403b0ca0d..e7f8d5ff2581 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8425,6 +8425,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1 | NFS_CAP_ALLOCATE + | NFS_CAP_DEALLOCATE | NFS_CAP_SEEK, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0a1484561e4b..03d0fa62a06e 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7395,6 +7395,7 @@ struct rpc_procinfo nfs4_procedures[] = { #ifdef CONFIG_NFS_V4_2 PROC(SEEK, enc_seek, dec_seek), PROC(ALLOCATE, enc_allocate, dec_allocate), + PROC(DEALLOCATE, enc_deallocate, dec_deallocate), #endif /* CONFIG_NFS_V4_2 */ }; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 2b28a21edcd0..022b761dbf0a 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -491,6 +491,7 @@ enum { /* nfs42 */ NFSPROC4_CLNT_SEEK, NFSPROC4_CLNT_ALLOCATE, + NFSPROC4_CLNT_DEALLOCATE, }; /* nfs41 types */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index df6ed429b406..1e37fbb78f7a 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -232,5 +232,6 @@ struct nfs_server { #define NFS_CAP_SECURITY_LABEL (1U << 18) #define NFS_CAP_SEEK (1U << 19) #define NFS_CAP_ALLOCATE (1U << 20) +#define NFS_CAP_DEALLOCATE (1U << 21) #endif From b4b9d2ccf0be61c69213f6ae4e33377c05194ef4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 26 Nov 2014 14:44:43 -0500 Subject: [PATCH 207/208] sunrpc: add debugfs file for displaying client rpc_task queue It's possible to get a dump of the RPC task queue by writing a value to /proc/sys/sunrpc/rpc_debug. If you write any value to that file, you get a dump of the RPC client task list into the log buffer. This is a rather inconvenient interface however, and makes it hard to get immediate info about the task queue. Add a new directory hierarchy under debugfs: sunrpc/ rpc_clnt/ / Within each clientid directory we create a new "tasks" file that will dump info similar to what shows up in the log buffer, but with a few small differences -- we avoid printing raw kernel addresses in favor of symbolic names and the XID is also displayed. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 4 + include/linux/sunrpc/debug.h | 31 ++++++ net/sunrpc/Kconfig | 1 + net/sunrpc/Makefile | 1 + net/sunrpc/clnt.c | 10 +- net/sunrpc/debugfs.c | 191 +++++++++++++++++++++++++++++++++++ net/sunrpc/sunrpc_syms.c | 8 ++ 7 files changed, 245 insertions(+), 1 deletion(-) create mode 100644 net/sunrpc/debugfs.c diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 70736b98c721..d86acc63b25f 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -63,6 +63,9 @@ struct rpc_clnt { struct rpc_rtt cl_rtt_default; struct rpc_timeout cl_timeout_default; const struct rpc_program *cl_program; +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) + struct dentry *cl_debugfs; /* debugfs directory */ +#endif }; /* @@ -176,5 +179,6 @@ size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); int rpc_localaddr(struct rpc_clnt *, struct sockaddr *, size_t); +const char *rpc_proc_name(const struct rpc_task *task); #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index 43f38ee9668c..835339707094 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -53,9 +53,40 @@ extern unsigned int nlm_debug; /* * Sysctl interface for RPC debugging */ + +struct rpc_clnt; + #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) void rpc_register_sysctl(void); void rpc_unregister_sysctl(void); +int sunrpc_debugfs_init(void); +void sunrpc_debugfs_exit(void); +int rpc_clnt_debugfs_register(struct rpc_clnt *); +void rpc_clnt_debugfs_unregister(struct rpc_clnt *); +#else +static inline int +sunrpc_debugfs_init(void) +{ + return 0; +} + +static inline void +sunrpc_debugfs_exit(void) +{ + return; +} + +static inline int +rpc_clnt_debugfs_register(struct rpc_clnt *clnt) +{ + return 0; +} + +static inline void +rpc_clnt_debugfs_unregister(struct rpc_clnt *clnt) +{ + return; +} #endif #endif /* _LINUX_SUNRPC_DEBUG_H_ */ diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 0754d0f466d2..fb78117b896c 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -35,6 +35,7 @@ config RPCSEC_GSS_KRB5 config SUNRPC_DEBUG bool "RPC: Enable dprintk debugging" depends on SUNRPC && SYSCTL + select DEBUG_FS help This option enables a sysctl-based debugging interface that is be used by the 'rpcdebug' utility to turn on or off diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index e5a7a1cac8f3..15e6f6c23c5d 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -14,6 +14,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ addr.o rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o \ svc_xprt.o +sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o sunrpc-$(CONFIG_PROC_FS) += stats.o sunrpc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 36c64ef460cf..05da12a33945 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -305,6 +305,10 @@ static int rpc_client_register(struct rpc_clnt *clnt, struct super_block *pipefs_sb; int err; + err = rpc_clnt_debugfs_register(clnt); + if (err) + return err; + pipefs_sb = rpc_get_sb_net(net); if (pipefs_sb) { err = rpc_setup_pipedir(pipefs_sb, clnt); @@ -331,6 +335,7 @@ err_auth: out: if (pipefs_sb) rpc_put_sb_net(net); + rpc_clnt_debugfs_unregister(clnt); return err; } @@ -670,6 +675,7 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt, rpc_unregister_client(clnt); __rpc_clnt_remove_pipedir(clnt); + rpc_clnt_debugfs_unregister(clnt); /* * A new transport was created. "clnt" therefore @@ -771,6 +777,7 @@ rpc_free_client(struct rpc_clnt *clnt) rcu_dereference(clnt->cl_xprt)->servername); if (clnt->cl_parent != clnt) parent = clnt->cl_parent; + rpc_clnt_debugfs_unregister(clnt); rpc_clnt_remove_pipedir(clnt); rpc_unregister_client(clnt); rpc_free_iostats(clnt->cl_metrics); @@ -1397,7 +1404,8 @@ rpc_restart_call(struct rpc_task *task) EXPORT_SYMBOL_GPL(rpc_restart_call); #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -static const char *rpc_proc_name(const struct rpc_task *task) +const char +*rpc_proc_name(const struct rpc_task *task) { const struct rpc_procinfo *proc = task->tk_msg.rpc_proc; diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c new file mode 100644 index 000000000000..3d7745683ca3 --- /dev/null +++ b/net/sunrpc/debugfs.c @@ -0,0 +1,191 @@ +/** + * debugfs interface for sunrpc + * + * (c) 2014 Jeff Layton + */ + +#include +#include +#include +#include "netns.h" + +static struct dentry *topdir; +static struct dentry *rpc_clnt_dir; + +struct rpc_clnt_iter { + struct rpc_clnt *clnt; + loff_t pos; +}; + +static int +tasks_show(struct seq_file *f, void *v) +{ + u32 xid = 0; + struct rpc_task *task = v; + struct rpc_clnt *clnt = task->tk_client; + const char *rpc_waitq = "none"; + + if (RPC_IS_QUEUED(task)) + rpc_waitq = rpc_qname(task->tk_waitqueue); + + if (task->tk_rqstp) + xid = be32_to_cpu(task->tk_rqstp->rq_xid); + + seq_printf(f, "%5u %04x %6d 0x%x 0x%x %8ld %ps %sv%u %s a:%ps q:%s\n", + task->tk_pid, task->tk_flags, task->tk_status, + clnt->cl_clid, xid, task->tk_timeout, task->tk_ops, + clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task), + task->tk_action, rpc_waitq); + return 0; +} + +static void * +tasks_start(struct seq_file *f, loff_t *ppos) + __acquires(&clnt->cl_lock) +{ + struct rpc_clnt_iter *iter = f->private; + loff_t pos = *ppos; + struct rpc_clnt *clnt = iter->clnt; + struct rpc_task *task; + + iter->pos = pos + 1; + spin_lock(&clnt->cl_lock); + list_for_each_entry(task, &clnt->cl_tasks, tk_task) + if (pos-- == 0) + return task; + return NULL; +} + +static void * +tasks_next(struct seq_file *f, void *v, loff_t *pos) +{ + struct rpc_clnt_iter *iter = f->private; + struct rpc_clnt *clnt = iter->clnt; + struct rpc_task *task = v; + struct list_head *next = task->tk_task.next; + + ++iter->pos; + ++*pos; + + /* If there's another task on list, return it */ + if (next == &clnt->cl_tasks) + return NULL; + return list_entry(next, struct rpc_task, tk_task); +} + +static void +tasks_stop(struct seq_file *f, void *v) + __releases(&clnt->cl_lock) +{ + struct rpc_clnt_iter *iter = f->private; + struct rpc_clnt *clnt = iter->clnt; + + spin_unlock(&clnt->cl_lock); +} + +static const struct seq_operations tasks_seq_operations = { + .start = tasks_start, + .next = tasks_next, + .stop = tasks_stop, + .show = tasks_show, +}; + +static int tasks_open(struct inode *inode, struct file *filp) +{ + int ret = seq_open_private(filp, &tasks_seq_operations, + sizeof(struct rpc_clnt_iter)); + + if (!ret) { + struct seq_file *seq = filp->private_data; + struct rpc_clnt_iter *iter = seq->private; + + iter->clnt = inode->i_private; + + if (!atomic_inc_not_zero(&iter->clnt->cl_count)) { + seq_release_private(inode, filp); + ret = -EINVAL; + } + } + + return ret; +} + +static int +tasks_release(struct inode *inode, struct file *filp) +{ + struct seq_file *seq = filp->private_data; + struct rpc_clnt_iter *iter = seq->private; + + rpc_release_client(iter->clnt); + return seq_release_private(inode, filp); +} + +static const struct file_operations tasks_fops = { + .owner = THIS_MODULE, + .open = tasks_open, + .read = seq_read, + .llseek = seq_lseek, + .release = tasks_release, +}; + +int +rpc_clnt_debugfs_register(struct rpc_clnt *clnt) +{ + int len; + char name[9]; /* 8 for hex digits + NULL terminator */ + + /* Already registered? */ + if (clnt->cl_debugfs) + return 0; + + len = snprintf(name, sizeof(name), "%x", clnt->cl_clid); + if (len >= sizeof(name)) + return -EINVAL; + + /* make the per-client dir */ + clnt->cl_debugfs = debugfs_create_dir(name, rpc_clnt_dir); + if (!clnt->cl_debugfs) + return -ENOMEM; + + /* make tasks file */ + if (!debugfs_create_file("tasks", S_IFREG | S_IRUSR, clnt->cl_debugfs, + clnt, &tasks_fops)) { + debugfs_remove_recursive(clnt->cl_debugfs); + clnt->cl_debugfs = NULL; + return -ENOMEM; + } + + return 0; +} + +void +rpc_clnt_debugfs_unregister(struct rpc_clnt *clnt) +{ + debugfs_remove_recursive(clnt->cl_debugfs); + clnt->cl_debugfs = NULL; +} + +void __exit +sunrpc_debugfs_exit(void) +{ + debugfs_remove_recursive(topdir); +} + +int __init +sunrpc_debugfs_init(void) +{ + topdir = debugfs_create_dir("sunrpc", NULL); + if (!topdir) + goto out; + + rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir); + if (!rpc_clnt_dir) + goto out_remove; + + return 0; +out_remove: + debugfs_remove_recursive(topdir); + topdir = NULL; +out: + return -ENOMEM; +} diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index f632e476ab6c..e37fbed87956 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -97,6 +97,11 @@ init_sunrpc(void) err = register_rpc_pipefs(); if (err) goto out4; + + err = sunrpc_debugfs_init(); + if (err) + goto out5; + #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) rpc_register_sysctl(); #endif @@ -104,6 +109,8 @@ init_sunrpc(void) init_socket_xprt(); /* clnt sock transport */ return 0; +out5: + unregister_rpc_pipefs(); out4: unregister_pernet_subsys(&sunrpc_net_ops); out3: @@ -120,6 +127,7 @@ cleanup_sunrpc(void) rpcauth_remove_module(); cleanup_socket_xprt(); svc_cleanup_xprt_sock(); + sunrpc_debugfs_exit(); unregister_rpc_pipefs(); rpc_destroy_mempool(); unregister_pernet_subsys(&sunrpc_net_ops); From 388f0c776781fe64ce951701bfe712b2182a31f2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 26 Nov 2014 14:44:44 -0500 Subject: [PATCH 208/208] sunrpc: add a debugfs rpc_xprt directory with an info file in it Add a new directory heirarchy under the debugfs sunrpc/ directory: sunrpc/ rpc_xprt/ / Within that directory, we can put files that give info about the xprts. We do have the (minor) problem that there is no succinct, unique identifier for rpc_xprts. So we generate them synthetically with a static atomic_t counter. For now, this directory just holds an "info" file, but we may add other files to it in the future. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/sunrpc/debug.h | 15 +++++ include/linux/sunrpc/xprt.h | 3 + net/sunrpc/debugfs.c | 115 ++++++++++++++++++++++++++++++++--- net/sunrpc/xprt.c | 8 +++ 4 files changed, 134 insertions(+), 7 deletions(-) diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index 835339707094..c57d8ea0716c 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -55,6 +55,7 @@ extern unsigned int nlm_debug; */ struct rpc_clnt; +struct rpc_xprt; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) void rpc_register_sysctl(void); @@ -63,6 +64,8 @@ int sunrpc_debugfs_init(void); void sunrpc_debugfs_exit(void); int rpc_clnt_debugfs_register(struct rpc_clnt *); void rpc_clnt_debugfs_unregister(struct rpc_clnt *); +int rpc_xprt_debugfs_register(struct rpc_xprt *); +void rpc_xprt_debugfs_unregister(struct rpc_xprt *); #else static inline int sunrpc_debugfs_init(void) @@ -87,6 +90,18 @@ rpc_clnt_debugfs_unregister(struct rpc_clnt *clnt) { return; } + +static inline int +rpc_xprt_debugfs_register(struct rpc_xprt *xprt) +{ + return 0; +} + +static inline void +rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt) +{ + return; +} #endif #endif /* _LINUX_SUNRPC_DEBUG_H_ */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index cf391eef2e6d..9d27ac45b909 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -239,6 +239,9 @@ struct rpc_xprt { struct net *xprt_net; const char *servername; const char *address_strings[RPC_DISPLAY_MAX]; +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) + struct dentry *debugfs; /* debugfs directory */ +#endif }; #if defined(CONFIG_SUNRPC_BACKCHANNEL) diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c index 3d7745683ca3..e811f390f9f6 100644 --- a/net/sunrpc/debugfs.c +++ b/net/sunrpc/debugfs.c @@ -11,6 +11,7 @@ static struct dentry *topdir; static struct dentry *rpc_clnt_dir; +static struct dentry *rpc_xprt_dir; struct rpc_clnt_iter { struct rpc_clnt *clnt; @@ -131,8 +132,8 @@ static const struct file_operations tasks_fops = { int rpc_clnt_debugfs_register(struct rpc_clnt *clnt) { - int len; - char name[9]; /* 8 for hex digits + NULL terminator */ + int len, err; + char name[24]; /* enough for "../../rpc_xprt/ + 8 hex digits + NULL */ /* Already registered? */ if (clnt->cl_debugfs) @@ -148,14 +149,28 @@ rpc_clnt_debugfs_register(struct rpc_clnt *clnt) return -ENOMEM; /* make tasks file */ + err = -ENOMEM; if (!debugfs_create_file("tasks", S_IFREG | S_IRUSR, clnt->cl_debugfs, - clnt, &tasks_fops)) { - debugfs_remove_recursive(clnt->cl_debugfs); - clnt->cl_debugfs = NULL; - return -ENOMEM; - } + clnt, &tasks_fops)) + goto out_err; + + err = -EINVAL; + rcu_read_lock(); + len = snprintf(name, sizeof(name), "../../rpc_xprt/%s", + rcu_dereference(clnt->cl_xprt)->debugfs->d_name.name); + rcu_read_unlock(); + if (len >= sizeof(name)) + goto out_err; + + err = -ENOMEM; + if (!debugfs_create_symlink("xprt", clnt->cl_debugfs, name)) + goto out_err; return 0; +out_err: + debugfs_remove_recursive(clnt->cl_debugfs); + clnt->cl_debugfs = NULL; + return err; } void @@ -165,6 +180,88 @@ rpc_clnt_debugfs_unregister(struct rpc_clnt *clnt) clnt->cl_debugfs = NULL; } +static int +xprt_info_show(struct seq_file *f, void *v) +{ + struct rpc_xprt *xprt = f->private; + + seq_printf(f, "netid: %s\n", xprt->address_strings[RPC_DISPLAY_NETID]); + seq_printf(f, "addr: %s\n", xprt->address_strings[RPC_DISPLAY_ADDR]); + seq_printf(f, "port: %s\n", xprt->address_strings[RPC_DISPLAY_PORT]); + seq_printf(f, "state: 0x%lx\n", xprt->state); + return 0; +} + +static int +xprt_info_open(struct inode *inode, struct file *filp) +{ + int ret; + struct rpc_xprt *xprt = inode->i_private; + + ret = single_open(filp, xprt_info_show, xprt); + + if (!ret) { + if (!xprt_get(xprt)) { + single_release(inode, filp); + ret = -EINVAL; + } + } + return ret; +} + +static int +xprt_info_release(struct inode *inode, struct file *filp) +{ + struct rpc_xprt *xprt = inode->i_private; + + xprt_put(xprt); + return single_release(inode, filp); +} + +static const struct file_operations xprt_info_fops = { + .owner = THIS_MODULE, + .open = xprt_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = xprt_info_release, +}; + +int +rpc_xprt_debugfs_register(struct rpc_xprt *xprt) +{ + int len, id; + static atomic_t cur_id; + char name[9]; /* 8 hex digits + NULL term */ + + id = (unsigned int)atomic_inc_return(&cur_id); + + len = snprintf(name, sizeof(name), "%x", id); + if (len >= sizeof(name)) + return -EINVAL; + + /* make the per-client dir */ + xprt->debugfs = debugfs_create_dir(name, rpc_xprt_dir); + if (!xprt->debugfs) + return -ENOMEM; + + /* make tasks file */ + if (!debugfs_create_file("info", S_IFREG | S_IRUSR, xprt->debugfs, + xprt, &xprt_info_fops)) { + debugfs_remove_recursive(xprt->debugfs); + xprt->debugfs = NULL; + return -ENOMEM; + } + + return 0; +} + +void +rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt) +{ + debugfs_remove_recursive(xprt->debugfs); + xprt->debugfs = NULL; +} + void __exit sunrpc_debugfs_exit(void) { @@ -182,6 +279,10 @@ sunrpc_debugfs_init(void) if (!rpc_clnt_dir) goto out_remove; + rpc_xprt_dir = debugfs_create_dir("rpc_xprt", topdir); + if (!rpc_xprt_dir) + goto out_remove; + return 0; out_remove: debugfs_remove_recursive(topdir); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 894d071426b2..ebbefad21a37 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1303,6 +1303,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net) */ struct rpc_xprt *xprt_create_transport(struct xprt_create *args) { + int err; struct rpc_xprt *xprt; struct xprt_class *t; @@ -1343,6 +1344,12 @@ found: return ERR_PTR(-ENOMEM); } + err = rpc_xprt_debugfs_register(xprt); + if (err) { + xprt_destroy(xprt); + return ERR_PTR(err); + } + dprintk("RPC: created transport %p with %u slots\n", xprt, xprt->max_reqs); out: @@ -1359,6 +1366,7 @@ static void xprt_destroy(struct rpc_xprt *xprt) dprintk("RPC: destroying transport %p\n", xprt); del_timer_sync(&xprt->timer); + rpc_xprt_debugfs_unregister(xprt); rpc_destroy_wait_queue(&xprt->binding); rpc_destroy_wait_queue(&xprt->pending); rpc_destroy_wait_queue(&xprt->sending);