linux/drivers/net/bonding/bond_sysfs.c

1501 lines
36 KiB
C
Raw Normal View History

/*
* Copyright(c) 2004-2005 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, see <http://www.gnu.org/licenses/>.
*
* The full GNU General Public License is included in this distribution in the
* file called LICENSE.
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/in.h>
#include <linux/sysfs.h>
#include <linux/ctype.h>
#include <linux/inet.h>
#include <linux/rtnetlink.h>
#include <linux/etherdevice.h>
[NET]: Make the device list and device lookups per namespace. This patch makes most of the generic device layer network namespace safe. This patch makes dev_base_head a network namespace variable, and then it picks up a few associated variables. The functions: dev_getbyhwaddr dev_getfirsthwbytype dev_get_by_flags dev_get_by_name __dev_get_by_name dev_get_by_index __dev_get_by_index dev_ioctl dev_ethtool dev_load wireless_process_ioctl were modified to take a network namespace argument, and deal with it. vlan_ioctl_set and brioctl_set were modified so their hooks will receive a network namespace argument. So basically anthing in the core of the network stack that was affected to by the change of dev_base was modified to handle multiple network namespaces. The rest of the network stack was simply modified to explicitly use &init_net the initial network namespace. This can be fixed when those components of the network stack are modified to handle multiple network namespaces. For now the ifindex generator is left global. Fundametally ifindex numbers are per namespace, or else we will have corner case problems with migration when we get that far. At the same time there are assumptions in the network stack that the ifindex of a network device won't change. Making the ifindex number global seems a good compromise until the network stack can cope with ifindex changes when you change namespaces, and the like. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2007-09-17 20:56:21 +02:00
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <linux/nsproxy.h>
#include "bonding.h"
#define to_dev(obj) container_of(obj, struct device, kobj)
#define to_bond(cd) ((struct bonding *)(netdev_priv(to_net_dev(cd))))
/*
* "show" function for the bond_masters attribute.
* The class parameter is ignored.
*/
static ssize_t bonding_show_bonds(struct class *cls,
struct class_attribute *attr,
char *buf)
{
struct bond_net *bn =
container_of(attr, struct bond_net, class_attr_bonding_masters);
int res = 0;
struct bonding *bond;
rtnl_lock();
list_for_each_entry(bond, &bn->dev_list, bond_list) {
if (res > (PAGE_SIZE - IFNAMSIZ)) {
/* not enough space for another interface name */
if ((PAGE_SIZE - res) > 10)
res = PAGE_SIZE - 10;
res += sprintf(buf + res, "++more++ ");
break;
}
res += sprintf(buf + res, "%s ", bond->dev->name);
}
if (res)
buf[res-1] = '\n'; /* eat the leftover space */
rtnl_unlock();
return res;
}
static struct net_device *bond_get_by_name(struct bond_net *bn, const char *ifname)
{
struct bonding *bond;
list_for_each_entry(bond, &bn->dev_list, bond_list) {
if (strncmp(bond->dev->name, ifname, IFNAMSIZ) == 0)
return bond->dev;
}
return NULL;
}
/*
* "store" function for the bond_masters attribute. This is what
* creates and deletes entire bonds.
*
* The class parameter is ignored.
*
*/
static ssize_t bonding_store_bonds(struct class *cls,
struct class_attribute *attr,
const char *buffer, size_t count)
{
struct bond_net *bn =
container_of(attr, struct bond_net, class_attr_bonding_masters);
char command[IFNAMSIZ + 1] = {0, };
char *ifname;
int rv, res = count;
sscanf(buffer, "%16s", command); /* IFNAMSIZ*/
ifname = command + 1;
if ((strlen(command) <= 1) ||
!dev_valid_name(ifname))
goto err_no_cmd;
if (command[0] == '+') {
pr_info("%s is being created...\n", ifname);
rv = bond_create(bn->net, ifname);
if (rv) {
if (rv == -EEXIST)
pr_info("%s already exists.\n", ifname);
else
pr_info("%s creation failed.\n", ifname);
res = rv;
}
} else if (command[0] == '-') {
struct net_device *bond_dev;
rtnl_lock();
bond_dev = bond_get_by_name(bn, ifname);
if (bond_dev) {
pr_info("%s is being deleted...\n", ifname);
unregister_netdevice(bond_dev);
} else {
pr_err("unable to delete non-existent %s\n", ifname);
res = -ENODEV;
}
rtnl_unlock();
} else
goto err_no_cmd;
/* Always return either count or an error. If you return 0, you'll
* get called forever, which is bad.
*/
return res;
err_no_cmd:
pr_err("no command found in bonding_masters. Use +ifname or -ifname.\n");
return -EPERM;
}
/* class attribute for bond_masters file. This ends up in /sys/class/net */
static const struct class_attribute class_attr_bonding_masters = {
.attr = {
.name = "bonding_masters",
.mode = S_IWUSR | S_IRUGO,
},
.show = bonding_show_bonds,
.store = bonding_store_bonds,
};
/*
* Show the slaves in the current bond.
*/
static ssize_t bonding_show_slaves(struct device *d,
struct device_attribute *attr, char *buf)
{
struct bonding *bond = to_bond(d);
struct list_head *iter;
struct slave *slave;
int res = 0;
if (!rtnl_trylock())
return restart_syscall();
bond_for_each_slave(bond, slave, iter) {
if (res > (PAGE_SIZE - IFNAMSIZ)) {
/* not enough space for another interface name */
if ((PAGE_SIZE - res) > 10)
res = PAGE_SIZE - 10;
res += sprintf(buf + res, "++more++ ");
break;
}
res += sprintf(buf + res, "%s ", slave->dev->name);
}
rtnl_unlock();
if (res)
buf[res-1] = '\n'; /* eat the leftover space */
return res;
}
/*
* Set the slaves in the current bond.
* This is supposed to be only thin wrapper for bond_enslave and bond_release.
* All hard work should be done there.
*/
static ssize_t bonding_store_slaves(struct device *d,
struct device_attribute *attr,
const char *buffer, size_t count)
{
char command[IFNAMSIZ + 1] = { 0, };
char *ifname;
int res, ret = count;
struct net_device *dev;
struct bonding *bond = to_bond(d);
if (!rtnl_trylock())
return restart_syscall();
sscanf(buffer, "%16s", command); /* IFNAMSIZ*/
ifname = command + 1;
if ((strlen(command) <= 1) ||
!dev_valid_name(ifname))
goto err_no_cmd;
dev = __dev_get_by_name(dev_net(bond->dev), ifname);
if (!dev) {
pr_info("%s: Interface %s does not exist!\n",
bond->dev->name, ifname);
ret = -ENODEV;
goto out;
}
switch (command[0]) {
case '+':
pr_info("%s: Adding slave %s.\n", bond->dev->name, dev->name);
res = bond_enslave(bond->dev, dev);
break;
case '-':
pr_info("%s: Removing slave %s.\n", bond->dev->name, dev->name);
res = bond_release(bond->dev, dev);
break;
default:
goto err_no_cmd;
}
if (res)
ret = res;
goto out;
err_no_cmd:
pr_err("no command found in slaves file for bond %s. Use +ifname or -ifname.\n",
bond->dev->name);
ret = -EPERM;
out:
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(slaves, S_IRUGO | S_IWUSR, bonding_show_slaves,
bonding_store_slaves);
/*
* Show and set the bonding mode. The bond interface must be down to
* change the mode.
*/
static ssize_t bonding_show_mode(struct device *d,
struct device_attribute *attr, char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%s %d\n",
bond_mode_tbl[bond->params.mode].modename,
bond->params.mode);
}
static ssize_t bonding_store_mode(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
int new_value, ret;
struct bonding *bond = to_bond(d);
new_value = bond_parse_parm(buf, bond_mode_tbl);
if (new_value < 0) {
pr_err("%s: Ignoring invalid mode value %.*s.\n",
bond->dev->name, (int)strlen(buf) - 1, buf);
return -EINVAL;
}
if (!rtnl_trylock())
return restart_syscall();
ret = bond_option_mode_set(bond, new_value);
if (!ret) {
pr_info("%s: setting mode to %s (%d).\n",
bond->dev->name, bond_mode_tbl[new_value].modename,
new_value);
ret = count;
}
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(mode, S_IRUGO | S_IWUSR,
bonding_show_mode, bonding_store_mode);
/*
* Show and set the bonding transmit hash method.
*/
static ssize_t bonding_show_xmit_hash(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%s %d\n",
xmit_hashtype_tbl[bond->params.xmit_policy].modename,
bond->params.xmit_policy);
}
static ssize_t bonding_store_xmit_hash(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
int new_value, ret;
struct bonding *bond = to_bond(d);
new_value = bond_parse_parm(buf, xmit_hashtype_tbl);
if (new_value < 0) {
pr_err("%s: Ignoring invalid xmit hash policy value %.*s.\n",
bond->dev->name,
(int)strlen(buf) - 1, buf);
return -EINVAL;
}
if (!rtnl_trylock())
return restart_syscall();
ret = bond_option_xmit_hash_policy_set(bond, new_value);
if (!ret)
ret = count;
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(xmit_hash_policy, S_IRUGO | S_IWUSR,
bonding_show_xmit_hash, bonding_store_xmit_hash);
/*
* Show and set arp_validate.
*/
static ssize_t bonding_show_arp_validate(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%s %d\n",
arp_validate_tbl[bond->params.arp_validate].modename,
bond->params.arp_validate);
}
static ssize_t bonding_store_arp_validate(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct bonding *bond = to_bond(d);
int new_value, ret;
new_value = bond_parse_parm(buf, arp_validate_tbl);
if (new_value < 0) {
pr_err("%s: Ignoring invalid arp_validate value %s\n",
bond->dev->name, buf);
return -EINVAL;
}
if (!rtnl_trylock())
return restart_syscall();
ret = bond_option_arp_validate_set(bond, new_value);
if (!ret)
ret = count;
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(arp_validate, S_IRUGO | S_IWUSR, bonding_show_arp_validate,
bonding_store_arp_validate);
bonding: add an option to fail when any of arp_ip_target is inaccessible Currently, we fail only when all of the ips in arp_ip_target are gone. However, in some situations we might need to fail if even one host from arp_ip_target becomes unavailable. All situations, obviously, rely on the idea that we need *completely* functional network, with all interfaces/addresses working correctly. One real world example might be: vlans on top on bond (hybrid port). If bond and vlans have ips assigned and we have their peers monitored via arp_ip_target - in case of switch misconfiguration (trunk/access port), slave driver malfunction or tagged/untagged traffic dropped on the way - we will be able to switch to another slave. Though any other configuration needs that if we need to have access to all arp_ip_targets. This patch adds this possibility by adding a new parameter - arp_all_targets (both as a module parameter and as a sysfs knob). It can be set to: 0 or any (the default) - which works exactly as it's working now - the slave is up if any of the arp_ip_targets are up. 1 or all - the slave is up if all of the arp_ip_targets are up. This parameter can be changed on the fly (via sysfs), and requires the mode to be active-backup and arp_validate to be enabled (it obeys the arp_validate config on which slaves to validate). Internally it's done through: 1) Add target_last_arp_rx[BOND_MAX_ARP_TARGETS] array to slave struct. It's an array of jiffies, meaning that slave->target_last_arp_rx[i] is the last time we've received arp from bond->params.arp_targets[i] on this slave. 2) If we successfully validate an arp from bond->params.arp_targets[i] in bond_validate_arp() - update the slave->target_last_arp_rx[i] with the current jiffies value. 3) When getting slave's last_rx via slave_last_rx(), we return the oldest time when we've received an arp from any address in bond->params.arp_targets[]. If the value of arp_all_targets == 0 - we still work the same way as before. Also, update the documentation to reflect the new parameter. v3->v4: Kill the forgotten rtnl_unlock(), rephrase the documentation part to be more clear, don't fail setting arp_all_targets if arp_validate is not set - it has no effect anyway but can be easier to set up. Also, print a warning if the last arp_ip_target is removed while the arp_interval is on, but not the arp_validate. v2->v3: Use _bh spinlock, remove useless rtnl_lock() and use jiffies for new arp_ip_target last arp, instead of slave_last_rx(). On bond_enslave(), use the same initialization value for target_last_arp_rx[] as is used for the default last_arp_rx, to avoid useless interface flaps. Also, instead of failing to remove the last arp_ip_target just print a warning - otherwise it might break existing scripts. v1->v2: Correctly handle adding/removing hosts in arp_ip_target - we need to shift/initialize all slave's target_last_arp_rx. Also, don't fail module loading on arp_all_targets misconfiguration, just disable it, and some minor style fixes. Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-24 11:49:34 +02:00
/*
* Show and set arp_all_targets.
*/
static ssize_t bonding_show_arp_all_targets(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
int value = bond->params.arp_all_targets;
return sprintf(buf, "%s %d\n", arp_all_targets_tbl[value].modename,
value);
}
static ssize_t bonding_store_arp_all_targets(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct bonding *bond = to_bond(d);
int new_value, ret;
bonding: add an option to fail when any of arp_ip_target is inaccessible Currently, we fail only when all of the ips in arp_ip_target are gone. However, in some situations we might need to fail if even one host from arp_ip_target becomes unavailable. All situations, obviously, rely on the idea that we need *completely* functional network, with all interfaces/addresses working correctly. One real world example might be: vlans on top on bond (hybrid port). If bond and vlans have ips assigned and we have their peers monitored via arp_ip_target - in case of switch misconfiguration (trunk/access port), slave driver malfunction or tagged/untagged traffic dropped on the way - we will be able to switch to another slave. Though any other configuration needs that if we need to have access to all arp_ip_targets. This patch adds this possibility by adding a new parameter - arp_all_targets (both as a module parameter and as a sysfs knob). It can be set to: 0 or any (the default) - which works exactly as it's working now - the slave is up if any of the arp_ip_targets are up. 1 or all - the slave is up if all of the arp_ip_targets are up. This parameter can be changed on the fly (via sysfs), and requires the mode to be active-backup and arp_validate to be enabled (it obeys the arp_validate config on which slaves to validate). Internally it's done through: 1) Add target_last_arp_rx[BOND_MAX_ARP_TARGETS] array to slave struct. It's an array of jiffies, meaning that slave->target_last_arp_rx[i] is the last time we've received arp from bond->params.arp_targets[i] on this slave. 2) If we successfully validate an arp from bond->params.arp_targets[i] in bond_validate_arp() - update the slave->target_last_arp_rx[i] with the current jiffies value. 3) When getting slave's last_rx via slave_last_rx(), we return the oldest time when we've received an arp from any address in bond->params.arp_targets[]. If the value of arp_all_targets == 0 - we still work the same way as before. Also, update the documentation to reflect the new parameter. v3->v4: Kill the forgotten rtnl_unlock(), rephrase the documentation part to be more clear, don't fail setting arp_all_targets if arp_validate is not set - it has no effect anyway but can be easier to set up. Also, print a warning if the last arp_ip_target is removed while the arp_interval is on, but not the arp_validate. v2->v3: Use _bh spinlock, remove useless rtnl_lock() and use jiffies for new arp_ip_target last arp, instead of slave_last_rx(). On bond_enslave(), use the same initialization value for target_last_arp_rx[] as is used for the default last_arp_rx, to avoid useless interface flaps. Also, instead of failing to remove the last arp_ip_target just print a warning - otherwise it might break existing scripts. v1->v2: Correctly handle adding/removing hosts in arp_ip_target - we need to shift/initialize all slave's target_last_arp_rx. Also, don't fail module loading on arp_all_targets misconfiguration, just disable it, and some minor style fixes. Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-24 11:49:34 +02:00
new_value = bond_parse_parm(buf, arp_all_targets_tbl);
if (new_value < 0) {
pr_err("%s: Ignoring invalid arp_all_targets value %s\n",
bond->dev->name, buf);
return -EINVAL;
}
if (!rtnl_trylock())
return restart_syscall();
bonding: add an option to fail when any of arp_ip_target is inaccessible Currently, we fail only when all of the ips in arp_ip_target are gone. However, in some situations we might need to fail if even one host from arp_ip_target becomes unavailable. All situations, obviously, rely on the idea that we need *completely* functional network, with all interfaces/addresses working correctly. One real world example might be: vlans on top on bond (hybrid port). If bond and vlans have ips assigned and we have their peers monitored via arp_ip_target - in case of switch misconfiguration (trunk/access port), slave driver malfunction or tagged/untagged traffic dropped on the way - we will be able to switch to another slave. Though any other configuration needs that if we need to have access to all arp_ip_targets. This patch adds this possibility by adding a new parameter - arp_all_targets (both as a module parameter and as a sysfs knob). It can be set to: 0 or any (the default) - which works exactly as it's working now - the slave is up if any of the arp_ip_targets are up. 1 or all - the slave is up if all of the arp_ip_targets are up. This parameter can be changed on the fly (via sysfs), and requires the mode to be active-backup and arp_validate to be enabled (it obeys the arp_validate config on which slaves to validate). Internally it's done through: 1) Add target_last_arp_rx[BOND_MAX_ARP_TARGETS] array to slave struct. It's an array of jiffies, meaning that slave->target_last_arp_rx[i] is the last time we've received arp from bond->params.arp_targets[i] on this slave. 2) If we successfully validate an arp from bond->params.arp_targets[i] in bond_validate_arp() - update the slave->target_last_arp_rx[i] with the current jiffies value. 3) When getting slave's last_rx via slave_last_rx(), we return the oldest time when we've received an arp from any address in bond->params.arp_targets[]. If the value of arp_all_targets == 0 - we still work the same way as before. Also, update the documentation to reflect the new parameter. v3->v4: Kill the forgotten rtnl_unlock(), rephrase the documentation part to be more clear, don't fail setting arp_all_targets if arp_validate is not set - it has no effect anyway but can be easier to set up. Also, print a warning if the last arp_ip_target is removed while the arp_interval is on, but not the arp_validate. v2->v3: Use _bh spinlock, remove useless rtnl_lock() and use jiffies for new arp_ip_target last arp, instead of slave_last_rx(). On bond_enslave(), use the same initialization value for target_last_arp_rx[] as is used for the default last_arp_rx, to avoid useless interface flaps. Also, instead of failing to remove the last arp_ip_target just print a warning - otherwise it might break existing scripts. v1->v2: Correctly handle adding/removing hosts in arp_ip_target - we need to shift/initialize all slave's target_last_arp_rx. Also, don't fail module loading on arp_all_targets misconfiguration, just disable it, and some minor style fixes. Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-24 11:49:34 +02:00
ret = bond_option_arp_all_targets_set(bond, new_value);
if (!ret)
ret = count;
rtnl_unlock();
return ret;
bonding: add an option to fail when any of arp_ip_target is inaccessible Currently, we fail only when all of the ips in arp_ip_target are gone. However, in some situations we might need to fail if even one host from arp_ip_target becomes unavailable. All situations, obviously, rely on the idea that we need *completely* functional network, with all interfaces/addresses working correctly. One real world example might be: vlans on top on bond (hybrid port). If bond and vlans have ips assigned and we have their peers monitored via arp_ip_target - in case of switch misconfiguration (trunk/access port), slave driver malfunction or tagged/untagged traffic dropped on the way - we will be able to switch to another slave. Though any other configuration needs that if we need to have access to all arp_ip_targets. This patch adds this possibility by adding a new parameter - arp_all_targets (both as a module parameter and as a sysfs knob). It can be set to: 0 or any (the default) - which works exactly as it's working now - the slave is up if any of the arp_ip_targets are up. 1 or all - the slave is up if all of the arp_ip_targets are up. This parameter can be changed on the fly (via sysfs), and requires the mode to be active-backup and arp_validate to be enabled (it obeys the arp_validate config on which slaves to validate). Internally it's done through: 1) Add target_last_arp_rx[BOND_MAX_ARP_TARGETS] array to slave struct. It's an array of jiffies, meaning that slave->target_last_arp_rx[i] is the last time we've received arp from bond->params.arp_targets[i] on this slave. 2) If we successfully validate an arp from bond->params.arp_targets[i] in bond_validate_arp() - update the slave->target_last_arp_rx[i] with the current jiffies value. 3) When getting slave's last_rx via slave_last_rx(), we return the oldest time when we've received an arp from any address in bond->params.arp_targets[]. If the value of arp_all_targets == 0 - we still work the same way as before. Also, update the documentation to reflect the new parameter. v3->v4: Kill the forgotten rtnl_unlock(), rephrase the documentation part to be more clear, don't fail setting arp_all_targets if arp_validate is not set - it has no effect anyway but can be easier to set up. Also, print a warning if the last arp_ip_target is removed while the arp_interval is on, but not the arp_validate. v2->v3: Use _bh spinlock, remove useless rtnl_lock() and use jiffies for new arp_ip_target last arp, instead of slave_last_rx(). On bond_enslave(), use the same initialization value for target_last_arp_rx[] as is used for the default last_arp_rx, to avoid useless interface flaps. Also, instead of failing to remove the last arp_ip_target just print a warning - otherwise it might break existing scripts. v1->v2: Correctly handle adding/removing hosts in arp_ip_target - we need to shift/initialize all slave's target_last_arp_rx. Also, don't fail module loading on arp_all_targets misconfiguration, just disable it, and some minor style fixes. Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-24 11:49:34 +02:00
}
static DEVICE_ATTR(arp_all_targets, S_IRUGO | S_IWUSR,
bonding_show_arp_all_targets, bonding_store_arp_all_targets);
/*
* Show and store fail_over_mac. User only allowed to change the
* value when there are no slaves.
*/
static ssize_t bonding_show_fail_over_mac(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%s %d\n",
fail_over_mac_tbl[bond->params.fail_over_mac].modename,
bond->params.fail_over_mac);
}
static ssize_t bonding_store_fail_over_mac(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
int new_value, ret;
struct bonding *bond = to_bond(d);
new_value = bond_parse_parm(buf, fail_over_mac_tbl);
if (new_value < 0) {
pr_err("%s: Ignoring invalid fail_over_mac value %s.\n",
bond->dev->name, buf);
return -EINVAL;
}
if (!rtnl_trylock())
return restart_syscall();
ret = bond_option_fail_over_mac_set(bond, new_value);
if (!ret)
ret = count;
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(fail_over_mac, S_IRUGO | S_IWUSR,
bonding_show_fail_over_mac, bonding_store_fail_over_mac);
/*
* Show and set the arp timer interval. There are two tricky bits
* here. First, if ARP monitoring is activated, then we must disable
* MII monitoring. Second, if the ARP timer isn't running, we must
* start it.
*/
static ssize_t bonding_show_arp_interval(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.arp_interval);
}
static ssize_t bonding_store_arp_interval(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct bonding *bond = to_bond(d);
int new_value, ret;
if (sscanf(buf, "%d", &new_value) != 1) {
pr_err("%s: no arp_interval value specified.\n",
bond->dev->name);
return -EINVAL;
}
if (!rtnl_trylock())
return restart_syscall();
ret = bond_option_arp_interval_set(bond, new_value);
if (!ret)
ret = count;
bonding: fix miimon and arp_interval delayed work race conditions First I would give three observations which will be used later. Observation 1: if (delayed_work_pending(wq)) cancel_delayed_work(wq) This usage is wrong because the pending bit is cleared just before the work's fn is executed and if the function re-arms itself we might end up with the work still running. It's safe to call cancel_delayed_work_sync() even if the work is not queued at all. Observation 2: Use of INIT_DELAYED_WORK() Work needs to be initialized only once prior to (de/en)queueing. Observation 3: IFF_UP is set only after ndo_open is called Related race conditions: 1. Race between bonding_store_miimon() and bonding_store_arp_interval() Because of Obs.1 we can end up having both works enqueued. 2. Multiple races with INIT_DELAYED_WORK() Since the works are not protected by anything between INIT_DELAYED_WORK() and calls to (en/de)queue it is possible for races between the following functions: (races are also possible between the calls to INIT_DELAYED_WORK() and workqueue code) bonding_store_miimon() - bonding_store_arp_interval(), bond_close(), bond_open(), enqueued functions bonding_store_arp_interval() - bonding_store_miimon(), bond_close(), bond_open(), enqueued functions 3. By Obs.1 we need to change bond_cancel_all() Bugs 1 and 2 are fixed by moving all work initializations in bond_open which by Obs. 2 and Obs. 3 and the fact that we make sure that all works are cancelled in bond_close(), is guaranteed not to have any work enqueued. Also RTNL lock is now acquired in bonding_store_miimon/arp_interval so they can't race with bond_close and bond_open. The opposing work is cancelled only if the IFF_UP flag is set and it is cancelled unconditionally. The opposing work is already cancelled if the interface is down so no need to cancel it again. This way we don't need new synchronizations for the bonding workqueue. These bugs (and fixes) are tied together and belong in the same patch. Note: I have left 1 line intentionally over 80 characters (84) because I didn't like how it looks broken down. If you'd prefer it otherwise, then simply break it. v2: Make description text < 75 columns Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com> Signed-off-by: Jay Vosburgh <fubar@us.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-11-29 02:31:31 +01:00
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(arp_interval, S_IRUGO | S_IWUSR,
bonding_show_arp_interval, bonding_store_arp_interval);
/*
* Show and set the arp targets.
*/
static ssize_t bonding_show_arp_targets(struct device *d,
struct device_attribute *attr,
char *buf)
{
int i, res = 0;
struct bonding *bond = to_bond(d);
for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) {
if (bond->params.arp_targets[i])
res += sprintf(buf + res, "%pI4 ",
&bond->params.arp_targets[i]);
}
if (res)
buf[res-1] = '\n'; /* eat the leftover space */
return res;
}
static ssize_t bonding_store_arp_targets(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct bonding *bond = to_bond(d);
__be32 target;
int ret = -EPERM;
if (!in4_pton(buf + 1, -1, (u8 *)&target, -1, NULL)) {
pr_err("%s: invalid ARP target %pI4 specified\n",
bond->dev->name, &target);
return -EPERM;
}
bonding: add an option to fail when any of arp_ip_target is inaccessible Currently, we fail only when all of the ips in arp_ip_target are gone. However, in some situations we might need to fail if even one host from arp_ip_target becomes unavailable. All situations, obviously, rely on the idea that we need *completely* functional network, with all interfaces/addresses working correctly. One real world example might be: vlans on top on bond (hybrid port). If bond and vlans have ips assigned and we have their peers monitored via arp_ip_target - in case of switch misconfiguration (trunk/access port), slave driver malfunction or tagged/untagged traffic dropped on the way - we will be able to switch to another slave. Though any other configuration needs that if we need to have access to all arp_ip_targets. This patch adds this possibility by adding a new parameter - arp_all_targets (both as a module parameter and as a sysfs knob). It can be set to: 0 or any (the default) - which works exactly as it's working now - the slave is up if any of the arp_ip_targets are up. 1 or all - the slave is up if all of the arp_ip_targets are up. This parameter can be changed on the fly (via sysfs), and requires the mode to be active-backup and arp_validate to be enabled (it obeys the arp_validate config on which slaves to validate). Internally it's done through: 1) Add target_last_arp_rx[BOND_MAX_ARP_TARGETS] array to slave struct. It's an array of jiffies, meaning that slave->target_last_arp_rx[i] is the last time we've received arp from bond->params.arp_targets[i] on this slave. 2) If we successfully validate an arp from bond->params.arp_targets[i] in bond_validate_arp() - update the slave->target_last_arp_rx[i] with the current jiffies value. 3) When getting slave's last_rx via slave_last_rx(), we return the oldest time when we've received an arp from any address in bond->params.arp_targets[]. If the value of arp_all_targets == 0 - we still work the same way as before. Also, update the documentation to reflect the new parameter. v3->v4: Kill the forgotten rtnl_unlock(), rephrase the documentation part to be more clear, don't fail setting arp_all_targets if arp_validate is not set - it has no effect anyway but can be easier to set up. Also, print a warning if the last arp_ip_target is removed while the arp_interval is on, but not the arp_validate. v2->v3: Use _bh spinlock, remove useless rtnl_lock() and use jiffies for new arp_ip_target last arp, instead of slave_last_rx(). On bond_enslave(), use the same initialization value for target_last_arp_rx[] as is used for the default last_arp_rx, to avoid useless interface flaps. Also, instead of failing to remove the last arp_ip_target just print a warning - otherwise it might break existing scripts. v1->v2: Correctly handle adding/removing hosts in arp_ip_target - we need to shift/initialize all slave's target_last_arp_rx. Also, don't fail module loading on arp_all_targets misconfiguration, just disable it, and some minor style fixes. Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-24 11:49:34 +02:00
if (!rtnl_trylock())
return restart_syscall();
bonding: add an option to fail when any of arp_ip_target is inaccessible Currently, we fail only when all of the ips in arp_ip_target are gone. However, in some situations we might need to fail if even one host from arp_ip_target becomes unavailable. All situations, obviously, rely on the idea that we need *completely* functional network, with all interfaces/addresses working correctly. One real world example might be: vlans on top on bond (hybrid port). If bond and vlans have ips assigned and we have their peers monitored via arp_ip_target - in case of switch misconfiguration (trunk/access port), slave driver malfunction or tagged/untagged traffic dropped on the way - we will be able to switch to another slave. Though any other configuration needs that if we need to have access to all arp_ip_targets. This patch adds this possibility by adding a new parameter - arp_all_targets (both as a module parameter and as a sysfs knob). It can be set to: 0 or any (the default) - which works exactly as it's working now - the slave is up if any of the arp_ip_targets are up. 1 or all - the slave is up if all of the arp_ip_targets are up. This parameter can be changed on the fly (via sysfs), and requires the mode to be active-backup and arp_validate to be enabled (it obeys the arp_validate config on which slaves to validate). Internally it's done through: 1) Add target_last_arp_rx[BOND_MAX_ARP_TARGETS] array to slave struct. It's an array of jiffies, meaning that slave->target_last_arp_rx[i] is the last time we've received arp from bond->params.arp_targets[i] on this slave. 2) If we successfully validate an arp from bond->params.arp_targets[i] in bond_validate_arp() - update the slave->target_last_arp_rx[i] with the current jiffies value. 3) When getting slave's last_rx via slave_last_rx(), we return the oldest time when we've received an arp from any address in bond->params.arp_targets[]. If the value of arp_all_targets == 0 - we still work the same way as before. Also, update the documentation to reflect the new parameter. v3->v4: Kill the forgotten rtnl_unlock(), rephrase the documentation part to be more clear, don't fail setting arp_all_targets if arp_validate is not set - it has no effect anyway but can be easier to set up. Also, print a warning if the last arp_ip_target is removed while the arp_interval is on, but not the arp_validate. v2->v3: Use _bh spinlock, remove useless rtnl_lock() and use jiffies for new arp_ip_target last arp, instead of slave_last_rx(). On bond_enslave(), use the same initialization value for target_last_arp_rx[] as is used for the default last_arp_rx, to avoid useless interface flaps. Also, instead of failing to remove the last arp_ip_target just print a warning - otherwise it might break existing scripts. v1->v2: Correctly handle adding/removing hosts in arp_ip_target - we need to shift/initialize all slave's target_last_arp_rx. Also, don't fail module loading on arp_all_targets misconfiguration, just disable it, and some minor style fixes. Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-24 11:49:34 +02:00
if (buf[0] == '+')
ret = bond_option_arp_ip_target_add(bond, target);
else if (buf[0] == '-')
ret = bond_option_arp_ip_target_rem(bond, target);
else
pr_err("no command found in arp_ip_targets file for bond %s. Use +<addr> or -<addr>.\n",
bond->dev->name);
if (!ret)
ret = count;
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(arp_ip_target, S_IRUGO | S_IWUSR , bonding_show_arp_targets, bonding_store_arp_targets);
/*
* Show and set the up and down delays. These must be multiples of the
* MII monitoring value, and are stored internally as the multiplier.
* Thus, we must translate to MS for the real world.
*/
static ssize_t bonding_show_downdelay(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.downdelay * bond->params.miimon);
}
static ssize_t bonding_store_downdelay(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
int new_value, ret;
struct bonding *bond = to_bond(d);
if (sscanf(buf, "%d", &new_value) != 1) {
pr_err("%s: no down delay value specified.\n", bond->dev->name);
return -EINVAL;
}
if (!rtnl_trylock())
return restart_syscall();
ret = bond_option_downdelay_set(bond, new_value);
if (!ret)
ret = count;
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(downdelay, S_IRUGO | S_IWUSR,
bonding_show_downdelay, bonding_store_downdelay);
static ssize_t bonding_show_updelay(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.updelay * bond->params.miimon);
}
static ssize_t bonding_store_updelay(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
int new_value, ret;
struct bonding *bond = to_bond(d);
if (sscanf(buf, "%d", &new_value) != 1) {
pr_err("%s: no up delay value specified.\n",
bond->dev->name);
return -EINVAL;
}
if (!rtnl_trylock())
return restart_syscall();
ret = bond_option_updelay_set(bond, new_value);
if (!ret)
ret = count;
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(updelay, S_IRUGO | S_IWUSR,
bonding_show_updelay, bonding_store_updelay);
/*
* Show and set the LACP interval. Interface must be down, and the mode
* must be set to 802.3ad mode.
*/
static ssize_t bonding_show_lacp(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%s %d\n",
bond_lacp_tbl[bond->params.lacp_fast].modename,
bond->params.lacp_fast);
}
static ssize_t bonding_store_lacp(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct bonding *bond = to_bond(d);
int new_value, ret;
new_value = bond_parse_parm(buf, bond_lacp_tbl);
if (new_value < 0) {
pr_err("%s: Ignoring invalid LACP rate value %.*s.\n",
bond->dev->name, (int)strlen(buf) - 1, buf);
return -EINVAL;
}
if (!rtnl_trylock())
return restart_syscall();
ret = bond_option_lacp_rate_set(bond, new_value);
if (!ret)
ret = count;
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(lacp_rate, S_IRUGO | S_IWUSR,
bonding_show_lacp, bonding_store_lacp);
static ssize_t bonding_show_min_links(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.min_links);
}
static ssize_t bonding_store_min_links(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct bonding *bond = to_bond(d);
int ret;
unsigned int new_value;
ret = kstrtouint(buf, 0, &new_value);
if (ret < 0) {
pr_err("%s: Ignoring invalid min links value %s.\n",
bond->dev->name, buf);
return ret;
}
if (!rtnl_trylock())
return restart_syscall();
ret = bond_option_min_links_set(bond, new_value);
if (!ret)
ret = count;
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(min_links, S_IRUGO | S_IWUSR,
bonding_show_min_links, bonding_store_min_links);
static ssize_t bonding_show_ad_select(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%s %d\n",
ad_select_tbl[bond->params.ad_select].modename,
bond->params.ad_select);
}
static ssize_t bonding_store_ad_select(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
int new_value, ret;
struct bonding *bond = to_bond(d);
new_value = bond_parse_parm(buf, ad_select_tbl);
if (new_value < 0) {
pr_err("%s: Ignoring invalid ad_select value %.*s.\n",
bond->dev->name, (int)strlen(buf) - 1, buf);
return -EINVAL;
}
if (!rtnl_trylock())
return restart_syscall();
ret = bond_option_ad_select_set(bond, new_value);
if (!ret)
ret = count;
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(ad_select, S_IRUGO | S_IWUSR,
bonding_show_ad_select, bonding_store_ad_select);
/*
* Show and set the number of peer notifications to send after a failover event.
*/
static ssize_t bonding_show_num_peer_notif(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.num_peer_notif);
}
static ssize_t bonding_store_num_peer_notif(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct bonding *bond = to_bond(d);
u8 new_value;
int ret;
ret = kstrtou8(buf, 10, &new_value);
if (ret) {
pr_err("%s: invalid value %s specified.\n",
bond->dev->name, buf);
return ret;
}
if (!rtnl_trylock())
return restart_syscall();
ret = bond_option_num_peer_notif_set(bond, new_value);
if (!ret)
ret = count;
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(num_grat_arp, S_IRUGO | S_IWUSR,
bonding_show_num_peer_notif, bonding_store_num_peer_notif);
static DEVICE_ATTR(num_unsol_na, S_IRUGO | S_IWUSR,
bonding_show_num_peer_notif, bonding_store_num_peer_notif);
/*
* Show and set the MII monitor interval. There are two tricky bits
* here. First, if MII monitoring is activated, then we must disable
* ARP monitoring. Second, if the timer isn't running, we must
* start it.
*/
static ssize_t bonding_show_miimon(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.miimon);
}
static ssize_t bonding_store_miimon(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
int new_value, ret;
struct bonding *bond = to_bond(d);
if (sscanf(buf, "%d", &new_value) != 1) {
pr_err("%s: no miimon value specified.\n",
bond->dev->name);
return -EINVAL;
}
if (!rtnl_trylock())
return restart_syscall();
ret = bond_option_miimon_set(bond, new_value);
if (!ret)
ret = count;
bonding: fix miimon and arp_interval delayed work race conditions First I would give three observations which will be used later. Observation 1: if (delayed_work_pending(wq)) cancel_delayed_work(wq) This usage is wrong because the pending bit is cleared just before the work's fn is executed and if the function re-arms itself we might end up with the work still running. It's safe to call cancel_delayed_work_sync() even if the work is not queued at all. Observation 2: Use of INIT_DELAYED_WORK() Work needs to be initialized only once prior to (de/en)queueing. Observation 3: IFF_UP is set only after ndo_open is called Related race conditions: 1. Race between bonding_store_miimon() and bonding_store_arp_interval() Because of Obs.1 we can end up having both works enqueued. 2. Multiple races with INIT_DELAYED_WORK() Since the works are not protected by anything between INIT_DELAYED_WORK() and calls to (en/de)queue it is possible for races between the following functions: (races are also possible between the calls to INIT_DELAYED_WORK() and workqueue code) bonding_store_miimon() - bonding_store_arp_interval(), bond_close(), bond_open(), enqueued functions bonding_store_arp_interval() - bonding_store_miimon(), bond_close(), bond_open(), enqueued functions 3. By Obs.1 we need to change bond_cancel_all() Bugs 1 and 2 are fixed by moving all work initializations in bond_open which by Obs. 2 and Obs. 3 and the fact that we make sure that all works are cancelled in bond_close(), is guaranteed not to have any work enqueued. Also RTNL lock is now acquired in bonding_store_miimon/arp_interval so they can't race with bond_close and bond_open. The opposing work is cancelled only if the IFF_UP flag is set and it is cancelled unconditionally. The opposing work is already cancelled if the interface is down so no need to cancel it again. This way we don't need new synchronizations for the bonding workqueue. These bugs (and fixes) are tied together and belong in the same patch. Note: I have left 1 line intentionally over 80 characters (84) because I didn't like how it looks broken down. If you'd prefer it otherwise, then simply break it. v2: Make description text < 75 columns Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com> Signed-off-by: Jay Vosburgh <fubar@us.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-11-29 02:31:31 +01:00
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(miimon, S_IRUGO | S_IWUSR,
bonding_show_miimon, bonding_store_miimon);
/*
* Show and set the primary slave. The store function is much
* simpler than bonding_store_slaves function because it only needs to
* handle one interface name.
* The bond must be a mode that supports a primary for this be
* set.
*/
static ssize_t bonding_show_primary(struct device *d,
struct device_attribute *attr,
char *buf)
{
int count = 0;
struct bonding *bond = to_bond(d);
if (bond->primary_slave)
count = sprintf(buf, "%s\n", bond->primary_slave->dev->name);
return count;
}
static ssize_t bonding_store_primary(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct bonding *bond = to_bond(d);
char ifname[IFNAMSIZ];
int ret;
sscanf(buf, "%15s", ifname); /* IFNAMSIZ */
if (ifname[0] == '\n')
ifname[0] = '\0';
if (!rtnl_trylock())
return restart_syscall();
ret = bond_option_primary_set(bond, ifname);
if (!ret)
ret = count;
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(primary, S_IRUGO | S_IWUSR,
bonding_show_primary, bonding_store_primary);
/*
* Show and set the primary_reselect flag.
*/
static ssize_t bonding_show_primary_reselect(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%s %d\n",
pri_reselect_tbl[bond->params.primary_reselect].modename,
bond->params.primary_reselect);
}
static ssize_t bonding_store_primary_reselect(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
int new_value, ret;
struct bonding *bond = to_bond(d);
new_value = bond_parse_parm(buf, pri_reselect_tbl);
if (new_value < 0) {
pr_err("%s: Ignoring invalid primary_reselect value %.*s.\n",
bond->dev->name,
(int) strlen(buf) - 1, buf);
return -EINVAL;
}
if (!rtnl_trylock())
return restart_syscall();
ret = bond_option_primary_reselect_set(bond, new_value);
if (!ret)
ret = count;
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(primary_reselect, S_IRUGO | S_IWUSR,
bonding_show_primary_reselect,
bonding_store_primary_reselect);
/*
* Show and set the use_carrier flag.
*/
static ssize_t bonding_show_carrier(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.use_carrier);
}
static ssize_t bonding_store_carrier(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
int new_value, ret;
struct bonding *bond = to_bond(d);
if (sscanf(buf, "%d", &new_value) != 1) {
pr_err("%s: no use_carrier value specified.\n",
bond->dev->name);
return -EINVAL;
}
if (!rtnl_trylock())
return restart_syscall();
ret = bond_option_use_carrier_set(bond, new_value);
if (!ret)
ret = count;
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(use_carrier, S_IRUGO | S_IWUSR,
bonding_show_carrier, bonding_store_carrier);
/*
* Show and set currently active_slave.
*/
static ssize_t bonding_show_active_slave(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
struct net_device *slave_dev;
int count = 0;
bonding: initial RCU conversion This patch does the initial bonding conversion to RCU. After it the following modes are protected by RCU alone: roundrobin, active-backup, broadcast and xor. Modes ALB/TLB and 3ad still acquire bond->lock for reading, and will be dealt with later. curr_active_slave needs to be dereferenced via rcu in the converted modes because the only thing protecting the slave after this patch is rcu_read_lock, so we need the proper barrier for weakly ordered archs and to make sure we don't have stale pointer. It's not tagged with __rcu yet because there's still work to be done to remove the curr_slave_lock, so sparse will complain when rcu_assign_pointer and rcu_dereference are used, but the alternative to use rcu_dereference_protected would've created much bigger code churn which is more difficult to test and review. That will be converted in time. 1. Active-backup mode 1.1 Perf recording while doing iperf -P 4 - old bonding: iperf spent 0.55% in bonding, system spent 0.29% CPU in bonding - new bonding: iperf spent 0.29% in bonding, system spent 0.15% CPU in bonding 1.2. Bandwidth measurements - old bonding: 16.1 gbps consistently - new bonding: 17.5 gbps consistently 2. Round-robin mode 2.1 Perf recording while doing iperf -P 4 - old bonding: iperf spent 0.51% in bonding, system spent 0.24% CPU in bonding - new bonding: iperf spent 0.16% in bonding, system spent 0.11% CPU in bonding 2.2 Bandwidth measurements - old bonding: 8 gbps (variable due to packet reorderings) - new bonding: 10 gbps (variable due to packet reorderings) Of course the latency has improved in all converted modes, and moreover while doing enslave/release (since it doesn't affect tx anymore). Also I've stress tested all modes doing enslave/release in a loop while transmitting traffic. Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-08-01 16:54:51 +02:00
rcu_read_lock();
slave_dev = bond_option_active_slave_get_rcu(bond);
if (slave_dev)
count = sprintf(buf, "%s\n", slave_dev->name);
bonding: initial RCU conversion This patch does the initial bonding conversion to RCU. After it the following modes are protected by RCU alone: roundrobin, active-backup, broadcast and xor. Modes ALB/TLB and 3ad still acquire bond->lock for reading, and will be dealt with later. curr_active_slave needs to be dereferenced via rcu in the converted modes because the only thing protecting the slave after this patch is rcu_read_lock, so we need the proper barrier for weakly ordered archs and to make sure we don't have stale pointer. It's not tagged with __rcu yet because there's still work to be done to remove the curr_slave_lock, so sparse will complain when rcu_assign_pointer and rcu_dereference are used, but the alternative to use rcu_dereference_protected would've created much bigger code churn which is more difficult to test and review. That will be converted in time. 1. Active-backup mode 1.1 Perf recording while doing iperf -P 4 - old bonding: iperf spent 0.55% in bonding, system spent 0.29% CPU in bonding - new bonding: iperf spent 0.29% in bonding, system spent 0.15% CPU in bonding 1.2. Bandwidth measurements - old bonding: 16.1 gbps consistently - new bonding: 17.5 gbps consistently 2. Round-robin mode 2.1 Perf recording while doing iperf -P 4 - old bonding: iperf spent 0.51% in bonding, system spent 0.24% CPU in bonding - new bonding: iperf spent 0.16% in bonding, system spent 0.11% CPU in bonding 2.2 Bandwidth measurements - old bonding: 8 gbps (variable due to packet reorderings) - new bonding: 10 gbps (variable due to packet reorderings) Of course the latency has improved in all converted modes, and moreover while doing enslave/release (since it doesn't affect tx anymore). Also I've stress tested all modes doing enslave/release in a loop while transmitting traffic. Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-08-01 16:54:51 +02:00
rcu_read_unlock();
return count;
}
static ssize_t bonding_store_active_slave(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
int ret;
struct bonding *bond = to_bond(d);
char ifname[IFNAMSIZ];
struct net_device *dev;
if (!rtnl_trylock())
return restart_syscall();
sscanf(buf, "%15s", ifname); /* IFNAMSIZ */
if (!strlen(ifname) || buf[0] == '\n') {
dev = NULL;
} else {
dev = __dev_get_by_name(dev_net(bond->dev), ifname);
if (!dev) {
ret = -ENODEV;
goto out;
}
}
ret = bond_option_active_slave_set(bond, dev);
if (!ret)
ret = count;
out:
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(active_slave, S_IRUGO | S_IWUSR,
bonding_show_active_slave, bonding_store_active_slave);
/*
* Show link status of the bond interface.
*/
static ssize_t bonding_show_mii_status(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
bonding: initial RCU conversion This patch does the initial bonding conversion to RCU. After it the following modes are protected by RCU alone: roundrobin, active-backup, broadcast and xor. Modes ALB/TLB and 3ad still acquire bond->lock for reading, and will be dealt with later. curr_active_slave needs to be dereferenced via rcu in the converted modes because the only thing protecting the slave after this patch is rcu_read_lock, so we need the proper barrier for weakly ordered archs and to make sure we don't have stale pointer. It's not tagged with __rcu yet because there's still work to be done to remove the curr_slave_lock, so sparse will complain when rcu_assign_pointer and rcu_dereference are used, but the alternative to use rcu_dereference_protected would've created much bigger code churn which is more difficult to test and review. That will be converted in time. 1. Active-backup mode 1.1 Perf recording while doing iperf -P 4 - old bonding: iperf spent 0.55% in bonding, system spent 0.29% CPU in bonding - new bonding: iperf spent 0.29% in bonding, system spent 0.15% CPU in bonding 1.2. Bandwidth measurements - old bonding: 16.1 gbps consistently - new bonding: 17.5 gbps consistently 2. Round-robin mode 2.1 Perf recording while doing iperf -P 4 - old bonding: iperf spent 0.51% in bonding, system spent 0.24% CPU in bonding - new bonding: iperf spent 0.16% in bonding, system spent 0.11% CPU in bonding 2.2 Bandwidth measurements - old bonding: 8 gbps (variable due to packet reorderings) - new bonding: 10 gbps (variable due to packet reorderings) Of course the latency has improved in all converted modes, and moreover while doing enslave/release (since it doesn't affect tx anymore). Also I've stress tested all modes doing enslave/release in a loop while transmitting traffic. Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-08-01 16:54:51 +02:00
return sprintf(buf, "%s\n", bond->curr_active_slave ? "up" : "down");
}
static DEVICE_ATTR(mii_status, S_IRUGO, bonding_show_mii_status, NULL);
/*
* Show current 802.3ad aggregator ID.
*/
static ssize_t bonding_show_ad_aggregator(struct device *d,
struct device_attribute *attr,
char *buf)
{
int count = 0;
struct bonding *bond = to_bond(d);
if (bond->params.mode == BOND_MODE_8023AD) {
struct ad_info ad_info;
count = sprintf(buf, "%d\n",
bond_3ad_get_active_agg_info(bond, &ad_info)
? 0 : ad_info.aggregator_id);
}
return count;
}
static DEVICE_ATTR(ad_aggregator, S_IRUGO, bonding_show_ad_aggregator, NULL);
/*
* Show number of active 802.3ad ports.
*/
static ssize_t bonding_show_ad_num_ports(struct device *d,
struct device_attribute *attr,
char *buf)
{
int count = 0;
struct bonding *bond = to_bond(d);
if (bond->params.mode == BOND_MODE_8023AD) {
struct ad_info ad_info;
count = sprintf(buf, "%d\n",
bond_3ad_get_active_agg_info(bond, &ad_info)
? 0 : ad_info.ports);
}
return count;
}
static DEVICE_ATTR(ad_num_ports, S_IRUGO, bonding_show_ad_num_ports, NULL);
/*
* Show current 802.3ad actor key.
*/
static ssize_t bonding_show_ad_actor_key(struct device *d,
struct device_attribute *attr,
char *buf)
{
int count = 0;
struct bonding *bond = to_bond(d);
if (bond->params.mode == BOND_MODE_8023AD) {
struct ad_info ad_info;
count = sprintf(buf, "%d\n",
bond_3ad_get_active_agg_info(bond, &ad_info)
? 0 : ad_info.actor_key);
}
return count;
}
static DEVICE_ATTR(ad_actor_key, S_IRUGO, bonding_show_ad_actor_key, NULL);
/*
* Show current 802.3ad partner key.
*/
static ssize_t bonding_show_ad_partner_key(struct device *d,
struct device_attribute *attr,
char *buf)
{
int count = 0;
struct bonding *bond = to_bond(d);
if (bond->params.mode == BOND_MODE_8023AD) {
struct ad_info ad_info;
count = sprintf(buf, "%d\n",
bond_3ad_get_active_agg_info(bond, &ad_info)
? 0 : ad_info.partner_key);
}
return count;
}
static DEVICE_ATTR(ad_partner_key, S_IRUGO, bonding_show_ad_partner_key, NULL);
/*
* Show current 802.3ad partner mac.
*/
static ssize_t bonding_show_ad_partner_mac(struct device *d,
struct device_attribute *attr,
char *buf)
{
int count = 0;
struct bonding *bond = to_bond(d);
if (bond->params.mode == BOND_MODE_8023AD) {
struct ad_info ad_info;
if (!bond_3ad_get_active_agg_info(bond, &ad_info))
count = sprintf(buf, "%pM\n", ad_info.partner_system);
}
return count;
}
static DEVICE_ATTR(ad_partner_mac, S_IRUGO, bonding_show_ad_partner_mac, NULL);
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 10:40:18 +02:00
/*
* Show the queue_ids of the slaves in the current bond.
*/
static ssize_t bonding_show_queue_id(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
struct list_head *iter;
struct slave *slave;
int res = 0;
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 10:40:18 +02:00
if (!rtnl_trylock())
return restart_syscall();
bond_for_each_slave(bond, slave, iter) {
if (res > (PAGE_SIZE - IFNAMSIZ - 6)) {
/* not enough space for another interface_name:queue_id pair */
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 10:40:18 +02:00
if ((PAGE_SIZE - res) > 10)
res = PAGE_SIZE - 10;
res += sprintf(buf + res, "++more++ ");
break;
}
res += sprintf(buf + res, "%s:%d ",
slave->dev->name, slave->queue_id);
}
if (res)
buf[res-1] = '\n'; /* eat the leftover space */
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 10:40:18 +02:00
rtnl_unlock();
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 10:40:18 +02:00
return res;
}
/*
* Set the queue_ids of the slaves in the current bond. The bond
* interface must be enslaved for this to work.
*/
static ssize_t bonding_store_queue_id(struct device *d,
struct device_attribute *attr,
const char *buffer, size_t count)
{
struct slave *slave, *update_slave;
struct bonding *bond = to_bond(d);
struct list_head *iter;
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 10:40:18 +02:00
u16 qid;
int ret = count;
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 10:40:18 +02:00
char *delim;
struct net_device *sdev = NULL;
if (!rtnl_trylock())
return restart_syscall();
/* delim will point to queue id if successful */
delim = strchr(buffer, ':');
if (!delim)
goto err_no_cmd;
/*
* Terminate string that points to device name and bump it
* up one, so we can read the queue id there.
*/
*delim = '\0';
if (sscanf(++delim, "%hd\n", &qid) != 1)
goto err_no_cmd;
/* Check buffer length, valid ifname and queue id */
if (strlen(buffer) > IFNAMSIZ ||
!dev_valid_name(buffer) ||
qid > bond->dev->real_num_tx_queues)
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 10:40:18 +02:00
goto err_no_cmd;
/* Get the pointer to that interface if it exists */
sdev = __dev_get_by_name(dev_net(bond->dev), buffer);
if (!sdev)
goto err_no_cmd;
/* Search for thes slave and check for duplicate qids */
update_slave = NULL;
bond_for_each_slave(bond, slave, iter) {
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 10:40:18 +02:00
if (sdev == slave->dev)
/*
* We don't need to check the matching
* slave for dups, since we're overwriting it
*/
update_slave = slave;
else if (qid && qid == slave->queue_id) {
goto err_no_cmd;
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 10:40:18 +02:00
}
}
if (!update_slave)
goto err_no_cmd;
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 10:40:18 +02:00
/* Actually set the qids for the slave */
update_slave->queue_id = qid;
out:
rtnl_unlock();
return ret;
err_no_cmd:
pr_info("invalid input for queue_id set for %s.\n",
bond->dev->name);
ret = -EPERM;
goto out;
}
static DEVICE_ATTR(queue_id, S_IRUGO | S_IWUSR, bonding_show_queue_id,
bonding_store_queue_id);
/*
* Show and set the all_slaves_active flag.
*/
static ssize_t bonding_show_slaves_active(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.all_slaves_active);
}
static ssize_t bonding_store_slaves_active(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct bonding *bond = to_bond(d);
int new_value, ret;
if (sscanf(buf, "%d", &new_value) != 1) {
pr_err("%s: no all_slaves_active value specified.\n",
bond->dev->name);
return -EINVAL;
}
if (!rtnl_trylock())
return restart_syscall();
ret = bond_option_all_slaves_active_set(bond, new_value);
if (!ret)
ret = count;
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(all_slaves_active, S_IRUGO | S_IWUSR,
bonding_show_slaves_active, bonding_store_slaves_active);
/*
* Show and set the number of IGMP membership reports to send on link failure
*/
static ssize_t bonding_show_resend_igmp(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.resend_igmp);
}
static ssize_t bonding_store_resend_igmp(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
int new_value, ret = count;
struct bonding *bond = to_bond(d);
if (sscanf(buf, "%d", &new_value) != 1) {
pr_err("%s: no resend_igmp value specified.\n",
bond->dev->name);
return -EINVAL;
}
if (!rtnl_trylock())
return restart_syscall();
ret = bond_option_resend_igmp_set(bond, new_value);
if (!ret)
ret = count;
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(resend_igmp, S_IRUGO | S_IWUSR,
bonding_show_resend_igmp, bonding_store_resend_igmp);
static ssize_t bonding_show_lp_interval(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.lp_interval);
}
static ssize_t bonding_store_lp_interval(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct bonding *bond = to_bond(d);
int new_value, ret;
if (sscanf(buf, "%d", &new_value) != 1) {
pr_err("%s: no lp interval value specified.\n",
bond->dev->name);
return -EINVAL;
}
if (!rtnl_trylock())
return restart_syscall();
ret = bond_option_lp_interval_set(bond, new_value);
if (!ret)
ret = count;
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(lp_interval, S_IRUGO | S_IWUSR,
bonding_show_lp_interval, bonding_store_lp_interval);
static ssize_t bonding_show_packets_per_slave(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
unsigned int packets_per_slave = bond->params.packets_per_slave;
return sprintf(buf, "%u\n", packets_per_slave);
}
static ssize_t bonding_store_packets_per_slave(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct bonding *bond = to_bond(d);
int new_value, ret;
if (sscanf(buf, "%d", &new_value) != 1) {
pr_err("%s: no packets_per_slave value specified.\n",
bond->dev->name);
return -EINVAL;
}
if (!rtnl_trylock())
return restart_syscall();
ret = bond_option_packets_per_slave_set(bond, new_value);
if (!ret)
ret = count;
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(packets_per_slave, S_IRUGO | S_IWUSR,
bonding_show_packets_per_slave,
bonding_store_packets_per_slave);
static struct attribute *per_bond_attrs[] = {
&dev_attr_slaves.attr,
&dev_attr_mode.attr,
&dev_attr_fail_over_mac.attr,
&dev_attr_arp_validate.attr,
bonding: add an option to fail when any of arp_ip_target is inaccessible Currently, we fail only when all of the ips in arp_ip_target are gone. However, in some situations we might need to fail if even one host from arp_ip_target becomes unavailable. All situations, obviously, rely on the idea that we need *completely* functional network, with all interfaces/addresses working correctly. One real world example might be: vlans on top on bond (hybrid port). If bond and vlans have ips assigned and we have their peers monitored via arp_ip_target - in case of switch misconfiguration (trunk/access port), slave driver malfunction or tagged/untagged traffic dropped on the way - we will be able to switch to another slave. Though any other configuration needs that if we need to have access to all arp_ip_targets. This patch adds this possibility by adding a new parameter - arp_all_targets (both as a module parameter and as a sysfs knob). It can be set to: 0 or any (the default) - which works exactly as it's working now - the slave is up if any of the arp_ip_targets are up. 1 or all - the slave is up if all of the arp_ip_targets are up. This parameter can be changed on the fly (via sysfs), and requires the mode to be active-backup and arp_validate to be enabled (it obeys the arp_validate config on which slaves to validate). Internally it's done through: 1) Add target_last_arp_rx[BOND_MAX_ARP_TARGETS] array to slave struct. It's an array of jiffies, meaning that slave->target_last_arp_rx[i] is the last time we've received arp from bond->params.arp_targets[i] on this slave. 2) If we successfully validate an arp from bond->params.arp_targets[i] in bond_validate_arp() - update the slave->target_last_arp_rx[i] with the current jiffies value. 3) When getting slave's last_rx via slave_last_rx(), we return the oldest time when we've received an arp from any address in bond->params.arp_targets[]. If the value of arp_all_targets == 0 - we still work the same way as before. Also, update the documentation to reflect the new parameter. v3->v4: Kill the forgotten rtnl_unlock(), rephrase the documentation part to be more clear, don't fail setting arp_all_targets if arp_validate is not set - it has no effect anyway but can be easier to set up. Also, print a warning if the last arp_ip_target is removed while the arp_interval is on, but not the arp_validate. v2->v3: Use _bh spinlock, remove useless rtnl_lock() and use jiffies for new arp_ip_target last arp, instead of slave_last_rx(). On bond_enslave(), use the same initialization value for target_last_arp_rx[] as is used for the default last_arp_rx, to avoid useless interface flaps. Also, instead of failing to remove the last arp_ip_target just print a warning - otherwise it might break existing scripts. v1->v2: Correctly handle adding/removing hosts in arp_ip_target - we need to shift/initialize all slave's target_last_arp_rx. Also, don't fail module loading on arp_all_targets misconfiguration, just disable it, and some minor style fixes. Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-24 11:49:34 +02:00
&dev_attr_arp_all_targets.attr,
&dev_attr_arp_interval.attr,
&dev_attr_arp_ip_target.attr,
&dev_attr_downdelay.attr,
&dev_attr_updelay.attr,
&dev_attr_lacp_rate.attr,
&dev_attr_ad_select.attr,
&dev_attr_xmit_hash_policy.attr,
&dev_attr_num_grat_arp.attr,
&dev_attr_num_unsol_na.attr,
&dev_attr_miimon.attr,
&dev_attr_primary.attr,
&dev_attr_primary_reselect.attr,
&dev_attr_use_carrier.attr,
&dev_attr_active_slave.attr,
&dev_attr_mii_status.attr,
&dev_attr_ad_aggregator.attr,
&dev_attr_ad_num_ports.attr,
&dev_attr_ad_actor_key.attr,
&dev_attr_ad_partner_key.attr,
&dev_attr_ad_partner_mac.attr,
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 10:40:18 +02:00
&dev_attr_queue_id.attr,
&dev_attr_all_slaves_active.attr,
&dev_attr_resend_igmp.attr,
&dev_attr_min_links.attr,
&dev_attr_lp_interval.attr,
&dev_attr_packets_per_slave.attr,
NULL,
};
static struct attribute_group bonding_group = {
.name = "bonding",
.attrs = per_bond_attrs,
};
/*
* Initialize sysfs. This sets up the bonding_masters file in
* /sys/class/net.
*/
int bond_create_sysfs(struct bond_net *bn)
{
int ret;
bn->class_attr_bonding_masters = class_attr_bonding_masters;
bonding: Add a forgetten sysfs_attr_init on class_attr_bonding_masters When I made class_attr_bonding_matters per network namespace and dynamically allocated I overlooked the need for calling sysfs_attr_init. Oops. This fixes the following lockdep splat: [ 5.749651] bonding: Ethernet Channel Bonding Driver: v3.7.1 (April 27, 2011) [ 5.749655] bonding: MII link monitoring set to 100 ms [ 5.749676] BUG: key f49a831c not in .data! [ 5.749677] ------------[ cut here ]------------ [ 5.749752] WARNING: at kernel/lockdep.c:2897 lockdep_init_map+0x1c3/0x460() [ 5.749809] Hardware name: ProLiant BL460c G1 [ 5.749862] Modules linked in: bonding(+) [ 5.749978] Pid: 3177, comm: modprobe Not tainted 3.1.0-rc9-02177-gf2d1a4e-dirty #1157 [ 5.750066] Call Trace: [ 5.750120] [<c1352c2f>] ? printk+0x18/0x21 [ 5.750176] [<c103112d>] warn_slowpath_common+0x6d/0xa0 [ 5.750231] [<c1060133>] ? lockdep_init_map+0x1c3/0x460 [ 5.750287] [<c1060133>] ? lockdep_init_map+0x1c3/0x460 [ 5.750342] [<c103117d>] warn_slowpath_null+0x1d/0x20 [ 5.750398] [<c1060133>] lockdep_init_map+0x1c3/0x460 [ 5.750453] [<c1355ddd>] ? _raw_spin_unlock+0x1d/0x20 [ 5.750510] [<c11255c8>] ? sysfs_new_dirent+0x68/0x110 [ 5.750565] [<c1124d4b>] sysfs_add_file_mode+0x8b/0xe0 [ 5.750621] [<c1124db3>] sysfs_add_file+0x13/0x20 [ 5.750675] [<c1124e7c>] sysfs_create_file+0x1c/0x20 [ 5.750737] [<c1208f09>] class_create_file+0x19/0x20 [ 5.750794] [<c12c186f>] netdev_class_create_file+0xf/0x20 [ 5.750853] [<f85deaf4>] bond_create_sysfs+0x44/0x90 [bonding] [ 5.750911] [<f8410947>] ? bond_create_proc_dir+0x1e/0x3e [bonding] [ 5.750970] [<f841007e>] bond_net_init+0x7e/0x87 [bonding] [ 5.751026] [<f8410000>] ? 0xf840ffff [ 5.751080] [<c12abc7a>] ops_init.clone.4+0xba/0x100 [ 5.751135] [<c12abdb2>] ? register_pernet_subsys+0x12/0x30 [ 5.751191] [<c12abd03>] register_pernet_operations.clone.3+0x43/0x80 [ 5.751249] [<c12abdb9>] register_pernet_subsys+0x19/0x30 [ 5.751306] [<f84108b9>] bonding_init+0x832/0x8a2 [bonding] [ 5.751363] [<c10011f0>] do_one_initcall+0x30/0x160 [ 5.751420] [<f8410087>] ? bond_net_init+0x87/0x87 [bonding] [ 5.751477] [<c106d5cf>] sys_init_module+0xef/0x1890 [ 5.751533] [<c1356490>] sysenter_do_call+0x12/0x36 [ 5.751588] ---[ end trace 89f492d83a7f5006 ]--- Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Reported-by: Eric Dumazet <eric.dumazet@gmail.com> Tested-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2011-10-22 00:43:07 +02:00
sysfs_attr_init(&bn->class_attr_bonding_masters.attr);
sysfs: make attr namespace interface less convoluted sysfs ns (namespace) implementation became more convoluted than necessary while trying to hide ns information from visible interface. The relatively recent attr ns support is a good example. * attr ns tag is determined by sysfs_ops->namespace() callback while dir tag is determined by kobj_type->namespace(). The placement is arbitrary. * Instead of performing operations with explicit ns tag, the namespace callback is routed through sysfs_attr_ns(), sysfs_ops->namespace(), class_attr_namespace(), class_attr->namespace(). It's not simpler in any sense. The only thing this convolution does is traversing the whole stack backwards. The namespace callbacks are unncessary because the operations involved are inherently synchronous. The information can be provided in in straight-forward top-down direction and reversing that direction is unnecessary and against basic design principles. This backward interface is unnecessarily convoluted and hinders properly separating out sysfs from driver model / kobject for proper layering. This patch updates attr ns support such that * sysfs_ops->namespace() and class_attr->namespace() are dropped. * sysfs_{create|remove}_file_ns(), which take explicit @ns param, are added and sysfs_{create|remove}_file() are now simple wrappers around the ns aware functions. * ns handling is dropped from sysfs_chmod_file(). Nobody uses it at this point. sysfs_chmod_file_ns() can be added later if necessary. * Explicit @ns is propagated through class_{create|remove}_file_ns() and netdev_class_{create|remove}_file_ns(). * driver/net/bonding which is currently the only user of attr namespace is updated to use netdev_class_{create|remove}_file_ns() with @bh->net as the ns tag instead of using the namespace callback. This patch should be an equivalent conversion without any functional difference. It makes the code easier to follow, reduces lines of code a bit and helps proper separation and layering. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Kay Sievers <kay@vrfy.org> Acked-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2013-09-12 04:29:04 +02:00
ret = netdev_class_create_file_ns(&bn->class_attr_bonding_masters,
bn->net);
2007-01-20 03:15:47 +01:00
/*
* Permit multiple loads of the module by ignoring failures to
* create the bonding_masters sysfs file. Bonding devices
* created by second or subsequent loads of the module will
* not be listed in, or controllable by, bonding_masters, but
* will have the usual "bonding" sysfs directory.
*
* This is done to preserve backwards compatibility for
* initscripts/sysconfig, which load bonding multiple times to
* configure multiple bonding devices.
*/
if (ret == -EEXIST) {
/* Is someone being kinky and naming a device bonding_master? */
if (__dev_get_by_name(bn->net,
class_attr_bonding_masters.attr.name))
pr_err("network device named %s already exists in sysfs",
class_attr_bonding_masters.attr.name);
ret = 0;
2007-01-20 03:15:47 +01:00
}
return ret;
}
/*
* Remove /sys/class/net/bonding_masters.
*/
void bond_destroy_sysfs(struct bond_net *bn)
{
sysfs: make attr namespace interface less convoluted sysfs ns (namespace) implementation became more convoluted than necessary while trying to hide ns information from visible interface. The relatively recent attr ns support is a good example. * attr ns tag is determined by sysfs_ops->namespace() callback while dir tag is determined by kobj_type->namespace(). The placement is arbitrary. * Instead of performing operations with explicit ns tag, the namespace callback is routed through sysfs_attr_ns(), sysfs_ops->namespace(), class_attr_namespace(), class_attr->namespace(). It's not simpler in any sense. The only thing this convolution does is traversing the whole stack backwards. The namespace callbacks are unncessary because the operations involved are inherently synchronous. The information can be provided in in straight-forward top-down direction and reversing that direction is unnecessary and against basic design principles. This backward interface is unnecessarily convoluted and hinders properly separating out sysfs from driver model / kobject for proper layering. This patch updates attr ns support such that * sysfs_ops->namespace() and class_attr->namespace() are dropped. * sysfs_{create|remove}_file_ns(), which take explicit @ns param, are added and sysfs_{create|remove}_file() are now simple wrappers around the ns aware functions. * ns handling is dropped from sysfs_chmod_file(). Nobody uses it at this point. sysfs_chmod_file_ns() can be added later if necessary. * Explicit @ns is propagated through class_{create|remove}_file_ns() and netdev_class_{create|remove}_file_ns(). * driver/net/bonding which is currently the only user of attr namespace is updated to use netdev_class_{create|remove}_file_ns() with @bh->net as the ns tag instead of using the namespace callback. This patch should be an equivalent conversion without any functional difference. It makes the code easier to follow, reduces lines of code a bit and helps proper separation and layering. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Kay Sievers <kay@vrfy.org> Acked-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2013-09-12 04:29:04 +02:00
netdev_class_remove_file_ns(&bn->class_attr_bonding_masters, bn->net);
}
/*
* Initialize sysfs for each bond. This sets up and registers
* the 'bondctl' directory for each individual bond under /sys/class/net.
*/
void bond_prepare_sysfs_group(struct bonding *bond)
{
bond->dev->sysfs_groups[0] = &bonding_group;
}