net: Refactor XPS for CPUs and Rx queues

Refactor XPS code to support Tx queue selection based on
CPU(s) map or Rx queue(s) map.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Amritha Nambiar 2018-06-29 21:26:41 -07:00 committed by David S. Miller
parent 1a84d7fdb5
commit 80d19669ec
4 changed files with 244 additions and 80 deletions

View File

@ -115,12 +115,17 @@ extern struct cpumask __cpu_active_mask;
#define cpu_active(cpu) ((cpu) == 0)
#endif
static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
{
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
WARN_ON_ONCE(cpu >= bits);
#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
}
/* verify cpu argument to cpumask_* operators */
static inline unsigned int cpumask_check(unsigned int cpu)
{
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
WARN_ON_ONCE(cpu >= nr_cpumask_bits);
#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
cpu_max_bits_warn(cpu, nr_cpumask_bits);
return cpu;
}

View File

@ -731,10 +731,15 @@ struct xps_map {
*/
struct xps_dev_maps {
struct rcu_head rcu;
struct xps_map __rcu *cpu_map[0];
struct xps_map __rcu *attr_map[0]; /* Either CPUs map or RXQs map */
};
#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) + \
#define XPS_CPU_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) + \
(nr_cpu_ids * (_tcs) * sizeof(struct xps_map *)))
#define XPS_RXQ_DEV_MAPS_SIZE(_tcs, _rxqs) (sizeof(struct xps_dev_maps) +\
(_rxqs * (_tcs) * sizeof(struct xps_map *)))
#endif /* CONFIG_XPS */
#define TC_MAX_QUEUE 16
@ -1910,7 +1915,8 @@ struct net_device {
int watchdog_timeo;
#ifdef CONFIG_XPS
struct xps_dev_maps __rcu *xps_maps;
struct xps_dev_maps __rcu *xps_cpus_map;
struct xps_dev_maps __rcu *xps_rxqs_map;
#endif
#ifdef CONFIG_NET_CLS_ACT
struct mini_Qdisc __rcu *miniq_egress;
@ -3259,6 +3265,92 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
#ifdef CONFIG_XPS
int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
u16 index);
int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
u16 index, bool is_rxqs_map);
/**
* netif_attr_test_mask - Test a CPU or Rx queue set in a mask
* @j: CPU/Rx queue index
* @mask: bitmask of all cpus/rx queues
* @nr_bits: number of bits in the bitmask
*
* Test if a CPU or Rx queue index is set in a mask of all CPU/Rx queues.
*/
static inline bool netif_attr_test_mask(unsigned long j,
const unsigned long *mask,
unsigned int nr_bits)
{
cpu_max_bits_warn(j, nr_bits);
return test_bit(j, mask);
}
/**
* netif_attr_test_online - Test for online CPU/Rx queue
* @j: CPU/Rx queue index
* @online_mask: bitmask for CPUs/Rx queues that are online
* @nr_bits: number of bits in the bitmask
*
* Returns true if a CPU/Rx queue is online.
*/
static inline bool netif_attr_test_online(unsigned long j,
const unsigned long *online_mask,
unsigned int nr_bits)
{
cpu_max_bits_warn(j, nr_bits);
if (online_mask)
return test_bit(j, online_mask);
return (j < nr_bits);
}
/**
* netif_attrmask_next - get the next CPU/Rx queue in a cpu/Rx queues mask
* @n: CPU/Rx queue index
* @srcp: the cpumask/Rx queue mask pointer
* @nr_bits: number of bits in the bitmask
*
* Returns >= nr_bits if no further CPUs/Rx queues set.
*/
static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp,
unsigned int nr_bits)
{
/* -1 is a legal arg here. */
if (n != -1)
cpu_max_bits_warn(n, nr_bits);
if (srcp)
return find_next_bit(srcp, nr_bits, n + 1);
return n + 1;
}
/**
* netif_attrmask_next_and - get the next CPU/Rx queue in *src1p & *src2p
* @n: CPU/Rx queue index
* @src1p: the first CPUs/Rx queues mask pointer
* @src2p: the second CPUs/Rx queues mask pointer
* @nr_bits: number of bits in the bitmask
*
* Returns >= nr_bits if no further CPUs/Rx queues set in both.
*/
static inline int netif_attrmask_next_and(int n, const unsigned long *src1p,
const unsigned long *src2p,
unsigned int nr_bits)
{
/* -1 is a legal arg here. */
if (n != -1)
cpu_max_bits_warn(n, nr_bits);
if (src1p && src2p)
return find_next_and_bit(src1p, src2p, nr_bits, n + 1);
else if (src1p)
return find_next_bit(src1p, nr_bits, n + 1);
else if (src2p)
return find_next_bit(src2p, nr_bits, n + 1);
return n + 1;
}
#else
static inline int netif_set_xps_queue(struct net_device *dev,
const struct cpumask *mask,

View File

@ -2092,7 +2092,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
int pos;
if (dev_maps)
map = xmap_dereference(dev_maps->cpu_map[tci]);
map = xmap_dereference(dev_maps->attr_map[tci]);
if (!map)
return false;
@ -2105,7 +2105,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
break;
}
RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL);
RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
kfree_rcu(map, rcu);
return false;
}
@ -2135,31 +2135,58 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
return active;
}
static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
struct xps_dev_maps *dev_maps, unsigned int nr_ids,
u16 offset, u16 count, bool is_rxqs_map)
{
bool active = false;
int i, j;
for (j = -1; j = netif_attrmask_next(j, mask, nr_ids),
j < nr_ids;)
active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
count);
if (!active) {
if (is_rxqs_map) {
RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
} else {
RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
for (i = offset + (count - 1); count--; i--)
netdev_queue_numa_node_write(
netdev_get_tx_queue(dev, i),
NUMA_NO_NODE);
}
kfree_rcu(dev_maps, rcu);
}
}
static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
u16 count)
{
const unsigned long *possible_mask = NULL;
struct xps_dev_maps *dev_maps;
int cpu, i;
bool active = false;
unsigned int nr_ids;
mutex_lock(&xps_map_mutex);
dev_maps = xmap_dereference(dev->xps_maps);
dev_maps = xmap_dereference(dev->xps_rxqs_map);
if (dev_maps) {
nr_ids = dev->num_rx_queues;
clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset,
count, true);
}
dev_maps = xmap_dereference(dev->xps_cpus_map);
if (!dev_maps)
goto out_no_maps;
for_each_possible_cpu(cpu)
active |= remove_xps_queue_cpu(dev, dev_maps, cpu,
offset, count);
if (!active) {
RCU_INIT_POINTER(dev->xps_maps, NULL);
kfree_rcu(dev_maps, rcu);
}
for (i = offset + (count - 1); count--; i--)
netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
NUMA_NO_NODE);
if (num_possible_cpus() > 1)
possible_mask = cpumask_bits(cpu_possible_mask);
nr_ids = nr_cpu_ids;
clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count,
false);
out_no_maps:
mutex_unlock(&xps_map_mutex);
@ -2170,8 +2197,8 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
}
static struct xps_map *expand_xps_map(struct xps_map *map,
int cpu, u16 index)
static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
u16 index, bool is_rxqs_map)
{
struct xps_map *new_map;
int alloc_len = XPS_MIN_MAP_ALLOC;
@ -2183,7 +2210,7 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
return map;
}
/* Need to add queue to this CPU's existing map */
/* Need to add tx-queue to this CPU's/rx-queue's existing map */
if (map) {
if (pos < map->alloc_len)
return map;
@ -2191,9 +2218,14 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
alloc_len = map->alloc_len * 2;
}
/* Need to allocate new map to store queue on this CPU's map */
new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
cpu_to_node(cpu));
/* Need to allocate new map to store tx-queue on this CPU's/rx-queue's
* map
*/
if (is_rxqs_map)
new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL);
else
new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
cpu_to_node(attr_index));
if (!new_map)
return NULL;
@ -2205,14 +2237,16 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
return new_map;
}
int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
u16 index)
int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
u16 index, bool is_rxqs_map)
{
const unsigned long *online_mask = NULL, *possible_mask = NULL;
struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
int i, cpu, tci, numa_node_id = -2;
int i, j, tci, numa_node_id = -2;
int maps_sz, num_tc = 1, tc = 0;
struct xps_map *map, *new_map;
bool active = false;
unsigned int nr_ids;
if (dev->num_tc) {
num_tc = dev->num_tc;
@ -2221,16 +2255,27 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
return -EINVAL;
}
maps_sz = XPS_DEV_MAPS_SIZE(num_tc);
mutex_lock(&xps_map_mutex);
if (is_rxqs_map) {
maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
dev_maps = xmap_dereference(dev->xps_rxqs_map);
nr_ids = dev->num_rx_queues;
} else {
maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
if (num_possible_cpus() > 1) {
online_mask = cpumask_bits(cpu_online_mask);
possible_mask = cpumask_bits(cpu_possible_mask);
}
dev_maps = xmap_dereference(dev->xps_cpus_map);
nr_ids = nr_cpu_ids;
}
if (maps_sz < L1_CACHE_BYTES)
maps_sz = L1_CACHE_BYTES;
mutex_lock(&xps_map_mutex);
dev_maps = xmap_dereference(dev->xps_maps);
/* allocate memory for queue storage */
for_each_cpu_and(cpu, cpu_online_mask, mask) {
for (j = -1; j = netif_attrmask_next_and(j, online_mask, mask, nr_ids),
j < nr_ids;) {
if (!new_dev_maps)
new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
if (!new_dev_maps) {
@ -2238,73 +2283,81 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
return -ENOMEM;
}
tci = cpu * num_tc + tc;
map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) :
tci = j * num_tc + tc;
map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
NULL;
map = expand_xps_map(map, cpu, index);
map = expand_xps_map(map, j, index, is_rxqs_map);
if (!map)
goto error;
RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
}
if (!new_dev_maps)
goto out_no_new_maps;
for_each_possible_cpu(cpu) {
for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
j < nr_ids;) {
/* copy maps belonging to foreign traffic classes */
for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) {
for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
/* fill in the new device map from the old device map */
map = xmap_dereference(dev_maps->cpu_map[tci]);
RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
map = xmap_dereference(dev_maps->attr_map[tci]);
RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
}
/* We need to explicitly update tci as prevous loop
* could break out early if dev_maps is NULL.
*/
tci = cpu * num_tc + tc;
tci = j * num_tc + tc;
if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
/* add queue to CPU maps */
if (netif_attr_test_mask(j, mask, nr_ids) &&
netif_attr_test_online(j, online_mask, nr_ids)) {
/* add tx-queue to CPU/rx-queue maps */
int pos = 0;
map = xmap_dereference(new_dev_maps->cpu_map[tci]);
map = xmap_dereference(new_dev_maps->attr_map[tci]);
while ((pos < map->len) && (map->queues[pos] != index))
pos++;
if (pos == map->len)
map->queues[map->len++] = index;
#ifdef CONFIG_NUMA
if (numa_node_id == -2)
numa_node_id = cpu_to_node(cpu);
else if (numa_node_id != cpu_to_node(cpu))
numa_node_id = -1;
if (!is_rxqs_map) {
if (numa_node_id == -2)
numa_node_id = cpu_to_node(j);
else if (numa_node_id != cpu_to_node(j))
numa_node_id = -1;
}
#endif
} else if (dev_maps) {
/* fill in the new device map from the old device map */
map = xmap_dereference(dev_maps->cpu_map[tci]);
RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
map = xmap_dereference(dev_maps->attr_map[tci]);
RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
}
/* copy maps belonging to foreign traffic classes */
for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
/* fill in the new device map from the old device map */
map = xmap_dereference(dev_maps->cpu_map[tci]);
RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
map = xmap_dereference(dev_maps->attr_map[tci]);
RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
}
}
rcu_assign_pointer(dev->xps_maps, new_dev_maps);
if (is_rxqs_map)
rcu_assign_pointer(dev->xps_rxqs_map, new_dev_maps);
else
rcu_assign_pointer(dev->xps_cpus_map, new_dev_maps);
/* Cleanup old maps */
if (!dev_maps)
goto out_no_old_maps;
for_each_possible_cpu(cpu) {
for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
map = xmap_dereference(dev_maps->cpu_map[tci]);
for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
j < nr_ids;) {
for (i = num_tc, tci = j * num_tc; i--; tci++) {
new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
map = xmap_dereference(dev_maps->attr_map[tci]);
if (map && map != new_map)
kfree_rcu(map, rcu);
}
@ -2317,19 +2370,23 @@ out_no_old_maps:
active = true;
out_no_new_maps:
/* update Tx queue numa node */
netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
(numa_node_id >= 0) ? numa_node_id :
NUMA_NO_NODE);
if (!is_rxqs_map) {
/* update Tx queue numa node */
netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
(numa_node_id >= 0) ?
numa_node_id : NUMA_NO_NODE);
}
if (!dev_maps)
goto out_no_maps;
/* removes queue from unused CPUs */
for_each_possible_cpu(cpu) {
for (i = tc, tci = cpu * num_tc; i--; tci++)
/* removes tx-queue from unused CPUs/rx-queues */
for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
j < nr_ids;) {
for (i = tc, tci = j * num_tc; i--; tci++)
active |= remove_xps_queue(dev_maps, tci, index);
if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu))
if (!netif_attr_test_mask(j, mask, nr_ids) ||
!netif_attr_test_online(j, online_mask, nr_ids))
active |= remove_xps_queue(dev_maps, tci, index);
for (i = num_tc - tc, tci++; --i; tci++)
active |= remove_xps_queue(dev_maps, tci, index);
@ -2337,7 +2394,10 @@ out_no_new_maps:
/* free map if not active */
if (!active) {
RCU_INIT_POINTER(dev->xps_maps, NULL);
if (is_rxqs_map)
RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
else
RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
kfree_rcu(dev_maps, rcu);
}
@ -2347,11 +2407,12 @@ out_no_maps:
return 0;
error:
/* remove any maps that we added */
for_each_possible_cpu(cpu) {
for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
j < nr_ids;) {
for (i = num_tc, tci = j * num_tc; i--; tci++) {
new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
map = dev_maps ?
xmap_dereference(dev_maps->cpu_map[tci]) :
xmap_dereference(dev_maps->attr_map[tci]) :
NULL;
if (new_map && new_map != map)
kfree(new_map);
@ -2363,6 +2424,12 @@ error:
kfree(new_dev_maps);
return -ENOMEM;
}
int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
u16 index)
{
return __netif_set_xps_queue(dev, cpumask_bits(mask), index, false);
}
EXPORT_SYMBOL(netif_set_xps_queue);
#endif
@ -3384,7 +3451,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
int queue_index = -1;
rcu_read_lock();
dev_maps = rcu_dereference(dev->xps_maps);
dev_maps = rcu_dereference(dev->xps_cpus_map);
if (dev_maps) {
unsigned int tci = skb->sender_cpu - 1;
@ -3393,7 +3460,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
tci += netdev_get_prio_tc_map(dev, skb->priority);
}
map = rcu_dereference(dev_maps->cpu_map[tci]);
map = rcu_dereference(dev_maps->attr_map[tci]);
if (map) {
if (map->len == 1)
queue_index = map->queues[0];

View File

@ -1227,13 +1227,13 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
return -ENOMEM;
rcu_read_lock();
dev_maps = rcu_dereference(dev->xps_maps);
dev_maps = rcu_dereference(dev->xps_cpus_map);
if (dev_maps) {
for_each_possible_cpu(cpu) {
int i, tci = cpu * num_tc + tc;
struct xps_map *map;
map = rcu_dereference(dev_maps->cpu_map[tci]);
map = rcu_dereference(dev_maps->attr_map[tci]);
if (!map)
continue;