net/Qdisc: use a seqlock instead seqcount

The seqcount disables preemption on -RT while it is held which can't
remove. Also we don't want the reader to spin for ages if the writer is
scheduled out. The seqlock on the other hand will serialize / sleep on
the lock while writer is active.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
This commit is contained in:
Sebastian Andrzej Siewior 2016-09-14 17:36:35 +02:00 committed by Alibek Omarov
parent 6e91cb9b19
commit c587de4dc8
8 changed files with 70 additions and 17 deletions

View File

@ -489,6 +489,15 @@ static inline void write_seqlock(seqlock_t *sl)
__raw_write_seqcount_begin(&sl->seqcount);
}
static inline int try_write_seqlock(seqlock_t *sl)
{
if (spin_trylock(&sl->lock)) {
__raw_write_seqcount_begin(&sl->seqcount);
return 1;
}
return 0;
}
static inline void write_sequnlock(seqlock_t *sl)
{
__raw_write_seqcount_end(&sl->seqcount);

View File

@ -6,6 +6,7 @@
#include <linux/socket.h>
#include <linux/rtnetlink.h>
#include <linux/pkt_sched.h>
#include <net/net_seq_lock.h>
struct gnet_stats_basic_cpu {
struct gnet_stats_basic_packed bstats;
@ -36,15 +37,15 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
spinlock_t *lock, struct gnet_dump *d,
int padattr);
int gnet_stats_copy_basic(const seqcount_t *running,
int gnet_stats_copy_basic(net_seqlock_t *running,
struct gnet_dump *d,
struct gnet_stats_basic_cpu __percpu *cpu,
struct gnet_stats_basic_packed *b);
void __gnet_stats_copy_basic(const seqcount_t *running,
void __gnet_stats_copy_basic(net_seqlock_t *running,
struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu,
struct gnet_stats_basic_packed *b);
int gnet_stats_copy_basic_hw(const seqcount_t *running,
int gnet_stats_copy_basic_hw(net_seqlock_t *running,
struct gnet_dump *d,
struct gnet_stats_basic_cpu __percpu *cpu,
struct gnet_stats_basic_packed *b);
@ -64,13 +65,13 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct net_rate_estimator __rcu **rate_est,
spinlock_t *lock,
seqcount_t *running, struct nlattr *opt);
net_seqlock_t *running, struct nlattr *opt);
void gen_kill_estimator(struct net_rate_estimator __rcu **ptr);
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct net_rate_estimator __rcu **ptr,
spinlock_t *lock,
seqcount_t *running, struct nlattr *opt);
net_seqlock_t *running, struct nlattr *opt);
bool gen_estimator_active(struct net_rate_estimator __rcu **ptr);
bool gen_estimator_read(struct net_rate_estimator __rcu **ptr,
struct gnet_stats_rate_est64 *sample);

View File

@ -0,0 +1,15 @@
#ifndef __NET_NET_SEQ_LOCK_H__
#define __NET_NET_SEQ_LOCK_H__
#ifdef CONFIG_PREEMPT_RT
# define net_seqlock_t seqlock_t
# define net_seq_begin(__r) read_seqbegin(__r)
# define net_seq_retry(__r, __s) read_seqretry(__r, __s)
#else
# define net_seqlock_t seqcount_t
# define net_seq_begin(__r) read_seqcount_begin(__r)
# define net_seq_retry(__r, __s) read_seqcount_retry(__r, __s)
#endif
#endif

View File

@ -10,6 +10,7 @@
#include <linux/percpu.h>
#include <linux/dynamic_queue_limits.h>
#include <linux/list.h>
#include <net/net_seq_lock.h>
#include <linux/refcount.h>
#include <linux/workqueue.h>
#include <linux/mutex.h>
@ -101,7 +102,7 @@ struct Qdisc {
struct sk_buff_head gso_skb ____cacheline_aligned_in_smp;
struct qdisc_skb_head q;
struct gnet_stats_basic_packed bstats;
seqcount_t running;
net_seqlock_t running;
struct gnet_stats_queue qstats;
unsigned long state;
struct Qdisc *next_sched;
@ -139,7 +140,11 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc)
{
if (qdisc->flags & TCQ_F_NOLOCK)
return spin_is_locked(&qdisc->seqlock);
#ifdef CONFIG_PREEMPT_RT
return spin_is_locked(&qdisc->running.lock) ? true : false;
#else
return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
#endif
}
static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
@ -200,17 +205,27 @@ nolock_empty:
} else if (qdisc_is_running(qdisc)) {
return false;
}
#ifdef CONFIG_PREEMPT_RT
if (try_write_seqlock(&qdisc->running))
return true;
return false;
#else
/* Variant of write_seqcount_begin() telling lockdep a trylock
* was attempted.
*/
raw_write_seqcount_begin(&qdisc->running);
seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_);
return true;
#endif
}
static inline void qdisc_run_end(struct Qdisc *qdisc)
{
#ifdef CONFIG_PREEMPT_RT
write_sequnlock(&qdisc->running);
#else
write_seqcount_end(&qdisc->running);
#endif
if (qdisc->flags & TCQ_F_NOLOCK) {
spin_unlock(&qdisc->seqlock);
@ -589,7 +604,7 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
return qdisc_lock(root);
}
static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
static inline net_seqlock_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
{
struct Qdisc *root = qdisc_root_sleeping(qdisc);

View File

@ -42,7 +42,7 @@
struct net_rate_estimator {
struct gnet_stats_basic_packed *bstats;
spinlock_t *stats_lock;
seqcount_t *running;
net_seqlock_t *running;
struct gnet_stats_basic_cpu __percpu *cpu_bstats;
u8 ewma_log;
u8 intvl_log; /* period : (250ms << intvl_log) */
@ -125,7 +125,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct net_rate_estimator __rcu **rate_est,
spinlock_t *lock,
seqcount_t *running,
net_seqlock_t *running,
struct nlattr *opt)
{
struct gnet_estimator *parm = nla_data(opt);
@ -226,7 +226,7 @@ int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct net_rate_estimator __rcu **rate_est,
spinlock_t *lock,
seqcount_t *running, struct nlattr *opt)
net_seqlock_t *running, struct nlattr *opt)
{
return gen_new_estimator(bstats, cpu_bstats, rate_est,
lock, running, opt);

View File

@ -138,7 +138,7 @@ __gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
}
void
__gnet_stats_copy_basic(const seqcount_t *running,
__gnet_stats_copy_basic(net_seqlock_t *running,
struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu,
struct gnet_stats_basic_packed *b)
@ -151,15 +151,15 @@ __gnet_stats_copy_basic(const seqcount_t *running,
}
do {
if (running)
seq = read_seqcount_begin(running);
seq = net_seq_begin(running);
bstats->bytes = b->bytes;
bstats->packets = b->packets;
} while (running && read_seqcount_retry(running, seq));
} while (running && net_seq_retry(running, seq));
}
EXPORT_SYMBOL(__gnet_stats_copy_basic);
static int
___gnet_stats_copy_basic(const seqcount_t *running,
___gnet_stats_copy_basic(net_seqlock_t *running,
struct gnet_dump *d,
struct gnet_stats_basic_cpu __percpu *cpu,
struct gnet_stats_basic_packed *b,
@ -200,7 +200,7 @@ ___gnet_stats_copy_basic(const seqcount_t *running,
* if the room in the socket buffer was not sufficient.
*/
int
gnet_stats_copy_basic(const seqcount_t *running,
gnet_stats_copy_basic(net_seqlock_t *running,
struct gnet_dump *d,
struct gnet_stats_basic_cpu __percpu *cpu,
struct gnet_stats_basic_packed *b)
@ -224,7 +224,7 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
* if the room in the socket buffer was not sufficient.
*/
int
gnet_stats_copy_basic_hw(const seqcount_t *running,
gnet_stats_copy_basic_hw(net_seqlock_t *running,
struct gnet_dump *d,
struct gnet_stats_basic_cpu __percpu *cpu,
struct gnet_stats_basic_packed *b)

View File

@ -1255,7 +1255,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
rcu_assign_pointer(sch->stab, stab);
}
if (tca[TCA_RATE]) {
seqcount_t *running;
net_seqlock_t *running;
err = -EOPNOTSUPP;
if (sch->flags & TCQ_F_MQROOT) {

View File

@ -583,7 +583,11 @@ struct Qdisc noop_qdisc = {
.ops = &noop_qdisc_ops,
.q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
.dev_queue = &noop_netdev_queue,
#ifdef CONFIG_PREEMPT_RT
.running = __SEQLOCK_UNLOCKED(noop_qdisc.running),
#else
.running = SEQCNT_ZERO(noop_qdisc.running),
#endif
.busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
.gso_skb = {
.next = (struct sk_buff *)&noop_qdisc.gso_skb,
@ -898,7 +902,11 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
spin_lock_init(&sch->busylock);
/* seqlock has the same scope of busylock, for NOLOCK qdisc */
spin_lock_init(&sch->seqlock);
#ifdef CONFIG_PREEMPT_RT
seqlock_init(&sch->running);
#else
seqcount_init(&sch->running);
#endif
sch->ops = ops;
sch->flags = ops->static_flags;
@ -912,7 +920,12 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
if (sch != &noop_qdisc) {
lockdep_set_class(&sch->busylock, &dev->qdisc_tx_busylock_key);
lockdep_set_class(&sch->seqlock, &dev->qdisc_tx_busylock_key);
#ifdef CONFIG_PREEMPT_RT
lockdep_set_class(&sch->running.seqcount, &dev->qdisc_running_key);
lockdep_set_class(&sch->running.lock, &dev->qdisc_running_key);
#else
lockdep_set_class(&sch->running, &dev->qdisc_running_key);
#endif
}
return sch;