| From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> |
| Date: Wed, 14 Sep 2016 17:36:35 +0200 |
| Subject: [PATCH] net/Qdisc: use a seqlock instead seqcount |
| |
| The seqcount disables preemption on -RT while it is held which can't |
| remove. Also we don't want the reader to spin for ages if the writer is |
| scheduled out. The seqlock on the other hand will serialize / sleep on |
| the lock while writer is active. |
| |
| Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> |
| --- |
| include/linux/seqlock.h | 9 +++++++++ |
| include/net/gen_stats.h | 9 +++++---- |
| include/net/net_seq_lock.h | 15 +++++++++++++++ |
| include/net/sch_generic.h | 21 ++++++++++++++++++--- |
| net/core/gen_estimator.c | 6 +++--- |
| net/core/gen_stats.c | 8 ++++---- |
| net/sched/sch_api.c | 2 +- |
| net/sched/sch_generic.c | 12 ++++++++++++ |
| 8 files changed, 67 insertions(+), 15 deletions(-) |
| create mode 100644 include/net/net_seq_lock.h |
| |
| --- a/include/linux/seqlock.h |
| +++ b/include/linux/seqlock.h |
| @@ -481,6 +481,15 @@ static inline void write_seqlock(seqlock |
| __raw_write_seqcount_begin(&sl->seqcount); |
| } |
| |
| +static inline int try_write_seqlock(seqlock_t *sl) |
| +{ |
| + if (spin_trylock(&sl->lock)) { |
| + __raw_write_seqcount_begin(&sl->seqcount); |
| + return 1; |
| + } |
| + return 0; |
| +} |
| + |
| static inline void write_sequnlock(seqlock_t *sl) |
| { |
| __raw_write_seqcount_end(&sl->seqcount); |
| --- a/include/net/gen_stats.h |
| +++ b/include/net/gen_stats.h |
| @@ -5,6 +5,7 @@ |
| #include <linux/socket.h> |
| #include <linux/rtnetlink.h> |
| #include <linux/pkt_sched.h> |
| +#include <net/net_seq_lock.h> |
| |
| struct gnet_stats_basic_cpu { |
| struct gnet_stats_basic_packed bstats; |
| @@ -33,11 +34,11 @@ int gnet_stats_start_copy_compat(struct |
| spinlock_t *lock, struct gnet_dump *d, |
| int padattr); |
| |
| -int gnet_stats_copy_basic(const seqcount_t *running, |
| +int gnet_stats_copy_basic(net_seqlock_t *running, |
| struct gnet_dump *d, |
| struct gnet_stats_basic_cpu __percpu *cpu, |
| struct gnet_stats_basic_packed *b); |
| -void __gnet_stats_copy_basic(const seqcount_t *running, |
| +void __gnet_stats_copy_basic(net_seqlock_t *running, |
| struct gnet_stats_basic_packed *bstats, |
| struct gnet_stats_basic_cpu __percpu *cpu, |
| struct gnet_stats_basic_packed *b); |
| @@ -55,14 +56,14 @@ int gen_new_estimator(struct gnet_stats_ |
| struct gnet_stats_basic_cpu __percpu *cpu_bstats, |
| struct gnet_stats_rate_est64 *rate_est, |
| spinlock_t *stats_lock, |
| - seqcount_t *running, struct nlattr *opt); |
| + net_seqlock_t *running, struct nlattr *opt); |
| void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, |
| struct gnet_stats_rate_est64 *rate_est); |
| int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, |
| struct gnet_stats_basic_cpu __percpu *cpu_bstats, |
| struct gnet_stats_rate_est64 *rate_est, |
| spinlock_t *stats_lock, |
| - seqcount_t *running, struct nlattr *opt); |
| + net_seqlock_t *running, struct nlattr *opt); |
| bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, |
| const struct gnet_stats_rate_est64 *rate_est); |
| #endif |
| --- /dev/null |
| +++ b/include/net/net_seq_lock.h |
| @@ -0,0 +1,15 @@ |
| +#ifndef __NET_NET_SEQ_LOCK_H__ |
| +#define __NET_NET_SEQ_LOCK_H__ |
| + |
| +#ifdef CONFIG_PREEMPT_RT_BASE |
| +# define net_seqlock_t seqlock_t |
| +# define net_seq_begin(__r) read_seqbegin(__r) |
| +# define net_seq_retry(__r, __s) read_seqretry(__r, __s) |
| + |
| +#else |
| +# define net_seqlock_t seqcount_t |
| +# define net_seq_begin(__r) read_seqcount_begin(__r) |
| +# define net_seq_retry(__r, __s) read_seqcount_retry(__r, __s) |
| +#endif |
| + |
| +#endif |
| --- a/include/net/sch_generic.h |
| +++ b/include/net/sch_generic.h |
| @@ -10,6 +10,7 @@ |
| #include <linux/dynamic_queue_limits.h> |
| #include <net/gen_stats.h> |
| #include <net/rtnetlink.h> |
| +#include <net/net_seq_lock.h> |
| |
| struct Qdisc_ops; |
| struct qdisc_walker; |
| @@ -86,7 +87,7 @@ struct Qdisc { |
| struct sk_buff *gso_skb ____cacheline_aligned_in_smp; |
| struct qdisc_skb_head q; |
| struct gnet_stats_basic_packed bstats; |
| - seqcount_t running; |
| + net_seqlock_t running; |
| struct gnet_stats_queue qstats; |
| unsigned long state; |
| struct Qdisc *next_sched; |
| @@ -98,13 +99,22 @@ struct Qdisc { |
| spinlock_t busylock ____cacheline_aligned_in_smp; |
| }; |
| |
| -static inline bool qdisc_is_running(const struct Qdisc *qdisc) |
| +static inline bool qdisc_is_running(struct Qdisc *qdisc) |
| { |
| +#ifdef CONFIG_PREEMPT_RT_BASE |
| + return spin_is_locked(&qdisc->running.lock) ? true : false; |
| +#else |
| return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; |
| +#endif |
| } |
| |
| static inline bool qdisc_run_begin(struct Qdisc *qdisc) |
| { |
| +#ifdef CONFIG_PREEMPT_RT_BASE |
| + if (try_write_seqlock(&qdisc->running)) |
| + return true; |
| + return false; |
| +#else |
| if (qdisc_is_running(qdisc)) |
| return false; |
| /* Variant of write_seqcount_begin() telling lockdep a trylock |
| @@ -113,11 +123,16 @@ static inline bool qdisc_run_begin(struc |
| raw_write_seqcount_begin(&qdisc->running); |
| seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_); |
| return true; |
| +#endif |
| } |
| |
| static inline void qdisc_run_end(struct Qdisc *qdisc) |
| { |
| +#ifdef CONFIG_PREEMPT_RT_BASE |
| + write_sequnlock(&qdisc->running); |
| +#else |
| write_seqcount_end(&qdisc->running); |
| +#endif |
| } |
| |
| static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) |
| @@ -308,7 +323,7 @@ static inline spinlock_t *qdisc_root_sle |
| return qdisc_lock(root); |
| } |
| |
| -static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc) |
| +static inline net_seqlock_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc) |
| { |
| struct Qdisc *root = qdisc_root_sleeping(qdisc); |
| |
| --- a/net/core/gen_estimator.c |
| +++ b/net/core/gen_estimator.c |
| @@ -84,7 +84,7 @@ struct gen_estimator |
| struct gnet_stats_basic_packed *bstats; |
| struct gnet_stats_rate_est64 *rate_est; |
| spinlock_t *stats_lock; |
| - seqcount_t *running; |
| + net_seqlock_t *running; |
| int ewma_log; |
| u32 last_packets; |
| unsigned long avpps; |
| @@ -213,7 +213,7 @@ int gen_new_estimator(struct gnet_stats_ |
| struct gnet_stats_basic_cpu __percpu *cpu_bstats, |
| struct gnet_stats_rate_est64 *rate_est, |
| spinlock_t *stats_lock, |
| - seqcount_t *running, |
| + net_seqlock_t *running, |
| struct nlattr *opt) |
| { |
| struct gen_estimator *est; |
| @@ -309,7 +309,7 @@ int gen_replace_estimator(struct gnet_st |
| struct gnet_stats_basic_cpu __percpu *cpu_bstats, |
| struct gnet_stats_rate_est64 *rate_est, |
| spinlock_t *stats_lock, |
| - seqcount_t *running, struct nlattr *opt) |
| + net_seqlock_t *running, struct nlattr *opt) |
| { |
| gen_kill_estimator(bstats, rate_est); |
| return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, running, opt); |
| --- a/net/core/gen_stats.c |
| +++ b/net/core/gen_stats.c |
| @@ -130,7 +130,7 @@ static void |
| } |
| |
| void |
| -__gnet_stats_copy_basic(const seqcount_t *running, |
| +__gnet_stats_copy_basic(net_seqlock_t *running, |
| struct gnet_stats_basic_packed *bstats, |
| struct gnet_stats_basic_cpu __percpu *cpu, |
| struct gnet_stats_basic_packed *b) |
| @@ -143,10 +143,10 @@ void |
| } |
| do { |
| if (running) |
| - seq = read_seqcount_begin(running); |
| + seq = net_seq_begin(running); |
| bstats->bytes = b->bytes; |
| bstats->packets = b->packets; |
| - } while (running && read_seqcount_retry(running, seq)); |
| + } while (running && net_seq_retry(running, seq)); |
| } |
| EXPORT_SYMBOL(__gnet_stats_copy_basic); |
| |
| @@ -164,7 +164,7 @@ EXPORT_SYMBOL(__gnet_stats_copy_basic); |
| * if the room in the socket buffer was not sufficient. |
| */ |
| int |
| -gnet_stats_copy_basic(const seqcount_t *running, |
| +gnet_stats_copy_basic(net_seqlock_t *running, |
| struct gnet_dump *d, |
| struct gnet_stats_basic_cpu __percpu *cpu, |
| struct gnet_stats_basic_packed *b) |
| --- a/net/sched/sch_api.c |
| +++ b/net/sched/sch_api.c |
| @@ -981,7 +981,7 @@ static struct Qdisc *qdisc_create(struct |
| rcu_assign_pointer(sch->stab, stab); |
| } |
| if (tca[TCA_RATE]) { |
| - seqcount_t *running; |
| + net_seqlock_t *running; |
| |
| err = -EOPNOTSUPP; |
| if (sch->flags & TCQ_F_MQROOT) |
| --- a/net/sched/sch_generic.c |
| +++ b/net/sched/sch_generic.c |
| @@ -425,7 +425,11 @@ struct Qdisc noop_qdisc = { |
| .ops = &noop_qdisc_ops, |
| .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), |
| .dev_queue = &noop_netdev_queue, |
| +#ifdef CONFIG_PREEMPT_RT_BASE |
| + .running = __SEQLOCK_UNLOCKED(noop_qdisc.running), |
| +#else |
| .running = SEQCNT_ZERO(noop_qdisc.running), |
| +#endif |
| .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock), |
| }; |
| EXPORT_SYMBOL(noop_qdisc); |
| @@ -624,9 +628,17 @@ struct Qdisc *qdisc_alloc(struct netdev_ |
| lockdep_set_class(&sch->busylock, |
| dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); |
| |
| +#ifdef CONFIG_PREEMPT_RT_BASE |
| + seqlock_init(&sch->running); |
| + lockdep_set_class(&sch->running.seqcount, |
| + dev->qdisc_running_key ?: &qdisc_running_key); |
| + lockdep_set_class(&sch->running.lock, |
| + dev->qdisc_running_key ?: &qdisc_running_key); |
| +#else |
| seqcount_init(&sch->running); |
| lockdep_set_class(&sch->running, |
| dev->qdisc_running_key ?: &qdisc_running_key); |
| +#endif |
| |
| sch->ops = ops; |
| sch->enqueue = ops->enqueue; |