| From b9e95bb455e96e1fe025caf937bd0e4ddfda7c90 Mon Sep 17 00:00:00 2001 |
| From: Ingo Molnar <mingo@elte.hu> |
| Date: Fri, 3 Jul 2009 08:30:08 -0500 |
| Subject: [PATCH] net: preempt-rt support |
| |
| commit 0f2c3c2b4cbac06fa3080bc350b0defb9d0f525e in tip. |
| |
| Signed-off-by: Ingo Molnar <mingo@elte.hu> |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h |
| index fa8b476..6be4dde 100644 |
| --- a/include/linux/netdevice.h |
| +++ b/include/linux/netdevice.h |
| @@ -1790,14 +1790,14 @@ static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) |
| static inline void __netif_tx_lock_bh(struct netdev_queue *txq) |
| { |
| spin_lock_bh(&txq->_xmit_lock); |
| - txq->xmit_lock_owner = smp_processor_id(); |
| + txq->xmit_lock_owner = raw_smp_processor_id(); |
| } |
| |
| static inline int __netif_tx_trylock(struct netdev_queue *txq) |
| { |
| int ok = spin_trylock(&txq->_xmit_lock); |
| if (likely(ok)) |
| - txq->xmit_lock_owner = smp_processor_id(); |
| + txq->xmit_lock_owner = raw_smp_processor_id(); |
| return ok; |
| } |
| |
| @@ -1831,7 +1831,7 @@ static inline void netif_tx_lock(struct net_device *dev) |
| int cpu; |
| |
| spin_lock(&dev->tx_global_lock); |
| - cpu = smp_processor_id(); |
| + cpu = raw_smp_processor_id(); |
| for (i = 0; i < dev->num_tx_queues; i++) { |
| struct netdev_queue *txq = netdev_get_tx_queue(dev, i); |
| |
| @@ -1894,7 +1894,7 @@ static inline void netif_tx_disable(struct net_device *dev) |
| int cpu; |
| |
| local_bh_disable(); |
| - cpu = smp_processor_id(); |
| + cpu = raw_smp_processor_id(); |
| for (i = 0; i < dev->num_tx_queues; i++) { |
| struct netdev_queue *txq = netdev_get_tx_queue(dev, i); |
| |
| diff --git a/net/core/dev.c b/net/core/dev.c |
| index aa5e7fb..08e1a36 100644 |
| --- a/net/core/dev.c |
| +++ b/net/core/dev.c |
| @@ -2127,9 +2127,14 @@ gso: |
| Either shot noqueue qdisc, it is even simpler 8) |
| */ |
| if (dev->flags & IFF_UP) { |
| - int cpu = smp_processor_id(); /* ok because BHs are off */ |
| - |
| + /* |
| + * No need to check for recursion with threaded interrupts: |
| + */ |
| +#ifdef CONFIG_PREEMPT_RT |
| + if (1) { |
| +#else |
| if (txq->xmit_lock_owner != cpu) { |
| +#endif |
| |
| HARD_TX_LOCK(dev, txq, cpu); |
| |
| @@ -2248,7 +2253,8 @@ EXPORT_SYMBOL(netif_rx_ni); |
| |
| static void net_tx_action(struct softirq_action *h) |
| { |
| - struct softnet_data *sd = &__get_cpu_var(softnet_data); |
| + struct softnet_data *sd = &per_cpu(softnet_data, |
| + raw_smp_processor_id()); |
| |
| if (sd->completion_queue) { |
| struct sk_buff *clist; |
| @@ -2264,6 +2270,11 @@ static void net_tx_action(struct softirq_action *h) |
| |
| WARN_ON(atomic_read(&skb->users)); |
| __kfree_skb(skb); |
| + /* |
| + * Safe to reschedule - the list is private |
| + * at this point. |
| + */ |
| + cond_resched_softirq_context(); |
| } |
| } |
| |
| @@ -2282,6 +2293,22 @@ static void net_tx_action(struct softirq_action *h) |
| head = head->next_sched; |
| |
| root_lock = qdisc_lock(q); |
| + /* |
| + * We are executing in softirq context here, and |
| + * if softirqs are preemptible, we must avoid |
| + * infinite reactivation of the softirq by |
| + * either the tx handler, or by netif_schedule(). |
| + * (it would result in an infinitely looping |
| + * softirq context) |
| + * So we take the spinlock unconditionally. |
| + */ |
| +#ifdef CONFIG_PREEMPT_SOFTIRQS |
| + spin_lock(root_lock); |
| + smp_mb__before_clear_bit(); |
| + clear_bit(__QDISC_STATE_SCHED, &q->state); |
| + qdisc_run(q); |
| + spin_unlock(root_lock); |
| +#else |
| if (spin_trylock(root_lock)) { |
| smp_mb__before_clear_bit(); |
| clear_bit(__QDISC_STATE_SCHED, |
| @@ -2298,6 +2325,7 @@ static void net_tx_action(struct softirq_action *h) |
| &q->state); |
| } |
| } |
| +#endif |
| } |
| } |
| } |
| @@ -2513,7 +2541,7 @@ int netif_receive_skb(struct sk_buff *skb) |
| skb->dev = master; |
| } |
| |
| - __get_cpu_var(netdev_rx_stat).total++; |
| + per_cpu(netdev_rx_stat, raw_smp_processor_id()).total++; |
| |
| skb_reset_network_header(skb); |
| skb_reset_transport_header(skb); |
| @@ -2910,9 +2938,10 @@ EXPORT_SYMBOL(napi_gro_frags); |
| static int process_backlog(struct napi_struct *napi, int quota) |
| { |
| int work = 0; |
| - struct softnet_data *queue = &__get_cpu_var(softnet_data); |
| + struct softnet_data *queue; |
| unsigned long start_time = jiffies; |
| |
| + queue = &per_cpu(softnet_data, raw_smp_processor_id()); |
| napi->weight = weight_p; |
| do { |
| struct sk_buff *skb; |
| @@ -2944,7 +2973,7 @@ void __napi_schedule(struct napi_struct *n) |
| |
| local_irq_save(flags); |
| list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list); |
| - __raise_softirq_irqoff(NET_RX_SOFTIRQ); |
| + raise_softirq_irqoff(NET_RX_SOFTIRQ); |
| local_irq_restore(flags); |
| } |
| EXPORT_SYMBOL(__napi_schedule); |
| diff --git a/net/core/netpoll.c b/net/core/netpoll.c |
| index a58f59b..2fbd53b 100644 |
| --- a/net/core/netpoll.c |
| +++ b/net/core/netpoll.c |
| @@ -74,20 +74,20 @@ static void queue_process(struct work_struct *work) |
| |
| txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); |
| |
| - local_irq_save(flags); |
| + local_irq_save_nort(flags); |
| __netif_tx_lock(txq, smp_processor_id()); |
| if (netif_tx_queue_stopped(txq) || |
| netif_tx_queue_frozen(txq) || |
| ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { |
| skb_queue_head(&npinfo->txq, skb); |
| __netif_tx_unlock(txq); |
| - local_irq_restore(flags); |
| + local_irq_restore_nort(flags); |
| |
| schedule_delayed_work(&npinfo->tx_work, HZ/10); |
| return; |
| } |
| __netif_tx_unlock(txq); |
| - local_irq_restore(flags); |
| + local_irq_restore_nort(flags); |
| } |
| } |
| |
| @@ -158,7 +158,7 @@ static void poll_napi(struct net_device *dev) |
| int budget = 16; |
| |
| list_for_each_entry(napi, &dev->napi_list, dev_list) { |
| - if (napi->poll_owner != smp_processor_id() && |
| + if (napi->poll_owner != raw_smp_processor_id() && |
| spin_trylock(&napi->poll_lock)) { |
| budget = poll_one_napi(dev->npinfo, napi, budget); |
| spin_unlock(&napi->poll_lock); |
| @@ -219,30 +219,35 @@ static void refill_skbs(void) |
| |
| static void zap_completion_queue(void) |
| { |
| - unsigned long flags; |
| struct softnet_data *sd = &get_cpu_var(softnet_data); |
| + struct sk_buff *clist = NULL; |
| + unsigned long flags; |
| |
| if (sd->completion_queue) { |
| - struct sk_buff *clist; |
| |
| local_irq_save(flags); |
| clist = sd->completion_queue; |
| sd->completion_queue = NULL; |
| local_irq_restore(flags); |
| - |
| - while (clist != NULL) { |
| - struct sk_buff *skb = clist; |
| - clist = clist->next; |
| - if (skb->destructor) { |
| - atomic_inc(&skb->users); |
| - dev_kfree_skb_any(skb); /* put this one back */ |
| - } else { |
| - __kfree_skb(skb); |
| - } |
| - } |
| } |
| |
| + |
| + /* |
| + * Took the list private, can drop our softnet |
| + * reference: |
| + */ |
| put_cpu_var(softnet_data); |
| + |
| + while (clist != NULL) { |
| + struct sk_buff *skb = clist; |
| + clist = clist->next; |
| + if (skb->destructor) { |
| + atomic_inc(&skb->users); |
| + dev_kfree_skb_any(skb); /* put this one back */ |
| + } else { |
| + __kfree_skb(skb); |
| + } |
| + } |
| } |
| |
| static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve) |
| @@ -250,13 +255,26 @@ static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve) |
| int count = 0; |
| struct sk_buff *skb; |
| |
| +#ifdef CONFIG_PREEMPT_RT |
| + /* |
| + * On -rt skb_pool.lock is schedulable, so if we are |
| + * in an atomic context we just try to dequeue from the |
| + * pool and fail if we cannot get one. |
| + */ |
| + if (in_atomic() || irqs_disabled()) |
| + goto pick_atomic; |
| +#endif |
| zap_completion_queue(); |
| refill_skbs(); |
| repeat: |
| |
| skb = alloc_skb(len, GFP_ATOMIC); |
| - if (!skb) |
| + if (!skb) { |
| +#ifdef CONFIG_PREEMPT_RT |
| +pick_atomic: |
| +#endif |
| skb = skb_dequeue(&skb_pool); |
| + } |
| |
| if (!skb) { |
| if (++count < 10) { |
| @@ -276,7 +294,7 @@ static int netpoll_owner_active(struct net_device *dev) |
| struct napi_struct *napi; |
| |
| list_for_each_entry(napi, &dev->napi_list, dev_list) { |
| - if (napi->poll_owner == smp_processor_id()) |
| + if (napi->poll_owner == raw_smp_processor_id()) |
| return 1; |
| } |
| return 0; |
| @@ -302,7 +320,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) |
| |
| txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); |
| |
| - local_irq_save(flags); |
| + local_irq_save_nort(flags); |
| /* try until next clock tick */ |
| for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; |
| tries > 0; --tries) { |
| @@ -329,7 +347,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) |
| "netpoll_send_skb(): %s enabled interrupts in poll (%pF)\n", |
| dev->name, ops->ndo_start_xmit); |
| |
| - local_irq_restore(flags); |
| + local_irq_restore_nort(flags); |
| } |
| |
| if (status != NETDEV_TX_OK) { |
| diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c |
| index ac4dec1..e4d0cc4 100644 |
| --- a/net/ipv4/icmp.c |
| +++ b/net/ipv4/icmp.c |
| @@ -202,7 +202,10 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; |
| */ |
| static struct sock *icmp_sk(struct net *net) |
| { |
| - return net->ipv4.icmp_sk[smp_processor_id()]; |
| + /* |
| + * Should be safe on PREEMPT_SOFTIRQS/HARDIRQS to use raw-smp-processor-id: |
| + */ |
| + return net->ipv4.icmp_sk[raw_smp_processor_id()]; |
| } |
| |
| static inline struct sock *icmp_xmit_lock(struct net *net) |
| diff --git a/net/ipv4/route.c b/net/ipv4/route.c |
| index 71d4e5b..f322a91 100644 |
| --- a/net/ipv4/route.c |
| +++ b/net/ipv4/route.c |
| @@ -204,13 +204,13 @@ struct rt_hash_bucket { |
| }; |
| |
| #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ |
| - defined(CONFIG_PROVE_LOCKING) |
| + defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_PREEMPT_RT) |
| /* |
| * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks |
| * The size of this table is a power of two and depends on the number of CPUS. |
| * (on lockdep we have a quite big spinlock_t, so keep the size down there) |
| */ |
| -#ifdef CONFIG_LOCKDEP |
| +#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT) |
| # define RT_HASH_LOCK_SZ 256 |
| #else |
| # if NR_CPUS >= 32 |
| diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c |
| index 9210e31..053133a 100644 |
| --- a/net/ipv6/netfilter/ip6_tables.c |
| +++ b/net/ipv6/netfilter/ip6_tables.c |
| @@ -384,7 +384,7 @@ ip6t_do_table(struct sk_buff *skb, |
| |
| xt_info_rdlock_bh(); |
| private = table->private; |
| - table_base = private->entries[smp_processor_id()]; |
| + table_base = private->entries[raw_smp_processor_id()]; |
| |
| e = get_entry(table_base, private->hook_entry[hook]); |
| |
| diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c |
| index ff4dd53..6d6a2ac 100644 |
| --- a/net/sched/sch_generic.c |
| +++ b/net/sched/sch_generic.c |
| @@ -12,6 +12,7 @@ |
| */ |
| |
| #include <linux/bitops.h> |
| +#include <linux/kallsyms.h> |
| #include <linux/module.h> |
| #include <linux/types.h> |
| #include <linux/kernel.h> |
| @@ -25,6 +26,7 @@ |
| #include <linux/rcupdate.h> |
| #include <linux/list.h> |
| #include <linux/slab.h> |
| +#include <linux/delay.h> |
| #include <net/pkt_sched.h> |
| |
| /* Main transmission queue. */ |
| @@ -77,7 +79,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, |
| { |
| int ret; |
| |
| - if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) { |
| + if (unlikely(dev_queue->xmit_lock_owner == raw_smp_processor_id())) { |
| /* |
| * Same CPU holding the lock. It may be a transient |
| * configuration error, when hard_start_xmit() recurses. We |
| @@ -119,7 +121,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, |
| /* And release qdisc */ |
| spin_unlock(root_lock); |
| |
| - HARD_TX_LOCK(dev, txq, smp_processor_id()); |
| + HARD_TX_LOCK(dev, txq, raw_smp_processor_id()); |
| if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) |
| ret = dev_hard_start_xmit(skb, dev, txq); |
| |
| @@ -787,9 +789,12 @@ void dev_deactivate(struct net_device *dev) |
| /* Wait for outstanding qdisc-less dev_queue_xmit calls. */ |
| synchronize_rcu(); |
| |
| - /* Wait for outstanding qdisc_run calls. */ |
| + /* |
| + * Wait for outstanding qdisc_run calls. |
| + * TODO: shouldnt this be wakeup-based, instead of polling it? |
| + */ |
| while (some_qdisc_is_busy(dev)) |
| - yield(); |
| + msleep(1); |
| } |
| |
| static void dev_init_scheduler_queue(struct net_device *dev, |
| -- |
| 1.7.1.1 |
| |