blob: 2884e963e6f6206cd0ec0ba302adea00c7104969 [file] [log] [blame]
From b9e95bb455e96e1fe025caf937bd0e4ddfda7c90 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 08:30:08 -0500
Subject: [PATCH] net: preempt-rt support
commit 0f2c3c2b4cbac06fa3080bc350b0defb9d0f525e in tip.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fa8b476..6be4dde 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1790,14 +1790,14 @@ static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
{
spin_lock_bh(&txq->_xmit_lock);
- txq->xmit_lock_owner = smp_processor_id();
+ txq->xmit_lock_owner = raw_smp_processor_id();
}
static inline int __netif_tx_trylock(struct netdev_queue *txq)
{
int ok = spin_trylock(&txq->_xmit_lock);
if (likely(ok))
- txq->xmit_lock_owner = smp_processor_id();
+ txq->xmit_lock_owner = raw_smp_processor_id();
return ok;
}
@@ -1831,7 +1831,7 @@ static inline void netif_tx_lock(struct net_device *dev)
int cpu;
spin_lock(&dev->tx_global_lock);
- cpu = smp_processor_id();
+ cpu = raw_smp_processor_id();
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
@@ -1894,7 +1894,7 @@ static inline void netif_tx_disable(struct net_device *dev)
int cpu;
local_bh_disable();
- cpu = smp_processor_id();
+ cpu = raw_smp_processor_id();
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
diff --git a/net/core/dev.c b/net/core/dev.c
index aa5e7fb..08e1a36 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2127,9 +2127,14 @@ gso:
Either shot noqueue qdisc, it is even simpler 8)
*/
if (dev->flags & IFF_UP) {
- int cpu = smp_processor_id(); /* ok because BHs are off */
-
+ /*
+ * No need to check for recursion with threaded interrupts:
+ */
+#ifdef CONFIG_PREEMPT_RT
+ if (1) {
+#else
if (txq->xmit_lock_owner != cpu) {
+#endif
HARD_TX_LOCK(dev, txq, cpu);
@@ -2248,7 +2253,8 @@ EXPORT_SYMBOL(netif_rx_ni);
static void net_tx_action(struct softirq_action *h)
{
- struct softnet_data *sd = &__get_cpu_var(softnet_data);
+ struct softnet_data *sd = &per_cpu(softnet_data,
+ raw_smp_processor_id());
if (sd->completion_queue) {
struct sk_buff *clist;
@@ -2264,6 +2270,11 @@ static void net_tx_action(struct softirq_action *h)
WARN_ON(atomic_read(&skb->users));
__kfree_skb(skb);
+ /*
+ * Safe to reschedule - the list is private
+ * at this point.
+ */
+ cond_resched_softirq_context();
}
}
@@ -2282,6 +2293,22 @@ static void net_tx_action(struct softirq_action *h)
head = head->next_sched;
root_lock = qdisc_lock(q);
+ /*
+ * We are executing in softirq context here, and
+ * if softirqs are preemptible, we must avoid
+ * infinite reactivation of the softirq by
+ * either the tx handler, or by netif_schedule().
+ * (it would result in an infinitely looping
+ * softirq context)
+ * So we take the spinlock unconditionally.
+ */
+#ifdef CONFIG_PREEMPT_SOFTIRQS
+ spin_lock(root_lock);
+ smp_mb__before_clear_bit();
+ clear_bit(__QDISC_STATE_SCHED, &q->state);
+ qdisc_run(q);
+ spin_unlock(root_lock);
+#else
if (spin_trylock(root_lock)) {
smp_mb__before_clear_bit();
clear_bit(__QDISC_STATE_SCHED,
@@ -2298,6 +2325,7 @@ static void net_tx_action(struct softirq_action *h)
&q->state);
}
}
+#endif
}
}
}
@@ -2513,7 +2541,7 @@ int netif_receive_skb(struct sk_buff *skb)
skb->dev = master;
}
- __get_cpu_var(netdev_rx_stat).total++;
+ per_cpu(netdev_rx_stat, raw_smp_processor_id()).total++;
skb_reset_network_header(skb);
skb_reset_transport_header(skb);
@@ -2910,9 +2938,10 @@ EXPORT_SYMBOL(napi_gro_frags);
static int process_backlog(struct napi_struct *napi, int quota)
{
int work = 0;
- struct softnet_data *queue = &__get_cpu_var(softnet_data);
+ struct softnet_data *queue;
unsigned long start_time = jiffies;
+ queue = &per_cpu(softnet_data, raw_smp_processor_id());
napi->weight = weight_p;
do {
struct sk_buff *skb;
@@ -2944,7 +2973,7 @@ void __napi_schedule(struct napi_struct *n)
local_irq_save(flags);
list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
- __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ raise_softirq_irqoff(NET_RX_SOFTIRQ);
local_irq_restore(flags);
}
EXPORT_SYMBOL(__napi_schedule);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index a58f59b..2fbd53b 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -74,20 +74,20 @@ static void queue_process(struct work_struct *work)
txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
- local_irq_save(flags);
+ local_irq_save_nort(flags);
__netif_tx_lock(txq, smp_processor_id());
if (netif_tx_queue_stopped(txq) ||
netif_tx_queue_frozen(txq) ||
ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
skb_queue_head(&npinfo->txq, skb);
__netif_tx_unlock(txq);
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
schedule_delayed_work(&npinfo->tx_work, HZ/10);
return;
}
__netif_tx_unlock(txq);
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
}
}
@@ -158,7 +158,7 @@ static void poll_napi(struct net_device *dev)
int budget = 16;
list_for_each_entry(napi, &dev->napi_list, dev_list) {
- if (napi->poll_owner != smp_processor_id() &&
+ if (napi->poll_owner != raw_smp_processor_id() &&
spin_trylock(&napi->poll_lock)) {
budget = poll_one_napi(dev->npinfo, napi, budget);
spin_unlock(&napi->poll_lock);
@@ -219,30 +219,35 @@ static void refill_skbs(void)
static void zap_completion_queue(void)
{
- unsigned long flags;
struct softnet_data *sd = &get_cpu_var(softnet_data);
+ struct sk_buff *clist = NULL;
+ unsigned long flags;
if (sd->completion_queue) {
- struct sk_buff *clist;
local_irq_save(flags);
clist = sd->completion_queue;
sd->completion_queue = NULL;
local_irq_restore(flags);
-
- while (clist != NULL) {
- struct sk_buff *skb = clist;
- clist = clist->next;
- if (skb->destructor) {
- atomic_inc(&skb->users);
- dev_kfree_skb_any(skb); /* put this one back */
- } else {
- __kfree_skb(skb);
- }
- }
}
+
+ /*
+ * Took the list private, can drop our softnet
+ * reference:
+ */
put_cpu_var(softnet_data);
+
+ while (clist != NULL) {
+ struct sk_buff *skb = clist;
+ clist = clist->next;
+ if (skb->destructor) {
+ atomic_inc(&skb->users);
+ dev_kfree_skb_any(skb); /* put this one back */
+ } else {
+ __kfree_skb(skb);
+ }
+ }
}
static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
@@ -250,13 +255,26 @@ static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
int count = 0;
struct sk_buff *skb;
+#ifdef CONFIG_PREEMPT_RT
+ /*
+ * On -rt skb_pool.lock is schedulable, so if we are
+ * in an atomic context we just try to dequeue from the
+ * pool and fail if we cannot get one.
+ */
+ if (in_atomic() || irqs_disabled())
+ goto pick_atomic;
+#endif
zap_completion_queue();
refill_skbs();
repeat:
skb = alloc_skb(len, GFP_ATOMIC);
- if (!skb)
+ if (!skb) {
+#ifdef CONFIG_PREEMPT_RT
+pick_atomic:
+#endif
skb = skb_dequeue(&skb_pool);
+ }
if (!skb) {
if (++count < 10) {
@@ -276,7 +294,7 @@ static int netpoll_owner_active(struct net_device *dev)
struct napi_struct *napi;
list_for_each_entry(napi, &dev->napi_list, dev_list) {
- if (napi->poll_owner == smp_processor_id())
+ if (napi->poll_owner == raw_smp_processor_id())
return 1;
}
return 0;
@@ -302,7 +320,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
- local_irq_save(flags);
+ local_irq_save_nort(flags);
/* try until next clock tick */
for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
tries > 0; --tries) {
@@ -329,7 +347,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
"netpoll_send_skb(): %s enabled interrupts in poll (%pF)\n",
dev->name, ops->ndo_start_xmit);
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
}
if (status != NETDEV_TX_OK) {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index ac4dec1..e4d0cc4 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -202,7 +202,10 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
*/
static struct sock *icmp_sk(struct net *net)
{
- return net->ipv4.icmp_sk[smp_processor_id()];
+ /*
+ * Should be safe on PREEMPT_SOFTIRQS/HARDIRQS to use raw-smp-processor-id:
+ */
+ return net->ipv4.icmp_sk[raw_smp_processor_id()];
}
static inline struct sock *icmp_xmit_lock(struct net *net)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 71d4e5b..f322a91 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -204,13 +204,13 @@ struct rt_hash_bucket {
};
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
- defined(CONFIG_PROVE_LOCKING)
+ defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_PREEMPT_RT)
/*
* Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks
* The size of this table is a power of two and depends on the number of CPUS.
* (on lockdep we have a quite big spinlock_t, so keep the size down there)
*/
-#ifdef CONFIG_LOCKDEP
+#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT)
# define RT_HASH_LOCK_SZ 256
#else
# if NR_CPUS >= 32
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 9210e31..053133a 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -384,7 +384,7 @@ ip6t_do_table(struct sk_buff *skb,
xt_info_rdlock_bh();
private = table->private;
- table_base = private->entries[smp_processor_id()];
+ table_base = private->entries[raw_smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index ff4dd53..6d6a2ac 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -12,6 +12,7 @@
*/
#include <linux/bitops.h>
+#include <linux/kallsyms.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -25,6 +26,7 @@
#include <linux/rcupdate.h>
#include <linux/list.h>
#include <linux/slab.h>
+#include <linux/delay.h>
#include <net/pkt_sched.h>
/* Main transmission queue. */
@@ -77,7 +79,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
{
int ret;
- if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
+ if (unlikely(dev_queue->xmit_lock_owner == raw_smp_processor_id())) {
/*
* Same CPU holding the lock. It may be a transient
* configuration error, when hard_start_xmit() recurses. We
@@ -119,7 +121,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
/* And release qdisc */
spin_unlock(root_lock);
- HARD_TX_LOCK(dev, txq, smp_processor_id());
+ HARD_TX_LOCK(dev, txq, raw_smp_processor_id());
if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
ret = dev_hard_start_xmit(skb, dev, txq);
@@ -787,9 +789,12 @@ void dev_deactivate(struct net_device *dev)
/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
synchronize_rcu();
- /* Wait for outstanding qdisc_run calls. */
+ /*
+ * Wait for outstanding qdisc_run calls.
+ * TODO: shouldnt this be wakeup-based, instead of polling it?
+ */
while (some_qdisc_is_busy(dev))
- yield();
+ msleep(1);
}
static void dev_init_scheduler_queue(struct net_device *dev,
--
1.7.1.1