patch-2.3.4 linux/net/ipv4/route.c
Next file: linux/net/ipv4/tcp.c
Previous file: linux/net/ipv4/raw.c
Back to the patch index
Back to the overall index
- Lines: 346
- Date:
Wed May 26 18:14:37 1999
- Orig file:
v2.3.3/linux/net/ipv4/route.c
- Orig date:
Mon May 10 09:55:25 1999
diff -u --recursive --new-file v2.3.3/linux/net/ipv4/route.c linux/net/ipv4/route.c
@@ -5,7 +5,7 @@
*
* ROUTE - implementation of the IP router.
*
- * Version: $Id: route.c,v 1.67 1999/05/08 20:00:20 davem Exp $
+ * Version: $Id: route.c,v 1.68 1999/05/27 00:37:54 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -174,7 +174,18 @@
* Route cache.
*/
-struct rtable *rt_hash_table[RT_HASH_DIVISOR];
+/* The locking scheme is rather straight forward:
+ *
+ * 1) A BH protected rwlock protects the central route hash.
+ * 2) Only writers remove entries, and they hold the lock
+ * as they look at rtable reference counts.
+ * 3) Only readers acquire references to rtable entries,
+ * they do so with atomic increments and with the
+ * lock held.
+ */
+
+static struct rtable *rt_hash_table[RT_HASH_DIVISOR];
+static rwlock_t rt_hash_lock = RW_LOCK_UNLOCKED;
static int rt_intern_hash(unsigned hash, struct rtable * rth, struct rtable ** res);
@@ -204,7 +215,7 @@
}
- start_bh_atomic();
+ read_lock_bh(&rt_hash_lock);
for (i = 0; i<RT_HASH_DIVISOR; i++) {
for (r = rt_hash_table[i]; r; r = r->u.rt_next) {
@@ -239,7 +250,7 @@
}
done:
- end_bh_atomic();
+ read_unlock_bh(&rt_hash_lock);
*start = buffer+len-(pos-offset);
len = pos-offset;
@@ -292,6 +303,7 @@
return 1;
}
+/* This runs via a timer and thus is always in BH context. */
static void rt_check_expire(unsigned long dummy)
{
int i;
@@ -305,6 +317,7 @@
rover = (rover + 1) & (RT_HASH_DIVISOR-1);
rthp = &rt_hash_table[rover];
+ write_lock(&rt_hash_lock);
while ((rth = *rthp) != NULL) {
if (rth->u.dst.expires) {
/* Entrie is expired even if it is in use */
@@ -325,6 +338,7 @@
*rthp = rth->u.rt_next;
rt_free(rth);
}
+ write_unlock(&rt_hash_lock);
/* Fallback loop breaker. */
if ((jiffies - now) > 0)
@@ -334,6 +348,9 @@
add_timer(&rt_periodic_timer);
}
+/* This can run from both BH and non-BH contexts, the latter
+ * in the case of a forced flush event.
+ */
static void rt_run_flush(unsigned long dummy)
{
int i;
@@ -341,23 +358,23 @@
rt_deadline = 0;
- start_bh_atomic();
for (i=0; i<RT_HASH_DIVISOR; i++) {
- if ((rth = xchg(&rt_hash_table[i], NULL)) == NULL)
- continue;
- end_bh_atomic();
+ write_lock_bh(&rt_hash_lock);
+ rth = rt_hash_table[i];
+ if(rth != NULL)
+ rt_hash_table[i] = NULL;
+ write_unlock_bh(&rt_hash_lock);
for (; rth; rth=next) {
next = rth->u.rt_next;
rth->u.rt_next = NULL;
rt_free(rth);
}
-
- start_bh_atomic();
}
- end_bh_atomic();
}
+static spinlock_t rt_flush_lock = SPIN_LOCK_UNLOCKED;
+
void rt_cache_flush(int delay)
{
unsigned long now = jiffies;
@@ -366,7 +383,7 @@
if (delay < 0)
delay = ip_rt_min_delay;
- start_bh_atomic();
+ spin_lock_bh(&rt_flush_lock);
if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) {
long tmo = (long)(rt_deadline - now);
@@ -386,7 +403,7 @@
}
if (delay <= 0) {
- end_bh_atomic();
+ spin_unlock_bh(&rt_flush_lock);
rt_run_flush(0);
return;
}
@@ -396,7 +413,7 @@
rt_flush_timer.expires = now + delay;
add_timer(&rt_flush_timer);
- end_bh_atomic();
+ spin_unlock_bh(&rt_flush_lock);
}
/*
@@ -459,7 +476,10 @@
do {
int i, k;
- start_bh_atomic();
+ /* The write lock is held during the entire hash
+ * traversal to ensure consistent state of the rover.
+ */
+ write_lock_bh(&rt_hash_lock);
for (i=0, k=rover; i<RT_HASH_DIVISOR; i++) {
unsigned tmo = expire;
@@ -480,7 +500,7 @@
break;
}
rover = k;
- end_bh_atomic();
+ write_unlock_bh(&rt_hash_lock);
if (goal <= 0)
goto work_done;
@@ -530,10 +550,9 @@
int attempts = !in_interrupt();
restart:
- start_bh_atomic();
-
rthp = &rt_hash_table[hash];
+ write_lock_bh(&rt_hash_lock);
while ((rth = *rthp) != NULL) {
if (memcmp(&rth->key, &rt->key, sizeof(rt->key)) == 0) {
/* Put it first */
@@ -544,7 +563,7 @@
atomic_inc(&rth->u.dst.refcnt);
atomic_inc(&rth->u.dst.use);
rth->u.dst.lastuse = now;
- end_bh_atomic();
+ write_unlock_bh(&rt_hash_lock);
rt_drop(rt);
*rp = rth;
@@ -559,7 +578,7 @@
*/
if (rt->rt_type == RTN_UNICAST || rt->key.iif == 0) {
if (!arp_bind_neighbour(&rt->u.dst)) {
- end_bh_atomic();
+ write_unlock_bh(&rt_hash_lock);
/* Neighbour tables are full and nothing
can be released. Try to shrink route cache,
@@ -594,7 +613,7 @@
}
#endif
rt_hash_table[hash] = rt;
- end_bh_atomic();
+ write_unlock_bh(&rt_hash_lock);
*rp = rt;
return 0;
}
@@ -633,6 +652,7 @@
rthp=&rt_hash_table[hash];
+ write_lock_bh(&rt_hash_lock);
while ( (rth = *rthp) != NULL) {
struct rtable *rt;
@@ -657,6 +677,7 @@
rt = dst_alloc(sizeof(struct rtable), &ipv4_dst_ops);
if (rt == NULL) {
ip_rt_put(rth);
+ write_unlock_bh(&rt_hash_lock);
return;
}
@@ -688,11 +709,15 @@
}
*rthp = rth->u.rt_next;
+ write_unlock_bh(&rt_hash_lock);
if (!rt_intern_hash(hash, rt, &rt))
ip_rt_put(rt);
rt_drop(rth);
- break;
+ goto do_next;
}
+ write_unlock_bh(&rt_hash_lock);
+ do_next:
+ ;
}
}
return;
@@ -722,8 +747,8 @@
#if RT_CACHE_DEBUG >= 1
printk(KERN_DEBUG "ip_rt_advice: redirect to %d.%d.%d.%d/%02x dropped\n", NIPQUAD(rt->rt_dst), rt->key.tos);
#endif
- start_bh_atomic();
ip_rt_put(rt);
+ write_lock_bh(&rt_hash_lock);
for (rthp = &rt_hash_table[hash]; *rthp; rthp = &(*rthp)->u.rt_next) {
if (*rthp == rt) {
*rthp = rt->u.rt_next;
@@ -731,7 +756,7 @@
break;
}
}
- end_bh_atomic();
+ write_unlock_bh(&rt_hash_lock);
return NULL;
}
}
@@ -861,6 +886,7 @@
for (i=0; i<2; i++) {
unsigned hash = rt_hash_code(daddr, skeys[i], tos);
+ read_lock_bh(&rt_hash_lock);
for (rth = rt_hash_table[hash]; rth; rth = rth->u.rt_next) {
if (rth->key.dst == daddr &&
rth->key.src == skeys[i] &&
@@ -890,6 +916,7 @@
}
}
}
+ read_unlock_bh(&rt_hash_lock);
}
return est_mtu ? : new_mtu;
}
@@ -1362,6 +1389,7 @@
tos &= IPTOS_TOS_MASK;
hash = rt_hash_code(daddr, saddr^(iif<<5), tos);
+ read_lock_bh(&rt_hash_lock);
for (rth=rt_hash_table[hash]; rth; rth=rth->u.rt_next) {
if (rth->key.dst == daddr &&
rth->key.src == saddr &&
@@ -1374,10 +1402,12 @@
rth->u.dst.lastuse = jiffies;
atomic_inc(&rth->u.dst.use);
atomic_inc(&rth->u.dst.refcnt);
+ read_unlock_bh(&rt_hash_lock);
skb->dst = (struct dst_entry*)rth;
return 0;
}
}
+ read_unlock_bh(&rt_hash_lock);
/* Multicast recognition logic is moved from route cache to here.
The problem was that too many Ethernet cards have broken/missing
@@ -1657,7 +1687,7 @@
hash = rt_hash_code(daddr, saddr^(oif<<5), tos);
- start_bh_atomic();
+ read_lock_bh(&rt_hash_lock);
for (rth=rt_hash_table[hash]; rth; rth=rth->u.rt_next) {
if (rth->key.dst == daddr &&
rth->key.src == saddr &&
@@ -1673,12 +1703,12 @@
rth->u.dst.lastuse = jiffies;
atomic_inc(&rth->u.dst.use);
atomic_inc(&rth->u.dst.refcnt);
- end_bh_atomic();
+ read_unlock_bh(&rt_hash_lock);
*rp = rth;
return 0;
}
}
- end_bh_atomic();
+ read_unlock_bh(&rt_hash_lock);
return ip_route_output_slow(rp, daddr, saddr, tos, oif);
}
@@ -1821,9 +1851,7 @@
return -ENODEV;
skb->protocol = __constant_htons(ETH_P_IP);
skb->dev = dev;
- start_bh_atomic();
err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
- end_bh_atomic();
rt = (struct rtable*)skb->dst;
if (!err && rt->u.dst.error)
err = -rt->u.dst.error;
@@ -1869,7 +1897,7 @@
if (h < s_h) continue;
if (h > s_h)
s_idx = 0;
- start_bh_atomic();
+ read_lock_bh(&rt_hash_lock);
for (rt = rt_hash_table[h], idx = 0; rt; rt = rt->u.rt_next, idx++) {
if (idx < s_idx)
continue;
@@ -1877,12 +1905,12 @@
if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1) <= 0) {
dst_release(xchg(&skb->dst, NULL));
- end_bh_atomic();
+ read_unlock_bh(&rt_hash_lock);
goto done;
}
dst_release(xchg(&skb->dst, NULL));
}
- end_bh_atomic();
+ read_unlock_bh(&rt_hash_lock);
}
done:
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)