/* Metin KAYA */ Releasing PPPoE Crash: In some platforms, e.g. MIPS, there is race contidion while releasing the PPPoE interfaces. Some part of code attemps to receive/send packet over an existing PPPoE interface, but otherone tries to release it. Using spinlocks makes it happy... Index: kernel/linux-2.6.30/include/linux/netdevice.h =================================================================== --- kernel/linux-2.6.30/include/linux/netdevice.h (revision 582) +++ kernel/linux-2.6.30/include/linux/netdevice.h (revision 589) @@ -1130,6 +1130,7 @@ extern struct net_device *dev_get_by_flags(struct net *net, unsigned short flags, unsigned short mask); extern struct net_device *dev_get_by_name(struct net *net, const char *name); +extern struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); extern struct net_device *__dev_get_by_name(struct net *net, const char *name); extern int dev_alloc_name(struct net_device *dev, const char *name); extern int dev_open(struct net_device *dev); Index: kernel/linux-2.6.30/net/core/dev.c =================================================================== --- kernel/linux-2.6.30/net/core/dev.c (revision 582) +++ kernel/linux-2.6.30/net/core/dev.c (revision 589) @@ -683,6 +685,21 @@ return NULL; } +struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) +{ + struct hlist_node *p; + struct net_device *dev; + struct hlist_head *head = dev_name_hash(net, name); + + hlist_for_each_entry_rcu(dev, p, head, name_hlist) + if (!strncmp(dev->name, name, IFNAMSIZ)) + return dev; + + return NULL; +} +EXPORT_SYMBOL(dev_get_by_name_rcu); + + /** * dev_get_by_name - find a device by its name * @net: the applicable net namespace @@ -4414,11 +4431,14 @@ } /* Delayed registration/unregisteration */ +static DEFINE_SPINLOCK(net_todo_list_lock); static LIST_HEAD(net_todo_list); static void net_set_todo(struct net_device *dev) { + spin_lock(&net_todo_list_lock); list_add_tail(&dev->todo_list, &net_todo_list); + spin_unlock(&net_todo_list_lock); } static void rollback_registered(struct net_device *dev) @@ -4858,8 +4878,10 @@ { struct list_head list; + spin_lock(&net_todo_list_lock); /* Snapshot list, allow later requests */ list_replace_init(&net_todo_list, &list); + spin_unlock(&net_todo_list_lock); __rtnl_unlock(); Index: kernel/linux-2.6.30/drivers/net/pppoe.c =================================================================== --- kernel/linux-2.6.30/drivers/net/pppoe.c (revision 582) +++ kernel/linux-2.6.30/drivers/net/pppoe.c (revision 589) @@ -97,7 +97,7 @@ static struct ppp_channel_ops pppoe_chan_ops; /* per-net private data for this module */ -static int pppoe_net_id; +static int pppoe_net_id __read_mostly; struct pppoe_net { /* * we could use _single_ hash table for all @@ -114,6 +114,8 @@ /* to eliminate a race btw pppoe_flush_dev and pppoe_release */ static DEFINE_SPINLOCK(flush_lock); +static struct pppox_sock *__delete_item(struct pppoe_net *pn, __be16 sid, + unsigned char *addr, int ifindex); /* * PPPoE could be in the following stages: * 1) Discovery stage (to obtain remote MAC and Session ID) @@ -139,7 +141,7 @@ return a->sid == b->sid && !memcmp(a->remote, b->remote, ETH_ALEN); } -static inline int cmp_addr(struct pppoe_addr *a, __be16 sid, char *addr) +static inline int cmp_addr(struct pppoe_addr *a, __be16 sid, unsigned char *addr) { return a->sid == sid && !memcmp(a->remote, addr, ETH_ALEN); } @@ -171,9 +173,13 @@ static struct pppox_sock *__get_item(struct pppoe_net *pn, __be16 sid, unsigned char *addr, int ifindex) { - int hash = hash_item(sid, addr); + int hash; struct pppox_sock *ret; + if (!pn || !addr) + return NULL; + + hash = hash_item(sid, addr); ret = pn->hash_table[hash]; while (ret) { if (cmp_addr(&ret->pppoe_pa, sid, addr) && @@ -188,32 +194,44 @@ static int __set_item(struct pppoe_net *pn, struct pppox_sock *po) { - int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote); + int hash; struct pppox_sock *ret; + hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote); ret = pn->hash_table[hash]; while (ret) { if (cmp_2_addr(&ret->pppoe_pa, &po->pppoe_pa) && - ret->pppoe_ifindex == po->pppoe_ifindex) - return -EALREADY; + ret->pppoe_ifindex == po->pppoe_ifindex) { + printk("\n\n\t\e[01;31m[MetinUlasKAYA] war for pppoe sid hash.\e[0m\n\n"); + __delete_item(pn, po->pppoe_pa.sid, + po->pppoe_pa.remote, po->pppoe_ifindex); + ret = NULL; + goto out; // return -EALREADY; + } ret = ret->next; } - po->next = pn->hash_table[hash]; - pn->hash_table[hash] = po; +out: + if (!ret) { + po->next = pn->hash_table[hash]; + pn->hash_table[hash] = po; + } return 0; } static struct pppox_sock *__delete_item(struct pppoe_net *pn, __be16 sid, - char *addr, int ifindex) + unsigned char *addr, int ifindex) { - int hash = hash_item(sid, addr); + int hash; struct pppox_sock *ret, **src; + hash = hash_item(sid, addr); ret = pn->hash_table[hash]; src = &pn->hash_table[hash]; + if (!src || *src == NULL) + return NULL; while (ret) { if (cmp_addr(&ret->pppoe_pa, sid, addr) && @@ -222,6 +240,9 @@ break; } + if (!ret || !ret->next) + return NULL; + src = &ret->next; ret = ret->next; } @@ -253,25 +274,24 @@ { struct net_device *dev; struct pppoe_net *pn; - struct pppox_sock *pppox_sock; + struct pppox_sock *pppox_sock = NULL; int ifindex; - dev = dev_get_by_name(net, sp->sa_addr.pppoe.dev); - if (!dev) - return NULL; - - ifindex = dev->ifindex; - pn = net_generic(net, pppoe_net_id); - pppox_sock = get_item(pn, sp->sa_addr.pppoe.sid, + rcu_read_lock(); + dev = dev_get_by_name_rcu(net, sp->sa_addr.pppoe.dev); + if (dev) { + ifindex = dev->ifindex; + pn = pppoe_pernet(net); + pppox_sock = get_item(pn, sp->sa_addr.pppoe.sid, sp->sa_addr.pppoe.remote, ifindex); - dev_put(dev); - + } + rcu_read_unlock(); return pppox_sock; } static inline struct pppox_sock *delete_item(struct pppoe_net *pn, __be16 sid, - char *addr, int ifindex) + unsigned char *addr, int ifindex) { struct pppox_sock *ret; @@ -296,64 +316,76 @@ BUG_ON(dev == NULL); + spin_lock(&flush_lock); pn = pppoe_pernet(dev_net(dev)); - if (!pn) /* already freed */ + if (!pn) { /* already freed */ + spin_unlock(&flush_lock); return; + } write_lock_bh(&pn->hash_lock); for (i = 0; i < PPPOE_HASH_SIZE; i++) { struct pppox_sock *po = pn->hash_table[i]; + struct sock *sk; - while (po != NULL) { - struct sock *sk; - if (po->pppoe_dev != dev) { + while (po) { + while (po && po->pppoe_dev != dev) { po = po->next; - continue; } + + if (!po) + break; + sk = sk_pppox(po); - spin_lock(&flush_lock); - po->pppoe_dev = NULL; - spin_unlock(&flush_lock); - dev_put(dev); /* We always grab the socket lock, followed by the - * hash_lock, in that order. Since we should - * hold the sock lock while doing any unbinding, - * we need to release the lock we're holding. - * Hold a reference to the sock so it doesn't disappear - * as we're jumping between locks. + * hash_lock, in that order. Since we should hold the + * sock lock while doing any unbinding, we need to + * release the lock we're holding. Hold a reference to + * the sock so it doesn't disappear as we're jumping + * between locks. */ sock_hold(sk); - write_unlock_bh(&pn->hash_lock); lock_sock(sk); - if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) { + if (po->pppoe_dev == dev && + sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) { pppox_unbind_sock(sk); sk->sk_state = PPPOX_ZOMBIE; sk->sk_state_change(sk); + po->pppoe_dev = NULL; + dev_put(dev); } release_sock(sk); sock_put(sk); - /* Restart scan at the beginning of this hash chain. - * While the lock was dropped the chain contents may - * have changed. + /* Restart the process from the start of the current + * hash chain. We dropped locks so the world may have + * change from underneath us. */ + + BUG_ON(pppoe_pernet(dev_net(dev)) == NULL); write_lock_bh(&pn->hash_lock); po = pn->hash_table[i]; } } write_unlock_bh(&pn->hash_lock); + spin_unlock(&flush_lock); } static int pppoe_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = (struct net_device *)ptr; + struct net_device *dev; + if (!ptr) + return NOTIFY_DONE; + + dev = (struct net_device *) ptr; + /* Only look at sockets that are using this specific device. */ switch (event) { case NETDEV_CHANGEMTU: @@ -369,7 +401,7 @@ default: break; - }; + } return NOTIFY_DONE; } @@ -385,14 +417,22 @@ ***********************************************************************/ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb) { - struct pppox_sock *po = pppox_sk(sk); + struct pppox_sock *po; struct pppox_sock *relay_po; + /* Backlog receive. Semantics of backlog rcv preclude any code from + * executing in lock_sock()/release_sock() bounds; meaning sk->sk_state + * can't change. + */ + + spin_lock(&flush_lock); + po = pppox_sk(sk); + if (sk->sk_state & PPPOX_BOUND) { ppp_input(&po->chan, skb); } else if (sk->sk_state & PPPOX_RELAY) { - relay_po = get_item_by_addr(dev_net(po->pppoe_dev), - &po->pppoe_relay); + relay_po = get_item_by_addr(sock_net(sk), + &po->pppoe_relay); if (relay_po == NULL) goto abort_kfree; @@ -413,6 +453,7 @@ abort_kfree: kfree_skb(skb); + spin_unlock(&flush_lock); return NET_RX_DROP; } @@ -447,6 +488,10 @@ goto drop; pn = pppoe_pernet(dev_net(dev)); + + /* Note that get_item does a sock_hold(), so sk_pppox(po) + * is known to be safe. + */ po = get_item(pn, ph->sid, eth_hdr(skb)->h_source, dev->ifindex); if (!po) goto drop; @@ -561,60 +606,48 @@ struct sock *sk = sock->sk; struct pppox_sock *po; struct pppoe_net *pn; + struct net *net = NULL; if (!sk) return 0; + spin_lock(&flush_lock); lock_sock(sk); if (sock_flag(sk, SOCK_DEAD)) { release_sock(sk); + spin_unlock(&flush_lock); return -EBADF; } + po = pppox_sk(sk); + + if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) { + dev_put(po->pppoe_dev); + po->pppoe_dev = NULL; + } + pppox_unbind_sock(sk); /* Signal the death of the socket. */ sk->sk_state = PPPOX_DEAD; - /* - * pppoe_flush_dev could lead to a race with - * this routine so we use flush_lock to eliminate - * such a case (we only need per-net specific data) - */ - spin_lock(&flush_lock); - po = pppox_sk(sk); - if (!po->pppoe_dev) { - spin_unlock(&flush_lock); - goto out; - } - pn = pppoe_pernet(dev_net(po->pppoe_dev)); - spin_unlock(&flush_lock); + net = sock_net(sk); + pn = pppoe_pernet(net); /* * protect "po" from concurrent updates * on pppoe_flush_dev */ - write_lock_bh(&pn->hash_lock); + delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote, + po->pppoe_ifindex); - po = pppox_sk(sk); - if (stage_session(po->pppoe_pa.sid)) - __delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote, - po->pppoe_ifindex); - - if (po->pppoe_dev) { - dev_put(po->pppoe_dev); - po->pppoe_dev = NULL; - } - - write_unlock_bh(&pn->hash_lock); - -out: sock_orphan(sk); sock->sk = NULL; skb_queue_purge(&sk->sk_receive_queue); release_sock(sk); sock_put(sk); + spin_unlock(&flush_lock); return 0; } @@ -622,13 +655,30 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, int sockaddr_len, int flags) { - struct sock *sk = sock->sk; - struct sockaddr_pppox *sp = (struct sockaddr_pppox *)uservaddr; - struct pppox_sock *po = pppox_sk(sk); + struct sock *sk; + struct sockaddr_pppox *sp; + struct pppox_sock *po; struct net_device *dev; struct pppoe_net *pn; + struct net *net = NULL; int error; + local_bh_disable(); + sk = sock->sk; + if (!sk) { + local_bh_enable(); + return -1; + } + sp = (struct sockaddr_pppox *) uservaddr; + if (!sp) { + local_bh_enable(); + return -1; + } + po = pppox_sk(sk); + if (!po) { + local_bh_enable(); + return -1; + } lock_sock(sk); error = -EINVAL; @@ -652,12 +702,14 @@ /* Delete the old binding */ if (stage_session(po->pppoe_pa.sid)) { pppox_unbind_sock(sk); + pn = pppoe_pernet(sock_net(sk)); + delete_item(pn, po->pppoe_pa.sid, + po->pppoe_pa.remote, po->pppoe_ifindex); if (po->pppoe_dev) { - pn = pppoe_pernet(dev_net(po->pppoe_dev)); - delete_item(pn, po->pppoe_pa.sid, - po->pppoe_pa.remote, po->pppoe_ifindex); dev_put(po->pppoe_dev); + po->pppoe_dev = NULL; } + memset(sk_pppox(po) + 1, 0, sizeof(struct pppox_sock) - sizeof(struct sock)); sk->sk_state = PPPOX_NONE; @@ -666,16 +718,15 @@ /* Re-bind in session stage only */ if (stage_session(sp->sa_addr.pppoe.sid)) { error = -ENODEV; - dev = dev_get_by_name(sock_net(sk), sp->sa_addr.pppoe.dev); + net = sock_net(sk); + dev = dev_get_by_name(net, sp->sa_addr.pppoe.dev); if (!dev) - goto end; + goto err_put; po->pppoe_dev = dev; po->pppoe_ifindex = dev->ifindex; - pn = pppoe_pernet(dev_net(dev)); - write_lock_bh(&pn->hash_lock); + pn = pppoe_pernet(net); if (!(dev->flags & IFF_UP)) { - write_unlock_bh(&pn->hash_lock); goto err_put; } @@ -684,7 +735,6 @@ sizeof(struct pppoe_addr)); error = __set_item(pn, po); - write_unlock_bh(&pn->hash_lock); if (error < 0) goto err_put; @@ -696,8 +746,11 @@ po->chan.ops = &pppoe_chan_ops; error = ppp_register_net_channel(dev_net(dev), &po->chan); - if (error) + if (error) { + delete_item(pn, po->pppoe_pa.sid, + po->pppoe_pa.remote, po->pppoe_ifindex); goto err_put; + } sk->sk_state = PPPOX_CONNECTED; } @@ -706,6 +759,7 @@ end: release_sock(sk); + local_bh_enable(); return error; err_put: if (po->pppoe_dev) { @@ -736,11 +790,15 @@ static int pppoe_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { - struct sock *sk = sock->sk; - struct pppox_sock *po = pppox_sk(sk); + struct sock *sk; + struct pppox_sock *po; int val; int err; + spin_lock(&flush_lock); + sk = sock->sk; + po = pppox_sk(sk); + switch (cmd) { case PPPIOCGMRU: err = -ENXIO; @@ -830,6 +888,7 @@ err = -ENOTTY; } + spin_unlock(&flush_lock); return err; } @@ -837,14 +896,18 @@ struct msghdr *m, size_t total_len) { struct sk_buff *skb; - struct sock *sk = sock->sk; - struct pppox_sock *po = pppox_sk(sk); + struct sock *sk; + struct pppox_sock *po; int error; struct pppoe_hdr hdr; struct pppoe_hdr *ph; struct net_device *dev; char *start; + spin_lock(&flush_lock); + sk = sock->sk; + po = pppox_sk(sk); + lock_sock(sk); if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) { error = -ENOTCONN; @@ -900,6 +963,8 @@ end: release_sock(sk); + spin_unlock(&flush_lock); + return error; } @@ -910,11 +975,24 @@ ***********************************************************************/ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb) { - struct pppox_sock *po = pppox_sk(sk); - struct net_device *dev = po->pppoe_dev; + struct pppox_sock *po; + struct net_device *dev; struct pppoe_hdr *ph; - int data_len = skb->len; + int data_len; + /* The higher-level PPP code (ppp_unregister_channel()) ensures the PPP + * xmit operations conclude prior to an unregistration call. Thus + * sk->sk_state cannot change, so we don't need to do lock_sock(). + * But, we also can't do a lock_sock since that introduces a potential + * deadlock as we'd reverse the lock ordering used when calling + * ppp_unregister_channel(). + */ + + spin_lock(&flush_lock); + po = pppox_sk(sk); + dev = po->pppoe_dev; + data_len = skb->len; + if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) goto abort; @@ -944,12 +1022,15 @@ po->pppoe_pa.remote, NULL, data_len); dev_queue_xmit(skb); + spin_unlock(&flush_lock); return 1; abort: kfree_skb(skb); - return 1; + spin_unlock(&flush_lock); + + return 0; } /************************************************************************ @@ -1063,6 +1144,7 @@ else { int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote); + po = NULL; while (++hash < PPPOE_HASH_SIZE) { po = pn->hash_table[hash]; if (po) @@ -1148,10 +1230,8 @@ pde = proc_net_fops_create(net, "pppoe", S_IRUGO, &pppoe_seq_fops); #ifdef CONFIG_PROC_FS - if (!pde) { - err = -ENOMEM; - goto out; - } + if (!pde) + return -ENOMEM; #endif return 0;