patch-2.3.15 linux/net/netlink/af_netlink.c
Next file: linux/net/netlink/netlink_dev.c
Previous file: linux/net/khttpd/waitheaders.c
Back to the patch index
Back to the overall index
- Lines: 955
- Date:
Mon Aug 23 10:01:02 1999
- Orig file:
v2.3.14/linux/net/netlink/af_netlink.c
- Orig date:
Wed Jun 2 11:29:13 1999
diff -u --recursive --new-file v2.3.14/linux/net/netlink/af_netlink.c linux/net/netlink/af_netlink.c
@@ -34,6 +34,7 @@
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/proc_fs.h>
+#include <linux/smp_lock.h>
#include <net/sock.h>
#include <net/scm.h>
@@ -43,8 +44,23 @@
#define NL_EMULATE_DEV
#endif
+#define BUG_TRAP(x) if (!(x)) { printk("Assertion (" #x ") failed at " __FILE__ "(%d):" __FUNCTION__ "\n", __LINE__); }
+
+struct netlink_opt
+{
+ pid_t pid;
+ unsigned groups;
+ pid_t dst_pid;
+ unsigned dst_groups;
+ unsigned long state;
+ int (*handler)(int unit, struct sk_buff *skb);
+ wait_queue_head_t wait;
+ struct netlink_callback *cb;
+ spinlock_t cb_lock;
+ void (*data_ready)(struct sock *sk, int bytes);
+};
+
static struct sock *nl_table[MAX_LINKS];
-static atomic_t nl_table_lock[MAX_LINKS];
static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
#ifdef NL_EMULATE_DEV
@@ -54,89 +70,131 @@
static int netlink_dump(struct sock *sk);
static void netlink_destroy_callback(struct netlink_callback *cb);
-/* Netlink table lock. It protects against sk list changes
- during uninterruptible sleeps in netlink_broadcast.
+atomic_t netlink_sock_nr;
- These lock MUST NOT be used from bh/irq on SMP kernels, because
- It would result in race in netlink_wait_on_table.
- */
+static rwlock_t nl_table_lock = RW_LOCK_UNLOCKED;
+static atomic_t nl_table_users = ATOMIC_INIT(0);
-extern __inline__ void
-netlink_wait_on_table(int protocol)
+static void netlink_sock_destruct(struct sock *sk)
{
- while (atomic_read(&nl_table_lock[protocol]))
- sleep_on(&nl_table_wait);
-}
+ skb_queue_purge(&sk->receive_queue);
-extern __inline__ void
-netlink_lock_table(int protocol)
-{
- atomic_inc(&nl_table_lock[protocol]);
+ if (!sk->dead) {
+ printk("Freeing alive netlink socket %p\n", sk);
+ return;
+ }
+ BUG_TRAP(atomic_read(&sk->rmem_alloc)==0);
+ BUG_TRAP(atomic_read(&sk->wmem_alloc)==0);
+ BUG_TRAP(sk->protinfo.af_netlink->cb==NULL);
+
+ kfree(sk->protinfo.af_netlink);
+
+ atomic_dec(&netlink_sock_nr);
+#ifdef NETLINK_REFCNT_DEBUG
+ printk(KERN_DEBUG "NETLINK %p released, %d are still alive\n", sk, atomic_read(&netlink_sock_nr));
+#endif
}
-extern __inline__ void
-netlink_unlock_table(int protocol)
+static void netlink_table_grab(void)
{
-#if 0
- /* F...g gcc does not eat it! */
+ write_lock_bh(&nl_table_lock);
- if (atomic_dec_and_test(&nl_table_lock[protocol]))
- wake_up(&nl_table_wait);
-#else
- atomic_dec(&nl_table_lock[protocol]);
- if (!atomic_read(&nl_table_lock[protocol]))
- wake_up(&nl_table_wait);
-#endif
+ if (atomic_read(&nl_table_users)) {
+ DECLARE_WAITQUEUE(wait, current);
+
+ add_wait_queue(&nl_table_wait, &wait);
+ do {
+ current->state = TASK_UNINTERRUPTIBLE;
+ if (atomic_read(&nl_table_users) == 0)
+ break;
+ write_unlock_bh(&nl_table_lock);
+ schedule();
+ write_lock_bh(&nl_table_lock);
+ } while (atomic_read(&nl_table_users));
+
+ current->state = TASK_RUNNING;
+ remove_wait_queue(&nl_table_wait, &wait);
+ }
}
-static __inline__ void netlink_lock(struct sock *sk)
+static __inline__ void netlink_table_ungrab(void)
{
- atomic_inc(&sk->protinfo.af_netlink.locks);
+ write_unlock_bh(&nl_table_lock);
}
-static __inline__ void netlink_unlock(struct sock *sk)
+static __inline__ void
+netlink_lock_table(void)
{
- atomic_dec(&sk->protinfo.af_netlink.locks);
+ /* read_lock() synchronizes us to netlink_table_grab */
+
+ read_lock(&nl_table_lock);
+ atomic_inc(&nl_table_users);
+ read_unlock(&nl_table_lock);
}
-static __inline__ int netlink_locked(struct sock *sk)
+static __inline__ void
+netlink_unlock_table(void)
{
- return atomic_read(&sk->protinfo.af_netlink.locks);
+ if (atomic_dec_and_test(&nl_table_users))
+ wake_up(&nl_table_wait);
}
static __inline__ struct sock *netlink_lookup(int protocol, u32 pid)
{
struct sock *sk;
+ read_lock(&nl_table_lock);
for (sk=nl_table[protocol]; sk; sk=sk->next) {
- if (sk->protinfo.af_netlink.pid == pid) {
- netlink_lock(sk);
+ if (sk->protinfo.af_netlink->pid == pid) {
+ sock_hold(sk);
+ read_unlock(&nl_table_lock);
return sk;
}
}
+ read_unlock(&nl_table_lock);
return NULL;
}
extern struct proto_ops netlink_ops;
-static void netlink_insert(struct sock *sk)
+static int netlink_insert(struct sock *sk, u32 pid)
{
- sk->next = nl_table[sk->protocol];
- nl_table[sk->protocol] = sk;
+ int err = -EADDRINUSE;
+ struct sock *osk;
+
+ netlink_table_grab();
+ for (osk=nl_table[sk->protocol]; osk; osk=osk->next) {
+ if (osk->protinfo.af_netlink->pid == pid)
+ break;
+ }
+ if (osk == NULL) {
+ err = -EBUSY;
+ if (sk->protinfo.af_netlink->pid == 0) {
+ sk->protinfo.af_netlink->pid = pid;
+ sk->next = nl_table[sk->protocol];
+ nl_table[sk->protocol] = sk;
+ sock_hold(sk);
+ err = 0;
+ }
+ }
+ netlink_table_ungrab();
+ return err;
}
static void netlink_remove(struct sock *sk)
{
struct sock **skp;
+
+ netlink_table_grab();
for (skp = &nl_table[sk->protocol]; *skp; skp = &((*skp)->next)) {
if (*skp == sk) {
- start_bh_atomic();
*skp = sk->next;
- end_bh_atomic();
- return;
+ __sock_put(sk);
+ break;
}
}
+ netlink_table_ungrab();
}
static int netlink_create(struct socket *sock, int protocol)
@@ -158,66 +216,56 @@
return -ENOMEM;
sock_init_data(sock,sk);
- sk->destruct = NULL;
-
+
+ sk->protinfo.af_netlink = kmalloc(sizeof(struct netlink_opt), GFP_KERNEL);
+ if (sk->protinfo.af_netlink == NULL) {
+ sk_free(sk);
+ return -ENOMEM;
+ }
+ memset(sk->protinfo.af_netlink, 0, sizeof(struct netlink_opt));
+
+ spin_lock_init(&sk->protinfo.af_netlink->cb_lock);
+ init_waitqueue_head(&sk->protinfo.af_netlink->wait);
+ sk->destruct = netlink_sock_destruct;
+ atomic_inc(&netlink_sock_nr);
+
sk->protocol=protocol;
return 0;
}
-static int netlink_release(struct socket *sock, struct socket *peer)
+static int netlink_release(struct socket *sock)
{
struct sock *sk = sock->sk;
if (!sk)
return 0;
- /* Wait on table before removing socket */
- netlink_wait_on_table(sk->protocol);
netlink_remove(sk);
- if (sk->protinfo.af_netlink.cb) {
- netlink_unlock(sk);
- sk->protinfo.af_netlink.cb->done(sk->protinfo.af_netlink.cb);
- netlink_destroy_callback(sk->protinfo.af_netlink.cb);
- sk->protinfo.af_netlink.cb = NULL;
+ spin_lock(&sk->protinfo.af_netlink->cb_lock);
+ if (sk->protinfo.af_netlink->cb) {
+ sk->protinfo.af_netlink->cb->done(sk->protinfo.af_netlink->cb);
+ netlink_destroy_callback(sk->protinfo.af_netlink->cb);
+ sk->protinfo.af_netlink->cb = NULL;
+ __sock_put(sk);
}
+ spin_unlock(&sk->protinfo.af_netlink->cb_lock);
/* OK. Socket is unlinked, and, therefore,
no new packets will arrive */
- sk->state_change(sk);
+
+ write_lock_irq(&sk->callback_lock);
sk->dead = 1;
+ sk->socket = NULL;
+ sock->sk = NULL;
+ wake_up_interruptible(sk->sleep);
+ sk->sleep = NULL;
+ wake_up_interruptible(&sk->protinfo.af_netlink->wait);
+ write_unlock_irq(&sk->callback_lock);
- skb_queue_purge(&sk->receive_queue);
skb_queue_purge(&sk->write_queue);
- /* IMPORTANT! It is the major unpleasant feature of this
- transport (and AF_UNIX datagram, when it will be repaired).
-
- Someone could wait on our sock->wait now.
- We cannot release socket until waiter will remove itself
- from wait queue. I choose the most conservetive way of solving
- the problem.
-
- We waked up this queue above, so that we need only to wait
- when the readers release us.
- */
-
- while (netlink_locked(sk)) {
- current->policy |= SCHED_YIELD;
- schedule();
- }
-
- if (sk->socket) {
- sk->socket = NULL;
- sock->sk = NULL;
- }
-
- if (atomic_read(&sk->rmem_alloc) || atomic_read(&sk->wmem_alloc)) {
- printk(KERN_DEBUG "netlink_release: impossible event. Please, report.\n");
- return 0;
- }
-
- sk_free(sk);
+ sock_put(sk);
return 0;
}
@@ -225,29 +273,34 @@
{
struct sock *sk = sock->sk;
struct sock *osk;
-
- sk->protinfo.af_netlink.groups = 0;
- sk->protinfo.af_netlink.pid = current->pid;
+ s32 pid = current->pid;
+ int err;
retry:
+ netlink_table_grab();
for (osk=nl_table[sk->protocol]; osk; osk=osk->next) {
- if (osk->protinfo.af_netlink.pid == sk->protinfo.af_netlink.pid) {
+ if (osk->protinfo.af_netlink->pid == pid) {
/* Bind collision, search negative pid values. */
- if (sk->protinfo.af_netlink.pid > 0)
- sk->protinfo.af_netlink.pid = -4096;
- sk->protinfo.af_netlink.pid--;
+ if (pid > 0)
+ pid = -4096;
+ pid--;
+ netlink_table_ungrab();
goto retry;
}
}
+ netlink_table_ungrab();
- netlink_insert(sk);
+ err = netlink_insert(sk, pid);
+ if (err == -EADDRINUSE)
+ goto retry;
+ sk->protinfo.af_netlink->groups = 0;
return 0;
}
static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
{
struct sock *sk = sock->sk;
- struct sock *osk;
+ int err;
struct sockaddr_nl *nladdr=(struct sockaddr_nl *)addr;
if (nladdr->nl_family != AF_NETLINK)
@@ -257,40 +310,36 @@
if (nladdr->nl_groups && !capable(CAP_NET_ADMIN))
return -EPERM;
- if (sk->protinfo.af_netlink.pid) {
- if (nladdr->nl_pid != sk->protinfo.af_netlink.pid)
+ if (sk->protinfo.af_netlink->pid) {
+ if (nladdr->nl_pid != sk->protinfo.af_netlink->pid)
return -EINVAL;
- sk->protinfo.af_netlink.groups = nladdr->nl_groups;
+ sk->protinfo.af_netlink->groups = nladdr->nl_groups;
return 0;
}
if (nladdr->nl_pid == 0) {
- netlink_autobind(sock);
- sk->protinfo.af_netlink.groups = nladdr->nl_groups;
- return 0;
+ err = netlink_autobind(sock);
+ if (err == 0)
+ sk->protinfo.af_netlink->groups = nladdr->nl_groups;
+ return err;
}
- for (osk=nl_table[sk->protocol]; osk; osk=osk->next) {
- if (osk->protinfo.af_netlink.pid == nladdr->nl_pid)
- return -EADDRINUSE;
- }
-
- sk->protinfo.af_netlink.pid = nladdr->nl_pid;
- sk->protinfo.af_netlink.groups = nladdr->nl_groups;
- netlink_insert(sk);
- return 0;
+ err = netlink_insert(sk, nladdr->nl_pid);
+ if (err == 0)
+ sk->protinfo.af_netlink->groups = nladdr->nl_groups;
+ return err;
}
static int netlink_connect(struct socket *sock, struct sockaddr *addr,
int alen, int flags)
{
+ int err = 0;
struct sock *sk = sock->sk;
struct sockaddr_nl *nladdr=(struct sockaddr_nl*)addr;
- if (addr->sa_family == AF_UNSPEC)
- {
- sk->protinfo.af_netlink.dst_pid = 0;
- sk->protinfo.af_netlink.dst_groups = 0;
+ if (addr->sa_family == AF_UNSPEC) {
+ sk->protinfo.af_netlink->dst_pid = 0;
+ sk->protinfo.af_netlink->dst_groups = 0;
return 0;
}
if (addr->sa_family != AF_NETLINK)
@@ -300,11 +349,14 @@
if (nladdr->nl_groups && !capable(CAP_NET_ADMIN))
return -EPERM;
- sk->protinfo.af_netlink.dst_pid = nladdr->nl_pid;
- sk->protinfo.af_netlink.dst_groups = nladdr->nl_groups;
+ if (!sk->protinfo.af_netlink->pid)
+ err = netlink_autobind(sock);
+
+ if (err == 0) {
+ sk->protinfo.af_netlink->dst_pid = nladdr->nl_pid;
+ sk->protinfo.af_netlink->dst_groups = nladdr->nl_groups;
+ }
- if (!sk->protinfo.af_netlink.pid)
- netlink_autobind(sock);
return 0;
}
@@ -317,15 +369,23 @@
*addr_len = sizeof(*nladdr);
if (peer) {
- nladdr->nl_pid = sk->protinfo.af_netlink.dst_pid;
- nladdr->nl_groups = sk->protinfo.af_netlink.dst_groups;
+ nladdr->nl_pid = sk->protinfo.af_netlink->dst_pid;
+ nladdr->nl_groups = sk->protinfo.af_netlink->dst_groups;
} else {
- nladdr->nl_pid = sk->protinfo.af_netlink.pid;
- nladdr->nl_groups = sk->protinfo.af_netlink.groups;
+ nladdr->nl_pid = sk->protinfo.af_netlink->pid;
+ nladdr->nl_groups = sk->protinfo.af_netlink->groups;
}
return 0;
}
+static void netlink_overrun(struct sock *sk)
+{
+ if (!test_and_set_bit(0, &sk->protinfo.af_netlink->state)) {
+ sk->err = ENOBUFS;
+ sk->error_report(sk);
+ }
+}
+
int netlink_unicast(struct sock *ssk, struct sk_buff *skb, u32 pid, int nonblock)
{
struct sock *sk;
@@ -334,58 +394,59 @@
DECLARE_WAITQUEUE(wait, current);
retry:
- for (sk = nl_table[protocol]; sk; sk = sk->next) {
- if (sk->protinfo.af_netlink.pid != pid)
- continue;
-
- netlink_lock(sk);
+ sk = netlink_lookup(protocol, pid);
+ if (sk == NULL)
+ goto no_dst;
#ifdef NL_EMULATE_DEV
- if (sk->protinfo.af_netlink.handler) {
- skb_orphan(skb);
- len = sk->protinfo.af_netlink.handler(protocol, skb);
- netlink_unlock(sk);
- return len;
- }
+ if (sk->protinfo.af_netlink->handler) {
+ skb_orphan(skb);
+ len = sk->protinfo.af_netlink->handler(protocol, skb);
+ sock_put(sk);
+ return len;
+ }
#endif
- if (!nonblock) {
- add_wait_queue(sk->sleep, &wait);
- current->state = TASK_INTERRUPTIBLE;
+ if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf ||
+ test_bit(0, &sk->protinfo.af_netlink->state)) {
+ if (nonblock) {
+ if (ssk->protinfo.af_netlink->pid == 0)
+ netlink_overrun(sk);
+ sock_put(sk);
+ kfree_skb(skb);
+ return -EAGAIN;
}
- if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf) {
- if (nonblock) {
- netlink_unlock(sk);
- kfree_skb(skb);
- return -EAGAIN;
- }
+ add_wait_queue(&sk->protinfo.af_netlink->wait, &wait);
+ current->state = TASK_INTERRUPTIBLE;
+ barrier();
+
+ if ((atomic_read(&sk->rmem_alloc) > sk->rcvbuf ||
+ test_bit(0, &sk->protinfo.af_netlink->state)) &&
+ !signal_pending(current) &&
+ !sk->dead)
schedule();
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sleep, &wait);
- netlink_unlock(sk);
-
- if (signal_pending(current)) {
- kfree_skb(skb);
- return -ERESTARTSYS;
- }
- goto retry;
+ current->state = TASK_RUNNING;
+ remove_wait_queue(&sk->protinfo.af_netlink->wait, &wait);
+ sock_put(sk);
+
+ if (signal_pending(current)) {
+ kfree_skb(skb);
+ return -ERESTARTSYS;
}
+ goto retry;
+ }
- if (!nonblock) {
- current->state = TASK_RUNNING;
- remove_wait_queue(sk->sleep, &wait);
- }
+ skb_orphan(skb);
+ skb_set_owner_r(skb, sk);
+ skb_queue_tail(&sk->receive_queue, skb);
+ sk->data_ready(sk, len);
+ sock_put(sk);
+ return len;
- skb_orphan(skb);
- skb_set_owner_r(skb, sk);
- skb_queue_tail(&sk->receive_queue, skb);
- sk->data_ready(sk, len);
- netlink_unlock(sk);
- return len;
- }
+no_dst:
kfree_skb(skb);
return -ECONNREFUSED;
}
@@ -393,14 +454,14 @@
static __inline__ int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
{
#ifdef NL_EMULATE_DEV
- if (sk->protinfo.af_netlink.handler) {
+ if (sk->protinfo.af_netlink->handler) {
skb_orphan(skb);
- sk->protinfo.af_netlink.handler(sk->protocol, skb);
+ sk->protinfo.af_netlink->handler(sk->protocol, skb);
return 0;
} else
#endif
- if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf) {
-Nprintk("broadcast_deliver %d\n", skb->len);
+ if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf &&
+ !test_bit(0, &sk->protinfo.af_netlink->state)) {
skb_orphan(skb);
skb_set_owner_r(skb, sk);
skb_queue_tail(&sk->receive_queue, skb);
@@ -420,24 +481,22 @@
/* While we sleep in clone, do not allow to change socket list */
- if (allocation == GFP_KERNEL)
- netlink_lock_table(protocol);
+ netlink_lock_table();
for (sk = nl_table[protocol]; sk; sk = sk->next) {
if (ssk == sk)
continue;
- if (sk->protinfo.af_netlink.pid == pid ||
- !(sk->protinfo.af_netlink.groups&group))
+ if (sk->protinfo.af_netlink->pid == pid ||
+ !(sk->protinfo.af_netlink->groups&group))
continue;
if (failure) {
- sk->err = ENOBUFS;
- sk->state_change(sk);
+ netlink_overrun(sk);
continue;
}
- netlink_lock(sk);
+ sock_hold(sk);
if (skb2 == NULL) {
if (atomic_read(&skb->users) != 1) {
skb2 = skb_clone(skb, allocation);
@@ -447,20 +506,17 @@
}
}
if (skb2 == NULL) {
- sk->err = ENOBUFS;
- sk->state_change(sk);
+ netlink_overrun(sk);
/* Clone failed. Notify ALL listeners. */
failure = 1;
} else if (netlink_broadcast_deliver(sk, skb2)) {
- sk->err = ENOBUFS;
- sk->state_change(sk);
+ netlink_overrun(sk);
} else
skb2 = NULL;
- netlink_unlock(sk);
+ sock_put(sk);
}
- if (allocation == GFP_KERNEL)
- netlink_unlock_table(protocol);
+ netlink_unlock_table();
if (skb2)
kfree_skb(skb2);
@@ -472,18 +528,19 @@
struct sock *sk;
int protocol = ssk->protocol;
-Nprintk("seterr");
+ read_lock(&nl_table_lock);
for (sk = nl_table[protocol]; sk; sk = sk->next) {
if (ssk == sk)
continue;
- if (sk->protinfo.af_netlink.pid == pid ||
- !(sk->protinfo.af_netlink.groups&group))
+ if (sk->protinfo.af_netlink->pid == pid ||
+ !(sk->protinfo.af_netlink->groups&group))
continue;
sk->err = code;
- sk->state_change(sk);
+ sk->error_report(sk);
}
+ read_unlock(&nl_table_lock);
}
static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, int len,
@@ -494,6 +551,7 @@
u32 dst_pid;
u32 dst_groups;
struct sk_buff *skb;
+ int err;
if (msg->msg_flags&MSG_OOB)
return -EOPNOTSUPP;
@@ -509,19 +567,23 @@
if (dst_groups && !capable(CAP_NET_ADMIN))
return -EPERM;
} else {
- dst_pid = sk->protinfo.af_netlink.dst_pid;
- dst_groups = sk->protinfo.af_netlink.dst_groups;
+ dst_pid = sk->protinfo.af_netlink->dst_pid;
+ dst_groups = sk->protinfo.af_netlink->dst_groups;
}
- if (!sk->protinfo.af_netlink.pid)
- netlink_autobind(sock);
+ if (!sk->protinfo.af_netlink->pid) {
+ err = netlink_autobind(sock);
+ if (err)
+ goto out;
+ }
- skb = sock_wmalloc(sk, len, 0, GFP_KERNEL);
+ err = -ENOBUFS;
+ skb = alloc_skb(len, GFP_KERNEL);
if (skb==NULL)
- return -ENOBUFS;
+ goto out;
- NETLINK_CB(skb).pid = sk->protinfo.af_netlink.pid;
- NETLINK_CB(skb).groups = sk->protinfo.af_netlink.groups;
+ NETLINK_CB(skb).pid = sk->protinfo.af_netlink->pid;
+ NETLINK_CB(skb).groups = sk->protinfo.af_netlink->groups;
NETLINK_CB(skb).dst_pid = dst_pid;
NETLINK_CB(skb).dst_groups = dst_groups;
memcpy(NETLINK_CREDS(skb), &scm->creds, sizeof(struct ucred));
@@ -533,16 +595,20 @@
*/
NETLINK_CB(skb).eff_cap = current->cap_effective;
+ err = -EFAULT;
if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len)) {
kfree_skb(skb);
- return -EFAULT;
+ goto out;
}
if (dst_groups) {
atomic_inc(&skb->users);
netlink_broadcast(sk, skb, dst_pid, dst_groups, GFP_KERNEL);
}
- return netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
+ err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
+
+out:
+ return err;
}
static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, int len,
@@ -554,12 +620,14 @@
struct sk_buff *skb;
int err;
- if (flags&(MSG_OOB|MSG_PEEK))
+ if (flags&MSG_OOB)
return -EOPNOTSUPP;
+ copied = 0;
+
skb = skb_recv_datagram(sk,flags,noblock,&err);
if (skb==NULL)
- return err;
+ goto out;
msg->msg_namelen = 0;
@@ -583,12 +651,33 @@
scm->creds = *NETLINK_CREDS(skb);
skb_free_datagram(sk, skb);
- if (sk->protinfo.af_netlink.cb
+ if (sk->protinfo.af_netlink->cb
&& atomic_read(&sk->rmem_alloc) <= sk->rcvbuf/2)
netlink_dump(sk);
+
+out:
+ if (skb_queue_len(&sk->receive_queue) <= sk->rcvbuf/2) {
+ if (skb_queue_len(&sk->receive_queue) == 0)
+ clear_bit(0, &sk->protinfo.af_netlink->state);
+ if (!test_bit(0, &sk->protinfo.af_netlink->state))
+ wake_up_interruptible(&sk->protinfo.af_netlink->wait);
+ }
return err ? : copied;
}
+void netlink_data_ready(struct sock *sk, int len)
+{
+ if (sk->protinfo.af_netlink->data_ready)
+ sk->protinfo.af_netlink->data_ready(sk, len);
+
+ if (skb_queue_len(&sk->receive_queue) <= sk->rcvbuf/2) {
+ if (skb_queue_len(&sk->receive_queue) == 0)
+ clear_bit(0, &sk->protinfo.af_netlink->state);
+ if (!test_bit(0, &sk->protinfo.af_netlink->state))
+ wake_up_interruptible(&sk->protinfo.af_netlink->wait);
+ }
+}
+
/*
* We export these functions to other modules. They provide a
* complete set of kernel non-blocking support for message
@@ -614,10 +703,11 @@
return NULL;
}
sk = sock->sk;
+ sk->data_ready = netlink_data_ready;
if (input)
- sk->data_ready = input;
+ sk->protinfo.af_netlink->data_ready = input;
- netlink_insert(sk);
+ netlink_insert(sk, 0);
return sk;
}
@@ -644,11 +734,19 @@
if (!skb)
return -ENOBUFS;
- cb = sk->protinfo.af_netlink.cb;
+ spin_lock(&sk->protinfo.af_netlink->cb_lock);
+
+ cb = sk->protinfo.af_netlink->cb;
+ if (cb == NULL) {
+ spin_unlock(&sk->protinfo.af_netlink->cb_lock);
+ kfree_skb(skb);
+ return -EINVAL;
+ }
len = cb->dump(skb, cb);
if (len > 0) {
+ spin_unlock(&sk->protinfo.af_netlink->cb_lock);
skb_queue_tail(&sk->receive_queue, skb);
sk->data_ready(sk, len);
return 0;
@@ -661,9 +759,11 @@
sk->data_ready(sk, skb->len);
cb->done(cb);
- sk->protinfo.af_netlink.cb = NULL;
+ sk->protinfo.af_netlink->cb = NULL;
+ spin_unlock(&sk->protinfo.af_netlink->cb_lock);
+
netlink_destroy_callback(cb);
- netlink_unlock(sk);
+ sock_put(sk);
return 0;
}
@@ -692,12 +792,16 @@
return -ECONNREFUSED;
}
/* A dump is in progress... */
- if (sk->protinfo.af_netlink.cb) {
+ spin_lock(&sk->protinfo.af_netlink->cb_lock);
+ if (sk->protinfo.af_netlink->cb) {
+ spin_unlock(&sk->protinfo.af_netlink->cb_lock);
netlink_destroy_callback(cb);
- netlink_unlock(sk);
+ sock_put(sk);
return -EBUSY;
}
- sk->protinfo.af_netlink.cb = cb;
+ sk->protinfo.af_netlink->cb = cb;
+ spin_unlock(&sk->protinfo.af_netlink->cb_lock);
+
netlink_dump(sk);
return 0;
}
@@ -717,7 +821,7 @@
skb = alloc_skb(size, GFP_KERNEL);
if (!skb)
return;
-
+
rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
NLMSG_ERROR, sizeof(struct nlmsgerr));
errmsg = NLMSG_DATA(rep);
@@ -728,6 +832,9 @@
#ifdef NL_EMULATE_DEV
+
+static rwlock_t nl_emu_lock = RW_LOCK_UNLOCKED;
+
/*
* Backward compatibility.
*/
@@ -737,31 +844,44 @@
struct sock *sk = netlink_kernel_create(unit, NULL);
if (sk == NULL)
return -ENOBUFS;
- sk->protinfo.af_netlink.handler = function;
+ sk->protinfo.af_netlink->handler = function;
+ write_lock_bh(&nl_emu_lock);
netlink_kernel[unit] = sk->socket;
+ write_unlock_bh(&nl_emu_lock);
return 0;
}
void netlink_detach(int unit)
{
- struct socket *sock = netlink_kernel[unit];
+ struct socket *sock;
+ write_lock_bh(&nl_emu_lock);
+ sock = netlink_kernel[unit];
netlink_kernel[unit] = NULL;
- synchronize_bh();
+ write_unlock_bh(&nl_emu_lock);
sock_release(sock);
}
int netlink_post(int unit, struct sk_buff *skb)
{
- struct socket *sock = netlink_kernel[unit];
- barrier();
+ struct socket *sock;
+
+ read_lock(&nl_emu_lock);
+ sock = netlink_kernel[unit];
if (sock) {
+ struct sock *sk = sock->sk;
memset(skb->cb, 0, sizeof(skb->cb));
- netlink_broadcast(sock->sk, skb, 0, ~0, GFP_ATOMIC);
+ sock_hold(sk);
+ read_unlock(&nl_emu_lock);
+
+ netlink_broadcast(sk, skb, 0, ~0, GFP_ATOMIC);
+
+ sock_put(sk);
return 0;
}
- return -EUNATCH;;
+ read_unlock(&nl_emu_lock);
+ return -EUNATCH;
}
#endif
@@ -781,16 +901,17 @@
"Rmem Wmem Dump Locks\n");
for (i=0; i<MAX_LINKS; i++) {
+ read_lock(&nl_table_lock);
for (s = nl_table[i]; s; s = s->next) {
len+=sprintf(buffer+len,"%p %-3d %-6d %08x %-8d %-8d %p %d",
s,
s->protocol,
- s->protinfo.af_netlink.pid,
- s->protinfo.af_netlink.groups,
+ s->protinfo.af_netlink->pid,
+ s->protinfo.af_netlink->groups,
atomic_read(&s->rmem_alloc),
atomic_read(&s->wmem_alloc),
- s->protinfo.af_netlink.cb,
- atomic_read(&s->protinfo.af_netlink.locks)
+ s->protinfo.af_netlink->cb,
+ atomic_read(&s->refcnt)
);
buffer[len++]='\n';
@@ -800,9 +921,12 @@
len=0;
begin=pos;
}
- if(pos>offset+length)
+ if(pos>offset+length) {
+ read_unlock(&nl_table_lock);
goto done;
+ }
}
+ read_unlock(&nl_table_lock);
}
*eof = 1;
@@ -820,7 +944,6 @@
struct proto_ops netlink_ops = {
PF_NETLINK,
- sock_no_dup,
netlink_release,
netlink_bind,
netlink_connect,
@@ -835,7 +958,8 @@
sock_no_getsockopt,
sock_no_fcntl,
netlink_sendmsg,
- netlink_recvmsg
+ netlink_recvmsg,
+ sock_no_mmap
};
struct net_proto_family netlink_family_ops = {
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)