特点
- netlink用于内核和用户空间传输信息。对于用户空间进程,我们可以直接使用socket api 来完成。
- netlink 是面向数据报的
相关函数
#include <asm/types.h>
#include <sys/socket.h>
#include <linux/netlink.h>
netlink_socket = socket(AF_NETLINK, socket_type, netlink_family);
-
socket_type
取值可以是
SOCK_DGRAM
或者SOCK_RAW
,但是netlink并不区分这个。 -
netlink_family
取值有很多,具体参见
man 7 netlink
。这里我们只记录一个NETLINK_KOBJECT_UEVENT
,当我们需要获取内核device的相关信息时(比如tf卡热插拔),就可以设置该协议。
相关结构体
-
struct msghdr
struct iovec { /* Scatter/gather array items */ void *iov_base; /* Starting address */ size_t iov_len; /* Number of bytes to transfer */ }; struct msghdr { void *msg_name; /* optional address */ socklen_t msg_namelen; /* size of address */ struct iovec *msg_iov; /* scatter/gather array */ size_t msg_iovlen; /* # elements in msg_iov */ void *msg_control; /* ancillary data, see below */ size_t msg_controllen; /* ancillary data buffer len */ int msg_flags; /* flags on received message */ };
用过
recvmsg/sendmsg
函数的对这个一定不陌生,实际上,对于Netlink而言,我们也是使用该函数组来接收发送数据。在linux 网络编程中,常用的接收发送函数如下:
#include <sys/types.h> #include <sys/socket.h> #include <sys/uio.h> // used only on a connected socket ssize_t send(int sockfd, const void *buf, size_t len, int flags); ssize_t recv(int sockfd, void *buf, size_t len, int flags); // used only on a connected socket ssize_t readv(int fd, const struct iovec *iov, int iovcnt); ssize_t writev(int fd, const struct iovec *iov, int iovcnt); //whether or not it is connection-oriented ssize_t sendto(int sockfd, const void *buf, size_t len, int flags, const struct sockaddr *dest_addr, socklen_t addrlen); ssize_t recvfrom(int sockfd, void *buf, size_t len, int flags, truct sockaddr *src_addr, socklen_t *addrlen); //whether or not it is connection-oriented ssize_t sendmsg(int sockfd, const struct msghdr *msg, int flags); ssize_t recvmsg(int sockfd, struct msghdr *msg, int flags);
这几组函数中,
readv/writev
和send/recv
只能被面向连接的(TCP)socket使用;sendto/recvfrom
无论是不是面向连接的socket都能使用;而sendmsg/recvmsg
能通过配置struct msghdr
来实现前3组函数的所有功能。msg_name
,取值可以是struct sockaddr
、struct sockaddr_in
、struct sockadd_nl
和struct sockaddr_un
。通常,只有当socket是无连接状态的(比如UDP),才会设置
msg_name
参数,对于netlink,其取值为sockaddr_nl
。//describes a netlink client in user space or in the kernel. struct sockaddr_nl { sa_family_t nl_family; /*该字段总是为AF_NETLINK */ unsigned short nl_pad; /* 目前未用到,填充为0*/ __u32 nl_pid; /* process pid */ __u32 nl_groups; /* multicast groups mask */ };
-
nl_pid
单播地址,设置为0表示消息destination是内核;否则,通常设置为消息目标进程的进程ID。当一个进程使用多个netlink socket,需要应用程序保证
nl_pid
的唯一,或者将其设置为0,有内核去分配一个独一无二的值。 -
nl_groups
多播地址掩码。没用到过!用到再补充,设置为0即可。。。
msg_iov
,消息内容。对于netlink,其实际内容由nlmsghdr
填充。。。msg_control
,指向其他协议控制相关消息或其他辅助数据的缓冲区。其实际类型通常是cmsghdr
。 -
-
struct nlmsghdr
struct nlmsghdr { __u32 nlmsg_len; /* Length of message including header. */ __u16 nlmsg_type; /* Type of message content. */ __u16 nlmsg_flags; /* Additional flags. */ __u32 nlmsg_seq; /* Sequence number. */ __u32 nlmsg_pid; /* PID of the sending process. */ };
-
nlmsg_type
NLMSG_NOOP
message is to be ignoredNLMSG_ERROR
message signals an error and the payload contains an nlmsgerr structure。NLMSG_DONE
message terminates a multipart message -
nlmsg_flags
-
-
struct cmsghdr
关于这些宏的使用方式可以看实例。
#include <sys/socket.h> //返回msghdr中的第一个 cmsghdr的地址 struct cmsghdr *CMSG_FIRSTHDR(struct msghdr *msgh); //返回msghdr中的第一个 cmsghdr的地址 struct cmsghdr *CMSG_NXTHDR(struct msghdr *msgh, struct cmsghdr *cmsg); size_t CMSG_ALIGN(size_t length); size_t CMSG_SPACE(size_t length); size_t CMSG_LEN(size_t length); unsigned char *CMSG_DATA(struct cmsghdr *cmsg); struct cmsghdr { socklen_t cmsg_len; /* data byte count, including hdr */ int cmsg_level; /* originating protocol */ int cmsg_type; /* protocol-specific type */ /* followed by unsigned char cmsg_data[]; */ };
msg_flags
,通常设置为0,取值见man手册。
使用示例
以NETLINK_KOBJECT_UEVENT
为例。
-
创建netlink socket。
int uevent_open_socket(int buf_sz, bool passcred) { struct sockaddr_nl addr; int on = passcred; int s; memset(&addr, 0, sizeof(addr)); addr.nl_family = AF_NETLINK; addr.nl_pid = getpid(); addr.nl_groups = 0xffffffff; //表示接受所有的广播消息??? s = socket(PF_NETLINK, SOCK_DGRAM | SOCK_CLOEXEC, NETLINK_KOBJECT_UEVENT); if(s < 0) return -1; /* buf_sz should be less than net.core.rmem_max for this to succeed */ if (setsockopt(s, SOL_SOCKET, SO_RCVBUF, &buf_sz, sizeof(buf_sz)) < 0) { close(s); return -1; } // 后面会看到的 setsockopt(s, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on)); if(bind(s, (struct sockaddr *) &addr, sizeof(addr)) < 0) { close(s); return -1; } return s; }
-
接收netlink消息
ssize_t uevent_kernel_recv(int socket, void *buffer, size_t length, bool require_group, uid_t *uid) { struct iovec iov = { buffer, length }; struct sockaddr_nl addr; char control[CMSG_SPACE(sizeof(struct ucred))]; struct msghdr hdr = { &addr, sizeof(addr), &iov, 1, control, sizeof(control), 0, }; *uid = -1; ssize_t n = recvmsg(socket, &hdr, 0); if (n <= 0) { return n; } struct cmsghdr *cmsg = CMSG_FIRSTHDR(&hdr); if (cmsg == NULL || cmsg->cmsg_type != SCM_CREDENTIALS) { /* ignoring netlink message with no sender credentials */ goto out; } // setsockopt(s, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on)); struct ucred *cred = (struct ucred *)CMSG_DATA(cmsg); *uid = cred->uid; if (cred->uid != 0) { /* ignoring netlink message from non-root user */ goto out; } if (addr.nl_pid != 0) { /* ignore non-kernel */ goto out; } if (require_group && addr.nl_groups == 0) { /* ignore unicast messages when requested */ goto out; } return n; out: /* clear residual potentially malicious data */ bzero(buffer, length); errno = EIO; return -1; }
此实例摘自android libcutils 库。通过这两个函数,我们基本就能知道如何使用netlink了。。。
但是不知道为啥
uevent_kernel_recv
接收数据时,并未使用nlmsghdr
结构体。。。
nlmsghdr
使用示例:int len; char buf[4096]; struct iovec iov = { buf, sizeof(buf) }; struct sockaddr_nl sa; struct msghdr msg; struct nlmsghdr *nh; msg = { (void *)&sa, sizeof(sa), &iov, 1, NULL, 0, 0 }; len = recvmsg(fd, &msg, 0); for (nh = (struct nlmsghdr *) buf; NLMSG_OK (nh, len); nh = NLMSG_NEXT (nh, len)) { /* The end of multipart message. */ if (nh->nlmsg_type == NLMSG_DONE) return; if (nh->nlmsg_type == NLMSG_ERROR) /* Do some error handling. */ ... /* Continue with parsing payload. */ ... }
补充: 走读了udev模块的
kobject_uevent_env
方法,似乎没看到和nlmsghdr
的处理。