| .\" This man page is Copyright (c) 1998 by Andi Kleen. |
| .\" |
| .\" %%%LICENSE_START(GPL_NOVERSION_ONELINE) |
| .\" Subject to the GPL. |
| .\" %%%LICENSE_END |
| .\" |
| .\" Based on the original comments from Alexey Kuznetsov |
| .\" Modified 2005-12-27 by Hasso Tepper <hasso@estpak.ee> |
| .\" $Id: netlink.7,v 1.8 2000/06/22 13:23:00 ak Exp $ |
| .TH NETLINK 7 2021-03-22 "Linux" "Linux Programmer's Manual" |
| .SH NAME |
| netlink \- communication between kernel and user space (AF_NETLINK) |
| .SH SYNOPSIS |
| .nf |
| .B #include <asm/types.h> |
| .B #include <sys/socket.h> |
| .B #include <linux/netlink.h> |
| .PP |
| .BI "netlink_socket = socket(AF_NETLINK, " socket_type ", " netlink_family ); |
| .fi |
| .SH DESCRIPTION |
| Netlink is used to transfer information between the kernel and |
| user-space processes. |
| It consists of a standard sockets-based interface for user space |
| processes and an internal kernel API for kernel modules. |
| The internal kernel interface is not documented in this manual page. |
| There is also an obsolete netlink interface |
| via netlink character devices; this interface is not documented here |
| and is provided only for backward compatibility. |
| .PP |
| Netlink is a datagram-oriented service. |
| Both |
| .B SOCK_RAW |
| and |
| .B SOCK_DGRAM |
| are valid values for |
| .IR socket_type . |
| However, the netlink protocol does not distinguish between datagram |
| and raw sockets. |
| .PP |
| .I netlink_family |
| selects the kernel module or netlink group to communicate with. |
| The currently assigned netlink families are: |
| .TP |
| .BR NETLINK_ROUTE |
| Receives routing and link updates and may be used to modify the routing |
| tables (both IPv4 and IPv6), IP addresses, link parameters, |
| neighbor setups, queueing disciplines, traffic classes, and |
| packet classifiers (see |
| .BR rtnetlink (7)). |
| .TP |
| .BR NETLINK_W1 " (Linux 2.6.13 to 2.16.17)" |
| Messages from 1-wire subsystem. |
| .TP |
| .BR NETLINK_USERSOCK |
| Reserved for user-mode socket protocols. |
| .TP |
| .BR NETLINK_FIREWALL " (up to and including Linux 3.4)" |
| .\" removed by commit d16cf20e2f2f13411eece7f7fb72c17d141c4a84 |
| Transport IPv4 packets from netfilter to user space. |
| Used by |
| .I ip_queue |
| kernel module. |
| After a long period of being declared obsolete (in favor of the more advanced |
| .I nfnetlink_queue |
| feature), |
| .BR NETLINK_FIREWALL |
| was removed in Linux 3.5. |
| .TP |
| .BR NETLINK_SOCK_DIAG " (since Linux 3.3)" |
| .\" commit 7f1fb60c4fc9fb29fbb406ac8c4cfb4e59e168d6 |
| Query information about sockets of various protocol families from the kernel |
| (see |
| .BR sock_diag (7)). |
| .TP |
| .BR NETLINK_INET_DIAG " (since Linux 2.6.14)" |
| An obsolete synonym for |
| .BR NETLINK_SOCK_DIAG . |
| .TP |
| .BR NETLINK_NFLOG " (up to and including Linux 3.16)" |
| Netfilter/iptables ULOG. |
| .TP |
| .BR NETLINK_XFRM |
| .\" FIXME More details on NETLINK_XFRM needed. |
| IPsec. |
| .TP |
| .BR NETLINK_SELINUX " (since Linux 2.6.4)" |
| SELinux event notifications. |
| .TP |
| .BR NETLINK_ISCSI " (since Linux 2.6.15)" |
| .\" FIXME More details on NETLINK_ISCSI needed. |
| Open-iSCSI. |
| .TP |
| .BR NETLINK_AUDIT " (since Linux 2.6.6)" |
| .\" FIXME More details on NETLINK_AUDIT needed. |
| Auditing. |
| .TP |
| .BR NETLINK_FIB_LOOKUP " (since Linux 2.6.13)" |
| .\" FIXME More details on NETLINK_FIB_LOOKUP needed. |
| Access to FIB lookup from user space. |
| .TP |
| .BR NETLINK_CONNECTOR " (since Linux 2.6.14)" |
| Kernel connector. |
| See |
| .I Documentation/driver\-api/connector.rst |
| (or |
| .IR /Documentation/connector/connector.* |
| .\" commit baa293e9544bea71361950d071579f0e4d5713ed |
| in kernel 5.2 and earlier) |
| in the Linux kernel source tree for further information. |
| .TP |
| .BR NETLINK_NETFILTER " (since Linux 2.6.14)" |
| .\" FIXME More details on NETLINK_NETFILTER needed. |
| Netfilter subsystem. |
| .TP |
| .BR NETLINK_SCSITRANSPORT " (since Linux 2.6.19)" |
| .\" commit 84314fd4740ad73550c76dee4a9578979d84af48 |
| .\" FIXME More details on NETLINK_SCSITRANSPORT needed. |
| SCSI Transports. |
| .TP |
| .BR NETLINK_RDMA " (since Linux 3.0)" |
| .\" commit b2cbae2c248776d81cc265ff7d48405b6a4cc463 |
| .\" FIXME More details on NETLINK_RDMA needed. |
| Infiniband RDMA. |
| .TP |
| .BR NETLINK_IP6_FW " (up to and including Linux 3.4)" |
| Transport IPv6 packets from netfilter to user space. |
| Used by |
| .I ip6_queue |
| kernel module. |
| .TP |
| .B NETLINK_DNRTMSG |
| DECnet routing messages. |
| .TP |
| .BR NETLINK_KOBJECT_UEVENT " (since Linux 2.6.10)" |
| .\" FIXME More details on NETLINK_KOBJECT_UEVENT needed. |
| Kernel messages to user space. |
| .TP |
| .BR NETLINK_GENERIC " (since Linux 2.6.15)" |
| Generic netlink family for simplified netlink usage. |
| .TP |
| .BR NETLINK_CRYPTO " (since Linux 3.2)" |
| .\" commit a38f7907b926e4c6c7d389ad96cc38cec2e5a9e9 |
| .\" Author: Steffen Klassert <steffen.klassert@secunet.com> |
| Netlink interface to request information about ciphers registered |
| with the kernel crypto API as well as allow configuration of the |
| kernel crypto API. |
| .PP |
| Netlink messages consist of a byte stream with one or multiple |
| .I nlmsghdr |
| headers and associated payload. |
| The byte stream should be accessed only with the standard |
| .B NLMSG_* |
| macros. |
| See |
| .BR netlink (3) |
| for further information. |
| .PP |
| In multipart messages (multiple |
| .I nlmsghdr |
| headers with associated payload in one byte stream) the first and all |
| following headers have the |
| .B NLM_F_MULTI |
| flag set, except for the last header which has the type |
| .BR NLMSG_DONE . |
| .PP |
| After each |
| .I nlmsghdr |
| the payload follows. |
| .PP |
| .in +4n |
| .EX |
| struct nlmsghdr { |
| __u32 nlmsg_len; /* Length of message including header */ |
| __u16 nlmsg_type; /* Type of message content */ |
| __u16 nlmsg_flags; /* Additional flags */ |
| __u32 nlmsg_seq; /* Sequence number */ |
| __u32 nlmsg_pid; /* Sender port ID */ |
| }; |
| .EE |
| .in |
| .PP |
| .I nlmsg_type |
| can be one of the standard message types: |
| .B NLMSG_NOOP |
| message is to be ignored, |
| .B NLMSG_ERROR |
| message signals an error and the payload contains an |
| .I nlmsgerr |
| structure, |
| .B NLMSG_DONE |
| message terminates a multipart message. Error messages get the |
| original request appened, unless the user requests to cap the |
| error message, and get extra error data if requested. |
| .PP |
| .in +4n |
| .EX |
| struct nlmsgerr { |
| int error; /* Negative errno or 0 for acknowledgements */ |
| struct nlmsghdr msg; /* Message header that caused the error */ |
| /* |
| * followed by the message contents unless NETLINK_CAP_ACK was set |
| * or the ACK indicates success (error == 0). |
| * For example Generic Netlink message with attributes. |
| * message length is aligned with NLMSG_ALIGN() |
| */ |
| /* |
| * followed by TLVs defined in enum nlmsgerr_attrs |
| * if NETLINK_EXT_ACK was set |
| */ |
| }; |
| .EE |
| .in |
| .PP |
| A netlink family usually specifies more message types, see the |
| appropriate manual pages for that, for example, |
| .BR rtnetlink (7) |
| for |
| .BR NETLINK_ROUTE . |
| .nh |
| .ad l |
| .TS |
| tab(:); |
| l s |
| lB lx. |
| Standard flag bits in \fInlmsg_flags\fP |
| _ |
| NLM_F_REQUEST:T{ |
| Must be set on all request messages. |
| T} |
| NLM_F_MULTI:T{ |
| The message is part of a multipart message terminated by |
| .BR NLMSG_DONE . |
| T} |
| NLM_F_ACK:T{ |
| Request for an acknowledgement on success. |
| T} |
| NLM_F_ECHO:T{ |
| Echo this request. |
| T} |
| .TE |
| .ad |
| .hy |
| .\" No right adjustment for text blocks in tables |
| .nh |
| .ad l |
| .TS |
| tab(:); |
| l s |
| lB lx. |
| Additional flag bits for GET requests |
| _ |
| NLM_F_ROOT:T{ |
| Return the complete table instead of a single entry. |
| T} |
| NLM_F_MATCH:T{ |
| Return all entries matching criteria passed in message content. |
| Not implemented yet. |
| T} |
| NLM_F_ATOMIC:T{ |
| Return an atomic snapshot of the table. |
| T} |
| NLM_F_DUMP:T{ |
| Convenience macro; equivalent to |
| (NLM_F_ROOT|NLM_F_MATCH). |
| T} |
| .TE |
| .ad |
| .hy |
| .\" FIXME NLM_F_ATOMIC is not used anymore? |
| .PP |
| Note that |
| .B NLM_F_ATOMIC |
| requires the |
| .B CAP_NET_ADMIN |
| capability or an effective UID of 0. |
| .nh |
| .ad l |
| .TS |
| tab(:); |
| l s |
| lB lx. |
| Additional flag bits for NEW requests |
| _ |
| NLM_F_REPLACE:T{ |
| Replace existing matching object. |
| T} |
| NLM_F_EXCL:T{ |
| Don't replace if the object already exists. |
| T} |
| NLM_F_CREATE:T{ |
| Create object if it doesn't already exist. |
| T} |
| NLM_F_APPEND:T{ |
| Add to the end of the object list. |
| T} |
| .TE |
| .ad |
| .hy |
| .PP |
| .I nlmsg_seq |
| and |
| .I nlmsg_pid |
| are used to track messages. |
| .I nlmsg_pid |
| shows the origin of the message. |
| Note that there isn't a 1:1 relationship between |
| .I nlmsg_pid |
| and the PID of the process if the message originated from a netlink |
| socket. |
| See the |
| .B ADDRESS FORMATS |
| section for further information. |
| .PP |
| Both |
| .I nlmsg_seq |
| and |
| .I nlmsg_pid |
| .\" FIXME Explain more about nlmsg_seq and nlmsg_pid. |
| are opaque to netlink core. |
| .PP |
| Netlink is not a reliable protocol. |
| It tries its best to deliver a message to its destination(s), |
| but may drop messages when an out-of-memory condition or |
| other error occurs. |
| For reliable transfer the sender can request an |
| acknowledgement from the receiver by setting the |
| .B NLM_F_ACK |
| flag. |
| An acknowledgement is an |
| .B NLMSG_ERROR |
| packet with the error field set to 0. |
| The application must generate acknowledgements for |
| received messages itself. |
| The kernel tries to send an |
| .B NLMSG_ERROR |
| message for every failed packet. |
| A user process should follow this convention too. |
| .PP |
| However, reliable transmissions from kernel to user are impossible |
| in any case. |
| The kernel can't send a netlink message if the socket buffer is full: |
| the message will be dropped and the kernel and the user-space process will |
| no longer have the same view of kernel state. |
| It is up to the application to detect when this happens (via the |
| .B ENOBUFS |
| error returned by |
| .BR recvmsg (2)) |
| and resynchronize. |
| .SS Address formats |
| The |
| .I sockaddr_nl |
| structure describes a netlink client in user space or in the kernel. |
| A |
| .I sockaddr_nl |
| can be either unicast (only sent to one peer) or sent to |
| netlink multicast groups |
| .RI ( nl_groups |
| not equal 0). |
| .PP |
| .in +4n |
| .EX |
| struct sockaddr_nl { |
| sa_family_t nl_family; /* AF_NETLINK */ |
| unsigned short nl_pad; /* Zero */ |
| pid_t nl_pid; /* Port ID */ |
| __u32 nl_groups; /* Multicast groups mask */ |
| }; |
| .EE |
| .in |
| .PP |
| .I nl_pid |
| is the unicast address of netlink socket. |
| It's always 0 if the destination is in the kernel. |
| For a user-space process, |
| .I nl_pid |
| is usually the PID of the process owning the destination socket. |
| However, |
| .I nl_pid |
| identifies a netlink socket, not a process. |
| If a process owns several netlink |
| sockets, then |
| .I nl_pid |
| can be equal to the process ID only for at most one socket. |
| There are two ways to assign |
| .I nl_pid |
| to a netlink socket. |
| If the application sets |
| .I nl_pid |
| before calling |
| .BR bind (2), |
| then it is up to the application to make sure that |
| .I nl_pid |
| is unique. |
| If the application sets it to 0, the kernel takes care of assigning it. |
| The kernel assigns the process ID to the first netlink socket the process |
| opens and assigns a unique |
| .I nl_pid |
| to every netlink socket that the process subsequently creates. |
| .PP |
| .I nl_groups |
| is a bit mask with every bit representing a netlink group number. |
| Each netlink family has a set of 32 multicast groups. |
| When |
| .BR bind (2) |
| is called on the socket, the |
| .I nl_groups |
| field in the |
| .I sockaddr_nl |
| should be set to a bit mask of the groups which it wishes to listen to. |
| The default value for this field is zero which means that no multicasts |
| will be received. |
| A socket may multicast messages to any of the multicast groups by setting |
| .I nl_groups |
| to a bit mask of the groups it wishes to send to when it calls |
| .BR sendmsg (2) |
| or does a |
| .BR connect (2). |
| Only processes with an effective UID of 0 or the |
| .B CAP_NET_ADMIN |
| capability may send or listen to a netlink multicast group. |
| Since Linux 2.6.13, |
| .\" commit d629b836d151d43332492651dd841d32e57ebe3b |
| messages can't be broadcast to multiple groups. |
| Any replies to a message received for a multicast group should be |
| sent back to the sending PID and the multicast group. |
| Some Linux kernel subsystems may additionally allow other users |
| to send and/or receive messages. |
| As at Linux 3.0, the |
| .BR NETLINK_KOBJECT_UEVENT , |
| .BR NETLINK_GENERIC , |
| .BR NETLINK_ROUTE , |
| and |
| .BR NETLINK_SELINUX |
| groups allow other users to receive messages. |
| No groups allow other users to send messages. |
| .SS Socket options |
| To set or get a netlink socket option, call |
| .BR getsockopt (2) |
| to read or |
| .BR setsockopt (2) |
| to write the option with the option level argument set to |
| .BR SOL_NETLINK . |
| Unless otherwise noted, |
| .I optval |
| is a pointer to an |
| .IR int . |
| .TP |
| .BR NETLINK_PKTINFO " (since Linux 2.6.14)" |
| .\" commit 9a4595bc7e67962f13232ee55a64e063062c3a99 |
| .\" Author: Patrick McHardy <kaber@trash.net> |
| Enable |
| .B nl_pktinfo |
| control messages for received packets to get the extended |
| destination group number. |
| .TP |
| .BR NETLINK_ADD_MEMBERSHIP ,\ NETLINK_DROP_MEMBERSHIP " (since Linux 2.6.14)" |
| .\" commit 9a4595bc7e67962f13232ee55a64e063062c3a99 |
| .\" Author: Patrick McHardy <kaber@trash.net> |
| Join/leave a group specified by |
| .IR optval . |
| .TP |
| .BR NETLINK_LIST_MEMBERSHIPS " (since Linux 4.2)" |
| .\" commit b42be38b2778eda2237fc759e55e3b698b05b315 |
| .\" Author: David Herrmann <dh.herrmann@gmail.com> |
| Retrieve all groups a socket is a member of. |
| .I optval |
| is a pointer to |
| .B __u32 |
| and |
| .I optlen |
| is the size of the array. |
| The array is filled with the full membership set of the |
| socket, and the required array size is returned in |
| .IR optlen . |
| .TP |
| .BR NETLINK_BROADCAST_ERROR " (since Linux 2.6.30)" |
| .\" commit be0c22a46cfb79ab2342bb28fde99afa94ef868e |
| .\" Author: Pablo Neira Ayuso <pablo@netfilter.org> |
| When not set, |
| .B netlink_broadcast() |
| only reports |
| .B ESRCH |
| errors and silently ignore |
| .B ENOBUFS |
| errors. |
| .TP |
| .BR NETLINK_NO_ENOBUFS " (since Linux 2.6.30)" |
| .\" commit 38938bfe3489394e2eed5e40c9bb8f66a2ce1405 |
| .\" Author: Pablo Neira Ayuso <pablo@netfilter.org> |
| This flag can be used by unicast and broadcast listeners to avoid receiving |
| .B ENOBUFS |
| errors. |
| .TP |
| .BR NETLINK_LISTEN_ALL_NSID " (since Linux 4.2)" |
| .\" commit 59324cf35aba5336b611074028777838a963d03b |
| .\" Author: Nicolas Dichtel <nicolas.dichtel@6wind.com> |
| When set, this socket will receive netlink notifications from |
| all network namespaces that have an |
| .I nsid |
| assigned into the network namespace where the socket has been opened. |
| The |
| .I nsid |
| is sent to user space via an ancillary data. |
| .TP |
| .BR NETLINK_CAP_ACK " (since Linux 4.3)" |
| .\" commit 0a6a3a23ea6efde079a5b77688541a98bf202721 |
| .\" Author: Christophe Ricard <christophe.ricard@gmail.com> |
| The kernel may fail to allocate the necessary room for the acknowledgement |
| message back to user space. |
| This option trims off the payload of the original netlink message. |
| The netlink message header is still included, so the user can guess from the |
| sequence number which message triggered the acknowledgement. |
| .SH VERSIONS |
| The socket interface to netlink first appeared Linux 2.2. |
| .PP |
| Linux 2.0 supported a more primitive device-based netlink interface |
| (which is still available as a compatibility option). |
| This obsolete interface is not described here. |
| .SH NOTES |
| It is often better to use netlink via |
| .I libnetlink |
| or |
| .I libnl |
| than via the low-level kernel interface. |
| .SH BUGS |
| This manual page is not complete. |
| .SH EXAMPLES |
| The following example creates a |
| .B NETLINK_ROUTE |
| netlink socket which will listen to the |
| .B RTMGRP_LINK |
| (network interface create/delete/up/down events) and |
| .B RTMGRP_IPV4_IFADDR |
| (IPv4 addresses add/delete events) multicast groups. |
| .PP |
| .in +4n |
| .EX |
| struct sockaddr_nl sa; |
| |
| memset(&sa, 0, sizeof(sa)); |
| sa.nl_family = AF_NETLINK; |
| sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR; |
| |
| fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); |
| bind(fd, (struct sockaddr *) &sa, sizeof(sa)); |
| .EE |
| .in |
| .PP |
| The next example demonstrates how to send a netlink message to the |
| kernel (pid 0). |
| Note that the application must take care of message sequence numbers |
| in order to reliably track acknowledgements. |
| .PP |
| .in +4n |
| .EX |
| struct nlmsghdr *nh; /* The nlmsghdr with payload to send */ |
| struct sockaddr_nl sa; |
| struct iovec iov = { nh, nh\->nlmsg_len }; |
| struct msghdr msg; |
| |
| msg = { &sa, sizeof(sa), &iov, 1, NULL, 0, 0 }; |
| memset(&sa, 0, sizeof(sa)); |
| sa.nl_family = AF_NETLINK; |
| nh\->nlmsg_pid = 0; |
| nh\->nlmsg_seq = ++sequence_number; |
| /* Request an ack from kernel by setting NLM_F_ACK */ |
| nh\->nlmsg_flags |= NLM_F_ACK; |
| |
| sendmsg(fd, &msg, 0); |
| .EE |
| .in |
| .PP |
| And the last example is about reading netlink message. |
| .PP |
| .in +4n |
| .EX |
| int len; |
| /* 8192 to avoid message truncation on platforms with |
| page size > 4096 */ |
| struct nlmsghdr buf[8192/sizeof(struct nlmsghdr)]; |
| struct iovec iov = { buf, sizeof(buf) }; |
| struct sockaddr_nl sa; |
| struct msghdr msg; |
| struct nlmsghdr *nh; |
| |
| msg = { &sa, sizeof(sa), &iov, 1, NULL, 0, 0 }; |
| len = recvmsg(fd, &msg, 0); |
| |
| for (nh = (struct nlmsghdr *) buf; NLMSG_OK (nh, len); |
| nh = NLMSG_NEXT (nh, len)) { |
| /* The end of multipart message */ |
| if (nh\->nlmsg_type == NLMSG_DONE) |
| return; |
| |
| if (nh\->nlmsg_type == NLMSG_ERROR) |
| /* Do some error handling */ |
| ... |
| |
| /* Continue with parsing payload */ |
| ... |
| } |
| .EE |
| .in |
| .SH SEE ALSO |
| .BR cmsg (3), |
| .BR netlink (3), |
| .BR capabilities (7), |
| .BR rtnetlink (7), |
| .BR sock_diag (7) |
| .PP |
| .UR ftp://ftp.inr.ac.ru\:/ip\-routing\:/iproute2* |
| information about libnetlink |
| .UE |
| .PP |
| .UR http://www.infradead.org\:/\(titgr\:/libnl/ |
| information about libnl |
| .UE |
| .PP |
| RFC 3549 "Linux Netlink as an IP Services Protocol" |