Skip to content

Commit 6bb0fef

Browse files
borkmanndavem330
authored andcommitted
netlink, mmap: fix edge-case leakages in nf queue zero-copy
When netlink mmap on receive side is the consumer of nf queue data, it can happen that in some edge cases, we write skb shared info into the user space mmap buffer: Assume a possible rx ring frame size of only 4096, and the network skb, which is being zero-copied into the netlink skb, contains page frags with an overall skb->len larger than the linear part of the netlink skb. skb_zerocopy(), which is generic and thus not aware of the fact that shared info cannot be accessed for such skbs then tries to write and fill frags, thus leaking kernel data/pointers and in some corner cases possibly writing out of bounds of the mmap area (when filling the last slot in the ring buffer this way). I.e. the ring buffer slot is then of status NL_MMAP_STATUS_VALID, has an advertised length larger than 4096, where the linear part is visible at the slot beginning, and the leaked sizeof(struct skb_shared_info) has been written to the beginning of the next slot (also corrupting the struct nl_mmap_hdr slot header incl. status etc), since skb->end points to skb->data + ring->frame_size - NL_MMAP_HDRLEN. The fix adds and lets __netlink_alloc_skb() take the actual needed linear room for the network skb + meta data into account. It's completely irrelevant for non-mmaped netlink sockets, but in case mmap sockets are used, it can be decided whether the available skb_tailroom() is really large enough for the buffer, or whether it needs to internally fallback to a normal alloc_skb(). >From nf queue side, the information whether the destination port is an mmap RX ring is not really available without extra port-to-socket lookup, thus it can only be determined in lower layers i.e. when __netlink_alloc_skb() is called that checks internally for this. I chose to add the extra ldiff parameter as mmap will then still work: We have data_len and hlen in nfqnl_build_packet_message(), data_len is the full length (capped at queue->copy_range) for skb_zerocopy() and hlen some possible part of data_len that needs to be copied; the rem_len variable indicates the needed remaining linear mmap space. The only other workaround in nf queue internally would be after allocation time by f.e. cap'ing the data_len to the skb_tailroom() iff we deal with an mmap skb, but that would 1) expose the fact that we use a mmap skb to upper layers, and 2) trim the skb where we otherwise could just have moved the full skb into the normal receive queue. After the patch, in my test case the ring slot doesn't fit and therefore shows NL_MMAP_STATUS_COPY, where a full skb carries all the data and thus needs to be picked up via recv(). Fixes: 3ab1f68 ("nfnetlink: add support for memory mapped netlink") Signed-off-by: Daniel Borkmann <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent a66e365 commit 6bb0fef

File tree

3 files changed

+26
-10
lines changed

3 files changed

+26
-10
lines changed

include/linux/netlink.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,17 @@ extern int netlink_change_ngroups(struct sock *sk, unsigned int groups);
6868
extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group);
6969
extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
7070
extern int netlink_has_listeners(struct sock *sk, unsigned int group);
71-
extern struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
72-
u32 dst_portid, gfp_t gfp_mask);
71+
72+
extern struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size,
73+
unsigned int ldiff, u32 dst_portid,
74+
gfp_t gfp_mask);
75+
static inline struct sk_buff *
76+
netlink_alloc_skb(struct sock *ssk, unsigned int size, u32 dst_portid,
77+
gfp_t gfp_mask)
78+
{
79+
return __netlink_alloc_skb(ssk, size, 0, dst_portid, gfp_mask);
80+
}
81+
7382
extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock);
7483
extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid,
7584
__u32 group, gfp_t allocation);

net/netfilter/nfnetlink_queue_core.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
301301
__be32 **packet_id_ptr)
302302
{
303303
size_t size;
304-
size_t data_len = 0, cap_len = 0;
304+
size_t data_len = 0, cap_len = 0, rem_len = 0;
305305
unsigned int hlen = 0;
306306
struct sk_buff *skb;
307307
struct nlattr *nla;
@@ -360,6 +360,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
360360
hlen = min_t(unsigned int, hlen, data_len);
361361
size += sizeof(struct nlattr) + hlen;
362362
cap_len = entskb->len;
363+
rem_len = data_len - hlen;
363364
break;
364365
}
365366

@@ -377,7 +378,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
377378
size += nla_total_size(seclen);
378379
}
379380

380-
skb = nfnetlink_alloc_skb(net, size, queue->peer_portid,
381+
skb = __netlink_alloc_skb(net->nfnl, size, rem_len, queue->peer_portid,
381382
GFP_ATOMIC);
382383
if (!skb) {
383384
skb_tx_error(entskb);

net/netlink/af_netlink.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1844,15 +1844,16 @@ int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
18441844
}
18451845
EXPORT_SYMBOL(netlink_unicast);
18461846

1847-
struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
1848-
u32 dst_portid, gfp_t gfp_mask)
1847+
struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size,
1848+
unsigned int ldiff, u32 dst_portid,
1849+
gfp_t gfp_mask)
18491850
{
18501851
#ifdef CONFIG_NETLINK_MMAP
1852+
unsigned int maxlen, linear_size;
18511853
struct sock *sk = NULL;
18521854
struct sk_buff *skb;
18531855
struct netlink_ring *ring;
18541856
struct nl_mmap_hdr *hdr;
1855-
unsigned int maxlen;
18561857

18571858
sk = netlink_getsockbyportid(ssk, dst_portid);
18581859
if (IS_ERR(sk))
@@ -1863,7 +1864,11 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
18631864
if (ring->pg_vec == NULL)
18641865
goto out_put;
18651866

1866-
if (ring->frame_size - NL_MMAP_HDRLEN < size)
1867+
/* We need to account the full linear size needed as a ring
1868+
* slot cannot have non-linear parts.
1869+
*/
1870+
linear_size = size + ldiff;
1871+
if (ring->frame_size - NL_MMAP_HDRLEN < linear_size)
18671872
goto out_put;
18681873

18691874
skb = alloc_skb_head(gfp_mask);
@@ -1877,13 +1882,14 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
18771882

18781883
/* check again under lock */
18791884
maxlen = ring->frame_size - NL_MMAP_HDRLEN;
1880-
if (maxlen < size)
1885+
if (maxlen < linear_size)
18811886
goto out_free;
18821887

18831888
netlink_forward_ring(ring);
18841889
hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
18851890
if (hdr == NULL)
18861891
goto err2;
1892+
18871893
netlink_ring_setup_skb(skb, sk, ring, hdr);
18881894
netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
18891895
atomic_inc(&ring->pending);
@@ -1909,7 +1915,7 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
19091915
#endif
19101916
return alloc_skb(size, gfp_mask);
19111917
}
1912-
EXPORT_SYMBOL_GPL(netlink_alloc_skb);
1918+
EXPORT_SYMBOL_GPL(__netlink_alloc_skb);
19131919

19141920
int netlink_has_listeners(struct sock *sk, unsigned int group)
19151921
{

0 commit comments

Comments
 (0)