|  | 
| 43 | 43 | #include <linux/sysctl.h> | 
| 44 | 44 | #endif | 
| 45 | 45 | 
 | 
|  | 46 | +#if IS_ENABLED(CONFIG_NF_CONNTRACK) | 
|  | 47 | +#include <net/netfilter/nf_conntrack_core.h> | 
|  | 48 | +#endif | 
|  | 49 | + | 
| 46 | 50 | static unsigned int brnf_net_id __read_mostly; | 
| 47 | 51 | 
 | 
| 48 | 52 | struct brnf_net { | 
| @@ -553,6 +557,90 @@ static unsigned int br_nf_pre_routing(void *priv, | 
| 553 | 557 | 	return NF_STOLEN; | 
| 554 | 558 | } | 
| 555 | 559 | 
 | 
|  | 560 | +#if IS_ENABLED(CONFIG_NF_CONNTRACK) | 
|  | 561 | +/* conntracks' nf_confirm logic cannot handle cloned skbs referencing | 
|  | 562 | + * the same nf_conn entry, which will happen for multicast (broadcast) | 
|  | 563 | + * Frames on bridges. | 
|  | 564 | + * | 
|  | 565 | + * Example: | 
|  | 566 | + *      macvlan0 | 
|  | 567 | + *      br0 | 
|  | 568 | + *  ethX  ethY | 
|  | 569 | + * | 
|  | 570 | + * ethX (or Y) receives multicast or broadcast packet containing | 
|  | 571 | + * an IP packet, not yet in conntrack table. | 
|  | 572 | + * | 
|  | 573 | + * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting. | 
|  | 574 | + *    -> skb->_nfct now references a unconfirmed entry | 
|  | 575 | + * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge | 
|  | 576 | + *    interface. | 
|  | 577 | + * 3. skb gets passed up the stack. | 
|  | 578 | + * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb | 
|  | 579 | + *    and schedules a work queue to send them out on the lower devices. | 
|  | 580 | + * | 
|  | 581 | + *    The clone skb->_nfct is not a copy, it is the same entry as the | 
|  | 582 | + *    original skb.  The macvlan rx handler then returns RX_HANDLER_PASS. | 
|  | 583 | + * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb. | 
|  | 584 | + * | 
|  | 585 | + * The Macvlan broadcast worker and normal confirm path will race. | 
|  | 586 | + * | 
|  | 587 | + * This race will not happen if step 2 already confirmed a clone. In that | 
|  | 588 | + * case later steps perform skb_clone() with skb->_nfct already confirmed (in | 
|  | 589 | + * hash table).  This works fine. | 
|  | 590 | + * | 
|  | 591 | + * But such confirmation won't happen when eb/ip/nftables rules dropped the | 
|  | 592 | + * packets before they reached the nf_confirm step in postrouting. | 
|  | 593 | + * | 
|  | 594 | + * Work around this problem by explicit confirmation of the entry at | 
|  | 595 | + * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed | 
|  | 596 | + * entry. | 
|  | 597 | + * | 
|  | 598 | + */ | 
|  | 599 | +static unsigned int br_nf_local_in(void *priv, | 
|  | 600 | +				   struct sk_buff *skb, | 
|  | 601 | +				   const struct nf_hook_state *state) | 
|  | 602 | +{ | 
|  | 603 | +	struct nf_conntrack *nfct = skb_nfct(skb); | 
|  | 604 | +	const struct nf_ct_hook *ct_hook; | 
|  | 605 | +	struct nf_conn *ct; | 
|  | 606 | +	int ret; | 
|  | 607 | + | 
|  | 608 | +	if (!nfct || skb->pkt_type == PACKET_HOST) | 
|  | 609 | +		return NF_ACCEPT; | 
|  | 610 | + | 
|  | 611 | +	ct = container_of(nfct, struct nf_conn, ct_general); | 
|  | 612 | +	if (likely(nf_ct_is_confirmed(ct))) | 
|  | 613 | +		return NF_ACCEPT; | 
|  | 614 | + | 
|  | 615 | +	WARN_ON_ONCE(skb_shared(skb)); | 
|  | 616 | +	WARN_ON_ONCE(refcount_read(&nfct->use) != 1); | 
|  | 617 | + | 
|  | 618 | +	/* We can't call nf_confirm here, it would create a dependency | 
|  | 619 | +	 * on nf_conntrack module. | 
|  | 620 | +	 */ | 
|  | 621 | +	ct_hook = rcu_dereference(nf_ct_hook); | 
|  | 622 | +	if (!ct_hook) { | 
|  | 623 | +		skb->_nfct = 0ul; | 
|  | 624 | +		nf_conntrack_put(nfct); | 
|  | 625 | +		return NF_ACCEPT; | 
|  | 626 | +	} | 
|  | 627 | + | 
|  | 628 | +	nf_bridge_pull_encap_header(skb); | 
|  | 629 | +	ret = ct_hook->confirm(skb); | 
|  | 630 | +	switch (ret & NF_VERDICT_MASK) { | 
|  | 631 | +	case NF_STOLEN: | 
|  | 632 | +		return NF_STOLEN; | 
|  | 633 | +	default: | 
|  | 634 | +		nf_bridge_push_encap_header(skb); | 
|  | 635 | +		break; | 
|  | 636 | +	} | 
|  | 637 | + | 
|  | 638 | +	ct = container_of(nfct, struct nf_conn, ct_general); | 
|  | 639 | +	WARN_ON_ONCE(!nf_ct_is_confirmed(ct)); | 
|  | 640 | + | 
|  | 641 | +	return ret; | 
|  | 642 | +} | 
|  | 643 | +#endif | 
| 556 | 644 | 
 | 
| 557 | 645 | /* PF_BRIDGE/FORWARD *************************************************/ | 
| 558 | 646 | static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) | 
| @@ -964,6 +1052,14 @@ static const struct nf_hook_ops br_nf_ops[] = { | 
| 964 | 1052 | 		.hooknum = NF_BR_PRE_ROUTING, | 
| 965 | 1053 | 		.priority = NF_BR_PRI_BRNF, | 
| 966 | 1054 | 	}, | 
|  | 1055 | +#if IS_ENABLED(CONFIG_NF_CONNTRACK) | 
|  | 1056 | +	{ | 
|  | 1057 | +		.hook = br_nf_local_in, | 
|  | 1058 | +		.pf = NFPROTO_BRIDGE, | 
|  | 1059 | +		.hooknum = NF_BR_LOCAL_IN, | 
|  | 1060 | +		.priority = NF_BR_PRI_LAST, | 
|  | 1061 | +	}, | 
|  | 1062 | +#endif | 
| 967 | 1063 | 	{ | 
| 968 | 1064 | 		.hook = br_nf_forward, | 
| 969 | 1065 | 		.pf = NFPROTO_BRIDGE, | 
|  | 
0 commit comments