|
| 1 | +netfilter: br_netfilter: skip conntrack input hook for promisc packets |
| 2 | + |
| 3 | +jira LE-1907 |
| 4 | +cve CVE-2024-27415 |
| 5 | +Rebuild_History Non-Buildable kernel-5.14.0-427.33.1.el9_4 |
| 6 | +commit-author Pablo Neira Ayuso < [email protected]> |
| 7 | +commit 751de2012eafa4d46d8081056761fa0e9cc8a178 |
| 8 | +Empty-Commit: Cherry-Pick Conflicts during history rebuild. |
| 9 | +Will be included in final tarball splat. Ref for failed cherry-pick at: |
| 10 | +ciq/ciq_backports/kernel-5.14.0-427.33.1.el9_4/751de201.failed |
| 11 | + |
| 12 | +For historical reasons, when bridge device is in promisc mode, packets |
| 13 | +that are directed to the taps follow bridge input hook path. This patch |
| 14 | +adds a workaround to reset conntrack for these packets. |
| 15 | + |
| 16 | +Jianbo Liu reports warning splats in their test infrastructure where |
| 17 | +cloned packets reach the br_netfilter input hook to confirm the |
| 18 | +conntrack object. |
| 19 | + |
| 20 | +Scratch one bit from BR_INPUT_SKB_CB to annotate that this packet has |
| 21 | +reached the input hook because it is passed up to the bridge device to |
| 22 | +reach the taps. |
| 23 | + |
| 24 | +[ 57.571874] WARNING: CPU: 1 PID: 0 at net/bridge/br_netfilter_hooks.c:616 br_nf_local_in+0x157/0x180 [br_netfilter] |
| 25 | +[ 57.572749] Modules linked in: xt_MASQUERADE nf_conntrack_netlink nfnetlink iptable_nat xt_addrtype xt_conntrack nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_isc si ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core mlx5ctl mlx5_core |
| 26 | +[ 57.575158] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.8.0+ #19 |
| 27 | +[ 57.575700] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 |
| 28 | +[ 57.576662] RIP: 0010:br_nf_local_in+0x157/0x180 [br_netfilter] |
| 29 | +[ 57.577195] Code: fe ff ff 41 bd 04 00 00 00 be 04 00 00 00 e9 4a ff ff ff be 04 00 00 00 48 89 ef e8 f3 a9 3c e1 66 83 ad b4 00 00 00 04 eb 91 <0f> 0b e9 f1 fe ff ff 0f 0b e9 df fe ff ff 48 89 df e8 b3 53 47 e1 |
| 30 | +[ 57.578722] RSP: 0018:ffff88885f845a08 EFLAGS: 00010202 |
| 31 | +[ 57.579207] RAX: 0000000000000002 RBX: ffff88812dfe8000 RCX: 0000000000000000 |
| 32 | +[ 57.579830] RDX: ffff88885f845a60 RSI: ffff8881022dc300 RDI: 0000000000000000 |
| 33 | +[ 57.580454] RBP: ffff88885f845a60 R08: 0000000000000001 R09: 0000000000000003 |
| 34 | +[ 57.581076] R10: 00000000ffff1300 R11: 0000000000000002 R12: 0000000000000000 |
| 35 | +[ 57.581695] R13: ffff8881047ffe00 R14: ffff888108dbee00 R15: ffff88814519b800 |
| 36 | +[ 57.582313] FS: 0000000000000000(0000) GS:ffff88885f840000(0000) knlGS:0000000000000000 |
| 37 | +[ 57.583040] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 |
| 38 | +[ 57.583564] CR2: 000000c4206aa000 CR3: 0000000103847001 CR4: 0000000000370eb0 |
| 39 | +[ 57.584194] DR0: 0000000000000000 DR1: 0000000000000000 DR2: |
| 40 | +0000000000000000 |
| 41 | +[ 57.584820] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: |
| 42 | +0000000000000400 |
| 43 | +[ 57.585440] Call Trace: |
| 44 | +[ 57.585721] <IRQ> |
| 45 | +[ 57.585976] ? __warn+0x7d/0x130 |
| 46 | +[ 57.586323] ? br_nf_local_in+0x157/0x180 [br_netfilter] |
| 47 | +[ 57.586811] ? report_bug+0xf1/0x1c0 |
| 48 | +[ 57.587177] ? handle_bug+0x3f/0x70 |
| 49 | +[ 57.587539] ? exc_invalid_op+0x13/0x60 |
| 50 | +[ 57.587929] ? asm_exc_invalid_op+0x16/0x20 |
| 51 | +[ 57.588336] ? br_nf_local_in+0x157/0x180 [br_netfilter] |
| 52 | +[ 57.588825] nf_hook_slow+0x3d/0xd0 |
| 53 | +[ 57.589188] ? br_handle_vlan+0x4b/0x110 |
| 54 | +[ 57.589579] br_pass_frame_up+0xfc/0x150 |
| 55 | +[ 57.589970] ? br_port_flags_change+0x40/0x40 |
| 56 | +[ 57.590396] br_handle_frame_finish+0x346/0x5e0 |
| 57 | +[ 57.590837] ? ipt_do_table+0x32e/0x430 |
| 58 | +[ 57.591221] ? br_handle_local_finish+0x20/0x20 |
| 59 | +[ 57.591656] br_nf_hook_thresh+0x4b/0xf0 [br_netfilter] |
| 60 | +[ 57.592286] ? br_handle_local_finish+0x20/0x20 |
| 61 | +[ 57.592802] br_nf_pre_routing_finish+0x178/0x480 [br_netfilter] |
| 62 | +[ 57.593348] ? br_handle_local_finish+0x20/0x20 |
| 63 | +[ 57.593782] ? nf_nat_ipv4_pre_routing+0x25/0x60 [nf_nat] |
| 64 | +[ 57.594279] br_nf_pre_routing+0x24c/0x550 [br_netfilter] |
| 65 | +[ 57.594780] ? br_nf_hook_thresh+0xf0/0xf0 [br_netfilter] |
| 66 | +[ 57.595280] br_handle_frame+0x1f3/0x3d0 |
| 67 | +[ 57.595676] ? br_handle_local_finish+0x20/0x20 |
| 68 | +[ 57.596118] ? br_handle_frame_finish+0x5e0/0x5e0 |
| 69 | +[ 57.596566] __netif_receive_skb_core+0x25b/0xfc0 |
| 70 | +[ 57.597017] ? __napi_build_skb+0x37/0x40 |
| 71 | +[ 57.597418] __netif_receive_skb_list_core+0xfb/0x220 |
| 72 | + |
| 73 | +Fixes: 62e7151ae3eb ("netfilter: bridge: confirm multicast packets before passing them up the stack") |
| 74 | + Reported-by: Jianbo Liu < [email protected]> |
| 75 | + Signed-off-by: Pablo Neira Ayuso < [email protected]> |
| 76 | +(cherry picked from commit 751de2012eafa4d46d8081056761fa0e9cc8a178) |
| 77 | + Signed-off-by: Jonathan Maple < [email protected]> |
| 78 | + |
| 79 | +# Conflicts: |
| 80 | +# net/bridge/br_netfilter_hooks.c |
| 81 | +# net/bridge/netfilter/nf_conntrack_bridge.c |
| 82 | +diff --cc net/bridge/br_netfilter_hooks.c |
| 83 | +index 9421c5a097bd,22e35623c148..000000000000 |
| 84 | +--- a/net/bridge/br_netfilter_hooks.c |
| 85 | ++++ b/net/bridge/br_netfilter_hooks.c |
| 86 | +@@@ -538,6 -557,96 +538,99 @@@ static unsigned int br_nf_pre_routing(v |
| 87 | + return NF_STOLEN; |
| 88 | + } |
| 89 | + |
| 90 | +++<<<<<<< HEAD |
| 91 | +++======= |
| 92 | ++ #if IS_ENABLED(CONFIG_NF_CONNTRACK) |
| 93 | ++ /* conntracks' nf_confirm logic cannot handle cloned skbs referencing |
| 94 | ++ * the same nf_conn entry, which will happen for multicast (broadcast) |
| 95 | ++ * Frames on bridges. |
| 96 | ++ * |
| 97 | ++ * Example: |
| 98 | ++ * macvlan0 |
| 99 | ++ * br0 |
| 100 | ++ * ethX ethY |
| 101 | ++ * |
| 102 | ++ * ethX (or Y) receives multicast or broadcast packet containing |
| 103 | ++ * an IP packet, not yet in conntrack table. |
| 104 | ++ * |
| 105 | ++ * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting. |
| 106 | ++ * -> skb->_nfct now references a unconfirmed entry |
| 107 | ++ * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge |
| 108 | ++ * interface. |
| 109 | ++ * 3. skb gets passed up the stack. |
| 110 | ++ * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb |
| 111 | ++ * and schedules a work queue to send them out on the lower devices. |
| 112 | ++ * |
| 113 | ++ * The clone skb->_nfct is not a copy, it is the same entry as the |
| 114 | ++ * original skb. The macvlan rx handler then returns RX_HANDLER_PASS. |
| 115 | ++ * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb. |
| 116 | ++ * |
| 117 | ++ * The Macvlan broadcast worker and normal confirm path will race. |
| 118 | ++ * |
| 119 | ++ * This race will not happen if step 2 already confirmed a clone. In that |
| 120 | ++ * case later steps perform skb_clone() with skb->_nfct already confirmed (in |
| 121 | ++ * hash table). This works fine. |
| 122 | ++ * |
| 123 | ++ * But such confirmation won't happen when eb/ip/nftables rules dropped the |
| 124 | ++ * packets before they reached the nf_confirm step in postrouting. |
| 125 | ++ * |
| 126 | ++ * Work around this problem by explicit confirmation of the entry at |
| 127 | ++ * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed |
| 128 | ++ * entry. |
| 129 | ++ * |
| 130 | ++ */ |
| 131 | ++ static unsigned int br_nf_local_in(void *priv, |
| 132 | ++ struct sk_buff *skb, |
| 133 | ++ const struct nf_hook_state *state) |
| 134 | ++ { |
| 135 | ++ bool promisc = BR_INPUT_SKB_CB(skb)->promisc; |
| 136 | ++ struct nf_conntrack *nfct = skb_nfct(skb); |
| 137 | ++ const struct nf_ct_hook *ct_hook; |
| 138 | ++ struct nf_conn *ct; |
| 139 | ++ int ret; |
| 140 | ++ |
| 141 | ++ if (promisc) { |
| 142 | ++ nf_reset_ct(skb); |
| 143 | ++ return NF_ACCEPT; |
| 144 | ++ } |
| 145 | ++ |
| 146 | ++ if (!nfct || skb->pkt_type == PACKET_HOST) |
| 147 | ++ return NF_ACCEPT; |
| 148 | ++ |
| 149 | ++ ct = container_of(nfct, struct nf_conn, ct_general); |
| 150 | ++ if (likely(nf_ct_is_confirmed(ct))) |
| 151 | ++ return NF_ACCEPT; |
| 152 | ++ |
| 153 | ++ WARN_ON_ONCE(skb_shared(skb)); |
| 154 | ++ WARN_ON_ONCE(refcount_read(&nfct->use) != 1); |
| 155 | ++ |
| 156 | ++ /* We can't call nf_confirm here, it would create a dependency |
| 157 | ++ * on nf_conntrack module. |
| 158 | ++ */ |
| 159 | ++ ct_hook = rcu_dereference(nf_ct_hook); |
| 160 | ++ if (!ct_hook) { |
| 161 | ++ skb->_nfct = 0ul; |
| 162 | ++ nf_conntrack_put(nfct); |
| 163 | ++ return NF_ACCEPT; |
| 164 | ++ } |
| 165 | ++ |
| 166 | ++ nf_bridge_pull_encap_header(skb); |
| 167 | ++ ret = ct_hook->confirm(skb); |
| 168 | ++ switch (ret & NF_VERDICT_MASK) { |
| 169 | ++ case NF_STOLEN: |
| 170 | ++ return NF_STOLEN; |
| 171 | ++ default: |
| 172 | ++ nf_bridge_push_encap_header(skb); |
| 173 | ++ break; |
| 174 | ++ } |
| 175 | ++ |
| 176 | ++ ct = container_of(nfct, struct nf_conn, ct_general); |
| 177 | ++ WARN_ON_ONCE(!nf_ct_is_confirmed(ct)); |
| 178 | ++ |
| 179 | ++ return ret; |
| 180 | ++ } |
| 181 | ++ #endif |
| 182 | +++>>>>>>> 751de2012eaf (netfilter: br_netfilter: skip conntrack input hook for promisc packets) |
| 183 | + |
| 184 | + /* PF_BRIDGE/FORWARD *************************************************/ |
| 185 | + static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) |
| 186 | +diff --cc net/bridge/netfilter/nf_conntrack_bridge.c |
| 187 | +index 56efda595b81,c3c51b9a6826..000000000000 |
| 188 | +--- a/net/bridge/netfilter/nf_conntrack_bridge.c |
| 189 | ++++ b/net/bridge/netfilter/nf_conntrack_bridge.c |
| 190 | +@@@ -291,6 -291,36 +291,39 @@@ static unsigned int nf_ct_bridge_pre(vo |
| 191 | + return nf_conntrack_in(skb, &bridge_state); |
| 192 | + } |
| 193 | + |
| 194 | +++<<<<<<< HEAD |
| 195 | +++======= |
| 196 | ++ static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb, |
| 197 | ++ const struct nf_hook_state *state) |
| 198 | ++ { |
| 199 | ++ bool promisc = BR_INPUT_SKB_CB(skb)->promisc; |
| 200 | ++ struct nf_conntrack *nfct = skb_nfct(skb); |
| 201 | ++ struct nf_conn *ct; |
| 202 | ++ |
| 203 | ++ if (promisc) { |
| 204 | ++ nf_reset_ct(skb); |
| 205 | ++ return NF_ACCEPT; |
| 206 | ++ } |
| 207 | ++ |
| 208 | ++ if (!nfct || skb->pkt_type == PACKET_HOST) |
| 209 | ++ return NF_ACCEPT; |
| 210 | ++ |
| 211 | ++ /* nf_conntrack_confirm() cannot handle concurrent clones, |
| 212 | ++ * this happens for broad/multicast frames with e.g. macvlan on top |
| 213 | ++ * of the bridge device. |
| 214 | ++ */ |
| 215 | ++ ct = container_of(nfct, struct nf_conn, ct_general); |
| 216 | ++ if (nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) |
| 217 | ++ return NF_ACCEPT; |
| 218 | ++ |
| 219 | ++ /* let inet prerouting call conntrack again */ |
| 220 | ++ skb->_nfct = 0; |
| 221 | ++ nf_ct_put(ct); |
| 222 | ++ |
| 223 | ++ return NF_ACCEPT; |
| 224 | ++ } |
| 225 | ++ |
| 226 | +++>>>>>>> 751de2012eaf (netfilter: br_netfilter: skip conntrack input hook for promisc packets) |
| 227 | + static void nf_ct_bridge_frag_save(struct sk_buff *skb, |
| 228 | + struct nf_bridge_frag_data *data) |
| 229 | + { |
| 230 | +diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c |
| 231 | +index c729528b5e85..e09000e38d07 100644 |
| 232 | +--- a/net/bridge/br_input.c |
| 233 | ++++ b/net/bridge/br_input.c |
| 234 | +@@ -30,7 +30,7 @@ br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) |
| 235 | + return netif_receive_skb(skb); |
| 236 | + } |
| 237 | + |
| 238 | +-static int br_pass_frame_up(struct sk_buff *skb) |
| 239 | ++static int br_pass_frame_up(struct sk_buff *skb, bool promisc) |
| 240 | + { |
| 241 | + struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; |
| 242 | + struct net_bridge *br = netdev_priv(brdev); |
| 243 | +@@ -65,6 +65,8 @@ static int br_pass_frame_up(struct sk_buff *skb) |
| 244 | + br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb), |
| 245 | + BR_MCAST_DIR_TX); |
| 246 | + |
| 247 | ++ BR_INPUT_SKB_CB(skb)->promisc = promisc; |
| 248 | ++ |
| 249 | + return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, |
| 250 | + dev_net(indev), NULL, skb, indev, NULL, |
| 251 | + br_netif_receive_skb); |
| 252 | +@@ -82,6 +84,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb |
| 253 | + struct net_bridge_mcast *brmctx; |
| 254 | + struct net_bridge_vlan *vlan; |
| 255 | + struct net_bridge *br; |
| 256 | ++ bool promisc; |
| 257 | + u16 vid = 0; |
| 258 | + u8 state; |
| 259 | + |
| 260 | +@@ -137,7 +140,9 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb |
| 261 | + if (p->flags & BR_LEARNING) |
| 262 | + br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0); |
| 263 | + |
| 264 | +- local_rcv = !!(br->dev->flags & IFF_PROMISC); |
| 265 | ++ promisc = !!(br->dev->flags & IFF_PROMISC); |
| 266 | ++ local_rcv = promisc; |
| 267 | ++ |
| 268 | + if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) { |
| 269 | + /* by definition the broadcast is also a multicast address */ |
| 270 | + if (is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) { |
| 271 | +@@ -200,7 +205,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb |
| 272 | + unsigned long now = jiffies; |
| 273 | + |
| 274 | + if (test_bit(BR_FDB_LOCAL, &dst->flags)) |
| 275 | +- return br_pass_frame_up(skb); |
| 276 | ++ return br_pass_frame_up(skb, false); |
| 277 | + |
| 278 | + if (now != dst->used) |
| 279 | + dst->used = now; |
| 280 | +@@ -213,7 +218,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb |
| 281 | + } |
| 282 | + |
| 283 | + if (local_rcv) |
| 284 | +- return br_pass_frame_up(skb); |
| 285 | ++ return br_pass_frame_up(skb, promisc); |
| 286 | + |
| 287 | + out: |
| 288 | + return 0; |
| 289 | +@@ -386,6 +391,8 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb) |
| 290 | + goto forward; |
| 291 | + } |
| 292 | + |
| 293 | ++ BR_INPUT_SKB_CB(skb)->promisc = false; |
| 294 | ++ |
| 295 | + /* The else clause should be hit when nf_hook(): |
| 296 | + * - returns < 0 (drop/error) |
| 297 | + * - returns = 0 (stolen/nf_queue) |
| 298 | +* Unmerged path net/bridge/br_netfilter_hooks.c |
| 299 | +diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h |
| 300 | +index ed200884afd2..1c7a03c54d17 100644 |
| 301 | +--- a/net/bridge/br_private.h |
| 302 | ++++ b/net/bridge/br_private.h |
| 303 | +@@ -582,6 +582,7 @@ struct br_input_skb_cb { |
| 304 | + #endif |
| 305 | + u8 proxyarp_replied:1; |
| 306 | + u8 src_port_isolated:1; |
| 307 | ++ u8 promisc:1; |
| 308 | + #ifdef CONFIG_BRIDGE_VLAN_FILTERING |
| 309 | + u8 vlan_filtered:1; |
| 310 | + #endif |
| 311 | +* Unmerged path net/bridge/netfilter/nf_conntrack_bridge.c |
0 commit comments