[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <534f532c0f587a7ca1e04036b7d106ad9274429e.1749499963.git.petrm@nvidia.com>
Date: Mon, 9 Jun 2025 22:50:26 +0200
From: Petr Machata <petrm@...dia.com>
To: "David S. Miller" <davem@...emloft.net>, Eric Dumazet
<edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>, Paolo Abeni
<pabeni@...hat.com>, David Ahern <dsahern@...il.com>,
<netdev@...r.kernel.org>
CC: Simon Horman <horms@...nel.org>, Nikolay Aleksandrov
<razor@...ckwall.org>, Ido Schimmel <idosch@...dia.com>, Petr Machata
<petrm@...dia.com>, <mlxsw@...dia.com>
Subject: [PATCH net-next 10/14] vxlan: Support MC routing in the underlay
Locally-generated MC packets have so far not been subject to MC routing.
Instead an MC-enabled installation would maintain the MC routing tables,
and separately from that the list of interfaces to send packets to as part
of the VXLAN FDB and MDB.
In a previous patch, a ip_mr_output() and ip6_mr_output() routines were
added for IPv4 and IPv6. All locally generated MC traffic is now passed
through these functions. For reasons of backward compatibility, an SKB
(IPCB / IP6CB) flag guards the actual MC routing.
This patch adds logic to set the flag, and the UAPI to enable the behavior.
Signed-off-by: Petr Machata <petrm@...dia.com>
Reviewed-by: Ido Schimmel <idosch@...dia.com>
---
Notes:
CC:Andrew Lunn <andrew+netdev@...n.ch>
CC:Menglong Dong <menglong8.dong@...il.com>
drivers/net/vxlan/vxlan_core.c | 22 ++++++++++++++++++++--
include/net/vxlan.h | 5 ++++-
include/uapi/linux/if_link.h | 1 +
3 files changed, 25 insertions(+), 3 deletions(-)
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index c4af6c652560..02eba9235406 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -2451,6 +2451,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
rcu_read_lock();
if (addr_family == AF_INET) {
struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
+ u16 ipcb_flags = 0;
struct rtable *rt;
__be16 df = 0;
__be32 saddr;
@@ -2467,6 +2468,9 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
+ if (flags & VXLAN_F_MC_ROUTE)
+ ipcb_flags |= IPSKB_MCROUTE;
+
if (!info) {
/* Bypass encapsulation if the destination is local */
err = encap_bypass_if_local(skb, dev, vxlan, AF_INET,
@@ -2522,11 +2526,13 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, saddr,
pkey->u.ipv4.dst, tos, ttl, df,
- src_port, dst_port, xnet, !udp_sum, 0);
+ src_port, dst_port, xnet, !udp_sum,
+ ipcb_flags);
#if IS_ENABLED(CONFIG_IPV6)
} else {
struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
struct in6_addr saddr;
+ u16 ip6cb_flags = 0;
if (!ifindex)
ifindex = sock6->sock->sk->sk_bound_dev_if;
@@ -2542,6 +2548,9 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
+ if (flags & VXLAN_F_MC_ROUTE)
+ ip6cb_flags |= IP6SKB_MCROUTE;
+
if (!info) {
u32 rt6i_flags = dst_rt6_info(ndst)->rt6i_flags;
@@ -2587,7 +2596,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
udp_tunnel6_xmit_skb(ndst, sock6->sock->sk, skb, dev,
&saddr, &pkey->u.ipv6.dst, tos, ttl,
pkey->label, src_port, dst_port, !udp_sum,
- 0);
+ ip6cb_flags);
#endif
}
vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX, pkt_len);
@@ -3402,6 +3411,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_LOCALBYPASS] = NLA_POLICY_MAX(NLA_U8, 1),
[IFLA_VXLAN_LABEL_POLICY] = NLA_POLICY_MAX(NLA_U32, VXLAN_LABEL_MAX),
[IFLA_VXLAN_RESERVED_BITS] = NLA_POLICY_EXACT_LEN(sizeof(struct vxlanhdr)),
+ [IFLA_VXLAN_MC_ROUTE] = NLA_POLICY_MAX(NLA_U8, 1),
};
static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -4315,6 +4325,14 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
return err;
}
+ if (data[IFLA_VXLAN_MC_ROUTE]) {
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_MC_ROUTE,
+ VXLAN_F_MC_ROUTE, changelink,
+ true, extack);
+ if (err)
+ return err;
+ }
+
if (tb[IFLA_MTU]) {
if (changelink) {
NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_MTU],
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index e2f7ca045d3e..0ee50785f4f1 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -332,6 +332,7 @@ struct vxlan_dev {
#define VXLAN_F_VNIFILTER 0x20000
#define VXLAN_F_MDB 0x40000
#define VXLAN_F_LOCALBYPASS 0x80000
+#define VXLAN_F_MC_ROUTE 0x100000
/* Flags that are used in the receive path. These flags must match in
* order for a socket to be shareable
@@ -353,7 +354,9 @@ struct vxlan_dev {
VXLAN_F_UDP_ZERO_CSUM6_RX | \
VXLAN_F_COLLECT_METADATA | \
VXLAN_F_VNIFILTER | \
- VXLAN_F_LOCALBYPASS)
+ VXLAN_F_LOCALBYPASS | \
+ VXLAN_F_MC_ROUTE | \
+ 0)
struct net_device *vxlan_dev_create(struct net *net, const char *name,
u8 name_assign_type, struct vxlan_config *conf);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 3ad2d5d98034..873c285996fe 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -1398,6 +1398,7 @@ enum {
IFLA_VXLAN_LOCALBYPASS,
IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */
IFLA_VXLAN_RESERVED_BITS,
+ IFLA_VXLAN_MC_ROUTE,
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
--
2.49.0
Powered by blists - more mailing lists