Merge branch 'nexthop-convert-rtm_-new-del-nexthop-to-per-netns-rtnl'

Kuniyuki Iwashima says:

====================
nexthop: Convert RTM_{NEW,DEL}NEXTHOP to per-netns RTNL.

Patch 1 - 5 move some validation for RTM_NEWNEXTHOP so that it can be
called without RTNL.

Patch 6 & 7 converts RTM_NEWNEXTHOP and RTM_DELNEXTHOP to per-netns RTNL.

Note that RTM_GETNEXTHOP and RTM_GETNEXTHOPBUCKET are not touched in
this series.

rtm_get_nexthop() can be easily converted to RCU, but rtm_dump_nexthop()
needs more work due to the left-to-right rbtree walk, which looks prone
to node deletion and tree rotation without a retry mechanism.

v1: https://lore.kernel.org/netdev/20250318233240.53946-1-kuniyu@amazon.com/
====================

Link: https://patch.msgid.link/20250319230743.65267-1-kuniyu@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2025-03-25 07:32:03 -07:00
+112 -71
View File
@@ -1272,10 +1272,8 @@ static int nh_check_attr_group(struct net *net,
u16 nh_grp_type, struct netlink_ext_ack *extack)
{
unsigned int len = nla_len(tb[NHA_GROUP]);
u8 nh_family = AF_UNSPEC;
struct nexthop_grp *nhg;
unsigned int i, j;
u8 nhg_fdb = 0;
if (!len || len & (sizeof(struct nexthop_grp) - 1)) {
NL_SET_ERR_MSG(extack,
@@ -1307,10 +1305,41 @@ static int nh_check_attr_group(struct net *net,
}
}
if (tb[NHA_FDB])
nhg_fdb = 1;
nhg = nla_data(tb[NHA_GROUP]);
for (i = 0; i < len; ++i) {
for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) {
if (!tb[i])
continue;
switch (i) {
case NHA_HW_STATS_ENABLE:
case NHA_FDB:
continue;
case NHA_RES_GROUP:
if (nh_grp_type == NEXTHOP_GRP_TYPE_RES)
continue;
break;
}
NL_SET_ERR_MSG(extack,
"No other attributes can be set in nexthop groups");
return -EINVAL;
}
return 0;
}
static int nh_check_attr_group_rtnl(struct net *net, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
u8 nh_family = AF_UNSPEC;
struct nexthop_grp *nhg;
unsigned int len;
unsigned int i;
u8 nhg_fdb;
len = nla_len(tb[NHA_GROUP]) / sizeof(*nhg);
nhg = nla_data(tb[NHA_GROUP]);
nhg_fdb = !!tb[NHA_FDB];
for (i = 0; i < len; i++) {
struct nexthop *nh;
bool is_fdb_nh;
@@ -1330,22 +1359,6 @@ static int nh_check_attr_group(struct net *net,
return -EINVAL;
}
}
for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) {
if (!tb[i])
continue;
switch (i) {
case NHA_HW_STATS_ENABLE:
case NHA_FDB:
continue;
case NHA_RES_GROUP:
if (nh_grp_type == NEXTHOP_GRP_TYPE_RES)
continue;
break;
}
NL_SET_ERR_MSG(extack,
"No other attributes can be set in nexthop groups");
return -EINVAL;
}
return 0;
}
@@ -2679,9 +2692,6 @@ static struct nexthop *nexthop_create_group(struct net *net,
int err;
int i;
if (WARN_ON(!num_nh))
return ERR_PTR(-EINVAL);
nh = nexthop_alloc();
if (!nh)
return ERR_PTR(-ENOMEM);
@@ -2915,11 +2925,6 @@ static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
struct nexthop *nh;
int err;
if (cfg->nlflags & NLM_F_REPLACE && !cfg->nh_id) {
NL_SET_ERR_MSG(extack, "Replace requires nexthop id");
return ERR_PTR(-EINVAL);
}
if (!cfg->nh_id) {
cfg->nh_id = nh_find_unused_id(net);
if (!cfg->nh_id) {
@@ -3016,19 +3021,13 @@ static int rtm_to_nh_config_grp_res(struct nlattr *res, struct nh_config *cfg,
}
static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
struct nlmsghdr *nlh, struct nh_config *cfg,
struct nlmsghdr *nlh, struct nlattr **tb,
struct nh_config *cfg,
struct netlink_ext_ack *extack)
{
struct nhmsg *nhm = nlmsg_data(nlh);
struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)];
int err;
err = nlmsg_parse(nlh, sizeof(*nhm), tb,
ARRAY_SIZE(rtm_nh_policy_new) - 1,
rtm_nh_policy_new, extack);
if (err < 0)
return err;
err = -EINVAL;
if (nhm->resvd || nhm->nh_scope) {
NL_SET_ERR_MSG(extack, "Invalid values in ancillary header");
@@ -3093,7 +3092,8 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
NL_SET_ERR_MSG(extack, "Invalid group type");
goto out;
}
err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb),
err = nh_check_attr_group(net, tb, ARRAY_SIZE(rtm_nh_policy_new),
cfg->nh_grp_type, extack);
if (err)
goto out;
@@ -3126,25 +3126,6 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
goto out;
}
if (!cfg->nh_fdb && tb[NHA_OIF]) {
cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
if (cfg->nh_ifindex)
cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);
if (!cfg->dev) {
NL_SET_ERR_MSG(extack, "Invalid device index");
goto out;
} else if (!(cfg->dev->flags & IFF_UP)) {
NL_SET_ERR_MSG(extack, "Nexthop device is not up");
err = -ENETDOWN;
goto out;
} else if (!netif_carrier_ok(cfg->dev)) {
NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
err = -ENETDOWN;
goto out;
}
}
err = -EINVAL;
if (tb[NHA_GATEWAY]) {
struct nlattr *gwa = tb[NHA_GATEWAY];
@@ -3188,7 +3169,7 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
cfg->nh_encap_type = nla_get_u16(tb[NHA_ENCAP_TYPE]);
err = lwtunnel_valid_encap_type(cfg->nh_encap_type,
extack, true);
extack, false);
if (err < 0)
goto out;
@@ -3207,22 +3188,76 @@ out:
return err;
}
static int rtm_to_nh_config_rtnl(struct net *net, struct nlattr **tb,
struct nh_config *cfg,
struct netlink_ext_ack *extack)
{
if (tb[NHA_GROUP])
return nh_check_attr_group_rtnl(net, tb, extack);
if (tb[NHA_OIF]) {
cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
if (cfg->nh_ifindex)
cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);
if (!cfg->dev) {
NL_SET_ERR_MSG(extack, "Invalid device index");
return -EINVAL;
}
if (!(cfg->dev->flags & IFF_UP)) {
NL_SET_ERR_MSG(extack, "Nexthop device is not up");
return -ENETDOWN;
}
if (!netif_carrier_ok(cfg->dev)) {
NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
return -ENETDOWN;
}
}
return 0;
}
/* rtnl */
static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)];
struct net *net = sock_net(skb->sk);
struct nh_config cfg;
struct nexthop *nh;
int err;
err = rtm_to_nh_config(net, skb, nlh, &cfg, extack);
if (!err) {
nh = nexthop_add(net, &cfg, extack);
if (IS_ERR(nh))
err = PTR_ERR(nh);
err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
ARRAY_SIZE(rtm_nh_policy_new) - 1,
rtm_nh_policy_new, extack);
if (err < 0)
goto out;
err = rtm_to_nh_config(net, skb, nlh, tb, &cfg, extack);
if (err)
goto out;
if (cfg.nlflags & NLM_F_REPLACE && !cfg.nh_id) {
NL_SET_ERR_MSG(extack, "Replace requires nexthop id");
err = -EINVAL;
goto out;
}
rtnl_net_lock(net);
err = rtm_to_nh_config_rtnl(net, tb, &cfg, extack);
if (err)
goto unlock;
nh = nexthop_add(net, &cfg, extack);
if (IS_ERR(nh))
err = PTR_ERR(nh);
unlock:
rtnl_net_unlock(net);
out:
return err;
}
@@ -3279,13 +3314,17 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err)
return err;
rtnl_net_lock(net);
nh = nexthop_find_by_id(net, id);
if (!nh)
return -ENOENT;
if (nh)
remove_nexthop(net, nh, &nlinfo);
else
err = -ENOENT;
remove_nexthop(net, nh, &nlinfo);
rtnl_net_unlock(net);
return 0;
return err;
}
/* rtnl */
@@ -4037,18 +4076,20 @@ static struct pernet_operations nexthop_net_ops = {
};
static const struct rtnl_msg_handler nexthop_rtnl_msg_handlers[] __initconst = {
{.msgtype = RTM_NEWNEXTHOP, .doit = rtm_new_nexthop},
{.msgtype = RTM_DELNEXTHOP, .doit = rtm_del_nexthop},
{.msgtype = RTM_NEWNEXTHOP, .doit = rtm_new_nexthop,
.flags = RTNL_FLAG_DOIT_PERNET},
{.msgtype = RTM_DELNEXTHOP, .doit = rtm_del_nexthop,
.flags = RTNL_FLAG_DOIT_PERNET},
{.msgtype = RTM_GETNEXTHOP, .doit = rtm_get_nexthop,
.dumpit = rtm_dump_nexthop},
{.msgtype = RTM_GETNEXTHOPBUCKET, .doit = rtm_get_nexthop_bucket,
.dumpit = rtm_dump_nexthop_bucket},
{.protocol = PF_INET, .msgtype = RTM_NEWNEXTHOP,
.doit = rtm_new_nexthop},
.doit = rtm_new_nexthop, .flags = RTNL_FLAG_DOIT_PERNET},
{.protocol = PF_INET, .msgtype = RTM_GETNEXTHOP,
.dumpit = rtm_dump_nexthop},
{.protocol = PF_INET6, .msgtype = RTM_NEWNEXTHOP,
.doit = rtm_new_nexthop},
.doit = rtm_new_nexthop, .flags = RTNL_FLAG_DOIT_PERNET},
{.protocol = PF_INET6, .msgtype = RTM_GETNEXTHOP,
.dumpit = rtm_dump_nexthop},
};