Skip to content

Commit

Permalink
ip_tunnel: convert __be16 tunnel flags to bitmaps
Browse files Browse the repository at this point in the history
Historically, tunnel flags like TUNNEL_CSUM or TUNNEL_ERSPAN_OPT
have been defined as __be16. Now all of those 16 bits are occupied
and there's no more free space for new flags.
It can't be simply switched to a bigger container with no
adjustments to the values, since it's an explicit Endian storage,
and on LE systems (__be16)0x0001 equals to
(__be64)0x0001000000000000.
We could probably define new 64-bit flags depending on the
Endianness, i.e. (__be64)0x0001 on BE and (__be64)0x00010000... on
LE, but that would introduce an Endianness dependency and spawn a
ton of Sparse warnings. To mitigate them, all of those places which
were adjusted with this change would be touched anyway, so why not
define stuff properly if there's no choice.

Define IP_TUNNEL_*_BIT counterparts as a bit number instead of the
value already coded and a fistful of <16 <-> bitmap> converters and
helpers. The two flags which have a different bit position are
SIT_ISATAP_BIT and VTI_ISVTI_BIT, as they were defined not as
__cpu_to_be16(), but as (__force __be16), i.e. had different
positions on LE and BE. Now they both have strongly defined places.
Change all __be16 fields which were used to store those flags, to
IP_TUNNEL_DECLARE_FLAGS() -> DECLARE_BITMAP(__IP_TUNNEL_FLAG_NUM) ->
unsigned long[1] for now, and replace all TUNNEL_* occurrences to
their bitmap counterparts. Use the converters in the places which talk
to the userspace, hardware (NFP) or other hosts (GRE header). The rest
must explicitly use the new flags only. This must be done at once,
otherwise there will be too many conversions throughout the code in
the intermediate commits.
Finally, disable the old __be16 flags for use in the kernel code
(except for the two 'irregular' flags mentioned above), to prevent
any accidental (mis)use of them. For the userspace, nothing is
changed, only additions were made.

Most noticeable bloat-o-meter difference (.text):

vmlinux:	307/-1 (306)
gre.ko:		62/0 (62)
ip_gre.ko:	941/-217 (724)	[*]
ip_tunnel.ko:	390/-900 (-510)	[**]
ip_vti.ko:	138/0 (138)
ip6_gre.ko:	534/-18 (516)	[*]
ip6_tunnel.ko:	118/-10 (108)

[*] gre_flags_to_tnl_flags() grew, but still is inlined
[**] ip_tunnel_find() got uninlined, hence such decrease

The average code size increase in non-extreme case is 100-200 bytes
per module, mostly due to sizeof(long) > sizeof(__be16), as
%__IP_TUNNEL_FLAG_NUM is less than %BITS_PER_LONG and the compilers
are able to expand the majority of bitmap_*() calls here into direct
operations on scalars.

Reviewed-by: Simon Horman <[email protected]>
Signed-off-by: Alexander Lobakin <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
alobakin authored and davem330 committed Apr 1, 2024
1 parent 117aef1 commit 5832c4a
Show file tree
Hide file tree
Showing 40 changed files with 715 additions and 415 deletions.
19 changes: 13 additions & 6 deletions drivers/net/bareudp.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ struct bareudp_dev {
static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct metadata_dst *tun_dst = NULL;
IP_TUNNEL_DECLARE_FLAGS(key) = { };
struct bareudp_dev *bareudp;
unsigned short family;
unsigned int len;
Expand Down Expand Up @@ -137,7 +138,10 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
bareudp->dev->stats.rx_dropped++;
goto drop;
}
tun_dst = udp_tun_rx_dst(skb, family, TUNNEL_KEY, 0, 0);

__set_bit(IP_TUNNEL_KEY_BIT, key);

tun_dst = udp_tun_rx_dst(skb, family, key, 0, 0);
if (!tun_dst) {
bareudp->dev->stats.rx_dropped++;
goto drop;
Expand Down Expand Up @@ -285,10 +289,10 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev,
struct bareudp_dev *bareudp,
const struct ip_tunnel_info *info)
{
bool udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags);
bool xnet = !net_eq(bareudp->net, dev_net(bareudp->dev));
bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct socket *sock = rcu_dereference(bareudp->sock);
bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
const struct ip_tunnel_key *key = &info->key;
struct rtable *rt;
__be16 sport, df;
Expand Down Expand Up @@ -316,7 +320,8 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev,

tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
ttl = key->ttl;
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
df = test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) ?
htons(IP_DF) : 0;
skb_scrub_packet(skb, xnet);

err = -ENOSPC;
Expand All @@ -338,7 +343,8 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev,
udp_tunnel_xmit_skb(rt, sock->sk, skb, saddr, info->key.u.ipv4.dst,
tos, ttl, df, sport, bareudp->port,
!net_eq(bareudp->net, dev_net(bareudp->dev)),
!(info->key.tun_flags & TUNNEL_CSUM));
!test_bit(IP_TUNNEL_CSUM_BIT,
info->key.tun_flags));
return 0;

free_dst:
Expand All @@ -350,10 +356,10 @@ static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
struct bareudp_dev *bareudp,
const struct ip_tunnel_info *info)
{
bool udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags);
bool xnet = !net_eq(bareudp->net, dev_net(bareudp->dev));
bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct socket *sock = rcu_dereference(bareudp->sock);
bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
const struct ip_tunnel_key *key = &info->key;
struct dst_entry *dst = NULL;
struct in6_addr saddr, daddr;
Expand Down Expand Up @@ -402,7 +408,8 @@ static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
udp_tunnel6_xmit_skb(dst, sock->sk, skb, dev,
&saddr, &daddr, prio, ttl,
info->key.label, sport, bareudp->port,
!(info->key.tun_flags & TUNNEL_CSUM));
!test_bit(IP_TUNNEL_CSUM_BIT,
info->key.tun_flags));
return 0;

free_dst:
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,

bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a,
struct mlx5e_encap_key *b,
__be16 tun_flags);
u32 tun_type);
#endif /* CONFIG_MLX5_ESWITCH */

#endif //__MLX5_EN_TC_TUNNEL_H__
6 changes: 3 additions & 3 deletions drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
Original file line number Diff line number Diff line change
Expand Up @@ -587,7 +587,7 @@ bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,

bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a,
struct mlx5e_encap_key *b,
__be16 tun_flags)
u32 tun_type)
{
struct ip_tunnel_info *a_info;
struct ip_tunnel_info *b_info;
Expand All @@ -596,8 +596,8 @@ bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a,
if (!mlx5e_tc_tun_encap_info_equal_generic(a, b))
return false;

a_has_opts = !!(a->ip_tun_key->tun_flags & tun_flags);
b_has_opts = !!(b->ip_tun_key->tun_flags & tun_flags);
a_has_opts = test_bit(tun_type, a->ip_tun_key->tun_flags);
b_has_opts = test_bit(tun_type, b->ip_tun_key->tun_flags);

/* keys are equal when both don't have any options attached */
if (!a_has_opts && !b_has_opts)
Expand Down
12 changes: 7 additions & 5 deletions drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,13 @@ static int mlx5e_gen_ip_tunnel_header_geneve(char buf[],
memset(geneveh, 0, sizeof(*geneveh));
geneveh->ver = MLX5E_GENEVE_VER;
geneveh->opt_len = tun_info->options_len / 4;
geneveh->oam = !!(tun_info->key.tun_flags & TUNNEL_OAM);
geneveh->critical = !!(tun_info->key.tun_flags & TUNNEL_CRIT_OPT);
geneveh->oam = test_bit(IP_TUNNEL_OAM_BIT, tun_info->key.tun_flags);
geneveh->critical = test_bit(IP_TUNNEL_CRIT_OPT_BIT,
tun_info->key.tun_flags);
mlx5e_tunnel_id_to_vni(tun_info->key.tun_id, geneveh->vni);
geneveh->proto_type = htons(ETH_P_TEB);

if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT) {
if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_info->key.tun_flags)) {
if (!geneveh->opt_len)
return -EOPNOTSUPP;
ip_tunnel_info_opts_get(geneveh->options, tun_info);
Expand Down Expand Up @@ -188,7 +189,7 @@ static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv,

/* make sure that we're talking about GENEVE options */

if (enc_opts.key->dst_opt_type != TUNNEL_GENEVE_OPT) {
if (enc_opts.key->dst_opt_type != IP_TUNNEL_GENEVE_OPT_BIT) {
NL_SET_ERR_MSG_MOD(extack,
"Matching on GENEVE options: option type is not GENEVE");
netdev_warn(priv->netdev,
Expand Down Expand Up @@ -337,7 +338,8 @@ static int mlx5e_tc_tun_parse_geneve(struct mlx5e_priv *priv,
static bool mlx5e_tc_tun_encap_info_equal_geneve(struct mlx5e_encap_key *a,
struct mlx5e_encap_key *b)
{
return mlx5e_tc_tun_encap_info_equal_options(a, b, TUNNEL_GENEVE_OPT);
return mlx5e_tc_tun_encap_info_equal_options(a, b,
IP_TUNNEL_GENEVE_OPT_BIT);
}

struct mlx5e_tc_tunnel geneve_tunnel = {
Expand Down
8 changes: 6 additions & 2 deletions drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,24 @@ static int mlx5e_gen_ip_tunnel_header_gretap(char buf[],
const struct ip_tunnel_key *tun_key = &e->tun_info->key;
struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf);
__be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
IP_TUNNEL_DECLARE_FLAGS(unsupp) = { };
int hdr_len;

*ip_proto = IPPROTO_GRE;

/* the HW does not calculate GRE csum or sequences */
if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ))
__set_bit(IP_TUNNEL_CSUM_BIT, unsupp);
__set_bit(IP_TUNNEL_SEQ_BIT, unsupp);

if (ip_tunnel_flags_intersect(tun_key->tun_flags, unsupp))
return -EOPNOTSUPP;

greh->protocol = htons(ETH_P_TEB);

/* GRE key */
hdr_len = mlx5e_tc_tun_calc_hlen_gretap(e);
greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags);
if (tun_key->tun_flags & TUNNEL_KEY) {
if (test_bit(IP_TUNNEL_KEY_BIT, tun_key->tun_flags)) {
__be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
*ptr = tun_id;
}
Expand Down
9 changes: 5 additions & 4 deletions drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[],
const struct vxlan_metadata *md;
struct vxlanhdr *vxh;

if ((tun_key->tun_flags & TUNNEL_VXLAN_OPT) &&
if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, tun_key->tun_flags) &&
e->tun_info->options_len != sizeof(*md))
return -EOPNOTSUPP;
vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
Expand All @@ -99,7 +99,7 @@ static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[],
udp->dest = tun_key->tp_dst;
vxh->vx_flags = VXLAN_HF_VNI;
vxh->vx_vni = vxlan_vni_field(tun_id);
if (tun_key->tun_flags & TUNNEL_VXLAN_OPT) {
if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, tun_key->tun_flags)) {
md = ip_tunnel_info_opts(e->tun_info);
vxlan_build_gbp_hdr(vxh, md);
}
Expand All @@ -125,7 +125,7 @@ static int mlx5e_tc_tun_parse_vxlan_gbp_option(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
}

if (enc_opts.key->dst_opt_type != TUNNEL_VXLAN_OPT) {
if (enc_opts.key->dst_opt_type != IP_TUNNEL_VXLAN_OPT_BIT) {
NL_SET_ERR_MSG_MOD(extack, "Wrong VxLAN option type: not GBP");
return -EOPNOTSUPP;
}
Expand Down Expand Up @@ -208,7 +208,8 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
static bool mlx5e_tc_tun_encap_info_equal_vxlan(struct mlx5e_encap_key *a,
struct mlx5e_encap_key *b)
{
return mlx5e_tc_tun_encap_info_equal_options(a, b, TUNNEL_VXLAN_OPT);
return mlx5e_tc_tun_encap_info_equal_options(a, b,
IP_TUNNEL_VXLAN_OPT_BIT);
}

static int mlx5e_tc_tun_get_remote_ifindex(struct net_device *mirred_dev)
Expand Down
16 changes: 12 additions & 4 deletions drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
Original file line number Diff line number Diff line change
Expand Up @@ -5464,13 +5464,16 @@ static bool mlx5e_tc_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct tunnel_match_enc_opts enc_opts = {};
struct mlx5_rep_uplink_priv *uplink_priv;
IP_TUNNEL_DECLARE_FLAGS(flags) = { };
struct mlx5e_rep_priv *uplink_rpriv;
struct metadata_dst *tun_dst;
struct tunnel_match_key key;
u32 tun_id, enc_opts_id;
struct net_device *dev;
int err;

__set_bit(IP_TUNNEL_KEY_BIT, flags);

enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK;
tun_id = tunnel_id >> ENC_OPTS_BITS;

Expand Down Expand Up @@ -5503,14 +5506,14 @@ static bool mlx5e_tc_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb
case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst,
key.enc_ip.tos, key.enc_ip.ttl,
key.enc_tp.dst, TUNNEL_KEY,
key.enc_tp.dst, flags,
key32_to_tunnel_id(key.enc_key_id.keyid),
enc_opts.key.len);
break;
case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst,
key.enc_ip.tos, key.enc_ip.ttl,
key.enc_tp.dst, 0, TUNNEL_KEY,
key.enc_tp.dst, 0, flags,
key32_to_tunnel_id(key.enc_key_id.keyid),
enc_opts.key.len);
break;
Expand All @@ -5528,11 +5531,16 @@ static bool mlx5e_tc_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb

tun_dst->u.tun_info.key.tp_src = key.enc_tp.src;

if (enc_opts.key.len)
if (enc_opts.key.len) {
ip_tunnel_flags_zero(flags);
if (enc_opts.key.dst_opt_type)
__set_bit(enc_opts.key.dst_opt_type, flags);

ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
enc_opts.key.data,
enc_opts.key.len,
enc_opts.key.dst_opt_type);
flags);
}

skb_dst_set(skb, (struct dst_entry *)tun_dst);
dev = dev_get_by_index(&init_net, key.filter_ifindex);
Expand Down
26 changes: 16 additions & 10 deletions drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,23 @@ mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev)
static bool
mlxsw_sp_ipip_parms4_has_ikey(const struct ip_tunnel_parm_kern *parms)
{
return !!(parms->i_flags & TUNNEL_KEY);
return test_bit(IP_TUNNEL_KEY_BIT, parms->i_flags);
}

static bool mlxsw_sp_ipip_parms6_has_ikey(const struct __ip6_tnl_parm *parms)
{
return !!(parms->i_flags & TUNNEL_KEY);
return test_bit(IP_TUNNEL_KEY_BIT, parms->i_flags);
}

static bool
mlxsw_sp_ipip_parms4_has_okey(const struct ip_tunnel_parm_kern *parms)
{
return !!(parms->o_flags & TUNNEL_KEY);
return test_bit(IP_TUNNEL_KEY_BIT, parms->o_flags);
}

static bool mlxsw_sp_ipip_parms6_has_okey(const struct __ip6_tnl_parm *parms)
{
return !!(parms->o_flags & TUNNEL_KEY);
return test_bit(IP_TUNNEL_KEY_BIT, parms->o_flags);
}

static u32 mlxsw_sp_ipip_parms4_ikey(const struct ip_tunnel_parm_kern *parms)
Expand Down Expand Up @@ -242,12 +242,15 @@ static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *ol_dev)
{
struct ip_tunnel *tunnel = netdev_priv(ol_dev);
__be16 okflags = TUNNEL_KEY; /* We can't offload any other features. */
bool inherit_ttl = tunnel->parms.iph.ttl == 0;
bool inherit_tos = tunnel->parms.iph.tos & 0x1;
IP_TUNNEL_DECLARE_FLAGS(okflags) = { };

return (tunnel->parms.i_flags & ~okflags) == 0 &&
(tunnel->parms.o_flags & ~okflags) == 0 &&
/* We can't offload any other features. */
__set_bit(IP_TUNNEL_KEY_BIT, okflags);

return ip_tunnel_flags_subset(tunnel->parms.i_flags, okflags) &&
ip_tunnel_flags_subset(tunnel->parms.o_flags, okflags) &&
inherit_ttl && inherit_tos &&
mlxsw_sp_ipip_tunnel_complete(MLXSW_SP_L3_PROTO_IPV4, ol_dev);
}
Expand Down Expand Up @@ -443,10 +446,13 @@ static bool mlxsw_sp_ipip_can_offload_gre6(const struct mlxsw_sp *mlxsw_sp,
struct __ip6_tnl_parm tparm = mlxsw_sp_ipip_netdev_parms6(ol_dev);
bool inherit_tos = tparm.flags & IP6_TNL_F_USE_ORIG_TCLASS;
bool inherit_ttl = tparm.hop_limit == 0;
__be16 okflags = TUNNEL_KEY; /* We can't offload any other features. */
IP_TUNNEL_DECLARE_FLAGS(okflags) = { };

/* We can't offload any other features. */
__set_bit(IP_TUNNEL_KEY_BIT, okflags);

return (tparm.i_flags & ~okflags) == 0 &&
(tparm.o_flags & ~okflags) == 0 &&
return ip_tunnel_flags_subset(tparm.i_flags, okflags) &&
ip_tunnel_flags_subset(tparm.o_flags, okflags) &&
inherit_ttl && inherit_tos &&
mlxsw_sp_ipip_tunnel_complete(MLXSW_SP_L3_PROTO_IPV6, ol_dev);
}
Expand Down
6 changes: 4 additions & 2 deletions drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,8 @@ mlxsw_sp_span_entry_gretap4_parms(struct mlxsw_sp *mlxsw_sp,

if (!(to_dev->flags & IFF_UP) ||
/* Reject tunnels with GRE keys, checksums, etc. */
tparm.i_flags || tparm.o_flags ||
!ip_tunnel_flags_empty(tparm.i_flags) ||
!ip_tunnel_flags_empty(tparm.o_flags) ||
/* Require a fixed TTL and a TOS copied from the mirrored packet. */
inherit_ttl || !inherit_tos ||
/* A destination address may not be "any". */
Expand Down Expand Up @@ -565,7 +566,8 @@ mlxsw_sp_span_entry_gretap6_parms(struct mlxsw_sp *mlxsw_sp,

if (!(to_dev->flags & IFF_UP) ||
/* Reject tunnels with GRE keys, checksums, etc. */
tparm.i_flags || tparm.o_flags ||
!ip_tunnel_flags_empty(tparm.i_flags) ||
!ip_tunnel_flags_empty(tparm.o_flags) ||
/* Require a fixed TTL and a TOS copied from the mirrored packet. */
inherit_ttl || !inherit_tos ||
/* A destination address may not be "any". */
Expand Down
Loading

0 comments on commit 5832c4a

Please sign in to comment.