From 01a3701ec41779c9bad685b67b728815dbc019a1 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 7 Dec 2016 11:37:16 +0200 Subject: [PATCH 01/34] switchdev: Use flags when checking parent HW id Allow the user to provide attribute flags when calling the switchdev same parent HW id helper. This patch doesn't add any new functionality. Issue: 974864 Change-Id: Ie16042078baea6ffbc7a68767659d017e64887bf Signed-off-by: Hadar Hen Zion Reviewed-by: Or Gerlitz --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 7 ++++--- include/net/switchdev.h | 6 ++++-- net/bridge/br_switchdev.c | 2 +- net/switchdev/switchdev.c | 5 ++++- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3c536f560dd2b5..fc241be097c66b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1414,7 +1414,7 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, return -EOPNOTSUPP; #endif /* if the egress device isn't on the same HW e-switch, we use the uplink */ - if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) + if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev, 0)) *out_dev = mlx5_eswitch_get_uplink_netdev(esw); else *out_dev = rt->dst.dev; @@ -1453,7 +1453,7 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, *out_ttl = ip6_dst_hoplimit(dst); /* if the egress device isn't on the same HW e-switch, we use the uplink */ - if (!switchdev_port_same_parent_id(priv->netdev, dst->dev)) + if (!switchdev_port_same_parent_id(priv->netdev, dst->dev, 0)) *out_dev = mlx5_eswitch_get_uplink_netdev(esw); else *out_dev = dst->dev; @@ -1879,7 +1879,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex); if (switchdev_port_same_parent_id(priv->netdev, - out_dev)) { + out_dev, + 0)) { attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; out_priv = netdev_priv(out_dev); diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 8ae9e3b6392e3c..592371c8c70ec6 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -216,7 +216,8 @@ void switchdev_port_fwd_mark_set(struct net_device *dev, bool joining); bool switchdev_port_same_parent_id(struct net_device *a, - struct net_device *b); + struct net_device *b, + u32 flags); #define SWITCHDEV_SET_OPS(netdev, ops) ((netdev)->switchdev_ops = (ops)) #else @@ -319,7 +320,8 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb, } static inline bool switchdev_port_same_parent_id(struct net_device *a, - struct net_device *b) + struct net_device *b, + u32 flags) { return false; } diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 181a44d0f1da63..e6dd68eab4f4b8 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -13,7 +13,7 @@ static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev) /* dev is yet to be added to the port list. */ list_for_each_entry(p, &br->port_list, list) { - if (switchdev_port_same_parent_id(dev, p->dev)) + if (switchdev_port_same_parent_id(dev, p->dev, 0)) return p->offload_fwd_mark; } diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 25dc67ef9d3708..3deb34c90458f4 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1094,15 +1094,18 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump); bool switchdev_port_same_parent_id(struct net_device *a, - struct net_device *b) + struct net_device *b, + u32 flags) { struct switchdev_attr a_attr = { .orig_dev = a, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, + .flags = flags, }; struct switchdev_attr b_attr = { .orig_dev = b, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, + .flags = flags, }; if (switchdev_port_attr_get(a, &a_attr) || From 040ad25a8ddb2c7c8d23587d6c56f9d1e2219f3a Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 7 Dec 2016 12:36:54 +0200 Subject: [PATCH 02/34] net/mlx5e: Avoid recursion when checking switchdev parent HW id We use the switchdev parent HW id helper to identify if the mirred device shares the same ASIC with the ingress device. This can get us wrong in the presence of upper devices (e.g vlan, team, etc). To fail offload attempts in such cases, we move to do the parent id checks without recursion, using the SWITCHDEV_F_NO_RECURSE flag. Issue: 974864 Change-Id: I6f45e549f5a8abef13ff3b156a2e9d7cb12b2a9e Fixes: 03a9d11e6eeb ("net/mlx5e: Add TC drop and mirred/redirect action parsing for SRIOV offloads") Signed-off-by: Hadar Hen Zion Reviewed-by: Or Gerlitz --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index fc241be097c66b..b77c8e28ea50d0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1414,7 +1414,7 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, return -EOPNOTSUPP; #endif /* if the egress device isn't on the same HW e-switch, we use the uplink */ - if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev, 0)) + if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev, SWITCHDEV_F_NO_RECURSE)) *out_dev = mlx5_eswitch_get_uplink_netdev(esw); else *out_dev = rt->dst.dev; @@ -1453,7 +1453,7 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, *out_ttl = ip6_dst_hoplimit(dst); /* if the egress device isn't on the same HW e-switch, we use the uplink */ - if (!switchdev_port_same_parent_id(priv->netdev, dst->dev, 0)) + if (!switchdev_port_same_parent_id(priv->netdev, dst->dev, SWITCHDEV_F_NO_RECURSE)) *out_dev = mlx5_eswitch_get_uplink_netdev(esw); else *out_dev = dst->dev; @@ -1880,7 +1880,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (switchdev_port_same_parent_id(priv->netdev, out_dev, - 0)) { + SWITCHDEV_F_NO_RECURSE)) { attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; out_priv = netdev_priv(out_dev); From 00eca515a01a8c49eaf82e99a107a6987752b565 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 11 Jun 2017 21:13:25 +0300 Subject: [PATCH 03/34] net/mlx5e: Disallow TC offloading of unsupported match/action combinations When offloading header re-write, the HW may need to adjust checksums along the packet. For IP traffic, and a case where we are asked to modify fields in the IP header, current HW supports that only for TCP and UDP. Enforce it, in this case fail the offloading attempt for non TCP/UDP packets. issue: 1050641 Change-Id: Ia575dfd157bb965bcde03a79e1a95a633074bf38 Signed-off-by: Or Gerlitz --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index b77c8e28ea50d0..8059b2e41f4c8e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1317,6 +1317,69 @@ static bool csum_offload_supported(struct mlx5e_priv *priv, u32 action, u32 upda return true; } +static bool modify_header_match_supported(struct mlx5_flow_spec *spec, + struct tcf_exts *exts) +{ + const struct tc_action *a; + LIST_HEAD(actions); + bool modify_ip_header; + u8 htype, ip_proto; + void *headers_v; + u16 ethertype; + int nkeys, i; + + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); + + /* for non-IP we only re-write MACs, so we're okay */ + if (ethertype != ETH_P_IP && ethertype != ETH_P_IPV6) + goto out_ok; + + modify_ip_header = false; + tcf_exts_to_list(exts, &actions); + list_for_each_entry(a, &actions, list) { + if (!is_tcf_pedit(a)) + continue; + + nkeys = tcf_pedit_nkeys(a); + for (i = 0; i < nkeys; i++) { + htype = tcf_pedit_htype(a, i); + if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 || + htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6) { + modify_ip_header = true; + break; + } + } + } + + ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); + if (modify_ip_header && ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP) { + pr_info("can't offload re-write of ip proto %d\n", ip_proto); + return false; + } + +out_ok: + return true; +} + +static bool actions_match_supported(struct mlx5e_priv *priv, + struct tcf_exts *exts, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow) +{ + u32 actions; + + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + actions = flow->esw_attr->action; + else + actions = flow->nic_attr->action; + + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + return modify_header_match_supported(&parse_attr->spec, exts); + + return true; +} + static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow) @@ -1378,6 +1441,9 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } + if (!actions_match_supported(priv, exts, parse_attr, flow)) + return -EOPNOTSUPP; + return 0; } @@ -1937,6 +2003,10 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } + + if (!actions_match_supported(priv, exts, parse_attr, flow)) + return -EOPNOTSUPP; + return err; } From 3f7c3d54c100cb7b140587f28d0c2f190e95eaf5 Mon Sep 17 00:00:00 2001 From: Rabie Loulou Date: Mon, 10 Jul 2017 14:35:10 +0300 Subject: [PATCH 04/34] net/mlx5: Fix counter list hardware structure The counter list hardware structure doesn't contain a clear and num_of_counters fields, remove them. These wrong fields were never used by the driver hence no other driver changes. Issue: 1070560 Change-Id: Ifbd657052eeb2bbc048ea9611aa5a0ff1564d7c2 Fixes: a351a1b03bf1 ("net/mlx5: Introduce bulk reading of flow counters") Signed-off-by: Rabie Loulou Reviewed-by: Or Gerlitz --- include/linux/mlx5/mlx5_ifc.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3030121b474601..f847a3a579139d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1071,8 +1071,7 @@ struct mlx5_ifc_dest_format_struct_bits { }; struct mlx5_ifc_flow_counter_list_bits { - u8 clear[0x1]; - u8 num_of_counters[0xf]; + u8 reserved_at_0[0x10]; u8 flow_counter_id[0x10]; u8 reserved_at_20[0x20]; From b58f8043b91992937c60a00e30a18525872e22b5 Mon Sep 17 00:00:00 2001 From: Rabie Loulou Date: Sun, 9 Jul 2017 13:39:30 +0300 Subject: [PATCH 05/34] net/mlx5: Increase the maximum flow counters supported Read new NIC capability field which represnts 16 MSBs of the max flow counters number supported (max_flow_counter_31_16). Backward compatibility with older firmware is preserved, the modified driver reads max_flow_counter_31_16 as 0 from the older firmware and uses up to 64K counters. Changed flow counter id from 16 bits to 32 bits. Backward compatibility with older firmware is preserved as we kept the 16 LSBs of the counter id in place and added 16 MSBs from reserved field. Changed the background bulk reading of flow counters to work in chunks of at most 32K counters, to make sure we don't attempt to allocate very large buffers. Changed mlx5_cmd_fc_bulk_alloc function to use vzalloc instead of kzalloc. Changed mlx5_cmd_fc_bulk_free function to use kvfree instead of kfree. Issue: 1070560 Change-Id: I9ffc8a09a6b76b5362697aacfa5943dbfb011ec1 Signed-off-by: Rabie Loulou Reviewed-by: Or Gerlitz --- .../mellanox/mlx5/core/eswitch_offloads.c | 6 ++++-- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 12 ++++++------ drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h | 10 +++++----- .../net/ethernet/mellanox/mlx5/core/fs_core.h | 2 +- .../ethernet/mellanox/mlx5/core/fs_counters.c | 13 ++++++++++--- include/linux/mlx5/mlx5_ifc.h | 16 ++++++---------- 6 files changed, 32 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 95b64025ce36f7..e7c186b585796d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -433,6 +433,8 @@ static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw) struct mlx5_flow_table *fdb = NULL; int esw_size, err = 0; u32 flags = 0; + u32 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | + MLX5_CAP_GEN(dev, max_flow_counter_15_0); root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); if (!root_ns) { @@ -443,9 +445,9 @@ static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw) esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d)*groups(%d))\n", MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size), - MLX5_CAP_GEN(dev, max_flow_counter), ESW_OFFLOADS_NUM_GROUPS); + max_flow_counter, ESW_OFFLOADS_NUM_GROUPS); - esw_size = min_t(int, MLX5_CAP_GEN(dev, max_flow_counter) * ESW_OFFLOADS_NUM_GROUPS, + esw_size = min_t(int, max_flow_counter * ESW_OFFLOADS_NUM_GROUPS, 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index e750f07793b829..16b32f31d691c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -359,7 +359,7 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id) +int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id) { u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)] = {0}; u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0}; @@ -374,7 +374,7 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id) return err; } -int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id) +int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id) { u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)] = {0}; u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)] = {0}; @@ -385,7 +385,7 @@ int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id) return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, +int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id, u64 *packets, u64 *bytes) { u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) + @@ -409,14 +409,14 @@ int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, } struct mlx5_cmd_fc_bulk { - u16 id; + u32 id; int num; int outlen; u32 out[0]; }; struct mlx5_cmd_fc_bulk * -mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num) +mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num) { struct mlx5_cmd_fc_bulk *b; int outlen = @@ -453,7 +453,7 @@ mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b) } void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, - struct mlx5_cmd_fc_bulk *b, u16 id, + struct mlx5_cmd_fc_bulk *b, u32 id, u64 *packets, u64 *bytes) { int index = id - b->id; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index 0f98a7cf4877d8..c6d7bdf255b6b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -74,20 +74,20 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, u32 underlay_qpn); -int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id); -int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id); -int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, +int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id); +int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id); +int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id, u64 *packets, u64 *bytes); struct mlx5_cmd_fc_bulk; struct mlx5_cmd_fc_bulk * -mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num); +mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num); void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b); int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b); void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, - struct mlx5_cmd_fc_bulk *b, u16 id, + struct mlx5_cmd_fc_bulk *b, u32 id, u64 *packets, u64 *bytes); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 990acee6fb091f..9fb5a333df522c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -136,7 +136,7 @@ struct mlx5_fc { u64 lastpackets; u64 lastbytes; - u16 id; + u32 id; bool deleted; bool aging; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 6507d8acc54d46..89d1f865003358 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -38,6 +38,8 @@ #include "fs_cmd.h" #define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000) +/* Max number of counters to query in bulk read is 32K */ +#define MLX5_SW_MAX_COUNTERS_BULK BIT(15) /* locking scheme: * @@ -90,16 +92,21 @@ static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter) rb_insert_color(&counter->node, root); } +/* The function returns the last node that was queried so the caller + * function can continue calling it till all counters are queried. + */ static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev, struct mlx5_fc *first, - u16 last_id) + u32 last_id) { struct mlx5_cmd_fc_bulk *b; struct rb_node *node = NULL; - u16 afirst_id; + u32 afirst_id; int num; int err; - int max_bulk = 1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk); + + int max_bulk = min_t(int, MLX5_SW_MAX_COUNTERS_BULK, + (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk))); /* first id must be aligned to 4 when using bulk query */ afirst_id = first->id & ~0x3; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f847a3a579139d..c99daffc3c3ce8 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -963,7 +963,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_2a0[0x10]; u8 max_wqe_sz_rq[0x10]; - u8 reserved_at_2c0[0x10]; + u8 max_flow_counter_31_16[0x10]; u8 max_wqe_sz_sq_dc[0x10]; u8 reserved_at_2e0[0x7]; @@ -981,7 +981,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_340[0x8]; u8 log_max_flow_counter_bulk[0x8]; - u8 max_flow_counter[0x10]; + u8 max_flow_counter_15_0[0x10]; u8 reserved_at_360[0x3]; @@ -1071,8 +1071,7 @@ struct mlx5_ifc_dest_format_struct_bits { }; struct mlx5_ifc_flow_counter_list_bits { - u8 reserved_at_0[0x10]; - u8 flow_counter_id[0x10]; + u8 flow_counter_id[0x20]; u8 reserved_at_20[0x20]; }; @@ -4402,8 +4401,7 @@ struct mlx5_ifc_query_flow_counter_in_bits { u8 reserved_at_c1[0xf]; u8 num_of_counters[0x10]; - u8 reserved_at_e0[0x10]; - u8 flow_counter_id[0x10]; + u8 flow_counter_id[0x20]; }; struct mlx5_ifc_query_esw_vport_context_out_bits { @@ -6271,8 +6269,7 @@ struct mlx5_ifc_dealloc_flow_counter_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x10]; - u8 flow_counter_id[0x10]; + u8 flow_counter_id[0x20]; u8 reserved_at_60[0x20]; }; @@ -7097,8 +7094,7 @@ struct mlx5_ifc_alloc_flow_counter_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x10]; - u8 flow_counter_id[0x10]; + u8 flow_counter_id[0x20]; u8 reserved_at_60[0x20]; }; From 3d2d9a753b2a248fd1743652737f982639b66b5c Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 12 Jan 2017 16:19:29 +0200 Subject: [PATCH 06/34] net/mlx5: Enlarge the NIC TC offload table size The NIC TC offload table size was hard coded to 1k. Change it to be min(max NIC RX table size, min(max flow counters, 64k) * num flow groups) where the max values are read from the firmware and the number of flow groups is hard-coded as before this change. We don't know upfront the division of flows to groups (== different masks). This setup allows each group to be of size up to the where we want to go (when supported, all offloaded flows use counters). Thus, we don't expect multiple occurences for a group which in turn would add steering hops. issue: 900706 Change-Id: I137e6cb41637ba9b3208a6683712541d69ca0fa4 Signed-off-by: Or Gerlitz --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 8059b2e41f4c8e..6d022e234c2f88 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -88,8 +88,8 @@ enum { MLX5_HEADER_TYPE_NVGRE = 0x1, }; -#define MLX5E_TC_TABLE_NUM_ENTRIES 1024 #define MLX5E_TC_TABLE_NUM_GROUPS 4 +#define MLX5E_TC_TABLE_MAX_GROUP_SIZE (1 << 16) struct mod_hdr_key { int num_actions; @@ -261,10 +261,21 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, } if (IS_ERR_OR_NULL(priv->fs.tc.t)) { + int tc_grp_size, tc_tbl_size; + u32 max_flow_counter; + + max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | + MLX5_CAP_GEN(dev, max_flow_counter_15_0); + + tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE); + + tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS, + BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size))); + priv->fs.tc.t = mlx5_create_auto_grouped_flow_table(priv->fs.ns, MLX5E_TC_PRIO, - MLX5E_TC_TABLE_NUM_ENTRIES, + tc_tbl_size, MLX5E_TC_TABLE_NUM_GROUPS, 0, 0); if (IS_ERR(priv->fs.tc.t)) { From 589fa8dc65016ac8ef88e5c93f2bad38779ddf7d Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Sun, 28 May 2017 10:58:40 +0300 Subject: [PATCH 07/34] net/mlx5: Convert linear search for free index to ida When allocating a flow table entry, we need to allocate a free index in the flow group. Currently, this is done by traversing the existing flow table entries in the flow group, until a free index is found. Replacing this by using a ida, which allows us to find a free index much faster. issue: 1055697 Change-Id: Ie0ce533c435de89105c7a465c0a240a070b5a766 Signed-off-by: Matan Barak Reviewed-by: Maor Gottlieb --- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 56 +++++++------------ .../net/ethernet/mellanox/mlx5/core/fs_core.h | 2 +- 2 files changed, 20 insertions(+), 38 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index e8690fe46bf26e..af245fcdba700a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -360,6 +360,7 @@ static void del_flow_table(struct fs_node *node) err = mlx5_cmd_destroy_flow_table(dev, ft); if (err) mlx5_core_warn(dev, "flow steering can't destroy ft\n"); + ida_destroy(&ft->fte_allocator); fs_get_obj(prio, ft->node.parent); prio->num_ft--; } @@ -437,8 +438,8 @@ static void del_fte(struct fs_node *node) "flow steering can't delete fte in index %d of flow group id %d\n", fte->index, fg->id); + ida_simple_remove(&ft->fte_allocator, fte->index); fte->status = 0; - fg->num_ftes--; } static void del_flow_group(struct fs_node *node) @@ -523,6 +524,7 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft ft->flags = flags; INIT_LIST_HEAD(&ft->fwd_rules); mutex_init(&ft->lock); + ida_init(&ft->fte_allocator); return ft; } @@ -839,6 +841,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa destroy_ft: mlx5_cmd_destroy_flow_table(root->dev, ft); free_ft: + ida_destroy(&ft->fte_allocator); kfree(ft); unlock_root: mutex_unlock(&root->chain_lock); @@ -1102,41 +1105,26 @@ add_rule_fte(struct fs_fte *fte, return ERR_PTR(err); } -/* Assumed fg is locked */ -static unsigned int get_free_fte_index(struct mlx5_flow_group *fg, - struct list_head **prev) -{ - struct fs_fte *fte; - unsigned int start = fg->start_index; - - if (prev) - *prev = &fg->node.children; - - /* assumed list is sorted by index */ - fs_for_each_fte(fte, fg) { - if (fte->index != start) - return start; - start++; - if (prev) - *prev = &fte->node.list; - } - - return start; -} - -/* prev is output, prev->next = new_fte */ static struct fs_fte *create_fte(struct mlx5_flow_group *fg, u32 *match_value, - struct mlx5_flow_act *flow_act, - struct list_head **prev) + struct mlx5_flow_act *flow_act) { + struct mlx5_flow_table *ft; struct fs_fte *fte; int index; - index = get_free_fte_index(fg, prev); + fs_get_obj(ft, fg->node.parent); + index = ida_simple_get(&ft->fte_allocator, fg->start_index, + fg->start_index + fg->max_ftes, + GFP_KERNEL); + if (index < 0) + return ERR_PTR(index); + fte = alloc_fte(flow_act, match_value, index); - if (IS_ERR(fte)) + if (IS_ERR(fte)) { + ida_simple_remove(&ft->fte_allocator, index); return fte; + } return fte; } @@ -1234,7 +1222,6 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, { struct mlx5_flow_handle *handle; struct mlx5_flow_table *ft; - struct list_head *prev; struct fs_fte *fte; int i; @@ -1267,12 +1254,8 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, unlock_ref_node(&fte->node); } fs_get_obj(ft, fg->node.parent); - if (fg->num_ftes >= fg->max_ftes) { - handle = ERR_PTR(-ENOSPC); - goto unlock_fg; - } - fte = create_fte(fg, match_value, flow_act, &prev); + fte = create_fte(fg, match_value, flow_act); if (IS_ERR(fte)) { handle = (void *)fte; goto unlock_fg; @@ -1286,10 +1269,9 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, goto unlock_fg; } - fg->num_ftes++; - tree_add_node(&fte->node, &fg->node); - list_add(&fte->node.list, prev); + /* fte list isn't sorted */ + list_add_tail(&fte->node.list, &fg->node.children); add_rules: for (i = 0; i < handle->num_rules; i++) { if (atomic_read(&handle->rule[i]->node.refcount) == 1) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 9fb5a333df522c..5fbae885558fd3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -118,6 +118,7 @@ struct mlx5_flow_table { /* FWD rules that point on this flow table */ struct list_head fwd_rules; u32 flags; + struct ida fte_allocator; }; struct mlx5_fc_cache { @@ -183,7 +184,6 @@ struct mlx5_flow_group { struct mlx5_flow_group_mask mask; u32 start_index; u32 max_ftes; - u32 num_ftes; u32 id; }; From 9918e6b0eb5b1b8d3e99f74b4a2ce54632dddb2e Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Mon, 7 Aug 2017 11:14:11 +0300 Subject: [PATCH 08/34] net/mlx5: Don't store reserved part in FTEs and FGs The current code stores fte_match_param in the software representation of FTEs and FGs. fte_match_param contains a large reserved area at the bottom of the struct. Since downstream patches are going to hash this part, we would like to avoid doing so on a reserved part. issue: 1055697 Change-Id: Id6cee97302b7ed665067037b033589acad4b1cae Signed-off-by: Matan Barak --- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 2 +- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 8 -------- .../net/ethernet/mellanox/mlx5/core/fs_core.h | 16 ++++++++++++++-- include/linux/mlx5/device.h | 2 +- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 16b32f31d691c6..e0d0efd903bc9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -263,7 +263,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->modify_id); in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, match_value); - memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param)); + memcpy(in_match_value, &fte->val, sizeof(fte->val)); in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); if (fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index af245fcdba700a..986c5e50e8728e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -371,21 +371,14 @@ static void del_rule(struct fs_node *node) struct mlx5_flow_table *ft; struct mlx5_flow_group *fg; struct fs_fte *fte; - u32 *match_value; int modify_mask; struct mlx5_core_dev *dev = get_dev(node); - int match_len = MLX5_ST_SZ_BYTES(fte_match_param); int err; bool update_fte = false; - match_value = kvzalloc(match_len, GFP_KERNEL); - if (!match_value) - return; - fs_get_obj(rule, node); fs_get_obj(fte, rule->node.parent); fs_get_obj(fg, fte->node.parent); - memcpy(match_value, fte->val, sizeof(fte->val)); fs_get_obj(ft, fg->node.parent); list_del(&rule->node.list); if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { @@ -415,7 +408,6 @@ static void del_rule(struct fs_node *node) "%s can't del rule fg id=%d fte_index=%d\n", __func__, fg->id, fte->index); } - kvfree(match_value); } static void del_fte(struct fs_node *node) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 5fbae885558fd3..bfbc081d17dcd3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -144,10 +144,22 @@ struct mlx5_fc { struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; }; +#define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_600 +/* Calculate the fte_match_param length and without the reserved length. + * Make sure the reserved field is the last. + */ +#define MLX5_ST_SZ_DW_MATCH_PARAM \ + ((MLX5_BYTE_OFF(fte_match_param, MLX5_FTE_MATCH_PARAM_RESERVED) / sizeof(u32)) + \ + BUILD_BUG_ON_ZERO(MLX5_ST_SZ_BYTES(fte_match_param) != \ + MLX5_FLD_SZ_BYTES(fte_match_param, \ + MLX5_FTE_MATCH_PARAM_RESERVED) +\ + MLX5_BYTE_OFF(fte_match_param, \ + MLX5_FTE_MATCH_PARAM_RESERVED))) + /* Type of children is mlx5_flow_rule */ struct fs_fte { struct fs_node node; - u32 val[MLX5_ST_SZ_DW(fte_match_param)]; + u32 val[MLX5_ST_SZ_DW_MATCH_PARAM]; u32 dests_size; u32 flow_tag; u32 index; @@ -175,7 +187,7 @@ struct mlx5_flow_namespace { struct mlx5_flow_group_mask { u8 match_criteria_enable; - u32 match_criteria[MLX5_ST_SZ_DW(fte_match_param)]; + u32 match_criteria[MLX5_ST_SZ_DW_MATCH_PARAM]; }; /* Type of children is fs_fte */ diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index f31a0b5377e1d4..3c7442b564604f 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -48,7 +48,7 @@ /* helper macros */ #define __mlx5_nullp(typ) ((struct mlx5_ifc_##typ##_bits *)0) #define __mlx5_bit_sz(typ, fld) sizeof(__mlx5_nullp(typ)->fld) -#define __mlx5_bit_off(typ, fld) ((unsigned)(unsigned long)(&(__mlx5_nullp(typ)->fld))) +#define __mlx5_bit_off(typ, fld) (offsetof(struct mlx5_ifc_##typ##_bits, fld)) #define __mlx5_dw_off(typ, fld) (__mlx5_bit_off(typ, fld) / 32) #define __mlx5_64_off(typ, fld) (__mlx5_bit_off(typ, fld) / 64) #define __mlx5_dw_bit_off(typ, fld) (32 - __mlx5_bit_sz(typ, fld) - (__mlx5_bit_off(typ, fld) & 0x1f)) From b9366f8f4f3be30a822884129546d00e8456e884 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Sun, 28 May 2017 12:09:06 +0300 Subject: [PATCH 09/34] net/mlx5: Add hash table to search FTEs in a flow-group When adding a flow table entry (fte) to a flow group (fg), we first need to check whether this fte exist. In such a case we just merge the destinations (if possible). Currently, this is done by traversing the fte list available in a fg. This could take a lot of time when using large flow groups. Speeding this up by using rhashtable, which is much faster. issue: 1055697 Change-Id: Ibe19bb72c7d48cc8377a83b4e0e783881e141a13 Signed-off-by: Matan Barak Reviewed-by: Maor Gottlieb --- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 228 ++++++++++++------ .../net/ethernet/mellanox/mlx5/core/fs_core.h | 3 + 2 files changed, 160 insertions(+), 71 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 986c5e50e8728e..d8d45b00699649 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -150,6 +150,14 @@ enum fs_i_mutex_lock_class { FS_MUTEX_CHILD }; +static const struct rhashtable_params rhash_fte = { + .key_len = FIELD_SIZEOF(struct fs_fte, val), + .key_offset = offsetof(struct fs_fte, val), + .head_offset = offsetof(struct fs_fte, hash), + .automatic_shrinking = true, + .min_size = 1, +}; + static void del_rule(struct fs_node *node); static void del_flow_table(struct fs_node *node); static void del_flow_group(struct fs_node *node); @@ -255,63 +263,59 @@ static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns, return NULL; } -static bool masked_memcmp(void *mask, void *val1, void *val2, size_t size) +static bool check_last_reserved(const u32 *match_criteria) { - unsigned int i; - - for (i = 0; i < size; i++, mask++, val1++, val2++) - if ((*((u8 *)val1) & (*(u8 *)mask)) != - ((*(u8 *)val2) & (*(u8 *)mask))) - return false; + char *match_criteria_reserved = + MLX5_ADDR_OF(fte_match_param, match_criteria, MLX5_FTE_MATCH_PARAM_RESERVED); - return true; + return !match_criteria_reserved[0] && + !memcmp(match_criteria_reserved, match_criteria_reserved + 1, + MLX5_FLD_SZ_BYTES(fte_match_param, + MLX5_FTE_MATCH_PARAM_RESERVED) - 1); } -static bool compare_match_value(struct mlx5_flow_group_mask *mask, - void *fte_param1, void *fte_param2) +static bool check_valid_mask(u8 match_criteria_enable, const u32 *match_criteria) { - if (mask->match_criteria_enable & - 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) { - void *fte_match1 = MLX5_ADDR_OF(fte_match_param, - fte_param1, outer_headers); - void *fte_match2 = MLX5_ADDR_OF(fte_match_param, - fte_param2, outer_headers); - void *fte_mask = MLX5_ADDR_OF(fte_match_param, - mask->match_criteria, outer_headers); + if (match_criteria_enable & ~( + (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) | + (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) | + (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS))) + return false; - if (!masked_memcmp(fte_mask, fte_match1, fte_match2, - MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4))) + if (!(match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS)) { + char *fg_type_mask = MLX5_ADDR_OF(fte_match_param, + match_criteria, outer_headers); + + if (fg_type_mask[0] || + memcmp(fg_type_mask, fg_type_mask + 1, + MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4) - 1)) return false; } - if (mask->match_criteria_enable & - 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) { - void *fte_match1 = MLX5_ADDR_OF(fte_match_param, - fte_param1, misc_parameters); - void *fte_match2 = MLX5_ADDR_OF(fte_match_param, - fte_param2, misc_parameters); - void *fte_mask = MLX5_ADDR_OF(fte_match_param, - mask->match_criteria, misc_parameters); + if (!(match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS)) { + char *fg_type_mask = MLX5_ADDR_OF(fte_match_param, + match_criteria, misc_parameters); - if (!masked_memcmp(fte_mask, fte_match1, fte_match2, - MLX5_ST_SZ_BYTES(fte_match_set_misc))) + if (fg_type_mask[0] || + memcmp(fg_type_mask, fg_type_mask + 1, + MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1)) return false; } - if (mask->match_criteria_enable & - 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) { - void *fte_match1 = MLX5_ADDR_OF(fte_match_param, - fte_param1, inner_headers); - void *fte_match2 = MLX5_ADDR_OF(fte_match_param, - fte_param2, inner_headers); - void *fte_mask = MLX5_ADDR_OF(fte_match_param, - mask->match_criteria, inner_headers); + if (!(match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS)) { + char *fg_type_mask = MLX5_ADDR_OF(fte_match_param, + match_criteria, inner_headers); - if (!masked_memcmp(fte_mask, fte_match1, fte_match2, - MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4))) + if (fg_type_mask[0] || + memcmp(fg_type_mask, fg_type_mask + 1, + MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4) - 1)) return false; } - return true; + + return check_last_reserved(match_criteria); } static bool compare_match_criteria(u8 match_criteria_enable1, @@ -410,6 +414,18 @@ static void del_rule(struct fs_node *node) } } +static void destroy_fte(struct fs_fte *fte, struct mlx5_flow_group *fg) +{ + struct mlx5_flow_table *ft; + int ret; + + ret = rhashtable_remove_fast(&fg->ftes_hash, &fte->hash, rhash_fte); + WARN_ON(ret); + fte->status = 0; + fs_get_obj(ft, fg->node.parent); + ida_simple_remove(&ft->fte_allocator, fte->index); +} + static void del_fte(struct fs_node *node) { struct mlx5_flow_table *ft; @@ -430,8 +446,7 @@ static void del_fte(struct fs_node *node) "flow steering can't delete fte in index %d of flow group id %d\n", fte->index, fg->id); - ida_simple_remove(&ft->fte_allocator, fte->index); - fte->status = 0; + destroy_fte(fte, fg); } static void del_flow_group(struct fs_node *node) @@ -447,6 +462,7 @@ static void del_flow_group(struct fs_node *node) if (ft->autogroup.active) ft->autogroup.num_groups--; + rhashtable_destroy(&fg->ftes_hash); if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", fg->id, ft->id); @@ -481,10 +497,17 @@ static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in) u8 match_criteria_enable = MLX5_GET(create_flow_group_in, create_fg_in, match_criteria_enable); + int ret; + fg = kzalloc(sizeof(*fg), GFP_KERNEL); if (!fg) return ERR_PTR(-ENOMEM); + ret = rhashtable_init(&fg->ftes_hash, &rhash_fte); + if (ret) { + kfree(fg); + return ERR_PTR(ret); + } fg->mask.match_criteria_enable = match_criteria_enable; memcpy(&fg->mask.match_criteria, match_criteria, sizeof(fg->mask.match_criteria)); @@ -920,10 +943,8 @@ static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table * return fg; err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id); - if (err) { - kfree(fg); - return ERR_PTR(err); - } + if (err) + goto err_free_fg; if (ft->autogroup.active) ft->autogroup.num_groups++; @@ -934,13 +955,27 @@ static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table * list_add(&fg->node.list, prev_fg); return fg; + +err_free_fg: + rhashtable_destroy(&fg->ftes_hash); + kfree(fg); + + return ERR_PTR(err); } struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *fg_in) { + void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, + fg_in, match_criteria); + u8 match_criteria_enable = MLX5_GET(create_flow_group_in, + fg_in, + match_criteria_enable); struct mlx5_flow_group *fg; + if (!check_valid_mask(match_criteria_enable, match_criteria)) + return ERR_PTR(-EINVAL); + if (ft->autogroup.active) return ERR_PTR(-EPERM); @@ -1104,6 +1139,7 @@ static struct fs_fte *create_fte(struct mlx5_flow_group *fg, struct mlx5_flow_table *ft; struct fs_fte *fte; int index; + int ret; fs_get_obj(ft, fg->node.parent); index = ida_simple_get(&ft->fte_allocator, fg->start_index, @@ -1114,11 +1150,20 @@ static struct fs_fte *create_fte(struct mlx5_flow_group *fg, fte = alloc_fte(flow_act, match_value, index); if (IS_ERR(fte)) { - ida_simple_remove(&ft->fte_allocator, index); - return fte; + ret = PTR_ERR(fte); + goto err_alloc; } + ret = rhashtable_insert_fast(&fg->ftes_hash, &fte->hash, rhash_fte); + if (ret) + goto err_hash; return fte; + +err_hash: + kfree(fte); +err_alloc: + ida_simple_remove(&ft->fte_allocator, index); + return ERR_PTR(ret); } static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft, @@ -1206,42 +1251,78 @@ static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte, return NULL; } +static bool check_conflicting_actions(u32 action1, u32 action2) +{ + u32 xored_actions = action1 ^ action2; + + /* if one rule only wants to count, it's ok */ + if (action1 == MLX5_FLOW_CONTEXT_ACTION_COUNT || + action2 == MLX5_FLOW_CONTEXT_ACTION_COUNT) + return false; + + if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_ENCAP | + MLX5_FLOW_CONTEXT_ACTION_DECAP)) + return true; + + return false; +} + +static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act) +{ + if (check_conflicting_actions(flow_act->action, fte->action)) { + mlx5_core_warn(get_dev(&fte->node), + "Found two FTEs with conflicting actions\n"); + return -EEXIST; + } + + if (fte->flow_tag != flow_act->flow_tag) { + mlx5_core_warn(get_dev(&fte->node), + "FTE flow tag %u already exists with different flow tag %u\n", + fte->flow_tag, + flow_act->flow_tag); + return -EEXIST; + } + + return 0; +} + static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, u32 *match_value, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num) { + u32 masked_val[sizeof(fg->mask.match_criteria)]; struct mlx5_flow_handle *handle; struct mlx5_flow_table *ft; struct fs_fte *fte; int i; nested_lock_ref_node(&fg->node, FS_MUTEX_PARENT); - fs_for_each_fte(fte, fg) { + for (i = 0; i < sizeof(masked_val); i++) + masked_val[i] = match_value[i] & fg->mask.match_criteria[i]; + fte = rhashtable_lookup_fast(&fg->ftes_hash, masked_val, rhash_fte); + if (fte) { + int old_action; + int ret; + nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); - if (compare_match_value(&fg->mask, match_value, &fte->val) && - (flow_act->action & fte->action)) { - int old_action = fte->action; - - if (fte->flow_tag != flow_act->flow_tag) { - mlx5_core_warn(get_dev(&fte->node), - "FTE flow tag %u already exists with different flow tag %u\n", - fte->flow_tag, - flow_act->flow_tag); - handle = ERR_PTR(-EEXIST); - goto unlock_fte; - } + ret = check_conflicting_ftes(fte, flow_act); + if (ret) { + handle = ERR_PTR(ret); + goto unlock_fte; + } - fte->action |= flow_act->action; - handle = add_rule_fte(fte, fg, dest, dest_num, - old_action != flow_act->action); - if (IS_ERR(handle)) { - fte->action = old_action; - goto unlock_fte; - } else { - goto add_rules; - } + old_action = fte->action; + fte->action |= flow_act->action; + handle = add_rule_fte(fte, fg, dest, dest_num, + old_action != flow_act->action); + if (IS_ERR(handle)) { + fte->action = old_action; + goto unlock_fte; + } else { + goto add_rules; } unlock_ref_node(&fte->node); } @@ -1257,6 +1338,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, handle = add_rule_fte(fte, fg, dest, dest_num, false); if (IS_ERR(handle)) { unlock_ref_node(&fte->node); + destroy_fte(fte, fg); kfree(fte); goto unlock_fg; } @@ -1332,6 +1414,10 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, struct mlx5_flow_handle *rule; int i; + if (!check_valid_mask(spec->match_criteria_enable, + spec->match_criteria)) + return ERR_PTR(-EINVAL); + for (i = 0; i < dest_num; i++) { if (!dest_is_valid(&dest[i], flow_act->action, ft)) return ERR_PTR(-EINVAL); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index bfbc081d17dcd3..62709a3865d2f0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -34,6 +34,7 @@ #define _MLX5_FS_CORE_ #include +#include enum fs_node_type { FS_TYPE_NAMESPACE, @@ -168,6 +169,7 @@ struct fs_fte { u32 modify_id; enum fs_fte_status status; struct mlx5_fc *counter; + struct rhash_head hash; }; /* Type of children is mlx5_flow_table/namespace */ @@ -197,6 +199,7 @@ struct mlx5_flow_group { u32 start_index; u32 max_ftes; u32 id; + struct rhashtable ftes_hash; }; struct mlx5_flow_root_namespace { From 03f69f3aec212e96de4ee0f7962c152616a21102 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Sun, 28 May 2017 16:45:31 +0300 Subject: [PATCH 10/34] net/mlx5: Add hash table for flow groups in flow table When adding a flow table entry (fte) to a flow table (ft), we first need to find its flow group (fg). Currently, this is done by traversing a linear list of all flow groups in the flow table. Furthermore, since multiple flow groups which correspond to the same fte mask may exist in the same ft, we can't just stop at the first match. Converting the linear list to rhltable in order to speed things up. The last four patches increases the steering rules update rate by a factor of more than 7 (for insertion of 50K steering rules). issue: 1055697 Change-Id: I169e5a6bfb13befd64f21e139139934eab740584 Signed-off-by: Matan Barak Reviewed-by: Maor Gottlieb --- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 187 ++++++++++++++---- .../net/ethernet/mellanox/mlx5/core/fs_core.h | 2 + 2 files changed, 152 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index d8d45b00699649..e2e91caffc0699 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -158,6 +158,15 @@ static const struct rhashtable_params rhash_fte = { .min_size = 1, }; +static const struct rhashtable_params rhash_fg = { + .key_len = FIELD_SIZEOF(struct mlx5_flow_group, mask), + .key_offset = offsetof(struct mlx5_flow_group, mask), + .head_offset = offsetof(struct mlx5_flow_group, hash), + .automatic_shrinking = true, + .min_size = 1, + +}; + static void del_rule(struct fs_node *node); static void del_flow_table(struct fs_node *node); static void del_flow_group(struct fs_node *node); @@ -318,12 +327,22 @@ static bool check_valid_mask(u8 match_criteria_enable, const u32 *match_criteria return check_last_reserved(match_criteria); } -static bool compare_match_criteria(u8 match_criteria_enable1, - u8 match_criteria_enable2, - void *mask1, void *mask2) +static bool check_valid_spec(const struct mlx5_flow_spec *spec) { - return match_criteria_enable1 == match_criteria_enable2 && - !memcmp(mask1, mask2, MLX5_ST_SZ_BYTES(fte_match_param)); + int i; + + if (!check_valid_mask(spec->match_criteria_enable, spec->match_criteria)) { + pr_warn("mlx5_core: Match criteria given mismatches match_criteria_enable\n"); + return false; + } + + for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++) + if (spec->match_value[i] & ~spec->match_criteria[i]) { + pr_warn("mlx5_core: match_value differs from match_criteria\n"); + return false; + } + + return check_last_reserved(spec->match_value); } static struct mlx5_flow_root_namespace *find_root(struct fs_node *node) @@ -365,6 +384,7 @@ static void del_flow_table(struct fs_node *node) if (err) mlx5_core_warn(dev, "flow steering can't destroy ft\n"); ida_destroy(&ft->fte_allocator); + rhltable_destroy(&ft->fgs_hash); fs_get_obj(prio, ft->node.parent); prio->num_ft--; } @@ -454,6 +474,7 @@ static void del_flow_group(struct fs_node *node) struct mlx5_flow_group *fg; struct mlx5_flow_table *ft; struct mlx5_core_dev *dev; + int err; fs_get_obj(fg, node); fs_get_obj(ft, fg->node.parent); @@ -463,6 +484,10 @@ static void del_flow_group(struct fs_node *node) ft->autogroup.num_groups--; rhashtable_destroy(&fg->ftes_hash); + err = rhltable_remove(&ft->fgs_hash, + &fg->hash, + rhash_fg); + WARN_ON(err); if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", fg->id, ft->id); @@ -525,10 +550,17 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft u32 flags) { struct mlx5_flow_table *ft; + int ret; ft = kzalloc(sizeof(*ft), GFP_KERNEL); if (!ft) - return NULL; + return ERR_PTR(-ENOMEM); + + ret = rhltable_init(&ft->fgs_hash, &rhash_fg); + if (ret) { + kfree(ft); + return ERR_PTR(ret); + } ft->level = level; ft->node.type = FS_TYPE_FLOW_TABLE; @@ -829,8 +861,8 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa ft_attr->max_fte ? roundup_pow_of_two(ft_attr->max_fte) : 0, root->table_type, op_mod, ft_attr->flags); - if (!ft) { - err = -ENOMEM; + if (IS_ERR(ft)) { + err = PTR_ERR(ft); goto unlock_root; } @@ -942,10 +974,14 @@ static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table * if (IS_ERR(fg)) return fg; - err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id); + err = rhltable_insert(&ft->fgs_hash, &fg->hash, rhash_fg); if (err) goto err_free_fg; + err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id); + if (err) + goto err_remove_fg; + if (ft->autogroup.active) ft->autogroup.num_groups++; /* Add node to tree */ @@ -956,6 +992,10 @@ static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table * return fg; +err_remove_fg: + WARN_ON(rhltable_remove(&ft->fgs_hash, + &fg->hash, + rhash_fg)); err_free_fg: rhashtable_destroy(&fg->ftes_hash); kfree(fg); @@ -1291,18 +1331,13 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, u32 *match_value, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, - int dest_num) + int dest_num, + struct fs_fte *fte) { - u32 masked_val[sizeof(fg->mask.match_criteria)]; struct mlx5_flow_handle *handle; struct mlx5_flow_table *ft; - struct fs_fte *fte; int i; - nested_lock_ref_node(&fg->node, FS_MUTEX_PARENT); - for (i = 0; i < sizeof(masked_val); i++) - masked_val[i] = match_value[i] & fg->mask.match_criteria[i]; - fte = rhashtable_lookup_fast(&fg->ftes_hash, masked_val, rhash_fte); if (fte) { int old_action; int ret; @@ -1324,15 +1359,12 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, } else { goto add_rules; } - unlock_ref_node(&fte->node); } fs_get_obj(ft, fg->node.parent); fte = create_fte(fg, match_value, flow_act); - if (IS_ERR(fte)) { - handle = (void *)fte; - goto unlock_fg; - } + if (IS_ERR(fte)) + return (void *)fte; tree_init_node(&fte->node, 0, del_fte); nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); handle = add_rule_fte(fte, fg, dest, dest_num, false); @@ -1340,7 +1372,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, unlock_ref_node(&fte->node); destroy_fte(fte, fg); kfree(fte); - goto unlock_fg; + return handle; } tree_add_node(&fte->node, &fg->node); @@ -1353,8 +1385,6 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, } unlock_fte: unlock_ref_node(&fte->node); -unlock_fg: - unlock_ref_node(&fg->node); return handle; } @@ -1402,6 +1432,96 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest, return true; } +static struct mlx5_flow_handle * +try_add_to_existing_fg(struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int dest_num) +{ + struct mlx5_flow_group *g; + struct mlx5_flow_handle *rule = ERR_PTR(-ENOENT); + struct rhlist_head *tmp, *list; + struct match_list { + struct list_head list; + struct mlx5_flow_group *g; + } match_list, *iter; + LIST_HEAD(match_head); + + rcu_read_lock(); + /* Collect all fgs which has a matching match_criteria */ + list = rhltable_lookup(&ft->fgs_hash, spec, rhash_fg); + rhl_for_each_entry_rcu(g, tmp, list, hash) { + struct match_list *curr_match; + + if (likely(list_empty(&match_head))) { + match_list.g = g; + list_add_tail(&match_list.list, &match_head); + continue; + } + curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); + + if (!curr_match) { + rcu_read_unlock(); + rule = ERR_PTR(-ENOMEM); + goto free_list; + } + curr_match->g = g; + list_add_tail(&curr_match->list, &match_head); + } + rcu_read_unlock(); + + /* Try to find a fg that already contains a matching fte */ + list_for_each_entry(iter, &match_head, list) { + struct fs_fte *fte; + + g = iter->g; + nested_lock_ref_node(&g->node, FS_MUTEX_PARENT); + fte = rhashtable_lookup_fast(&g->ftes_hash, spec->match_value, + rhash_fte); + if (fte) { + rule = add_rule_fg(g, spec->match_value, + flow_act, dest, dest_num, fte); + unlock_ref_node(&g->node); + goto free_list; + } + unlock_ref_node(&g->node); + } + + /* No group with matching fte found. Try to add a new fte to any + * matching fg. + */ + list_for_each_entry(iter, &match_head, list) { + g = iter->g; + + nested_lock_ref_node(&g->node, FS_MUTEX_PARENT); + rule = add_rule_fg(g, spec->match_value, + flow_act, dest, dest_num, NULL); + if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) { + unlock_ref_node(&g->node); + goto free_list; + } + unlock_ref_node(&g->node); + } + +free_list: + if (!list_empty(&match_head)) { + struct match_list *match_tmp; + + /* The most common case is having one FG. Since we want to + * optimize this case, we save the first on the stack. + * Therefore, no need to free it. + */ + list_del(&list_first_entry(&match_head, typeof(*iter), list)->list); + list_for_each_entry_safe(iter, match_tmp, &match_head, list) { + list_del(&iter->list); + kfree(iter); + } + } + + return rule; +} + static struct mlx5_flow_handle * _mlx5_add_flow_rules(struct mlx5_flow_table *ft, struct mlx5_flow_spec *spec, @@ -1414,8 +1534,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, struct mlx5_flow_handle *rule; int i; - if (!check_valid_mask(spec->match_criteria_enable, - spec->match_criteria)) + if (!check_valid_spec(spec)) return ERR_PTR(-EINVAL); for (i = 0; i < dest_num; i++) { @@ -1424,16 +1543,9 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, } nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT); - fs_for_each_fg(g, ft) - if (compare_match_criteria(g->mask.match_criteria_enable, - spec->match_criteria_enable, - g->mask.match_criteria, - spec->match_criteria)) { - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num); - if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) - goto unlock; - } + rule = try_add_to_existing_fg(ft, spec, flow_act, dest, dest_num); + if (!IS_ERR(rule)) + goto unlock; g = create_autogroup(ft, spec->match_criteria_enable, spec->match_criteria); @@ -1442,7 +1554,8 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, goto unlock; } - rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num); + rule = add_rule_fg(g, spec->match_value, flow_act, dest, + dest_num, NULL); if (IS_ERR(rule)) { /* Remove assumes refcount > 0 and autogroup creates a group * with a refcount = 0. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 62709a3865d2f0..5509a752f98e7b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -120,6 +120,7 @@ struct mlx5_flow_table { struct list_head fwd_rules; u32 flags; struct ida fte_allocator; + struct rhltable fgs_hash; }; struct mlx5_fc_cache { @@ -200,6 +201,7 @@ struct mlx5_flow_group { u32 max_ftes; u32 id; struct rhashtable ftes_hash; + struct rhlist_head hash; }; struct mlx5_flow_root_namespace { From f3469a39939c5d98d7aaa1a67b74b32379728a4e Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Sun, 28 May 2017 10:32:09 +0300 Subject: [PATCH 11/34] net/mlx5: Add tracepoints Add a tracepoint infrastructure for mlx5_core driver. Implemented flow steering tracepoints: 1. Add flow group 2. Remove flow group 3. Add flow table entry 4. Remove flow table entry 5. Add flow table rule 6. Remove flow table rule issue: 1055697 Change-Id: I0f25e1ec9df085a3252b907078d5a59fc8f49c8a Signed-off-by: Matan Barak Reviewed-by: Mark Bloch --- .../net/ethernet/mellanox/mlx5/core/Makefile | 5 +- .../ethernet/mellanox/mlx5/core/diag/Makefile | 1 + .../mellanox/mlx5/core/diag/fs_tracepoint.c | 261 ++++++++++++++++ .../mellanox/mlx5/core/diag/fs_tracepoint.h | 282 ++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/fs_core.c | 11 +- 5 files changed, 558 insertions(+), 2 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 9d17e4e76d3a53..3610e37605b1de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -4,7 +4,8 @@ subdir-ccflags-y += -I$(src) mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ - fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o + fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o \ + diag/fs_tracepoint.o mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o @@ -22,3 +23,5 @@ mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ en_accel/ipsec_stats.o + +CFLAGS_tracepoint.o := -I$(src) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile new file mode 100644 index 00000000000000..d8e17110f25d0c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile @@ -0,0 +1 @@ +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c new file mode 100644 index 00000000000000..0be4575b58a272 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define CREATE_TRACE_POINTS + +#include "fs_tracepoint.h" +#include + +#define DECLARE_MASK_VAL(type, name) struct {type m; type v; } name +#define MASK_VAL(type, spec, name, mask, val, fld) \ + DECLARE_MASK_VAL(type, name) = \ + {.m = MLX5_GET(spec, mask, fld),\ + .v = MLX5_GET(spec, val, fld)} +#define MASK_VAL_BE(type, spec, name, mask, val, fld) \ + DECLARE_MASK_VAL(type, name) = \ + {.m = MLX5_GET_BE(type, spec, mask, fld),\ + .v = MLX5_GET_BE(type, spec, val, fld)} +#define GET_MASKED_VAL(name) (name.m & name.v) + +#define GET_MASK_VAL(name, type, mask, val, fld) \ + (name.m = MLX5_GET(type, mask, fld), \ + name.v = MLX5_GET(type, val, fld), \ + name.m & name.v) +#define PRINT_MASKED_VAL(name, p, format) { \ + if (name.m) \ + trace_seq_printf(p, __stringify(name) "=" format " ", name.v); \ + } +#define PRINT_MASKED_VALP(name, cast, p, format) { \ + if (name.m) \ + trace_seq_printf(p, __stringify(name) "=" format " ", \ + (cast)&name.v);\ + } + +static void print_lyr_2_4_hdrs(struct trace_seq *p, + const u32 *mask, const u32 *value) +{ +#define MASK_VAL_L2(type, name, fld) \ + MASK_VAL(type, fte_match_set_lyr_2_4, name, mask, value, fld) + DECLARE_MASK_VAL(u64, smac) = { + .m = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, mask, smac_15_0), + .v = MLX5_GET(fte_match_set_lyr_2_4, value, smac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, value, smac_15_0)}; + DECLARE_MASK_VAL(u64, dmac) = { + .m = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_15_0), + .v = MLX5_GET(fte_match_set_lyr_2_4, value, dmac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, value, dmac_15_0)}; + MASK_VAL_L2(u16, ethertype, ethertype); + + PRINT_MASKED_VALP(smac, u8 *, p, "%pM"); + PRINT_MASKED_VALP(dmac, u8 *, p, "%pM"); + PRINT_MASKED_VAL(ethertype, p, "%04x"); + + if (ethertype.m == 0xffff) { + if (ethertype.v == ETH_P_IP) { +#define MASK_VAL_L2_BE(type, name, fld) \ + MASK_VAL_BE(type, fte_match_set_lyr_2_4, name, mask, value, fld) + MASK_VAL_L2_BE(u32, src_ipv4, + src_ipv4_src_ipv6.ipv4_layout.ipv4); + MASK_VAL_L2_BE(u32, dst_ipv4, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + + PRINT_MASKED_VALP(src_ipv4, typeof(&src_ipv4.v), p, + "%pI4"); + PRINT_MASKED_VALP(dst_ipv4, typeof(&dst_ipv4.v), p, + "%pI4"); + } else if (ethertype.v == ETH_P_IPV6) { + static const struct in6_addr full_ones = { + .in6_u.u6_addr32 = {htonl(0xffffffff), + htonl(0xffffffff), + htonl(0xffffffff), + htonl(0xffffffff)}, + }; + DECLARE_MASK_VAL(struct in6_addr, src_ipv6); + DECLARE_MASK_VAL(struct in6_addr, dst_ipv6); + + memcpy(src_ipv6.m.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(src_ipv6.m)); + memcpy(dst_ipv6.m.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(dst_ipv6.m)); + memcpy(src_ipv6.v.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, value, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(src_ipv6.v)); + memcpy(dst_ipv6.v.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, value, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(dst_ipv6.v)); + + if (!memcmp(&src_ipv6.m, &full_ones, sizeof(full_ones))) + trace_seq_printf(p, "src_ipv6=%pI6 ", + &src_ipv6.v); + if (!memcmp(&dst_ipv6.m, &full_ones, sizeof(full_ones))) + trace_seq_printf(p, "dst_ipv6=%pI6 ", + &dst_ipv6.v); + } + } + +#define PRINT_MASKED_VAL_L2(type, name, fld, p, format) {\ + MASK_VAL_L2(type, name, fld); \ + PRINT_MASKED_VAL(name, p, format); \ +} + + PRINT_MASKED_VAL_L2(u8, ip_protocol, ip_protocol, p, "%02x"); + PRINT_MASKED_VAL_L2(u16, tcp_flags, tcp_flags, p, "%x"); + PRINT_MASKED_VAL_L2(u16, tcp_sport, tcp_sport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, tcp_dport, tcp_dport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, udp_sport, udp_sport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, udp_dport, udp_dport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, first_vid, first_vid, p, "%04x"); + PRINT_MASKED_VAL_L2(u8, first_prio, first_prio, p, "%x"); + PRINT_MASKED_VAL_L2(u8, first_cfi, first_cfi, p, "%d"); + PRINT_MASKED_VAL_L2(u8, ip_dscp, ip_dscp, p, "%02x"); + PRINT_MASKED_VAL_L2(u8, ip_ecn, ip_ecn, p, "%x"); + PRINT_MASKED_VAL_L2(u8, cvlan_tag, cvlan_tag, p, "%d"); + PRINT_MASKED_VAL_L2(u8, svlan_tag, svlan_tag, p, "%d"); + PRINT_MASKED_VAL_L2(u8, frag, frag, p, "%d"); +} + +static void print_misc_parameters_hdrs(struct trace_seq *p, + const u32 *mask, const u32 *value) +{ +#define MASK_VAL_MISC(type, name, fld) \ + MASK_VAL(type, fte_match_set_misc, name, mask, value, fld) +#define PRINT_MASKED_VAL_MISC(type, name, fld, p, format) {\ + MASK_VAL_MISC(type, name, fld); \ + PRINT_MASKED_VAL(name, p, format); \ +} + DECLARE_MASK_VAL(u64, gre_key) = { + .m = MLX5_GET(fte_match_set_misc, mask, gre_key_h) << 8 | + MLX5_GET(fte_match_set_misc, mask, gre_key_l), + .v = MLX5_GET(fte_match_set_misc, value, gre_key_h) << 8 | + MLX5_GET(fte_match_set_misc, value, gre_key_l)}; + + PRINT_MASKED_VAL(gre_key, p, "%llu"); + PRINT_MASKED_VAL_MISC(u32, source_sqn, source_sqn, p, "%u"); + PRINT_MASKED_VAL_MISC(u16, source_port, source_port, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, outer_second_prio, outer_second_prio, + p, "%u"); + PRINT_MASKED_VAL_MISC(u8, outer_second_cfi, outer_second_cfi, p, "%u"); + PRINT_MASKED_VAL_MISC(u16, outer_second_vid, outer_second_vid, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_prio, inner_second_prio, + p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_cfi, inner_second_cfi, p, "%u"); + PRINT_MASKED_VAL_MISC(u16, inner_second_vid, inner_second_vid, p, "%u"); + + PRINT_MASKED_VAL_MISC(u8, outer_second_cvlan_tag, + outer_second_cvlan_tag, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_cvlan_tag, + inner_second_cvlan_tag, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, outer_second_svlan_tag, + outer_second_svlan_tag, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_svlan_tag, + inner_second_svlan_tag, p, "%u"); + + PRINT_MASKED_VAL_MISC(u8, gre_protocol, gre_protocol, p, "%u"); + + PRINT_MASKED_VAL_MISC(u32, vxlan_vni, vxlan_vni, p, "%u"); + PRINT_MASKED_VAL_MISC(u32, outer_ipv6_flow_label, outer_ipv6_flow_label, + p, "%x"); + PRINT_MASKED_VAL_MISC(u32, inner_ipv6_flow_label, inner_ipv6_flow_label, + p, "%x"); +} + +const char *parse_fs_hdrs(struct trace_seq *p, + u8 match_criteria_enable, + const u32 *mask_outer, + const u32 *mask_misc, + const u32 *mask_inner, + const u32 *value_outer, + const u32 *value_misc, + const u32 *value_inner) +{ + const char *ret = trace_seq_buffer_ptr(p); + + if (match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) { + trace_seq_printf(p, "[outer] "); + print_lyr_2_4_hdrs(p, mask_outer, value_outer); + } + + if (match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) { + trace_seq_printf(p, "[misc] "); + print_misc_parameters_hdrs(p, mask_misc, value_misc); + } + if (match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) { + trace_seq_printf(p, "[inner] "); + print_lyr_2_4_hdrs(p, mask_inner, value_inner); + } + trace_seq_putc(p, 0); + return ret; +} + +const char *parse_fs_dst(struct trace_seq *p, + const struct mlx5_flow_destination *dst, + u32 counter_id) +{ + const char *ret = trace_seq_buffer_ptr(p); + + switch (dst->type) { + case MLX5_FLOW_DESTINATION_TYPE_VPORT: + trace_seq_printf(p, "vport=%u\n", dst->vport_num); + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: + trace_seq_printf(p, "ft=%p\n", dst->ft); + break; + case MLX5_FLOW_DESTINATION_TYPE_TIR: + trace_seq_printf(p, "tir=%u\n", dst->tir_num); + break; + case MLX5_FLOW_DESTINATION_TYPE_COUNTER: + trace_seq_printf(p, "counter_id=%u\n", counter_id); + break; + } + + trace_seq_putc(p, 0); + return ret; +} + +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_fg); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fg); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_set_fte); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fte); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_rule); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_rule); + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h new file mode 100644 index 00000000000000..1e3a6c3e41323d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if !defined(_MLX5_FS_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_FS_TP_ + +#include +#include +#include "../fs_core.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#define __parse_fs_hdrs(match_criteria_enable, mouter, mmisc, minner, vouter, \ + vinner, vmisc) \ + parse_fs_hdrs(p, match_criteria_enable, mouter, mmisc, minner, vouter,\ + vinner, vmisc) + +const char *parse_fs_hdrs(struct trace_seq *p, + u8 match_criteria_enable, + const u32 *mask_outer, + const u32 *mask_misc, + const u32 *mask_inner, + const u32 *value_outer, + const u32 *value_misc, + const u32 *value_inner); + +#define __parse_fs_dst(dst, counter_id) \ + parse_fs_dst(p, (const struct mlx5_flow_destination *)dst, counter_id) + +const char *parse_fs_dst(struct trace_seq *p, + const struct mlx5_flow_destination *dst, + u32 counter_id); + +TRACE_EVENT(mlx5_fs_add_fg, + TP_PROTO(const struct mlx5_flow_group *fg), + TP_ARGS(fg), + TP_STRUCT__entry( + __field(const struct mlx5_flow_group *, fg) + __field(const struct mlx5_flow_table *, ft) + __field(u32, start_index) + __field(u32, end_index) + __field(u32, id) + __field(u8, mask_enable) + __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc)) + ), + TP_fast_assign( + __entry->fg = fg; + fs_get_obj(__entry->ft, fg->node.parent); + __entry->start_index = fg->start_index; + __entry->end_index = fg->start_index + fg->max_ftes; + __entry->id = fg->id; + __entry->mask_enable = fg->mask.match_criteria_enable; + memcpy(__entry->mask_outer, + MLX5_ADDR_OF(fte_match_param, + &fg->mask.match_criteria, + outer_headers), + sizeof(__entry->mask_outer)); + memcpy(__entry->mask_inner, + MLX5_ADDR_OF(fte_match_param, + &fg->mask.match_criteria, + inner_headers), + sizeof(__entry->mask_inner)); + memcpy(__entry->mask_misc, + MLX5_ADDR_OF(fte_match_param, + &fg->mask.match_criteria, + misc_parameters), + sizeof(__entry->mask_misc)); + + ), + TP_printk("fg=%p ft=%p id=%u start=%u end=%u bit_mask=%02x %s\n", + __entry->fg, __entry->ft, __entry->id, + __entry->start_index, __entry->end_index, + __entry->mask_enable, + __parse_fs_hdrs(__entry->mask_enable, + __entry->mask_outer, + __entry->mask_misc, + __entry->mask_inner, + __entry->mask_outer, + __entry->mask_misc, + __entry->mask_inner)) + ); + +TRACE_EVENT(mlx5_fs_del_fg, + TP_PROTO(const struct mlx5_flow_group *fg), + TP_ARGS(fg), + TP_STRUCT__entry( + __field(const struct mlx5_flow_group *, fg) + __field(u32, id) + ), + TP_fast_assign( + __entry->fg = fg; + __entry->id = fg->id; + + ), + TP_printk("fg=%p id=%u\n", + __entry->fg, __entry->id) + ); + +#define ACTION_FLAGS \ + {MLX5_FLOW_CONTEXT_ACTION_ALLOW, "ALLOW"},\ + {MLX5_FLOW_CONTEXT_ACTION_DROP, "DROP"},\ + {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, "FWD"},\ + {MLX5_FLOW_CONTEXT_ACTION_COUNT, "CNT"},\ + {MLX5_FLOW_CONTEXT_ACTION_ENCAP, "ENCAP"},\ + {MLX5_FLOW_CONTEXT_ACTION_DECAP, "DECAP"},\ + {MLX5_FLOW_CONTEXT_ACTION_MOD_HDR, "MOD_HDR"},\ + {MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO, "NEXT_PRIO"} + +TRACE_EVENT(mlx5_fs_set_fte, + TP_PROTO(const struct fs_fte *fte, bool new_fte), + TP_ARGS(fte, new_fte), + TP_STRUCT__entry( + __field(const struct fs_fte *, fte) + __field(const struct mlx5_flow_group *, fg) + __field(u32, group_index) + __field(u32, index) + __field(u32, action) + __field(u32, flow_tag) + __field(u8, mask_enable) + __field(bool, new_fte) + __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc)) + __array(u32, value_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, value_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, value_misc, MLX5_ST_SZ_DW(fte_match_set_misc)) + ), + TP_fast_assign( + __entry->fte = fte; + __entry->new_fte = new_fte; + fs_get_obj(__entry->fg, fte->node.parent); + __entry->group_index = __entry->fg->id; + __entry->index = fte->index; + __entry->action = fte->action; + __entry->mask_enable = __entry->fg->mask.match_criteria_enable; + __entry->flow_tag = fte->flow_tag; + memcpy(__entry->mask_outer, + MLX5_ADDR_OF(fte_match_param, + &__entry->fg->mask.match_criteria, + outer_headers), + sizeof(__entry->mask_outer)); + memcpy(__entry->mask_inner, + MLX5_ADDR_OF(fte_match_param, + &__entry->fg->mask.match_criteria, + inner_headers), + sizeof(__entry->mask_inner)); + memcpy(__entry->mask_misc, + MLX5_ADDR_OF(fte_match_param, + &__entry->fg->mask.match_criteria, + misc_parameters), + sizeof(__entry->mask_misc)); + memcpy(__entry->value_outer, + MLX5_ADDR_OF(fte_match_param, + &fte->val, + outer_headers), + sizeof(__entry->value_outer)); + memcpy(__entry->value_inner, + MLX5_ADDR_OF(fte_match_param, + &fte->val, + inner_headers), + sizeof(__entry->value_inner)); + memcpy(__entry->value_misc, + MLX5_ADDR_OF(fte_match_param, + &fte->val, + misc_parameters), + sizeof(__entry->value_misc)); + + ), + TP_printk("op=%s fte=%p fg=%p index=%u group_index=%u action=<%s> flow_tag=%x %s\n", + __entry->new_fte ? "add" : "set", + __entry->fte, __entry->fg, __entry->index, + __entry->group_index, __print_flags(__entry->action, "|", + ACTION_FLAGS), + __entry->flow_tag, + __parse_fs_hdrs(__entry->mask_enable, + __entry->mask_outer, + __entry->mask_misc, + __entry->mask_inner, + __entry->value_outer, + __entry->value_misc, + __entry->value_inner)) + ); + +TRACE_EVENT(mlx5_fs_del_fte, + TP_PROTO(const struct fs_fte *fte), + TP_ARGS(fte), + TP_STRUCT__entry( + __field(const struct fs_fte *, fte) + __field(u32, index) + ), + TP_fast_assign( + __entry->fte = fte; + __entry->index = fte->index; + + ), + TP_printk("fte=%p index=%u\n", + __entry->fte, __entry->index) + ); + +TRACE_EVENT(mlx5_fs_add_rule, + TP_PROTO(const struct mlx5_flow_rule *rule), + TP_ARGS(rule), + TP_STRUCT__entry( + __field(const struct mlx5_flow_rule *, rule) + __field(const struct fs_fte *, fte) + __field(u32, sw_action) + __field(u32, index) + __field(u32, counter_id) + __array(u8, destination, sizeof(struct mlx5_flow_destination)) + ), + TP_fast_assign( + __entry->rule = rule; + fs_get_obj(__entry->fte, rule->node.parent); + __entry->index = __entry->fte->dests_size - 1; + __entry->sw_action = rule->sw_action; + memcpy(__entry->destination, + &rule->dest_attr, + sizeof(__entry->destination)); + if (rule->dest_attr.type & MLX5_FLOW_DESTINATION_TYPE_COUNTER && + rule->dest_attr.counter) + __entry->counter_id = + rule->dest_attr.counter->id; + ), + TP_printk("rule=%p fte=%p index=%u sw_action=<%s> [dst] %s\n", + __entry->rule, __entry->fte, __entry->index, + __print_flags(__entry->sw_action, "|", ACTION_FLAGS), + __parse_fs_dst(__entry->destination, __entry->counter_id)) + ); + +TRACE_EVENT(mlx5_fs_del_rule, + TP_PROTO(const struct mlx5_flow_rule *rule), + TP_ARGS(rule), + TP_STRUCT__entry( + __field(const struct mlx5_flow_rule *, rule) + __field(const struct fs_fte *, fte) + ), + TP_fast_assign( + __entry->rule = rule; + fs_get_obj(__entry->fte, rule->node.parent); + ), + TP_printk("rule=%p fte=%p\n", + __entry->rule, __entry->fte) + ); +#endif + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ./diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE fs_tracepoint +#include diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index e2e91caffc0699..cce380ea28910f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -36,6 +36,7 @@ #include "mlx5_core.h" #include "fs_core.h" #include "fs_cmd.h" +#include "diag/fs_tracepoint.h" #define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ sizeof(struct init_tree_node)) @@ -404,6 +405,7 @@ static void del_rule(struct fs_node *node) fs_get_obj(fte, rule->node.parent); fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); + trace_mlx5_fs_del_rule(rule); list_del(&rule->node.list); if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { mutex_lock(&rule->dest_attr.ft->lock); @@ -457,6 +459,7 @@ static void del_fte(struct fs_node *node) fs_get_obj(fte, node); fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); + trace_mlx5_fs_del_fte(fte); dev = get_dev(&ft->node); err = mlx5_cmd_delete_fte(dev, ft, @@ -479,6 +482,7 @@ static void del_flow_group(struct fs_node *node) fs_get_obj(fg, node); fs_get_obj(ft, fg->node.parent); dev = get_dev(&ft->node); + trace_mlx5_fs_del_fg(fg); if (ft->autogroup.active) ft->autogroup.num_groups--; @@ -990,6 +994,7 @@ static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table * /* Add node to group list */ list_add(&fg->node.list, prev_fg); + trace_mlx5_fs_add_fg(fg); return fg; err_remove_fg: @@ -1357,6 +1362,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, fte->action = old_action; goto unlock_fte; } else { + trace_mlx5_fs_set_fte(fte, false); goto add_rules; } } @@ -1378,10 +1384,13 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, tree_add_node(&fte->node, &fg->node); /* fte list isn't sorted */ list_add_tail(&fte->node.list, &fg->node.children); + trace_mlx5_fs_set_fte(fte, true); add_rules: for (i = 0; i < handle->num_rules; i++) { - if (atomic_read(&handle->rule[i]->node.refcount) == 1) + if (atomic_read(&handle->rule[i]->node.refcount) == 1) { tree_add_node(&handle->rule[i]->node, &fte->node); + trace_mlx5_fs_add_rule(handle->rule[i]); + } } unlock_fte: unlock_ref_node(&fte->node); From 4a9b3376456c222eba0e077cce325f9fc3690509 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 10 Aug 2017 15:29:12 +0300 Subject: [PATCH 12/34] net/mlx5: Fix creating a new FTE when an existing but full FTE exists Currently, when a flow steering rule is added, we look for a FTE with an identical value. If we find a match, we try to merge the required destinations with the existing ones. In a case where the existing destination list is full, the code should return an error to its consumer. However, the current code just tries to create another FTE. Fixing that by returning an error in this special scenario. Fixes: f478be79a22e ("net/mlx5: Add hash table for flow groups in flow table") issue: 1115444 Change-Id: Iee0b3bc494aa877c528dbb631bea22e7e8c32763 --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index cce380ea28910f..bb4472880a8633 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1449,7 +1449,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, int dest_num) { struct mlx5_flow_group *g; - struct mlx5_flow_handle *rule = ERR_PTR(-ENOENT); + struct mlx5_flow_handle *rule; struct rhlist_head *tmp, *list; struct match_list { struct list_head list; @@ -1513,6 +1513,8 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, unlock_ref_node(&g->node); } + rule = ERR_PTR(-ENOENT); + free_list: if (!list_empty(&match_head)) { struct match_list *match_tmp; @@ -1553,7 +1555,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT); rule = try_add_to_existing_fg(ft, spec, flow_act, dest, dest_num); - if (!IS_ERR(rule)) + if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOENT) goto unlock; g = create_autogroup(ft, spec->match_criteria_enable, From 70ecb504fdbb7fc99450b5db455048c15a2d258d Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 8 Aug 2017 11:45:28 +0300 Subject: [PATCH 13/34] net/mlx5e: Check encap state when creating flow table entry Do not create flow table entry in hardware if encap exists but is not valid. This may happen when encap was removed from hardware due to neighbour state change. Similar case when new encap is created on neighbour that is not in valid state is handled by returning EAGAIN to upper layer. When EAGAIN is returned, flow table entry provision to hardware is performed by mlx5e_rep_neigh_update when neighbour becomes connected. Issue: 1048140 Change-Id: I15ca50c94b8d3e7fc7dd6654f58ad072ce707a5a Fixes: 232c001398ae ("net/mlx5e: Add support to neighbour update flow") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 6d022e234c2f88..3d888141950bfc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1896,6 +1896,8 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, *encap_dev = e->out_dev; if (e->flags & MLX5_ENCAP_ENTRY_VALID) attr->encap_id = e->encap_id; + else + err = -EAGAIN; return err; From b70b347be04f0689343c137ef1ea751ea02ed4ba Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 21 Aug 2017 12:04:50 +0300 Subject: [PATCH 14/34] net/mlx5e: IPoIB, Fix KASAN error when releasing rdma netdev When cleaning rdma netdevice we need to save the mdev pointer because priv is released when we release netdev. Found by KASAN: use-after-free in mlx5_rdma_netdev_free+0xe3/0x100 [mlx5_core] Issue: 1121098 Fixes: 48935bbb7ae8 ("net/mlx5e: IPoIB, Add netdevice profile skeleton") Change-Id: Ibc253182650f95589e76b4e08aa6da19f60236d8 Signed-off-by: Roi Dayan --- drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 85298051a3e4fc..145e392ab84973 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -572,12 +572,13 @@ void mlx5_rdma_netdev_free(struct net_device *netdev) { struct mlx5e_priv *priv = mlx5i_epriv(netdev); const struct mlx5e_profile *profile = priv->profile; + struct mlx5_core_dev *mdev = priv->mdev; mlx5e_detach_netdev(priv); profile->cleanup(priv); destroy_workqueue(priv->wq); free_netdev(netdev); - mlx5e_destroy_mdev_resources(priv->mdev); + mlx5e_destroy_mdev_resources(mdev); } EXPORT_SYMBOL(mlx5_rdma_netdev_free); From 57ce54cd8b71c568f713f79105d0f634ac416270 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 22 Aug 2017 13:51:56 +0300 Subject: [PATCH 15/34] net/mlx5e: Properly resolve TC offloaded ipv6 vxlan tunnel source address Currently if vxlan tunnel ipv6 src isn't supplied the driver fails to resolve it as part of the route lookup. The resulting encap header is left with a zeroed out ipv6 src address so the packets are sent with this src ip. Use an appropriate route lookup API that also resolves the source ipv6 address if it's not supplied. Issue: 1092855 Change-Id: Iad49a00461e9f1cb8804112a012c74dc006b7dfd Fixes: ce99f6b97fcd ('net/mlx5e: Support SRIOV TC encapsulation offloads for IPv6 tunnels') Signed-off-by: Paul Blakey --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3d888141950bfc..a469ceb811f722 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1520,10 +1520,9 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; int ret; - dst = ip6_route_output(dev_net(mirred_dev), NULL, fl6); - ret = dst->error; - if (ret) { - dst_release(dst); + ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst, + fl6); + if (ret < 0) { return ret; } From 699606dc099cf783682ed8650f62e67846934878 Mon Sep 17 00:00:00 2001 From: Shahar Klein Date: Tue, 1 Aug 2017 15:29:55 +0300 Subject: [PATCH 16/34] net/mlx5: E-Switch, Unload the representors in the correct order This fixes a bug in which changing mode to legacy while handling VXLAN traffic causes a NULL derefrence. When switching from switchdev to legacy all the port devices (nic and reps) are cleaned up. Part of this cleaning process is removing the neigh entry and the hash table containing them. however, a representor neigh entry might be linked to the nic hash table and if the nic is cleaned first the cleaning of the representor will end up in null deref. Fix that by unloading the representors in the opposite order of load. Issue: 1063609 Change-Id: I609697cc684574413ad23d53cae75ea28cef9dc5 Fixes: cb67b832921c ("net/mlx5e: Introduce SRIOV VF representors") Signed-off-by: Shahar Klein Reviewed-by: Roi Dayan --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index e7c186b585796d..d9fd8570b07c83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -817,7 +817,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports) struct mlx5_eswitch_rep *rep; int vport; - for (vport = 0; vport < nvports; vport++) { + for (vport = nvports - 1; vport >= 0; vport--) { rep = &esw->offloads.vport_reps[vport]; if (!rep->valid) continue; From 4b007dd50fb7d7ea14d2e1488aa739786705f02b Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Wed, 16 Aug 2017 23:13:33 -0400 Subject: [PATCH 17/34] idr: Add new APIs to support unsigned long The following new APIs are added: int idr_alloc_ext(struct idr *idr, void *ptr, unsigned long *index, unsigned long start, unsigned long end, gfp_t gfp); static inline void *idr_remove_ext(struct idr *idr, unsigned long id); static inline void *idr_find_ext(const struct idr *idr, unsigned long id); void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id); void *idr_get_next_ext(struct idr *idr, unsigned long *nextid); Signed-off-by: Chris Mi Signed-off-by: Jiri Pirko --- include/linux/idr.h | 16 +++++++++ include/linux/radix-tree.h | 3 ++ lib/idr.c | 56 +++++++++++++++++++++++++++++ lib/radix-tree.c | 73 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 148 insertions(+) diff --git a/include/linux/idr.h b/include/linux/idr.h index bf70b3ef0a073b..e0a030bb09a5fe 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -81,11 +81,15 @@ static inline void idr_set_cursor(struct idr *idr, unsigned int val) void idr_preload(gfp_t gfp_mask); int idr_alloc(struct idr *, void *entry, int start, int end, gfp_t); +int idr_alloc_ext(struct idr *idr, void *ptr, unsigned long *index, + unsigned long start, unsigned long end, gfp_t gfp); int idr_alloc_cyclic(struct idr *, void *entry, int start, int end, gfp_t); int idr_for_each(const struct idr *, int (*fn)(int id, void *p, void *data), void *data); void *idr_get_next(struct idr *, int *nextid); +void *idr_get_next_ext(struct idr *idr, unsigned long *nextid); void *idr_replace(struct idr *, void *, int id); +void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id); void idr_destroy(struct idr *); static inline void *idr_remove(struct idr *idr, int id) @@ -93,6 +97,11 @@ static inline void *idr_remove(struct idr *idr, int id) return radix_tree_delete_item(&idr->idr_rt, id, NULL); } +static inline void *idr_remove_ext(struct idr *idr, unsigned long id) +{ + return radix_tree_delete_item(&idr->idr_rt, id, NULL); +} + static inline void idr_init(struct idr *idr) { INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER); @@ -133,6 +142,11 @@ static inline void *idr_find(const struct idr *idr, int id) return radix_tree_lookup(&idr->idr_rt, id); } +static inline void *idr_find_ext(const struct idr *idr, unsigned long id) +{ + return radix_tree_lookup(&idr->idr_rt, id); +} + /** * idr_for_each_entry - iterate over an idr's elements of a given type * @idr: idr handle @@ -145,6 +159,8 @@ static inline void *idr_find(const struct idr *idr, int id) */ #define idr_for_each_entry(idr, entry, id) \ for (id = 0; ((entry) = idr_get_next(idr, &(id))) != NULL; ++id) +#define idr_for_each_entry_ext(idr, entry, id) \ + for (id = 0; ((entry) = idr_get_next_ext(idr, &(id))) != NULL; ++id) /** * idr_for_each_entry_continue - continue iteration over an idr's elements of a given type diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 3e5735064b7166..947299edb6925f 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -359,6 +359,9 @@ int radix_tree_join(struct radix_tree_root *, unsigned long index, unsigned new_order, void *); void __rcu **idr_get_free(struct radix_tree_root *, struct radix_tree_iter *, gfp_t, int end); +void __rcu **idr_get_free_ext(struct radix_tree_root *root, + struct radix_tree_iter *iter, + gfp_t gfp, unsigned long end); enum { RADIX_TREE_ITER_TAG_MASK = 0x0f, /* tag index in lower nybble */ diff --git a/lib/idr.c b/lib/idr.c index b13682bb0a1c67..2a091b93ba8825 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -47,6 +47,29 @@ int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp) } EXPORT_SYMBOL_GPL(idr_alloc); +int idr_alloc_ext(struct idr *idr, void *ptr, unsigned long *index, + unsigned long start, unsigned long end, gfp_t gfp) +{ + void __rcu **slot; + struct radix_tree_iter iter; + + if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr))) + return -EINVAL; + + radix_tree_iter_init(&iter, start); + slot = idr_get_free_ext(&idr->idr_rt, &iter, gfp, end); + if (IS_ERR(slot)) + return PTR_ERR(slot); + + radix_tree_iter_replace(&idr->idr_rt, &iter, slot, ptr); + radix_tree_iter_tag_clear(&idr->idr_rt, &iter, IDR_FREE); + + if (index) + *index = iter.index; + return 0; +} +EXPORT_SYMBOL_GPL(idr_alloc_ext); + /** * idr_alloc_cyclic - allocate new idr entry in a cyclical fashion * @idr: idr handle @@ -134,6 +157,20 @@ void *idr_get_next(struct idr *idr, int *nextid) } EXPORT_SYMBOL(idr_get_next); +void *idr_get_next_ext(struct idr *idr, unsigned long *nextid) +{ + struct radix_tree_iter iter; + void __rcu **slot; + + slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid); + if (!slot) + return NULL; + + *nextid = iter.index; + return rcu_dereference_raw(*slot); +} +EXPORT_SYMBOL(idr_get_next_ext); + /** * idr_replace - replace pointer for given id * @idr: idr handle @@ -169,6 +206,25 @@ void *idr_replace(struct idr *idr, void *ptr, int id) } EXPORT_SYMBOL(idr_replace); +void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id) +{ + struct radix_tree_node *node; + void __rcu **slot = NULL; + void *entry; + + if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr))) + return ERR_PTR(-EINVAL); + + entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot); + if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE)) + return ERR_PTR(-ENOENT); + + __radix_tree_replace(&idr->idr_rt, node, slot, ptr, NULL, NULL); + + return entry; +} +EXPORT_SYMBOL(idr_replace_ext); + /** * DOC: IDA description * diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 898e8799841759..06bfdbda35ecc3 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -2208,6 +2208,79 @@ void __rcu **idr_get_free(struct radix_tree_root *root, return slot; } +void __rcu **idr_get_free_ext(struct radix_tree_root *root, + struct radix_tree_iter *iter, + gfp_t gfp, unsigned long end) +{ + struct radix_tree_node *node = NULL, *child; + void __rcu **slot = (void __rcu **)&root->rnode; + unsigned long maxindex, start = iter->next_index; + unsigned long max = end - 1; + unsigned int shift, offset = 0; + + grow: + shift = radix_tree_load_root(root, &child, &maxindex); + if (!radix_tree_tagged(root, IDR_FREE)) + start = max(start, maxindex + 1); + if (start > max) + return ERR_PTR(-ENOSPC); + + if (start > maxindex) { + int error = radix_tree_extend(root, gfp, start, shift); + + if (error < 0) + return ERR_PTR(error); + shift = error; + child = rcu_dereference_raw(root->rnode); + } + + while (shift) { + shift -= RADIX_TREE_MAP_SHIFT; + if (child == NULL) { + /* Have to add a child node. */ + child = radix_tree_node_alloc(gfp, node, root, shift, + offset, 0, 0); + if (!child) + return ERR_PTR(-ENOMEM); + all_tag_set(child, IDR_FREE); + rcu_assign_pointer(*slot, node_to_entry(child)); + if (node) + node->count++; + } else if (!radix_tree_is_internal_node(child)) + break; + + node = entry_to_node(child); + offset = radix_tree_descend(node, &child, start); + if (!tag_get(node, IDR_FREE, offset)) { + offset = radix_tree_find_next_bit(node, IDR_FREE, + offset + 1); + start = next_index(start, node, offset); + if (start > max) + return ERR_PTR(-ENOSPC); + while (offset == RADIX_TREE_MAP_SIZE) { + offset = node->offset + 1; + node = node->parent; + if (!node) + goto grow; + shift = node->shift; + } + child = rcu_dereference_raw(node->slots[offset]); + } + slot = &node->slots[offset]; + } + + iter->index = start; + if (node) + iter->next_index = 1 + min(max, (start | node_maxindex(node))); + else + iter->next_index = 1; + iter->node = node; + __set_iter_shift(iter, shift); + set_iter_tags(iter, node, offset, IDR_FREE); + + return slot; +} + /** * idr_destroy - release all internal memory from an IDR * @idr: idr handle From fdff7db78d777cfd18998776bf773b9cd1ab9884 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Thu, 17 Aug 2017 05:56:32 -0400 Subject: [PATCH 18/34] net/sched: Change cls_flower to use IDR Currently, all filters with the same priority are linked in a doubly linked list. Every filter should have a unique handle. To make the handle unique, we need to iterate the list every time to see if the handle exists or not when inserting a new filter. It is time-consuming. For example, it takes about 5m3.169s to insert 64K rules. This patch changes cls_flower to use IDR. With this patch, it takes about 0m1.127s to insert 64K rules. The improvement is huge. But please note that in this testing, all filters share the same action. If every filter has a unique action, that is another bottleneck. Follow-up patch in this patchset addresses that. Signed-off-by: Chris Mi Signed-off-by: Jiri Pirko --- net/sched/cls_flower.c | 55 ++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 31 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 7832eb93379b39..e3ed00a24dc0d7 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -68,7 +68,6 @@ struct cls_fl_head { struct rhashtable ht; struct fl_flow_mask mask; struct flow_dissector dissector; - u32 hgen; bool mask_assigned; struct list_head filters; struct rhashtable_params ht_params; @@ -76,6 +75,7 @@ struct cls_fl_head { struct work_struct work; struct rcu_head rcu; }; + struct idr handle_idr; }; struct cls_fl_filter { @@ -211,6 +211,7 @@ static int fl_init(struct tcf_proto *tp) INIT_LIST_HEAD_RCU(&head->filters); rcu_assign_pointer(tp->root, head); + idr_init(&head->handle_idr); return 0; } @@ -309,6 +310,9 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f) { + struct cls_fl_head *head = rtnl_dereference(tp->root); + + idr_remove_ext(&head->handle_idr, f->handle); list_del_rcu(&f->list); if (!tc_skip_hw(f->flags)) fl_hw_destroy_filter(tp, f); @@ -341,6 +345,7 @@ static void fl_destroy(struct tcf_proto *tp) list_for_each_entry_safe(f, next, &head->filters, list) __fl_delete(tp, f); + idr_destroy(&head->handle_idr); __module_get(THIS_MODULE); call_rcu(&head->rcu, fl_destroy_rcu); @@ -351,10 +356,8 @@ static unsigned long fl_get(struct tcf_proto *tp, u32 handle) struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f; - list_for_each_entry(f, &head->filters, list) - if (f->handle == handle) - return (unsigned long) f; - return 0; + f = idr_find_ext(&head->handle_idr, handle); + return (unsigned long) f; } static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { @@ -882,27 +885,6 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, return err; } -static u32 fl_grab_new_handle(struct tcf_proto *tp, - struct cls_fl_head *head) -{ - unsigned int i = 0x80000000; - u32 handle; - - do { - if (++head->hgen == 0x7FFFFFFF) - head->hgen = 1; - } while (--i > 0 && fl_get(tp, head->hgen)); - - if (unlikely(i == 0)) { - pr_err("Insufficient number of handles\n"); - handle = 0; - } else { - handle = head->hgen; - } - - return handle; -} - static int fl_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, @@ -913,6 +895,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, struct cls_fl_filter *fnew; struct nlattr **tb; struct fl_flow_mask mask = {}; + unsigned long idr_index; int err; if (!tca[TCA_OPTIONS]) @@ -943,13 +926,21 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, goto errout; if (!handle) { - handle = fl_grab_new_handle(tp, head); - if (!handle) { - err = -EINVAL; + err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index, + 1, 0x80000000, GFP_KERNEL); + if (err) goto errout; - } + fnew->handle = idr_index; + } + + /* user specifies a handle and it doesn't exist */ + if (handle && !fold) { + err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index, + handle, handle + 1, GFP_KERNEL); + if (err) + goto errout; + fnew->handle = idr_index; } - fnew->handle = handle; if (tb[TCA_FLOWER_FLAGS]) { fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]); @@ -1003,6 +994,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, *arg = (unsigned long) fnew; if (fold) { + fnew->handle = handle; + idr_replace_ext(&head->handle_idr, fnew, fnew->handle); list_replace_rcu(&fold->list, &fnew->list); tcf_unbind_filter(tp, &fold->res); call_rcu(&fold->rcu, fl_destroy_filter); From 2583f3c24a0b509e7464979d105575587e46a94d Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Tue, 22 Aug 2017 01:07:07 -0400 Subject: [PATCH 19/34] net/sched: Change act_api and act_xxx modules to use IDR Typically, each TC filter has its own action. All the actions of the same type are saved in its hash table. But the hash buckets are too small that it degrades to a list. And the performance is greatly affected. For example, it takes about 0m11.914s to insert 64K rules. If we convert the hash table to IDR, it only takes about 0m1.500s. The improvement is huge. But please note that the test result is based on previous patch that cls_flower uses IDR. Signed-off-by: Chris Mi Signed-off-by: Jiri Pirko --- include/net/act_api.h | 76 ++++-------- net/sched/act_api.c | 243 ++++++++++++++++++------------------- net/sched/act_bpf.c | 17 ++- net/sched/act_connmark.c | 16 ++- net/sched/act_csum.c | 16 ++- net/sched/act_gact.c | 16 ++- net/sched/act_ife.c | 20 ++- net/sched/act_ipt.c | 26 ++-- net/sched/act_mirred.c | 19 ++- net/sched/act_nat.c | 16 ++- net/sched/act_pedit.c | 18 ++- net/sched/act_police.c | 18 ++- net/sched/act_sample.c | 17 ++- net/sched/act_simple.c | 20 ++- net/sched/act_skbedit.c | 18 ++- net/sched/act_skbmod.c | 18 ++- net/sched/act_tunnel_key.c | 20 ++- net/sched/act_vlan.c | 22 ++-- 18 files changed, 275 insertions(+), 341 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index 26ffd8333f50fc..c011722c4e37a7 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -10,12 +10,9 @@ #include #include - -struct tcf_hashinfo { - struct hlist_head *htab; - unsigned int hmask; - spinlock_t lock; - u32 index; +struct tcf_idrinfo { + spinlock_t lock; + struct idr action_idr; }; struct tc_action_ops; @@ -25,9 +22,8 @@ struct tc_action { __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ __u32 order; struct list_head list; - struct tcf_hashinfo *hinfo; + struct tcf_idrinfo *idrinfo; - struct hlist_node tcfa_head; u32 tcfa_index; int tcfa_refcnt; int tcfa_bindcnt; @@ -44,7 +40,6 @@ struct tc_action { struct tc_cookie *act_cookie; struct tcf_chain *goto_chain; }; -#define tcf_head common.tcfa_head #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt #define tcf_bindcnt common.tcfa_bindcnt @@ -57,27 +52,6 @@ struct tc_action { #define tcf_lock common.tcfa_lock #define tcf_rcu common.tcfa_rcu -static inline unsigned int tcf_hash(u32 index, unsigned int hmask) -{ - return index & hmask; -} - -static inline int tcf_hashinfo_init(struct tcf_hashinfo *hf, unsigned int mask) -{ - int i; - - spin_lock_init(&hf->lock); - hf->index = 0; - hf->hmask = mask; - hf->htab = kzalloc((mask + 1) * sizeof(struct hlist_head), - GFP_KERNEL); - if (!hf->htab) - return -ENOMEM; - for (i = 0; i < mask + 1; i++) - INIT_HLIST_HEAD(&hf->htab[i]); - return 0; -} - /* Update lastuse only if needed, to avoid dirtying a cache line. * We use a temp variable to avoid fetching jiffies twice. */ @@ -126,53 +100,51 @@ struct tc_action_ops { }; struct tc_action_net { - struct tcf_hashinfo *hinfo; + struct tcf_idrinfo *idrinfo; const struct tc_action_ops *ops; }; static inline int tc_action_net_init(struct tc_action_net *tn, - const struct tc_action_ops *ops, unsigned int mask) + const struct tc_action_ops *ops) { int err = 0; - tn->hinfo = kmalloc(sizeof(*tn->hinfo), GFP_KERNEL); - if (!tn->hinfo) + tn->idrinfo = kmalloc(sizeof(*tn->idrinfo), GFP_KERNEL); + if (!tn->idrinfo) return -ENOMEM; tn->ops = ops; - err = tcf_hashinfo_init(tn->hinfo, mask); - if (err) - kfree(tn->hinfo); + spin_lock_init(&tn->idrinfo->lock); + idr_init(&tn->idrinfo->action_idr); return err; } -void tcf_hashinfo_destroy(const struct tc_action_ops *ops, - struct tcf_hashinfo *hinfo); +void tcf_idrinfo_destroy(const struct tc_action_ops *ops, + struct tcf_idrinfo *idrinfo); static inline void tc_action_net_exit(struct tc_action_net *tn) { - tcf_hashinfo_destroy(tn->ops, tn->hinfo); - kfree(tn->hinfo); + tcf_idrinfo_destroy(tn->ops, tn->idrinfo); + kfree(tn->idrinfo); } int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct netlink_callback *cb, int type, const struct tc_action_ops *ops); -int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index); -u32 tcf_hash_new_index(struct tc_action_net *tn); -bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a, +int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index); +bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, int bind); -int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est, - struct tc_action **a, const struct tc_action_ops *ops, int bind, - bool cpustats); -void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est); -void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a); +int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, + struct tc_action **a, const struct tc_action_ops *ops, + int bind, bool cpustats); +void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est); +void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a); -int __tcf_hash_release(struct tc_action *a, bool bind, bool strict); +int __tcf_idr_release(struct tc_action *a, bool bind, bool strict); -static inline int tcf_hash_release(struct tc_action *a, bool bind) +static inline int tcf_idr_release(struct tc_action *a, bool bind) { - return __tcf_hash_release(a, bind, false); + return __tcf_idr_release(a, bind, false); } int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index f2e9ed34a963e1..c34a11744d993e 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -70,11 +70,11 @@ static void free_tcf(struct rcu_head *head) kfree(p); } -static void tcf_hash_destroy(struct tcf_hashinfo *hinfo, struct tc_action *p) +static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p) { - spin_lock_bh(&hinfo->lock); - hlist_del(&p->tcfa_head); - spin_unlock_bh(&hinfo->lock); + spin_lock_bh(&idrinfo->lock); + idr_remove_ext(&idrinfo->action_idr, p->tcfa_index); + spin_unlock_bh(&idrinfo->lock); gen_kill_estimator(&p->tcfa_rate_est); /* * gen_estimator est_timer() might access p->tcfa_lock @@ -83,7 +83,7 @@ static void tcf_hash_destroy(struct tcf_hashinfo *hinfo, struct tc_action *p) call_rcu(&p->tcfa_rcu, free_tcf); } -int __tcf_hash_release(struct tc_action *p, bool bind, bool strict) +int __tcf_idr_release(struct tc_action *p, bool bind, bool strict) { int ret = 0; @@ -97,53 +97,47 @@ int __tcf_hash_release(struct tc_action *p, bool bind, bool strict) if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) { if (p->ops->cleanup) p->ops->cleanup(p, bind); - tcf_hash_destroy(p->hinfo, p); + tcf_idr_remove(p->idrinfo, p); ret = ACT_P_DELETED; } } return ret; } -EXPORT_SYMBOL(__tcf_hash_release); +EXPORT_SYMBOL(__tcf_idr_release); -static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb, +static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, struct netlink_callback *cb) { - int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; + int err = 0, index = -1, s_i = 0, n_i = 0; struct nlattr *nest; - - spin_lock_bh(&hinfo->lock); + struct idr *idr = &idrinfo->action_idr; + struct tc_action *p; + unsigned long id = 1; s_i = cb->args[0]; - for (i = 0; i < (hinfo->hmask + 1); i++) { - struct hlist_head *head; - struct tc_action *p; - - head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; - - hlist_for_each_entry_rcu(p, head, tcfa_head) { - index++; - if (index < s_i) - continue; - - nest = nla_nest_start(skb, n_i); - if (nest == NULL) - goto nla_put_failure; - err = tcf_action_dump_1(skb, p, 0, 0); - if (err < 0) { - index--; - nlmsg_trim(skb, nest); - goto done; - } - nla_nest_end(skb, nest); - n_i++; - if (n_i >= TCA_ACT_MAX_PRIO) - goto done; + idr_for_each_entry_ext(idr, p, id) { + index++; + if (index < s_i) + continue; + + nest = nla_nest_start(skb, n_i); + if (nest == NULL) + goto nla_put_failure; + err = tcf_action_dump_1(skb, p, 0, 0); + if (err < 0) { + index--; + nlmsg_trim(skb, nest); + goto done; } + nla_nest_end(skb, nest); + n_i++; + if (n_i >= TCA_ACT_MAX_PRIO) + goto done; } + done: - spin_unlock_bh(&hinfo->lock); if (n_i) cb->args[0] += n_i; return n_i; @@ -153,32 +147,29 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb, goto done; } -static int tcf_del_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb, +static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, const struct tc_action_ops *ops) { struct nlattr *nest; - int i = 0, n_i = 0; + int n_i = 0; int ret = -EINVAL; + struct idr *idr = &idrinfo->action_idr; + struct tc_action *p; + unsigned long id = 1; nest = nla_nest_start(skb, 0); if (nest == NULL) goto nla_put_failure; if (nla_put_string(skb, TCA_KIND, ops->kind)) goto nla_put_failure; - for (i = 0; i < (hinfo->hmask + 1); i++) { - struct hlist_head *head; - struct hlist_node *n; - struct tc_action *p; - - head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; - hlist_for_each_entry_safe(p, n, head, tcfa_head) { - ret = __tcf_hash_release(p, false, true); - if (ret == ACT_P_DELETED) { - module_put(p->ops->owner); - n_i++; - } else if (ret < 0) - goto nla_put_failure; - } + + idr_for_each_entry_ext(idr, p, id) { + ret = __tcf_idr_release(p, false, true); + if (ret == ACT_P_DELETED) { + module_put(p->ops->owner); + n_i++; + } else if (ret < 0) + goto nla_put_failure; } if (nla_put_u32(skb, TCA_FCNT, n_i)) goto nla_put_failure; @@ -194,12 +185,12 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct netlink_callback *cb, int type, const struct tc_action_ops *ops) { - struct tcf_hashinfo *hinfo = tn->hinfo; + struct tcf_idrinfo *idrinfo = tn->idrinfo; if (type == RTM_DELACTION) { - return tcf_del_walker(hinfo, skb, ops); + return tcf_del_walker(idrinfo, skb, ops); } else if (type == RTM_GETACTION) { - return tcf_dump_walker(hinfo, skb, cb); + return tcf_dump_walker(idrinfo, skb, cb); } else { WARN(1, "tcf_generic_walker: unknown action %d\n", type); return -EINVAL; @@ -207,40 +198,21 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, } EXPORT_SYMBOL(tcf_generic_walker); -static struct tc_action *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo) +static struct tc_action *tcf_idr_lookup(u32 index, struct tcf_idrinfo *idrinfo) { struct tc_action *p = NULL; - struct hlist_head *head; - spin_lock_bh(&hinfo->lock); - head = &hinfo->htab[tcf_hash(index, hinfo->hmask)]; - hlist_for_each_entry_rcu(p, head, tcfa_head) - if (p->tcfa_index == index) - break; - spin_unlock_bh(&hinfo->lock); + spin_lock_bh(&idrinfo->lock); + p = idr_find_ext(&idrinfo->action_idr, index); + spin_unlock_bh(&idrinfo->lock); return p; } -u32 tcf_hash_new_index(struct tc_action_net *tn) -{ - struct tcf_hashinfo *hinfo = tn->hinfo; - u32 val = hinfo->index; - - do { - if (++val == 0) - val = 1; - } while (tcf_hash_lookup(val, hinfo)); - - hinfo->index = val; - return val; -} -EXPORT_SYMBOL(tcf_hash_new_index); - -int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index) +int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index) { - struct tcf_hashinfo *hinfo = tn->hinfo; - struct tc_action *p = tcf_hash_lookup(index, hinfo); + struct tcf_idrinfo *idrinfo = tn->idrinfo; + struct tc_action *p = tcf_idr_lookup(index, idrinfo); if (p) { *a = p; @@ -248,15 +220,15 @@ int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index) } return 0; } -EXPORT_SYMBOL(tcf_hash_search); +EXPORT_SYMBOL(tcf_idr_search); -bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a, - int bind) +bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, + int bind) { - struct tcf_hashinfo *hinfo = tn->hinfo; - struct tc_action *p = NULL; + struct tcf_idrinfo *idrinfo = tn->idrinfo; + struct tc_action *p = tcf_idr_lookup(index, idrinfo); - if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) { + if (index && p) { if (bind) p->tcfa_bindcnt++; p->tcfa_refcnt++; @@ -265,22 +237,24 @@ bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a, } return false; } -EXPORT_SYMBOL(tcf_hash_check); +EXPORT_SYMBOL(tcf_idr_check); -void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est) +void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est) { if (est) gen_kill_estimator(&a->tcfa_rate_est); call_rcu(&a->tcfa_rcu, free_tcf); } -EXPORT_SYMBOL(tcf_hash_cleanup); +EXPORT_SYMBOL(tcf_idr_cleanup); -int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est, - struct tc_action **a, const struct tc_action_ops *ops, - int bind, bool cpustats) +int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, + struct tc_action **a, const struct tc_action_ops *ops, + int bind, bool cpustats) { struct tc_action *p = kzalloc(ops->size, GFP_KERNEL); - struct tcf_hashinfo *hinfo = tn->hinfo; + struct tcf_idrinfo *idrinfo = tn->idrinfo; + struct idr *idr = &idrinfo->action_idr; + unsigned long idr_index; int err = -ENOMEM; if (unlikely(!p)) @@ -304,8 +278,30 @@ int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est, } } spin_lock_init(&p->tcfa_lock); - INIT_HLIST_NODE(&p->tcfa_head); - p->tcfa_index = index ? index : tcf_hash_new_index(tn); + /* user doesn't specify an index */ + if (!index) { + spin_lock_bh(&idrinfo->lock); + err = idr_alloc_ext(idr, NULL, &idr_index, 1, 0, + GFP_KERNEL); + spin_unlock_bh(&idrinfo->lock); + if (err) { +err3: + free_percpu(p->cpu_qstats); + goto err2; + } + p->tcfa_index = idr_index; + } + else { + spin_lock_bh(&idrinfo->lock); + err = idr_alloc_ext(idr, NULL, NULL, index, index + 1, + GFP_KERNEL); + spin_unlock_bh(&idrinfo->lock); + if (err) { + goto err3; + } + p->tcfa_index = index; + } + p->tcfa_tm.install = jiffies; p->tcfa_tm.lastuse = jiffies; p->tcfa_tm.firstuse = 0; @@ -314,52 +310,47 @@ int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est, &p->tcfa_rate_est, &p->tcfa_lock, NULL, est); if (err) { - free_percpu(p->cpu_qstats); - goto err2; + goto err3; } } - p->hinfo = hinfo; + p->idrinfo = idrinfo; p->ops = ops; INIT_LIST_HEAD(&p->list); *a = p; return 0; } -EXPORT_SYMBOL(tcf_hash_create); +EXPORT_SYMBOL(tcf_idr_create); -void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a) +void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a) { - struct tcf_hashinfo *hinfo = tn->hinfo; - unsigned int h = tcf_hash(a->tcfa_index, hinfo->hmask); + struct tcf_idrinfo *idrinfo = tn->idrinfo; - spin_lock_bh(&hinfo->lock); - hlist_add_head(&a->tcfa_head, &hinfo->htab[h]); - spin_unlock_bh(&hinfo->lock); + spin_lock_bh(&idrinfo->lock); + idr_replace_ext(&idrinfo->action_idr, a, a->tcfa_index); + spin_unlock_bh(&idrinfo->lock); } -EXPORT_SYMBOL(tcf_hash_insert); +EXPORT_SYMBOL(tcf_idr_insert); -void tcf_hashinfo_destroy(const struct tc_action_ops *ops, - struct tcf_hashinfo *hinfo) -{ - int i; - for (i = 0; i < hinfo->hmask + 1; i++) { - struct tc_action *p; - struct hlist_node *n; - - hlist_for_each_entry_safe(p, n, &hinfo->htab[i], tcfa_head) { - int ret; +void tcf_idrinfo_destroy(const struct tc_action_ops *ops, + struct tcf_idrinfo *idrinfo) +{ + struct idr *idr = &idrinfo->action_idr; + struct tc_action *p; + int ret; + unsigned long id = 1; - ret = __tcf_hash_release(p, false, true); - if (ret == ACT_P_DELETED) - module_put(ops->owner); - else if (ret < 0) - return; - } + idr_for_each_entry_ext(idr, p, id) { + ret = __tcf_idr_release(p, false, true); + if (ret == ACT_P_DELETED) + module_put(ops->owner); + else if (ret < 0) + return; } - kfree(hinfo->htab); + idr_destroy(&idrinfo->action_idr); } -EXPORT_SYMBOL(tcf_hashinfo_destroy); +EXPORT_SYMBOL(tcf_idrinfo_destroy); static LIST_HEAD(act_base); static DEFINE_RWLOCK(act_mod_lock); @@ -510,7 +501,7 @@ int tcf_action_destroy(struct list_head *actions, int bind) int ret = 0; list_for_each_entry_safe(a, tmp, actions, list) { - ret = __tcf_hash_release(a, bind, true); + ret = __tcf_idr_release(a, bind, true); if (ret == ACT_P_DELETED) module_put(a->ops->owner); else if (ret < 0) diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 9afe1337cfd109..c0c707eb2c9625 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -21,7 +21,6 @@ #include #include -#define BPF_TAB_MASK 15 #define ACT_BPF_NAME_LEN 256 struct tcf_bpf_cfg { @@ -295,9 +294,9 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_ACT_BPF_PARMS]); - if (!tcf_hash_check(tn, parm->index, act, bind)) { - ret = tcf_hash_create(tn, parm->index, est, act, - &act_bpf_ops, bind, true); + if (!tcf_idr_check(tn, parm->index, act, bind)) { + ret = tcf_idr_create(tn, parm->index, est, act, + &act_bpf_ops, bind, true); if (ret < 0) return ret; @@ -307,7 +306,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (bind) return 0; - tcf_hash_release(*act, bind); + tcf_idr_release(*act, bind); if (!replace) return -EEXIST; } @@ -343,7 +342,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, rcu_assign_pointer(prog->filter, cfg.filter); if (res == ACT_P_CREATED) { - tcf_hash_insert(tn, *act); + tcf_idr_insert(tn, *act); } else { /* make sure the program being replaced is no longer executing */ synchronize_rcu(); @@ -353,7 +352,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, return res; out: if (res == ACT_P_CREATED) - tcf_hash_cleanup(*act, est); + tcf_idr_cleanup(*act, est); return ret; } @@ -379,7 +378,7 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, bpf_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_bpf_ops __read_mostly = { @@ -399,7 +398,7 @@ static __net_init int bpf_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, bpf_net_id); - return tc_action_net_init(tn, &act_bpf_ops, BPF_TAB_MASK); + return tc_action_net_init(tn, &act_bpf_ops); } static void __net_exit bpf_exit_net(struct net *net) diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 2155bc6c6a1e79..10b7a8855a6c75 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -28,8 +28,6 @@ #include #include -#define CONNMARK_TAB_MASK 3 - static unsigned int connmark_net_id; static struct tc_action_ops act_connmark_ops; @@ -119,9 +117,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_CONNMARK_PARMS]); - if (!tcf_hash_check(tn, parm->index, a, bind)) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_connmark_ops, bind, false); + if (!tcf_idr_check(tn, parm->index, a, bind)) { + ret = tcf_idr_create(tn, parm->index, est, a, + &act_connmark_ops, bind, false); if (ret) return ret; @@ -130,13 +128,13 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, ci->net = net; ci->zone = parm->zone; - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); ret = ACT_P_CREATED; } else { ci = to_connmark(*a); if (bind) return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; /* replacing action and zone */ @@ -189,7 +187,7 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, connmark_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_connmark_ops = { @@ -208,7 +206,7 @@ static __net_init int connmark_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, connmark_net_id); - return tc_action_net_init(tn, &act_connmark_ops, CONNMARK_TAB_MASK); + return tc_action_net_init(tn, &act_connmark_ops); } static void __net_exit connmark_exit_net(struct net *net) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 3317a2f579da7d..d836f998117b24 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -37,8 +37,6 @@ #include #include -#define CSUM_TAB_MASK 15 - static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = { [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), }, }; @@ -67,16 +65,16 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_CSUM_PARMS]); - if (!tcf_hash_check(tn, parm->index, a, bind)) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_csum_ops, bind, false); + if (!tcf_idr_check(tn, parm->index, a, bind)) { + ret = tcf_idr_create(tn, parm->index, est, a, + &act_csum_ops, bind, false); if (ret) return ret; ret = ACT_P_CREATED; } else { if (bind)/* dont override defaults */ return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -88,7 +86,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -615,7 +613,7 @@ static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, csum_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_csum_ops = { @@ -634,7 +632,7 @@ static __net_init int csum_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, csum_net_id); - return tc_action_net_init(tn, &act_csum_ops, CSUM_TAB_MASK); + return tc_action_net_init(tn, &act_csum_ops); } static void __net_exit csum_exit_net(struct net *net) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 99afe8b1f1fb01..e29a48ef7fc348 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -23,8 +23,6 @@ #include #include -#define GACT_TAB_MASK 15 - static unsigned int gact_net_id; static struct tc_action_ops act_gact_ops; @@ -92,16 +90,16 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, } #endif - if (!tcf_hash_check(tn, parm->index, a, bind)) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_gact_ops, bind, true); + if (!tcf_idr_check(tn, parm->index, a, bind)) { + ret = tcf_idr_create(tn, parm->index, est, a, + &act_gact_ops, bind, true); if (ret) return ret; ret = ACT_P_CREATED; } else { if (bind)/* dont override defaults */ return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -122,7 +120,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, } #endif if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -214,7 +212,7 @@ static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, gact_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_gact_ops = { @@ -234,7 +232,7 @@ static __net_init int gact_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, gact_net_id); - return tc_action_net_init(tn, &act_gact_ops, GACT_TAB_MASK); + return tc_action_net_init(tn, &act_gact_ops); } static void __net_exit gact_exit_net(struct net *net) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index c5dec308b8b1eb..770c5d940daa45 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -34,8 +34,6 @@ #include #include -#define IFE_TAB_MASK 15 - static unsigned int ife_net_id; static int max_metacnt = IFE_META_MAX + 1; static struct tc_action_ops act_ife_ops; @@ -452,7 +450,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_IFE_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; @@ -462,20 +460,20 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, **/ if (!tb[TCA_IFE_TYPE]) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); pr_info("You MUST pass etherype for encoding\n"); return -EINVAL; } } if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, &act_ife_ops, - bind, false); + ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops, + bind, false); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -518,7 +516,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (err) { metadata_parse_err: if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (ret == ACT_P_CREATED) _tcf_ife_cleanup(*a, bind); @@ -552,7 +550,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, spin_unlock_bh(&ife->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -811,7 +809,7 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, ife_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_ife_ops = { @@ -831,7 +829,7 @@ static __net_init int ife_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ife_net_id); - return tc_action_net_init(tn, &act_ife_ops, IFE_TAB_MASK); + return tc_action_net_init(tn, &act_ife_ops); } static void __net_exit ife_exit_net(struct net *net) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index d516ba8178b809..c59f6006168834 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -28,8 +28,6 @@ #include -#define IPT_TAB_MASK 15 - static unsigned int ipt_net_id; static struct tc_action_ops act_ipt_ops; @@ -116,33 +114,33 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, if (tb[TCA_IPT_INDEX] != NULL) index = nla_get_u32(tb[TCA_IPT_INDEX]); - exists = tcf_hash_check(tn, index, a, bind); + exists = tcf_idr_check(tn, index, a, bind); if (exists && bind) return 0; if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]); if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } if (!exists) { - ret = tcf_hash_create(tn, index, est, a, ops, bind, - false); + ret = tcf_idr_create(tn, index, est, a, ops, bind, + false); if (ret) return ret; ret = ACT_P_CREATED; } else { if (bind)/* dont override defaults */ return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; @@ -178,7 +176,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, ipt->tcfi_hook = hook; spin_unlock_bh(&ipt->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; err3: @@ -187,7 +185,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, kfree(tname); err1: if (ret == ACT_P_CREATED) - tcf_hash_cleanup(*a, est); + tcf_idr_cleanup(*a, est); return err; } @@ -314,7 +312,7 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, ipt_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_ipt_ops = { @@ -334,7 +332,7 @@ static __net_init int ipt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ipt_net_id); - return tc_action_net_init(tn, &act_ipt_ops, IPT_TAB_MASK); + return tc_action_net_init(tn, &act_ipt_ops); } static void __net_exit ipt_exit_net(struct net *net) @@ -364,7 +362,7 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, xt_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_xt_ops = { @@ -384,7 +382,7 @@ static __net_init int xt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, xt_net_id); - return tc_action_net_init(tn, &act_xt_ops, IPT_TAB_MASK); + return tc_action_net_init(tn, &act_xt_ops); } static void __net_exit xt_exit_net(struct net *net) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 1b5549ababd469..416627c66f081f 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -28,7 +28,6 @@ #include #include -#define MIRRED_TAB_MASK 7 static LIST_HEAD(mirred_list); static DEFINE_SPINLOCK(mirred_list_lock); @@ -94,7 +93,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_MIRRED_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; @@ -106,14 +105,14 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, break; default: if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } if (parm->ifindex) { dev = __dev_get_by_index(net, parm->ifindex); if (dev == NULL) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -ENODEV; } mac_header_xmit = dev_is_mac_header_xmit(dev); @@ -124,13 +123,13 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, if (!exists) { if (dev == NULL) return -EINVAL; - ret = tcf_hash_create(tn, parm->index, est, a, - &act_mirred_ops, bind, true); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_mirred_ops, bind, true); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -152,7 +151,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, spin_lock_bh(&mirred_list_lock); list_add(&m->tcfm_list, &mirred_list); spin_unlock_bh(&mirred_list_lock); - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); } return ret; @@ -283,7 +282,7 @@ static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, mirred_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static int mirred_device_event(struct notifier_block *unused, @@ -344,7 +343,7 @@ static __net_init int mirred_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, mirred_net_id); - return tc_action_net_init(tn, &act_mirred_ops, MIRRED_TAB_MASK); + return tc_action_net_init(tn, &act_mirred_ops); } static void __net_exit mirred_exit_net(struct net *net) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 9016ab8a064978..c365d01b99c8b7 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -29,8 +29,6 @@ #include -#define NAT_TAB_MASK 15 - static unsigned int nat_net_id; static struct tc_action_ops act_nat_ops; @@ -58,16 +56,16 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, return -EINVAL; parm = nla_data(tb[TCA_NAT_PARMS]); - if (!tcf_hash_check(tn, parm->index, a, bind)) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_nat_ops, bind, false); + if (!tcf_idr_check(tn, parm->index, a, bind)) { + ret = tcf_idr_create(tn, parm->index, est, a, + &act_nat_ops, bind, false); if (ret) return ret; ret = ACT_P_CREATED; } else { if (bind) return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -83,7 +81,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -290,7 +288,7 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, nat_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_nat_ops = { @@ -309,7 +307,7 @@ static __net_init int nat_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, nat_net_id); - return tc_action_net_init(tn, &act_nat_ops, NAT_TAB_MASK); + return tc_action_net_init(tn, &act_nat_ops); } static void __net_exit nat_exit_net(struct net *net) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 7dc5892671c818..491fe5deb09ee7 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -24,8 +24,6 @@ #include #include -#define PEDIT_TAB_MASK 15 - static unsigned int pedit_net_id; static struct tc_action_ops act_pedit_ops; @@ -168,17 +166,17 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, if (IS_ERR(keys_ex)) return PTR_ERR(keys_ex); - if (!tcf_hash_check(tn, parm->index, a, bind)) { + if (!tcf_idr_check(tn, parm->index, a, bind)) { if (!parm->nkeys) return -EINVAL; - ret = tcf_hash_create(tn, parm->index, est, a, - &act_pedit_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_pedit_ops, bind, false); if (ret) return ret; p = to_pedit(*a); keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) { - tcf_hash_cleanup(*a, est); + tcf_idr_cleanup(*a, est); kfree(keys_ex); return -ENOMEM; } @@ -186,7 +184,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, } else { if (bind) return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; p = to_pedit(*a); @@ -214,7 +212,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -432,7 +430,7 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, pedit_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_pedit_ops = { @@ -452,7 +450,7 @@ static __net_init int pedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, pedit_net_id); - return tc_action_net_init(tn, &act_pedit_ops, PEDIT_TAB_MASK); + return tc_action_net_init(tn, &act_pedit_ops); } static void __net_exit pedit_exit_net(struct net *net) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index b062bc80c7cb11..3bb2ebf9e9aec2 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -40,8 +40,6 @@ struct tcf_police { #define to_police(pc) ((struct tcf_police *)pc) -#define POL_TAB_MASK 15 - /* old policer structure from before tc actions */ struct tc_police_compat { u32 index; @@ -101,18 +99,18 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_POLICE_TBF]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; if (!exists) { - ret = tcf_hash_create(tn, parm->index, NULL, a, - &act_police_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, NULL, a, + &act_police_ops, bind, false); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -188,7 +186,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla, return ret; police->tcfp_t_c = ktime_get_ns(); - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; @@ -196,7 +194,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla, qdisc_put_rtab(P_tab); qdisc_put_rtab(R_tab); if (ret == ACT_P_CREATED) - tcf_hash_cleanup(*a, est); + tcf_idr_cleanup(*a, est); return err; } @@ -310,7 +308,7 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, police_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } MODULE_AUTHOR("Alexey Kuznetsov"); @@ -333,7 +331,7 @@ static __net_init int police_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, police_net_id); - return tc_action_net_init(tn, &act_police_ops, POL_TAB_MASK); + return tc_action_net_init(tn, &act_police_ops); } static void __net_exit police_exit_net(struct net *net) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 59d6645a400781..ec986ae5280890 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -25,7 +25,6 @@ #include -#define SAMPLE_TAB_MASK 7 static unsigned int sample_net_id; static struct tc_action_ops act_sample_ops; @@ -59,18 +58,18 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_SAMPLE_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_sample_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_sample_ops, bind, false); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -82,7 +81,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, psample_group = psample_group_get(net, s->psample_group_num); if (!psample_group) { if (ret == ACT_P_CREATED) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -ENOMEM; } RCU_INIT_POINTER(s->psample_group, psample_group); @@ -93,7 +92,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, } if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -221,7 +220,7 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, sample_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_sample_ops = { @@ -241,7 +240,7 @@ static __net_init int sample_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, sample_net_id); - return tc_action_net_init(tn, &act_sample_ops, SAMPLE_TAB_MASK); + return tc_action_net_init(tn, &act_sample_ops); } static void __net_exit sample_exit_net(struct net *net) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 43605e7ce05107..e7b57e5071a365 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -24,8 +24,6 @@ #include #include -#define SIMP_TAB_MASK 7 - static unsigned int simp_net_id; static struct tc_action_ops act_simp_ops; @@ -102,28 +100,28 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_DEF_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; if (tb[TCA_DEF_DATA] == NULL) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } defdata = nla_data(tb[TCA_DEF_DATA]); if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_simp_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_simp_ops, bind, false); if (ret) return ret; d = to_defact(*a); ret = alloc_defdata(d, defdata); if (ret < 0) { - tcf_hash_cleanup(*a, est); + tcf_idr_cleanup(*a, est); return ret; } d->tcf_action = parm->action; @@ -131,7 +129,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, } else { d = to_defact(*a); - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; @@ -139,7 +137,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, } if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -183,7 +181,7 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, simp_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_simp_ops = { @@ -203,7 +201,7 @@ static __net_init int simp_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, simp_net_id); - return tc_action_net_init(tn, &act_simp_ops, SIMP_TAB_MASK); + return tc_action_net_init(tn, &act_simp_ops); } static void __net_exit simp_exit_net(struct net *net) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 6b3e65d7de0c2e..59949d61f20da1 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -27,8 +27,6 @@ #include #include -#define SKBEDIT_TAB_MASK 15 - static unsigned int skbedit_net_id; static struct tc_action_ops act_skbedit_ops; @@ -118,18 +116,18 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_SKBEDIT_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; if (!flags) { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_skbedit_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_skbedit_ops, bind, false); if (ret) return ret; @@ -137,7 +135,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { d = to_skbedit(*a); - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -163,7 +161,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, spin_unlock_bh(&d->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -221,7 +219,7 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_skbedit_ops = { @@ -240,7 +238,7 @@ static __net_init int skbedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); - return tc_action_net_init(tn, &act_skbedit_ops, SKBEDIT_TAB_MASK); + return tc_action_net_init(tn, &act_skbedit_ops); } static void __net_exit skbedit_exit_net(struct net *net) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index a73c4bbcada293..b642ad3d39dd41 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -20,8 +20,6 @@ #include #include -#define SKBMOD_TAB_MASK 15 - static unsigned int skbmod_net_id; static struct tc_action_ops act_skbmod_ops; @@ -129,7 +127,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, if (parm->flags & SKBMOD_F_SWAPMAC) lflags = SKBMOD_F_SWAPMAC; - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; @@ -137,14 +135,14 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, return -EINVAL; if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_skbmod_ops, bind, true); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_skbmod_ops, bind, true); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -155,7 +153,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL); if (unlikely(!p)) { if (ovr) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -ENOMEM; } @@ -182,7 +180,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, kfree_rcu(p_old, rcu); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -245,7 +243,7 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_skbmod_ops = { @@ -265,7 +263,7 @@ static __net_init int skbmod_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); - return tc_action_net_init(tn, &act_skbmod_ops, SKBMOD_TAB_MASK); + return tc_action_net_init(tn, &act_skbmod_ops); } static void __net_exit skbmod_exit_net(struct net *net) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index fd7e75679c69e0..30c96274c63826 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -20,8 +20,6 @@ #include #include -#define TUNNEL_KEY_TAB_MASK 15 - static unsigned int tunnel_key_net_id; static struct tc_action_ops act_tunnel_key_ops; @@ -100,7 +98,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; @@ -159,14 +157,14 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, } if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_tunnel_key_ops, bind, true); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_tunnel_key_ops, bind, true); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -177,7 +175,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); if (unlikely(!params_new)) { if (ret == ACT_P_CREATED) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -ENOMEM; } @@ -193,13 +191,13 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, kfree_rcu(params_old, rcu); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; err_out: if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return ret; } @@ -304,7 +302,7 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_tunnel_key_ops = { @@ -324,7 +322,7 @@ static __net_init int tunnel_key_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - return tc_action_net_init(tn, &act_tunnel_key_ops, TUNNEL_KEY_TAB_MASK); + return tc_action_net_init(tn, &act_tunnel_key_ops); } static void __net_exit tunnel_key_exit_net(struct net *net) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 13ba3a89f675d7..16eb067a8d8fa2 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -19,8 +19,6 @@ #include #include -#define VLAN_TAB_MASK 15 - static unsigned int vlan_net_id; static struct tc_action_ops act_vlan_ops; @@ -128,7 +126,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, if (!tb[TCA_VLAN_PARMS]) return -EINVAL; parm = nla_data(tb[TCA_VLAN_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; @@ -139,13 +137,13 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, case TCA_VLAN_ACT_MODIFY: if (!tb[TCA_VLAN_PUSH_VLAN_ID]) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]); if (push_vid >= VLAN_VID_MASK) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -ERANGE; } @@ -167,20 +165,20 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, break; default: if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } action = parm->v_action; if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_vlan_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_vlan_ops, bind, false); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -199,7 +197,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, spin_unlock_bh(&v->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -252,7 +250,7 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, vlan_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_vlan_ops = { @@ -271,7 +269,7 @@ static __net_init int vlan_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, vlan_net_id); - return tc_action_net_init(tn, &act_vlan_ops, VLAN_TAB_MASK); + return tc_action_net_init(tn, &act_vlan_ops); } static void __net_exit vlan_exit_net(struct net *net) From 15390b2bfd7818c78f40c010c4c0817dea17cdd1 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Wed, 23 Aug 2017 20:43:09 -0400 Subject: [PATCH 20/34] net/sched: Change tc_action refcnt and bindcnt to atomic Signed-off-by: Chris Mi --- include/net/act_api.h | 4 ++-- net/sched/act_api.c | 21 +++++++++++---------- net/sched/act_bpf.c | 4 ++-- net/sched/act_connmark.c | 4 ++-- net/sched/act_csum.c | 4 ++-- net/sched/act_gact.c | 4 ++-- net/sched/act_ife.c | 4 ++-- net/sched/act_ipt.c | 4 ++-- net/sched/act_mirred.c | 4 ++-- net/sched/act_nat.c | 4 ++-- net/sched/act_pedit.c | 4 ++-- net/sched/act_police.c | 4 ++-- net/sched/act_sample.c | 4 ++-- net/sched/act_simple.c | 4 ++-- net/sched/act_skbedit.c | 4 ++-- net/sched/act_skbmod.c | 4 ++-- net/sched/act_tunnel_key.c | 4 ++-- net/sched/act_vlan.c | 4 ++-- 18 files changed, 45 insertions(+), 44 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index c011722c4e37a7..77b3e1a85b3053 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -25,8 +25,8 @@ struct tc_action { struct tcf_idrinfo *idrinfo; u32 tcfa_index; - int tcfa_refcnt; - int tcfa_bindcnt; + atomic_t tcfa_refcnt; + atomic_t tcfa_bindcnt; u32 tcfa_capab; int tcfa_action; struct tcf_t tcfa_tm; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index c34a11744d993e..da0999e61b6b88 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -89,12 +89,13 @@ int __tcf_idr_release(struct tc_action *p, bool bind, bool strict) if (p) { if (bind) - p->tcfa_bindcnt--; - else if (strict && p->tcfa_bindcnt > 0) + atomic_dec(&p->tcfa_bindcnt); + else if (strict && atomic_read(&p->tcfa_bindcnt) > 0) return -EPERM; - p->tcfa_refcnt--; - if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) { + atomic_dec(&p->tcfa_refcnt); + if (atomic_read(&p->tcfa_bindcnt) == 0 && + atomic_read(&p->tcfa_refcnt) == 0) { if (p->ops->cleanup) p->ops->cleanup(p, bind); tcf_idr_remove(p->idrinfo, p); @@ -230,8 +231,8 @@ bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, if (index && p) { if (bind) - p->tcfa_bindcnt++; - p->tcfa_refcnt++; + atomic_inc(&p->tcfa_bindcnt); + atomic_inc(&p->tcfa_refcnt); *a = p; return true; } @@ -259,9 +260,9 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, if (unlikely(!p)) return -ENOMEM; - p->tcfa_refcnt = 1; + atomic_set(&p->tcfa_refcnt, 1); if (bind) - p->tcfa_bindcnt = 1; + atomic_set(&p->tcfa_bindcnt, 1); if (cpustats) { p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); @@ -708,7 +709,7 @@ static void cleanup_a(struct list_head *actions, int ovr) return; list_for_each_entry(a, actions, list) - a->tcfa_refcnt--; + atomic_dec(&a->tcfa_refcnt); } int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, @@ -732,7 +733,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, } act->order = i; if (ovr) - act->tcfa_refcnt++; + atomic_inc(&act->tcfa_refcnt); list_add_tail(&act->list, actions); } diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index c0c707eb2c9625..4ddf2810d9164a 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -141,8 +141,8 @@ static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act, struct tcf_bpf *prog = to_bpf(act); struct tc_act_bpf opt = { .index = prog->tcf_index, - .refcnt = prog->tcf_refcnt - ref, - .bindcnt = prog->tcf_bindcnt - bind, + .refcnt = atomic_read(&prog->tcf_refcnt) - ref, + .bindcnt = atomic_read(&prog->tcf_bindcnt) - bind, .action = prog->tcf_action, }; struct tcf_t tm; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 10b7a8855a6c75..d2cf658dd8b227 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -153,8 +153,8 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a, struct tc_connmark opt = { .index = ci->tcf_index, - .refcnt = ci->tcf_refcnt - ref, - .bindcnt = ci->tcf_bindcnt - bind, + .refcnt = atomic_read(&ci->tcf_refcnt) - ref, + .bindcnt = atomic_read(&ci->tcf_bindcnt) - bind, .action = ci->tcf_action, .zone = ci->zone, }; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index d836f998117b24..905f26788a0104 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -581,8 +581,8 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind, .update_flags = p->update_flags, .index = p->tcf_index, .action = p->tcf_action, - .refcnt = p->tcf_refcnt - ref, - .bindcnt = p->tcf_bindcnt - bind, + .refcnt = atomic_read(&p->tcf_refcnt) - ref, + .bindcnt = atomic_read(&p->tcf_bindcnt) - bind, }; struct tcf_t t; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index e29a48ef7fc348..b1326b76eeac04 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -169,8 +169,8 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, struct tcf_gact *gact = to_gact(a); struct tc_gact opt = { .index = gact->tcf_index, - .refcnt = gact->tcf_refcnt - ref, - .bindcnt = gact->tcf_bindcnt - bind, + .refcnt = atomic_read(&gact->tcf_refcnt) - ref, + .bindcnt = atomic_read(&gact->tcf_bindcnt) - bind, .action = gact->tcf_action, }; struct tcf_t t; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 770c5d940daa45..4b19351ca4874e 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -562,8 +562,8 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, struct tcf_ife_info *ife = to_ife(a); struct tc_ife opt = { .index = ife->tcf_index, - .refcnt = ife->tcf_refcnt - ref, - .bindcnt = ife->tcf_bindcnt - bind, + .refcnt = atomic_read(&ife->tcf_refcnt) - ref, + .bindcnt = atomic_read(&ife->tcf_bindcnt) - bind, .action = ife->tcf_action, .flags = ife->flags, }; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index c59f6006168834..4fe7898ec97f98 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -275,8 +275,8 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, if (unlikely(!t)) goto nla_put_failure; - c.bindcnt = ipt->tcf_bindcnt - bind; - c.refcnt = ipt->tcf_refcnt - ref; + c.bindcnt = atomic_read(&ipt->tcf_bindcnt) - bind; + c.refcnt = atomic_read(&ipt->tcf_refcnt) - ref; strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name); if (nla_put(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t) || diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 416627c66f081f..aeeeb381ba3725 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -249,8 +249,8 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, struct tc_mirred opt = { .index = m->tcf_index, .action = m->tcf_action, - .refcnt = m->tcf_refcnt - ref, - .bindcnt = m->tcf_bindcnt - bind, + .refcnt = atomic_read(&m->tcf_refcnt) - ref, + .bindcnt = atomic_read(&m->tcf_bindcnt) - bind, .eaction = m->tcfm_eaction, .ifindex = m->tcfm_ifindex, }; diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index c365d01b99c8b7..58fa1ae3530d53 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -256,8 +256,8 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a, .index = p->tcf_index, .action = p->tcf_action, - .refcnt = p->tcf_refcnt - ref, - .bindcnt = p->tcf_bindcnt - bind, + .refcnt = atomic_read(&p->tcf_refcnt) - ref, + .bindcnt = atomic_read(&p->tcf_bindcnt) - bind, }; struct tcf_t t; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 491fe5deb09ee7..27b9feac99b738 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -391,8 +391,8 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, opt->nkeys = p->tcfp_nkeys; opt->flags = p->tcfp_flags; opt->action = p->tcf_action; - opt->refcnt = p->tcf_refcnt - ref; - opt->bindcnt = p->tcf_bindcnt - bind; + opt->refcnt = atomic_read(&p->tcf_refcnt) - ref; + opt->bindcnt = atomic_read(&p->tcf_bindcnt) - bind; if (p->tcfp_keys_ex) { tcf_pedit_key_ex_dump(skb, p->tcfp_keys_ex, p->tcfp_nkeys); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 3bb2ebf9e9aec2..d8a1ac6dda7327 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -272,8 +272,8 @@ static int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, .action = police->tcf_action, .mtu = police->tcfp_mtu, .burst = PSCHED_NS2TICKS(police->tcfp_burst), - .refcnt = police->tcf_refcnt - ref, - .bindcnt = police->tcf_bindcnt - bind, + .refcnt = atomic_read(&police->tcf_refcnt) - ref, + .bindcnt = atomic_read(&police->tcf_bindcnt) - bind, }; struct tcf_t t; diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index ec986ae5280890..64889a436ff189 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -179,8 +179,8 @@ static int tcf_sample_dump(struct sk_buff *skb, struct tc_action *a, struct tc_sample opt = { .index = s->tcf_index, .action = s->tcf_action, - .refcnt = s->tcf_refcnt - ref, - .bindcnt = s->tcf_bindcnt - bind, + .refcnt = atomic_read(&s->tcf_refcnt) - ref, + .bindcnt = atomic_read(&s->tcf_bindcnt) - bind, }; struct tcf_t t; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index e7b57e5071a365..ec7e397afac642 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -148,8 +148,8 @@ static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, struct tcf_defact *d = to_defact(a); struct tc_defact opt = { .index = d->tcf_index, - .refcnt = d->tcf_refcnt - ref, - .bindcnt = d->tcf_bindcnt - bind, + .refcnt = atomic_read(&d->tcf_refcnt) - ref, + .bindcnt = atomic_read(&d->tcf_bindcnt) - bind, .action = d->tcf_action, }; struct tcf_t t; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 59949d61f20da1..2b05e565872203 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -172,8 +172,8 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, struct tcf_skbedit *d = to_skbedit(a); struct tc_skbedit opt = { .index = d->tcf_index, - .refcnt = d->tcf_refcnt - ref, - .bindcnt = d->tcf_bindcnt - bind, + .refcnt = atomic_read(&d->tcf_refcnt) - ref, + .bindcnt = atomic_read(&d->tcf_bindcnt) - bind, .action = d->tcf_action, }; struct tcf_t t; diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index b642ad3d39dd41..ca1ddf9dc5fff6 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -201,8 +201,8 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, struct tcf_skbmod_params *p = rtnl_dereference(d->skbmod_p); struct tc_skbmod opt = { .index = d->tcf_index, - .refcnt = d->tcf_refcnt - ref, - .bindcnt = d->tcf_bindcnt - bind, + .refcnt = atomic_read(&d->tcf_refcnt) - ref, + .bindcnt = atomic_read(&d->tcf_bindcnt) - bind, .action = d->tcf_action, }; struct tcf_t t; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 30c96274c63826..158d472bb2df15 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -250,8 +250,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, struct tcf_tunnel_key_params *params; struct tc_tunnel_key opt = { .index = t->tcf_index, - .refcnt = t->tcf_refcnt - ref, - .bindcnt = t->tcf_bindcnt - bind, + .refcnt = atomic_read(&t->tcf_refcnt) - ref, + .bindcnt = atomic_read(&t->tcf_bindcnt) - bind, }; struct tcf_t tm; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 16eb067a8d8fa2..e2d7aab0e4fa18 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -208,8 +208,8 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, struct tcf_vlan *v = to_vlan(a); struct tc_vlan opt = { .index = v->tcf_index, - .refcnt = v->tcf_refcnt - ref, - .bindcnt = v->tcf_bindcnt - bind, + .refcnt = atomic_read(&v->tcf_refcnt) - ref, + .bindcnt = atomic_read(&v->tcf_bindcnt) - bind, .action = v->tcf_action, .v_action = v->tcfv_action, }; From a18ec5054c49fc1d33025a6122650587f1d0e5be Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Thu, 24 Aug 2017 06:56:05 -0400 Subject: [PATCH 21/34] net/sched: Use action array instead of action list as parameter When destroying filters, actions should be destroyed first. The pointers of each action are saved in an array. TC doesn't use the array directly, but put all actions in a doubly linked list and use the list as the parameter. There is no problem if each filter has its own actions. But if some filters share the same action, when these filters are destroyed, RCU callback fl_destroy_filter() may be called at the same time. That means the same action's 'struct list_head list' could be manipulated at the same time. It may point to some illegal address so that system will panic. So this patch uses the action array directly. And we add a new parameter to tell how many actions we have. After testing, this patch fixes this issue. Signed-off-by: Chris Mi --- include/net/act_api.h | 7 +-- net/sched/act_api.c | 104 ++++++++++++++++++++++++++---------------- net/sched/cls_api.c | 18 +++----- 3 files changed, 74 insertions(+), 55 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index 77b3e1a85b3053..a4e18e7b915e2a 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -150,16 +150,17 @@ static inline int tcf_idr_release(struct tc_action *a, bool bind) int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops); int tcf_unregister_action(struct tc_action_ops *a, struct pernet_operations *ops); -int tcf_action_destroy(struct list_head *actions, int bind); +int tcf_action_destroy(struct tc_action **actions, int nr, int bind); int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int nr_actions, struct tcf_result *res); int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, - struct list_head *actions); + struct tc_action **actions, int *nr); struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind); -int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int); +int tcf_action_dump(struct sk_buff *skb, struct tc_action **actions, int nr, + int bind, int ref); int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int); int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int); int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index da0999e61b6b88..2ba51db8810fd6 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -496,12 +496,13 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, } EXPORT_SYMBOL(tcf_action_exec); -int tcf_action_destroy(struct list_head *actions, int bind) +int tcf_action_destroy(struct tc_action **actions, int nr, int bind) { - struct tc_action *a, *tmp; - int ret = 0; + struct tc_action *a; + int ret = 0, i; - list_for_each_entry_safe(a, tmp, actions, list) { + for (i = 0; i < nr; i++) { + a = actions[i]; ret = __tcf_idr_release(a, bind, true); if (ret == ACT_P_DELETED) module_put(a->ops->owner); @@ -549,14 +550,15 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) } EXPORT_SYMBOL(tcf_action_dump_1); -int tcf_action_dump(struct sk_buff *skb, struct list_head *actions, +int tcf_action_dump(struct sk_buff *skb, struct tc_action **actions, int nr, int bind, int ref) { struct tc_action *a; - int err = -EINVAL; + int err = -EINVAL, i; struct nlattr *nest; - list_for_each_entry(a, actions, list) { + for (i = 0; i < nr; i++) { + a = actions[i]; nest = nla_nest_start(skb, a->order); if (nest == NULL) goto nla_put_failure; @@ -681,10 +683,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) { err = tcf_action_goto_chain_init(a, tp); if (err) { - LIST_HEAD(actions); - - list_add_tail(&a->list, &actions); - tcf_action_destroy(&actions, bind); + tcf_action_destroy(&a, 1, bind); return ERR_PTR(err); } } @@ -701,25 +700,29 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, return ERR_PTR(err); } -static void cleanup_a(struct list_head *actions, int ovr) +static void cleanup_a(struct tc_action **actions, int nr, int ovr) { struct tc_action *a; + int i; if (!ovr) return; - list_for_each_entry(a, actions, list) + for (i = 0; i < nr; i++) { + a = actions[i]; atomic_dec(&a->tcfa_refcnt); + } } int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, - struct list_head *actions) + struct tc_action **actions, int *nr) { struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; int err; int i; + int n = 0; err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL); if (err < 0) @@ -734,17 +737,19 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, act->order = i; if (ovr) atomic_inc(&act->tcfa_refcnt); - list_add_tail(&act->list, actions); + actions[n++] = act; } + *nr = n; /* Remove the temp refcnt which was necessary to protect against * destroying an existing action which was being replaced */ - cleanup_a(actions, ovr); + cleanup_a(actions, n, ovr); return 0; err: - tcf_action_destroy(actions, bind); + tcf_action_destroy(actions, n, bind); + *nr = 0; return err; } @@ -792,9 +797,9 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p, return -1; } -static int tca_get_fill(struct sk_buff *skb, struct list_head *actions, - u32 portid, u32 seq, u16 flags, int event, int bind, - int ref) +static int tca_get_fill(struct sk_buff *skb, struct tc_action **actions, + int nr, u32 portid, u32 seq, u16 flags, int event, + int bind, int ref) { struct tcamsg *t; struct nlmsghdr *nlh; @@ -813,7 +818,7 @@ static int tca_get_fill(struct sk_buff *skb, struct list_head *actions, if (nest == NULL) goto out_nlmsg_trim; - if (tcf_action_dump(skb, actions, bind, ref) < 0) + if (tcf_action_dump(skb, actions, nr, bind, ref) < 0) goto out_nlmsg_trim; nla_nest_end(skb, nest); @@ -828,14 +833,14 @@ static int tca_get_fill(struct sk_buff *skb, struct list_head *actions, static int tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n, - struct list_head *actions, int event) + struct tc_action **actions, int nr, int event) { struct sk_buff *skb; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; - if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event, + if (tca_get_fill(skb, actions, nr, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) { kfree_skb(skb); return -EINVAL; @@ -949,7 +954,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, } static int -tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, +tcf_del_notify(struct net *net, struct nlmsghdr *n, + struct tc_action **actions, int nr, u32 portid) { int ret; @@ -959,14 +965,14 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, if (!skb) return -ENOBUFS; - if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION, - 0, 1) <= 0) { + if (tca_get_fill(skb, actions, nr, portid, n->nlmsg_seq, 0, + RTM_DELACTION, 0, 1) <= 0) { kfree_skb(skb); return -EINVAL; } /* now do the delete */ - ret = tcf_action_destroy(actions, 0); + ret = tcf_action_destroy(actions, nr, 0); if (ret < 0) { kfree_skb(skb); return ret; @@ -983,10 +989,10 @@ static int tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, int event) { - int i, ret; + int i, ret, nr = 0; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; - LIST_HEAD(actions); + struct tc_action **actions; ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL); if (ret < 0) @@ -999,6 +1005,11 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, return -EINVAL; } + actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), + GFP_KERNEL); + if (!actions) + return -ENOMEM; + for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { act = tcf_action_get_1(net, tb[i], n, portid); if (IS_ERR(act)) { @@ -1006,25 +1017,28 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, goto err; } act->order = i; - list_add_tail(&act->list, &actions); + actions[nr++] = act; } if (event == RTM_GETACTION) - ret = tcf_get_notify(net, portid, n, &actions, event); + ret = tcf_get_notify(net, portid, n, actions, nr, event); else { /* delete */ - ret = tcf_del_notify(net, n, &actions, portid); + ret = tcf_del_notify(net, n, actions, nr, portid); if (ret) goto err; + kfree(actions); return ret; } err: if (event != RTM_GETACTION) - tcf_action_destroy(&actions, 0); + tcf_action_destroy(actions, nr, 0); + kfree(actions); return ret; } static int -tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, +tcf_add_notify(struct net *net, struct nlmsghdr *n, + struct tc_action **actions, int nr, u32 portid) { struct sk_buff *skb; @@ -1034,7 +1048,7 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, if (!skb) return -ENOBUFS; - if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags, + if (tca_get_fill(skb, actions, nr, portid, n->nlmsg_seq, n->nlmsg_flags, RTM_NEWACTION, 0, 0) <= 0) { kfree_skb(skb); return -EINVAL; @@ -1050,14 +1064,24 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, static int tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, int ovr) { - int ret = 0; - LIST_HEAD(actions); + int ret = 0, nr; + struct tc_action **actions; - ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions); - if (ret) + actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), + GFP_KERNEL); + if (!actions) + return -ENOMEM; + + ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, + actions, &nr); + if (ret) { + kfree(actions); return ret; + } - return tcf_add_notify(net, n, &actions, portid); + ret = tcf_add_notify(net, n, actions, nr, portid); + kfree(actions); + return ret; } static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 39da0c5801c908..64c81f77203caf 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -834,8 +834,7 @@ void tcf_exts_destroy(struct tcf_exts *exts) #ifdef CONFIG_NET_CLS_ACT LIST_HEAD(actions); - tcf_exts_to_list(exts, &actions); - tcf_action_destroy(&actions, TCA_ACT_UNBIND); + tcf_action_destroy(exts->actions, exts->nr_actions, TCA_ACT_UNBIND); kfree(exts->actions); exts->nr_actions = 0; #endif @@ -860,17 +859,14 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, exts->actions[0] = act; exts->nr_actions = 1; } else if (exts->action && tb[exts->action]) { - LIST_HEAD(actions); - int err, i = 0; + int err; err = tcf_action_init(net, tp, tb[exts->action], rate_tlv, NULL, ovr, TCA_ACT_BIND, - &actions); + exts->actions, + &exts->nr_actions); if (err) return err; - list_for_each_entry(act, &actions, list) - exts->actions[i++] = act; - exts->nr_actions = i; } } #else @@ -922,14 +918,12 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) * tc data even if iproute2 was newer - jhs */ if (exts->type != TCA_OLD_COMPAT) { - LIST_HEAD(actions); - nest = nla_nest_start(skb, exts->action); if (nest == NULL) goto nla_put_failure; - tcf_exts_to_list(exts, &actions); - if (tcf_action_dump(skb, &actions, 0, 0) < 0) + if (tcf_action_dump(skb, exts->actions, + exts->nr_actions, 0, 0) < 0) goto nla_put_failure; nla_nest_end(skb, nest); } else if (exts->police) { From 4d3f831b31424a4deb690e353969e6c647e152fd Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 5 Sep 2017 15:05:51 +0300 Subject: [PATCH 22/34] net/mlx5e: Fix erroneous freeing of encap header buffer In case the neighbour for the encap header isn't valid we send off a neighbour update request but we free the encap header buffer. We still need it for as base for allocating an encap header onces neighbour is available. Fix that by skipping freeing it if we wait for neighbour. Issue: 1120257 Change-Id: I8c266cc7f5293670fb1a35e8e072fded2397737a Fixes: 232c001398ae ('net/mlx5e: Add support to neighbour update flow') Signed-off-by: Paul Blakey --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index a469ceb811f722..4871d0a33e629e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1642,7 +1642,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, break; default: err = -EOPNOTSUPP; - goto out; + goto free_encap; } fl4.flowi4_tos = tun_key->tos; fl4.daddr = tun_key->u.ipv4.dst; @@ -1651,7 +1651,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &fl4, &n, &ttl); if (err) - goto out; + goto free_encap; /* used by mlx5e_detach_encap to lookup a neigh hash table * entry in the neigh hash table when a user deletes a rule @@ -1668,7 +1668,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, */ err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); if (err) - goto out; + goto free_encap; read_lock_bh(&n->lock); nud_state = n->nud_state; @@ -1708,8 +1708,9 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, destroy_neigh_entry: mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); -out: +free_encap: kfree(encap_header); +out: if (n) neigh_release(n); return err; @@ -1746,7 +1747,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, break; default: err = -EOPNOTSUPP; - goto out; + goto free_encap; } fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); @@ -1756,7 +1757,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, &fl6, &n, &ttl); if (err) - goto out; + goto free_encap; /* used by mlx5e_detach_encap to lookup a neigh hash table * entry in the neigh hash table when a user deletes a rule @@ -1773,7 +1774,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, */ err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); if (err) - goto out; + goto free_encap; read_lock_bh(&n->lock); nud_state = n->nud_state; @@ -1814,8 +1815,9 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, destroy_neigh_entry: mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); -out: +free_encap: kfree(encap_header); +out: if (n) neigh_release(n); return err; From 9162e9f6267f6ccf351663cade05b9b7adbe0dd4 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 20 Sep 2017 09:18:45 -0700 Subject: [PATCH 23/34] net_sched: remove cls_flower idr on failure Fixes: c15ab236d69d ("net/sched: Change cls_flower to use IDR") Cc: Chris Mi Cc: Jiri Pirko Signed-off-by: Cong Wang Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index e3ed00a24dc0d7..7970f891da2f52 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -947,28 +947,28 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (!tc_flags_valid(fnew->flags)) { err = -EINVAL; - goto errout; + goto errout_idr; } } err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr); if (err) - goto errout; + goto errout_idr; err = fl_check_assign_mask(head, &mask); if (err) - goto errout; + goto errout_idr; if (!tc_skip_sw(fnew->flags)) { if (!fold && fl_lookup(head, &fnew->mkey)) { err = -EEXIST; - goto errout; + goto errout_idr; } err = rhashtable_insert_fast(&head->ht, &fnew->ht_node, head->ht_params); if (err) - goto errout; + goto errout_idr; } if (!tc_skip_hw(fnew->flags)) { @@ -977,7 +977,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, &mask.key, fnew); if (err) - goto errout; + goto errout_idr; } if (!tc_in_hw(fnew->flags)) @@ -1006,6 +1006,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, kfree(tb); return 0; +errout_idr: + if (fnew->handle) + idr_remove_ext(&head->handle_idr, fnew->handle); errout: tcf_exts_destroy(&fnew->exts); kfree(fnew); From f03178675ccbc447b1ca7fa98786e8e3a2b8a4f2 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 1 Oct 2017 15:05:45 +0300 Subject: [PATCH 24/34] net/mlx5e: Fix double encap cleanup Since mlx5e_tc_add_fdb_flow() didn't attach encap, it shouldn't release it on error. Before this commit mlx5e_tc_add_fdb_flow(), on error it detached and released encap though a caller is going the list that was freed. The second release is when actually cleaning the flow. Move the release on error to the caller that actually attached the encap. Issue: 1147629 Fixes: aa0cbbae5d36 ("net/mlx5e: Properly deal with resource cleanup when adding TC flow fails") Change-Id: I5f380ea04834974c6eeb563e9beaf627084631ee Signed-off-by: Roi Dayan --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 4871d0a33e629e..acec029adbe53a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -370,8 +370,6 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, err_mod_hdr: mlx5_eswitch_del_vlan_action(esw, attr); err_add_vlan: - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) - mlx5e_detach_encap(priv, flow); return rule; } @@ -2061,6 +2059,9 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, if (err < 0) goto err_handle_encap_flow; flow->rule = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow); + if (IS_ERR(flow->rule)) + if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) + mlx5e_detach_encap(priv, flow); } else { err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow); if (err < 0) From 016b216924499b1cd9fc3fb1e692a7bc686b7af4 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 25 Jun 2017 12:38:45 +0300 Subject: [PATCH 25/34] net/mlx5: Add hairpin definitions to the FW API Add hairpin definitions to the IFC file. issue: 1068457 Change-Id: I3fc9dba9ad80a263b00c38f36672284d83094f3e Signed-off-by: Or Gerlitz --- include/linux/mlx5/mlx5_ifc.h | 43 +++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c99daffc3c3ce8..91335eaa8eb573 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -785,7 +785,10 @@ enum { }; struct mlx5_ifc_cmd_hca_cap_bits { - u8 reserved_at_0[0x80]; + u8 reserved_at_0[0x30]; + u8 vhca_id[0x10]; + + u8 reserved_at_40[0x40]; u8 log_max_srq_sz[0x8]; u8 log_max_qp_sz[0x8]; @@ -1012,7 +1015,14 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_3b8[0x3]; u8 log_min_stride_sz_sq[0x5]; - u8 reserved_at_3c0[0x1b]; + u8 hairpin[0x1]; + u8 reserved_at_3c1[0x2]; + u8 log_max_hairpin_queues[0x5]; + u8 reserved_at_3c8[0x3]; + u8 log_max_hairpin_wq_data_sz[0x5]; + u8 reserved_at_3d0[0x3]; + u8 log_max_hairpin_num_packets[0x5]; + u8 reserved_at_3d8[0x3]; u8 log_max_wq_sz[0x5]; u8 nic_vport_change_event[0x1]; @@ -1148,7 +1158,12 @@ struct mlx5_ifc_wq_bits { u8 reserved_at_118[0x3]; u8 log_wq_sz[0x5]; - u8 reserved_at_120[0x15]; + u8 reserved_at_120[0x3]; + u8 log_hairpin_num_packets[0x5]; + u8 reserved_at_128[0x3]; + u8 log_hairpin_data_sz[0x5]; + u8 reserved_at_130[0x5]; + u8 log_wqe_num_of_strides[0x3]; u8 two_byte_shift_en[0x1]; u8 reserved_at_139[0x4]; @@ -2440,7 +2455,8 @@ struct mlx5_ifc_sqc_bits { u8 state[0x4]; u8 reg_umr[0x1]; u8 allow_swp[0x1]; - u8 reserved_at_e[0x12]; + u8 hairpin[0x1]; + u8 reserved_at_f[0x11]; u8 reserved_at_20[0x8]; u8 user_index[0x18]; @@ -2448,7 +2464,13 @@ struct mlx5_ifc_sqc_bits { u8 reserved_at_40[0x8]; u8 cqn[0x18]; - u8 reserved_at_60[0x90]; + u8 reserved_at_60[0x8]; + u8 hairpin_peer_rq[0x18]; + + u8 reserved_at_80[0x10]; + u8 hairpin_peer_vhca[0x10]; + + u8 reserved_at_a0[0x50]; u8 packet_pacing_rate_limit_index[0x10]; u8 tis_lst_sz[0x10]; @@ -2520,7 +2542,8 @@ struct mlx5_ifc_rqc_bits { u8 state[0x4]; u8 reserved_at_c[0x1]; u8 flush_in_error_en[0x1]; - u8 reserved_at_e[0x12]; + u8 hairpin[0x1]; + u8 reserved_at_f[0x11]; u8 reserved_at_20[0x8]; u8 user_index[0x18]; @@ -2534,7 +2557,13 @@ struct mlx5_ifc_rqc_bits { u8 reserved_at_80[0x8]; u8 rmpn[0x18]; - u8 reserved_at_a0[0xe0]; + u8 reserved_at_a0[0x8]; + u8 hairpin_peer_sq[0x18]; + + u8 reserved_at_c0[0x10]; + u8 hairpin_peer_vhca[0x10]; + + u8 reserved_at_e0[0xa0]; struct mlx5_ifc_wq_bits wq; }; From 744be84b75051e5f0590d9520c5beed085c0a27b Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 25 Jun 2017 18:10:25 +0300 Subject: [PATCH 26/34] net/mlx5e: Hairpin low-level objects setup Low level code to setup hairpin contexts (SQ, RQ, TIR) Deal with: - Create RQ/SQ - Modify RQ/SQ (rst2rdy and rdy2rst) - Destroy RQ/SQ - Create TIR - Destroy TIR issue: 1068457 Change-Id: I7112408a5b71b58be7a4ea024ad06e187f8a6564 Signed-off-by: Or Gerlitz --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 412 ++++++++++++++++++ 1 file changed, 412 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index acec029adbe53a..a856780ed1c627 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -91,6 +91,31 @@ enum { #define MLX5E_TC_TABLE_NUM_GROUPS 4 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE (1 << 16) +struct mlx5_hairpin_params { + int num_channels; + u8 log_num_packets; + u8 log_data_size; +}; + +struct mlx5_hairpin_ctx { + u32 *rqn; + u32 *sqn; + + u32 tirn; + + struct mlx5_core_dev *mdev; + struct mlx5_hairpin_params params; +}; + +int mlx5_hairpin_set(struct mlx5_core_dev *func_mdev, + struct mlx5_core_dev *peer_mdev, + struct mlx5_hairpin_params *params, + struct mlx5_hairpin_ctx **_func_ctx, + struct mlx5_hairpin_ctx **_peer_ctx); + +void mlx5_hairpin_unset(struct mlx5_hairpin_ctx *func_ctx, + struct mlx5_hairpin_ctx *peer_ctx); + struct mod_hdr_key { int num_actions; void *actions; @@ -2190,3 +2215,390 @@ void mlx5e_tc_cleanup(struct mlx5e_priv *priv) tc->t = NULL; } } + +static +int mlx5_hairpin_create_rq(struct mlx5_core_dev *mdev, + struct mlx5_hairpin_params *params, + int *rqn) +{ + void *in, *rqc, *wq; + int inlen, err; + + inlen = MLX5_ST_SZ_BYTES(create_rq_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); + wq = MLX5_ADDR_OF(rqc, rqc, wq); + + MLX5_SET(rqc, rqc, hairpin, 1); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); + + MLX5_SET(wq, wq, log_hairpin_num_packets, params->log_num_packets); + MLX5_SET(wq, wq, log_hairpin_data_sz, params->log_data_size); + + err = mlx5_core_create_rq(mdev, in, inlen, rqn); + + kvfree(in); + return err; +} + +static +int mlx5_hairpin_create_sq(struct mlx5_core_dev *mdev, + struct mlx5_hairpin_params *params, + int *sqn) +{ + void *in, *sqc, *wq; + int inlen, err; + + inlen = MLX5_ST_SZ_BYTES(create_sq_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); + wq = MLX5_ADDR_OF(sqc, sqc, wq); + + MLX5_SET(sqc, sqc, hairpin, 1); + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + + MLX5_SET(wq, wq, log_hairpin_num_packets, params->log_num_packets); + MLX5_SET(wq, wq, log_hairpin_data_sz, params->log_data_size); + + err = mlx5_core_create_sq(mdev, in, inlen, sqn); + + kvfree(in); + return err; +} + +static +int mlx5_hairpin_create_tir(struct mlx5_core_dev *mdev, u32 rqn, u32 *tirn) +{ + void *in, *tirc; + int inlen, err; + + inlen = MLX5_ST_SZ_BYTES(create_tir_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); + MLX5_SET(tirc, tirc, inline_rqn, rqn); + MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.td.tdn); + + err = mlx5_core_create_tir(mdev, in, inlen, tirn); + + kvfree(in); + return err; +} + +static +int mlx5_hairpin_alloc(struct mlx5_core_dev *mdev, + struct mlx5_hairpin_params *params, + struct mlx5_hairpin_ctx **_ctx) +{ + + struct mlx5_hairpin_ctx *ctx; + int i, j, size, err; + + size = sizeof(*ctx) + sizeof(u32) * 2 * params->num_channels; + ctx = kvzalloc(size, GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->rqn = (void *)ctx + sizeof(*ctx); + ctx->sqn = ctx->rqn + params->num_channels; + + for (i = 0; i < params->num_channels; i++) { + err = mlx5_hairpin_create_rq(mdev, params, &ctx->rqn[i]); + if (err) + goto out_err_rq; + } + + for (i = 0; i < params->num_channels; i++) { + err = mlx5_hairpin_create_sq(mdev, params, &ctx->sqn[i]); + if (err) + goto out_err_sq; + } + + err = mlx5_hairpin_create_tir(mdev, ctx->rqn[0], &ctx->tirn); + if (err) + goto out_err_tir; + + ctx->mdev = mdev; + ctx->params = *params; + *_ctx = ctx; + + return 0; + +out_err_tir: + i = params->num_channels; +out_err_sq: + for (j = 0; j < i; j++) + mlx5_core_destroy_sq(mdev, ctx->sqn[j]); + i = params->num_channels; +out_err_rq: + for (j = 0; j < i; j++) + mlx5_core_destroy_rq(mdev, ctx->rqn[j]); + + kfree(ctx); + return err; +} + +static +void mlx5_hairpin_free(struct mlx5_hairpin_ctx *ctx) +{ + int i; + + mlx5_core_destroy_tir(ctx->mdev, ctx->tirn); + + for (i = 0; i < ctx->params.num_channels; i++) { + mlx5_core_destroy_rq(ctx->mdev, ctx->rqn[i]); + mlx5_core_destroy_sq(ctx->mdev, ctx->sqn[i]); + } + + kvfree(ctx); +} + +static +int mlx5_hairpin_modify_rq(struct mlx5_hairpin_ctx *ctx, int index, + int curr_state, int next_state, + u16 peer_vhca, u32 peer_sq) +{ + void *in, *rqc; + int inlen, err; + + inlen = MLX5_ST_SZ_BYTES(modify_rq_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + if (next_state == MLX5_RQC_STATE_RDY) { + MLX5_SET(rqc, rqc, hairpin_peer_sq, peer_sq); + MLX5_SET(rqc, rqc, hairpin_peer_vhca, peer_vhca); + } + + MLX5_SET(modify_rq_in, in, rq_state, curr_state); + MLX5_SET(rqc, rqc, state, next_state); + + err = mlx5_core_modify_rq(ctx->mdev, ctx->rqn[index], in, inlen); + + kvfree(in); + return err; +} + +static +int mlx5_hairpin_modify_sq(struct mlx5_hairpin_ctx *ctx, int index, + int curr_state, int next_state, + u16 peer_vhca, u32 peer_rq) +{ + void *in, *sqc; + int inlen, err; + + inlen = MLX5_ST_SZ_BYTES(modify_sq_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); + + if (next_state == MLX5_RQC_STATE_RDY) { + MLX5_SET(sqc, sqc, hairpin_peer_rq, peer_rq); + MLX5_SET(sqc, sqc, hairpin_peer_vhca, peer_vhca); + } + + MLX5_SET(modify_sq_in, in, sq_state, curr_state); + MLX5_SET(sqc, sqc, state, next_state); + + err = mlx5_core_modify_sq(ctx->mdev, ctx->sqn[index], in, inlen); + + kvfree(in); + return err; +} + +static +int mlx5_hairpin_set_sqs(struct mlx5_hairpin_ctx *peer_ctx, + struct mlx5_hairpin_ctx *func_ctx) /* set peer SQs */ +{ + int i, err; + + for (i = 0; i < peer_ctx->params.num_channels; i++) { + err = mlx5_hairpin_modify_sq(peer_ctx, i, + MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY, + MLX5_CAP_GEN(func_ctx->mdev, vhca_id), + func_ctx->rqn[i]); + + if (err) + return err; /* FIXME modify to rst 0..i */ + } + + return 0; +} + +static +int mlx5_hairpin_set_rqs(struct mlx5_hairpin_ctx *func_ctx, + struct mlx5_hairpin_ctx *peer_ctx) /* set func RQs */ +{ + int i, err; + + for (i = 0; i < func_ctx->params.num_channels; i++) { + err = mlx5_hairpin_modify_rq(func_ctx, i, + MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY, + MLX5_CAP_GEN(peer_ctx->mdev, vhca_id), + peer_ctx->sqn[i]); + + if (err) + return err; /* FIXME modify to rst 0..i */ + } + + return 0; +} + +static +int mlx5_hairpin_set_rss(struct mlx5_hairpin_ctx *func_ctx) /* set RSS for func RQs */ +{ + return 0; +} + +static +void mlx5_hairpin_unset_rss(struct mlx5_hairpin_ctx *func_ctx) /* unset RSS for func RQs */ +{ +} + +static +void mlx5_hairpin_unset_rqs(struct mlx5_hairpin_ctx *func_ctx) +{ + int i; + + for (i = 0; i < func_ctx->params.num_channels; i++) + mlx5_hairpin_modify_rq(func_ctx, i, MLX5_RQC_STATE_RDY, + MLX5_RQC_STATE_RST, 0, 0); +} + +static +void mlx5_hairpin_unset_sqs(struct mlx5_hairpin_ctx *func_ctx) +{ + int i; + + for (i = 0; i < func_ctx->params.num_channels; i++) + mlx5_hairpin_modify_sq(func_ctx, i, MLX5_SQC_STATE_RDY, + MLX5_SQC_STATE_RST, 0, 0); +} + +static +int mlx5_hairpin_set_queues(struct mlx5_hairpin_ctx *func_ctx, + struct mlx5_hairpin_ctx *peer_ctx) +{ + int err; + + err = mlx5_hairpin_set_sqs(peer_ctx, func_ctx); /* set peer SQs */ + if (err) + goto err_set_sqs; + + err = mlx5_hairpin_set_rqs(func_ctx, peer_ctx); /* set func RQs */ + if (err) + goto err_set_rqs; + + printk(KERN_ERR "%s %s tirn %x rqn %x sqn %x\n", __func__, func_ctx->mdev->priv.name, func_ctx->tirn, func_ctx->rqn[0], func_ctx->sqn[0]); + + err = mlx5_hairpin_set_rss(func_ctx); /* set RSS for func RQs */ + if (err) + goto err_set_rss; + + return 0; + +err_set_rss: + mlx5_hairpin_unset_rqs(func_ctx); /* unset func RQs */ +err_set_rqs: + mlx5_hairpin_unset_sqs(peer_ctx); /* unset peer SQs */ +err_set_sqs: + return err; +} + +static +void mlx5_hairpin_unset_queues(struct mlx5_hairpin_ctx *func_ctx, + struct mlx5_hairpin_ctx *peer_ctx) +{ + mlx5_hairpin_unset_rss(func_ctx); /* unset RSS for func RQs */ + mlx5_hairpin_unset_rqs(func_ctx); /* unset func RQs */ + mlx5_hairpin_unset_sqs(peer_ctx); /* unset peer SQs */ +} + +int mlx5_hairpin_set(struct mlx5_core_dev *func_mdev, + struct mlx5_core_dev *peer_mdev, + struct mlx5_hairpin_params *params, + struct mlx5_hairpin_ctx **_func_ctx, + struct mlx5_hairpin_ctx **_peer_ctx) +{ + struct mlx5_hairpin_ctx *func_ctx, *peer_ctx = NULL; + bool self_hairpin = false; + int err; + + if (func_mdev == peer_mdev) + self_hairpin = true; + + /* alloc func --> peer hairpin */ + err = mlx5_hairpin_alloc(func_mdev, params, &func_ctx); + if (err) + goto err_alloc_func; + + /* alloc peer --> func hairpin */ + if (!self_hairpin) { + err = mlx5_hairpin_alloc(peer_mdev, params, &peer_ctx); + if (err) + goto err_alloc_peer; + } else + peer_ctx = func_ctx; + + /* set func --> peer hairpin */ + err = mlx5_hairpin_set_queues(func_ctx, peer_ctx); + if (err) + goto err_set_func_queues; + + /* set peer --> func hairpin */ + if (!self_hairpin) { + err = mlx5_hairpin_set_queues(peer_ctx, func_ctx); + if (err) + goto err_set_peer_queues; + } + + *_func_ctx = func_ctx; + *_peer_ctx = peer_ctx; + return 0; + +err_set_peer_queues: + mlx5_hairpin_unset_queues(func_ctx, peer_ctx); +err_set_func_queues: + if (!self_hairpin) + mlx5_hairpin_free(peer_ctx); +err_alloc_peer: + mlx5_hairpin_free(func_ctx); +err_alloc_func: + return err; +} + +void mlx5_hairpin_unset(struct mlx5_hairpin_ctx *func_ctx, + struct mlx5_hairpin_ctx *peer_ctx) +{ + bool self_hairpin = false; + + if (func_ctx == peer_ctx) + self_hairpin = true; + + /* unset peer --> func hairpin */ + if (!self_hairpin) + mlx5_hairpin_unset_queues(peer_ctx, func_ctx); + + /* unset func --> peer hairpin */ + mlx5_hairpin_unset_queues(func_ctx, peer_ctx); + + if (!self_hairpin) + mlx5_hairpin_free(peer_ctx); + + mlx5_hairpin_free(func_ctx); +} From ab425d9a6779a02325d750c2d27f486acd40748f Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 25 Jun 2017 17:40:09 +0300 Subject: [PATCH 27/34] net/mlx5e: Support offloading TC NIC hairpin flows Hairpin is TC **NIC** rule that involves forwarding. All hairpin rules from the current NIC device (called "func" in the code) to a given peer are set on the same hairpin RQ/SQ pair. The hairpin RQ/SQ pair is set on demand and removed when there are no TC rules that need it. When we set the func --> peer pair we immediately set also the peer --> func pair. We delete both when there's no demand for either of them (note add/del are under rtnl). TC rule that matches on ICMP, does header re-write of the dst mac and hairpin from RX of P1 to TX on P2 (P1 and P2 are two VF devices in this example): tc filter add dev enp1s2f1 protocol ip parent ffff: prio 2 flower skip_sw ip_proto icmp action pedit ex munge eth dst set 10:22:33:44:55:66 pipe action mirred egress redirect dev enp1s2f2 issue: 1068457 Change-Id: I1ea222342c3189c27cdc0c0158828a870d5e558a Signed-off-by: Or Gerlitz --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../net/ethernet/mellanox/mlx5/core/en_tc.c | 192 +++++++++++++++++- 2 files changed, 184 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 0039b4725405fc..9c8435b045eb9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -651,6 +651,7 @@ struct mlx5e_tc_table { struct rhashtable ht; DECLARE_HASHTABLE(mod_hdr_tbl, 8); + DECLARE_HASHTABLE(hairpin_tbl, 8); }; struct mlx5e_vlan_table { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index a856780ed1c627..904aecd2ef243a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -56,12 +56,14 @@ struct mlx5_nic_flow_attr { u32 action; u32 flow_tag; u32 mod_hdr_id; + u32 hairpin_tirn; }; enum { MLX5E_TC_FLOW_ESWITCH = BIT(0), MLX5E_TC_FLOW_NIC = BIT(1), MLX5E_TC_FLOW_OFFLOADED = BIT(2), + MLX5E_TC_FLOW_HAIRPIN = BIT(3), }; struct mlx5e_tc_flow { @@ -71,6 +73,7 @@ struct mlx5e_tc_flow { struct mlx5_flow_handle *rule; struct list_head encap; /* flows sharing the same encap ID */ struct list_head mod_hdr; /* flows sharing the same mod hdr ID */ + struct list_head hairpin; /* flows sharing the same hairpin */ union { struct mlx5_esw_flow_attr esw_attr[0]; struct mlx5_nic_flow_attr nic_attr[0]; @@ -79,6 +82,7 @@ struct mlx5e_tc_flow { struct mlx5e_tc_flow_parse_attr { struct mlx5_flow_spec spec; + int peer_ifindex; int num_mod_hdr_actions; void *mod_hdr_actions; }; @@ -116,6 +120,18 @@ int mlx5_hairpin_set(struct mlx5_core_dev *func_mdev, void mlx5_hairpin_unset(struct mlx5_hairpin_ctx *func_ctx, struct mlx5_hairpin_ctx *peer_ctx); +struct mlx5e_hairpin_entry { + /* a node of a hash table which keeps all the hairpin entries */ + struct hlist_node hairpin_hlist; + + /* flows sharing the same hairpin */ + struct list_head flows; + + int peer_ifindex; + struct mlx5_hairpin_ctx *ctx; + struct mlx5e_priv *peer_priv; +}; + struct mod_hdr_key { int num_actions; void *actions; @@ -245,6 +261,125 @@ static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv, } } +static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv, + int peer_ifindex) +{ + struct mlx5e_hairpin_entry *hp; + + hash_for_each_possible(priv->fs.tc.hairpin_tbl, hp, + hairpin_hlist, peer_ifindex) { + if (hp->peer_ifindex == peer_ifindex) + return hp; + } + + return NULL; +} + +static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5e_tc_flow_parse_attr *parse_attr) +{ + int peer_ifindex = parse_attr->peer_ifindex; + int func_ifindex = priv->netdev->ifindex; + struct mlx5e_hairpin_entry *hp, *php; + struct mlx5_hairpin_params params; + struct mlx5e_priv *peer_priv; + struct net_device *peer_dev; + int err; + + if (!MLX5_CAP_GEN(priv->mdev, hairpin)) { + printk(KERN_ERR "%s hairpin cap not supported\n", __func__); + return -EOPNOTSUPP; + } else { + printk(KERN_ERR "%s hairpin cap supported vhca_id %d log_max_hairpin wq_data_sz %d num_packets %d queues %d\n", + __func__, MLX5_CAP_GEN(priv->mdev, vhca_id), MLX5_CAP_GEN(priv->mdev,log_max_hairpin_wq_data_sz), + MLX5_CAP_GEN(priv->mdev,log_max_hairpin_num_packets), MLX5_CAP_GEN(priv->mdev,log_max_hairpin_queues)); + } + + hp = mlx5e_hairpin_get(priv, peer_ifindex); + if (hp) + goto attach_flow; + + hp = kzalloc(sizeof(*hp), GFP_KERNEL); + php = kzalloc(sizeof(*php), GFP_KERNEL); + if (!hp || !php) { + err = -ENOMEM; + goto out_err; + } + + peer_dev = __dev_get_by_index(dev_net(priv->netdev), peer_ifindex); + peer_priv = netdev_priv(peer_dev); + + INIT_LIST_HEAD(&hp->flows); + hp->peer_ifindex = peer_ifindex; + hp->peer_priv = peer_priv; + + INIT_LIST_HEAD(&php->flows); + php->peer_ifindex = func_ifindex; + php->peer_priv = priv; + + params.num_channels = 1; /* no RSS */ + params.log_num_packets = 7; /* 128 packets */ + params.log_data_size = ilog2(roundup_pow_of_two(MLX5E_SW2HW_MTU(priv, priv->netdev->mtu))) + + params.log_num_packets; + + err = mlx5_hairpin_set(priv->mdev, peer_priv->mdev, ¶ms, &hp->ctx, &php->ctx); + if (err) + goto out_err; + + hash_add(priv->fs.tc.hairpin_tbl, &hp->hairpin_hlist, peer_ifindex); + + /* for self hairpin, there's no peer hairpin entry, free it */ + if (func_ifindex != peer_ifindex) + hash_add(peer_priv->fs.tc.hairpin_tbl, &php->hairpin_hlist, func_ifindex); + else + kfree(php); + +attach_flow: + flow->nic_attr->hairpin_tirn = hp->ctx->tirn; + list_add(&flow->hairpin, &hp->flows); + + return 0; + +out_err: + kfree(hp); + kfree(php); + return err; +} + +static void mlx5e_hairpin_flow_rem(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct list_head *next = flow->hairpin.next; + + list_del(&flow->hairpin); + + /* no more hairpin flows for us, attemp to release the hairpin pair */ + if (list_empty(next)) { + struct mlx5e_hairpin_entry *hp, *php; + + hp = list_entry(next, struct mlx5e_hairpin_entry, flows); + php = mlx5e_hairpin_get(hp->peer_priv, priv->netdev->ifindex); + + /* peer still has flows, cleanup will take place later, + * this logic holds also for self hairpin. + */ + if (!list_empty(&php->flows)) + return; + + mlx5_hairpin_unset(hp->ctx, php->ctx); + + hash_del(&hp->hairpin_hlist); + kfree(hp); + + /* avoid double free on self hairpin */ + if (hp != php) { + hash_del(&php->hairpin_hlist); + kfree(php); + } + } +} + static struct mlx5_flow_handle * mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, @@ -252,7 +387,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, { struct mlx5_nic_flow_attr *attr = flow->nic_attr; struct mlx5_core_dev *dev = priv->mdev; - struct mlx5_flow_destination dest = {}; + struct mlx5_flow_destination dest[2] = {}; struct mlx5_flow_act flow_act = { .action = attr->action, .flow_tag = attr->flow_tag, @@ -261,18 +396,32 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5_fc *counter = NULL; struct mlx5_flow_handle *rule; bool table_created = false; - int err; + int err, dest_ix = 0; if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { - dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = priv->fs.vlan.ft.t; - } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) { + err = mlx5e_hairpin_flow_add(priv, flow, parse_attr); + if (err) { + rule = ERR_PTR(err); + goto err_add_hairpin_flow; + } + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest[dest_ix].tir_num = attr->hairpin_tirn; + } else { + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[dest_ix].ft = priv->fs.vlan.ft.t; + } + dest_ix++; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(dev, true); if (IS_ERR(counter)) return ERR_CAST(counter); - dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest.counter = counter; + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[dest_ix].counter = counter; + dest_ix++; } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { @@ -315,7 +464,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec, - &flow_act, &dest, 1); + &flow_act, dest, dest_ix); if (IS_ERR(rule)) goto err_add_rule; @@ -331,8 +480,10 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); err_create_mod_hdr_id: + if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) + mlx5e_hairpin_flow_rem(priv, flow); +err_add_hairpin_flow: mlx5_fc_destroy(dev, counter); - return rule; } @@ -353,6 +504,9 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); + + if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) + mlx5e_hairpin_flow_rem(priv, flow); } static void mlx5e_detach_encap(struct mlx5e_priv *priv, @@ -1458,6 +1612,25 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EOPNOTSUPP; } + if (is_tcf_mirred_egress_redirect(a)) { + int ifindex = tcf_mirred_ifindex(a); + struct net_device *peer_dev; + + peer_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex); + + if (priv->netdev->netdev_ops == peer_dev->netdev_ops) { /* FIXME this isn't enough */ + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + parse_attr->peer_ifindex = ifindex; + flow->flags |= MLX5E_TC_FLOW_HAIRPIN; + } else { + pr_err("devices %s %s not on same NIC, can't offload forwarding\n", + priv->netdev->name, peer_dev->name); + return -EINVAL; + } + continue; + } + if (is_tcf_skbedit_mark(a)) { u32 mark = tcf_skbedit_mark(a); @@ -2190,6 +2363,7 @@ int mlx5e_tc_init(struct mlx5e_priv *priv) struct mlx5e_tc_table *tc = &priv->fs.tc; hash_init(tc->mod_hdr_tbl); + hash_init(tc->hairpin_tbl); tc->ht_params = mlx5e_tc_flow_ht_params; return rhashtable_init(&tc->ht, &tc->ht_params); From e8e73b65d696d40892e98f1a64eb107db6b702b2 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Thu, 26 Oct 2017 16:53:04 +0300 Subject: [PATCH 28/34] Revert "net/mlx5e: Fix double encap cleanup" This reverts commit f03178675ccbc447b1ca7fa98786e8e3a2b8a4f2. --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 904aecd2ef243a..7f93ed67d08815 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -549,6 +549,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, err_mod_hdr: mlx5_eswitch_del_vlan_action(esw, attr); err_add_vlan: + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) + mlx5e_detach_encap(priv, flow); return rule; } @@ -2257,9 +2259,6 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, if (err < 0) goto err_handle_encap_flow; flow->rule = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow); - if (IS_ERR(flow->rule)) - if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) - mlx5e_detach_encap(priv, flow); } else { err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow); if (err < 0) From c21f1a11fa01f1684d2cb025fe141821a9aa3567 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 17 Oct 2017 12:33:43 +0200 Subject: [PATCH 29/34] net/mlx5e: Properly deal with encap flows add/del under neigh update Currently, the encap action offload is handled in the actions parse function and not in mlx5e_tc_add_fdb_flow() where we deal with all the other aspects of offloading actions (vlan, modify header) and the rule itself. When the neigh update code (mlx5e_tc_encap_flows_add()) recreates the encap entry and offloads the related flows, we wrongly call again into mlx5e_tc_add_fdb_flow(), this for itself would cause us to handle again the offloading of vlans and header re-write which puts things in non consistent state and step on freed memory (e.g the modify header parse buffer which is already freed). Since on error, mlx5e_tc_add_fdb_flow() detaches and may release the encap entry, it causes a corruption at the neigh update code which goes over the list of flows associated with this encap entry, or double free when the tc flow is later deleted by user-space. When neigh update (mlx5e_tc_encap_flows_del()) unoffloads the flows related to an encap entry which is now invalid, we do a partial repeat of the eswitch flow removal code which is wrong too. To fix things up we do the following: (1) handle the encap action offload in the eswitch flow add function mlx5e_tc_add_fdb_flow() as done for the other actions and the rule itself. (2) modify the neigh update code (mlx5e_tc_encap_flows_add/del) to only deal with the encap entry and rules delete/add and not with any of the other offloaded actions. Fixes: 232c001398ae ('net/mlx5e: Add support to neighbour update flow') Signed-off-by: Or Gerlitz Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 89 +++++++++++-------- 1 file changed, 54 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 7f93ed67d08815..73ca600a2e369f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -81,10 +81,12 @@ struct mlx5e_tc_flow { }; struct mlx5e_tc_flow_parse_attr { + struct ip_tunnel_info tun_info; struct mlx5_flow_spec spec; int peer_ifindex; int num_mod_hdr_actions; void *mod_hdr_actions; + int mirred_ifindex; }; enum { @@ -512,6 +514,12 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, static void mlx5e_detach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow); +static int mlx5e_attach_encap(struct mlx5e_priv *priv, + struct ip_tunnel_info *tun_info, + struct net_device *mirred_dev, + struct net_device **encap_dev, + struct mlx5e_tc_flow *flow); + static struct mlx5_flow_handle * mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, @@ -519,9 +527,27 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; - struct mlx5_flow_handle *rule; + struct net_device *out_dev, *encap_dev = NULL; + struct mlx5_flow_handle *rule = NULL; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *out_priv; int err; + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) { + out_dev = __dev_get_by_index(dev_net(priv->netdev), + attr->parse_attr->mirred_ifindex); + err = mlx5e_attach_encap(priv, &parse_attr->tun_info, + out_dev, &encap_dev, flow); + if (err) { + rule = ERR_PTR(err); + if (err != -EAGAIN) + goto err_attach_encap; + } + out_priv = netdev_priv(encap_dev); + rpriv = out_priv->ppriv; + attr->out_rep = rpriv->rep; + } + err = mlx5_eswitch_add_vlan_action(esw, attr); if (err) { rule = ERR_PTR(err); @@ -537,10 +563,14 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, } } - rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr); - if (IS_ERR(rule)) - goto err_add_rule; - + /* we get here if (1) there's no error (rule being null) or when + * (2) there's an encap action and we're on -EAGAIN (no valid neigh) + */ + if (rule != ERR_PTR(-EAGAIN)) { + rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr); + if (IS_ERR(rule)) + goto err_add_rule; + } return rule; err_add_rule: @@ -551,6 +581,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, err_add_vlan: if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) mlx5e_detach_encap(priv, flow); +err_attach_encap: return rule; } @@ -579,6 +610,8 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *esw_attr; struct mlx5e_tc_flow *flow; int err; @@ -594,10 +627,9 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, mlx5e_rep_queue_neigh_stats_work(priv); list_for_each_entry(flow, &e->flows, encap) { - flow->esw_attr->encap_id = e->encap_id; - flow->rule = mlx5e_tc_add_fdb_flow(priv, - flow->esw_attr->parse_attr, - flow); + esw_attr = flow->esw_attr; + esw_attr->encap_id = e->encap_id; + flow->rule = mlx5_eswitch_add_offloaded_rule(esw, &esw_attr->parse_attr->spec, esw_attr); if (IS_ERR(flow->rule)) { err = PTR_ERR(flow->rule); mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", @@ -611,15 +643,13 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow *flow; - struct mlx5_fc *counter; list_for_each_entry(flow, &e->flows, encap) { if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; - counter = mlx5_flow_rule_counter(flow->rule); - mlx5_del_flow_rules(flow->rule); - mlx5_fc_destroy(priv->mdev, counter); + mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->esw_attr); } } @@ -2151,7 +2181,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (is_tcf_mirred_egress_redirect(a)) { int ifindex = tcf_mirred_ifindex(a); - struct net_device *out_dev, *encap_dev = NULL; + struct net_device *out_dev; struct mlx5e_priv *out_priv; out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex); @@ -2165,17 +2195,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, rpriv = out_priv->ppriv; attr->out_rep = rpriv->rep; } else if (encap) { - err = mlx5e_attach_encap(priv, info, - out_dev, &encap_dev, flow); - if (err && err != -EAGAIN) - return err; + parse_attr->mirred_ifindex = ifindex; + parse_attr->tun_info = *info; + attr->parse_attr = parse_attr; attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; - out_priv = netdev_priv(encap_dev); - rpriv = out_priv->ppriv; - attr->out_rep = rpriv->rep; - attr->parse_attr = parse_attr; + /* attr->out_rep is resolved when we handle encap */ } else { pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", priv->netdev->name, out_dev->name); @@ -2257,7 +2283,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow); if (err < 0) - goto err_handle_encap_flow; + goto err_free; flow->rule = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow); } else { err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow); @@ -2268,10 +2294,13 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, if (IS_ERR(flow->rule)) { err = PTR_ERR(flow->rule); - goto err_free; + if (err != -EAGAIN) + goto err_free; } - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + if (err != -EAGAIN) + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + err = rhashtable_insert_fast(&tc->ht, &flow->node, tc->ht_params); if (err) @@ -2285,16 +2314,6 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, err_del_rule: mlx5e_tc_del_flow(priv, flow); -err_handle_encap_flow: - if (err == -EAGAIN) { - err = rhashtable_insert_fast(&tc->ht, &flow->node, - tc->ht_params); - if (err) - mlx5e_tc_del_flow(priv, flow); - else - return 0; - } - err_free: kvfree(parse_attr); kfree(flow); From ff00ea4391a3d167016726146c9a3795a2fa165d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Sep 2017 08:31:23 -0700 Subject: [PATCH 30/34] net: sched: don't use GFP_KERNEL under spin lock The new TC IDR code uses GFP_KERNEL under spin lock. Which leads to: [ 582.621091] BUG: sleeping function called from invalid context at ../mm/slab.h:416 [ 582.629721] in_atomic(): 1, irqs_disabled(): 0, pid: 3379, name: tc [ 582.636939] 2 locks held by tc/3379: [ 582.641049] #0: (rtnl_mutex){+.+.+.}, at: [] rtnetlink_rcv_msg+0x92e/0x1400 [ 582.650958] #1: (&(&tn->idrinfo->lock)->rlock){+.-.+.}, at: [] tcf_idr_create+0x2f0/0x8e0 [ 582.662217] Preemption disabled at: [ 582.662222] [] tcf_idr_create+0x2f0/0x8e0 [ 582.672592] CPU: 9 PID: 3379 Comm: tc Tainted: G W 4.13.0-rc7-debug-00648-g43503a79b9f0 #287 [ 582.683432] Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.3.4 11/08/2016 [ 582.691937] Call Trace: ... [ 582.742460] kmem_cache_alloc+0x286/0x540 [ 582.747055] radix_tree_node_alloc.constprop.6+0x4a/0x450 [ 582.753209] idr_get_free_cmn+0x627/0xf80 ... [ 582.815525] idr_alloc_cmn+0x1a8/0x270 ... [ 582.833804] tcf_idr_create+0x31b/0x8e0 ... Try to preallocate the memory with idr_prealloc(GFP_KERNEL) (as suggested by Eric Dumazet), and change the allocation flags under spin lock. Change-Id: I2667c6883ffe1c8ceb7fd44ce7afe481bdb56ca4 Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/act_api.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 2ba51db8810fd6..7d079726688a72 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -281,25 +281,27 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, spin_lock_init(&p->tcfa_lock); /* user doesn't specify an index */ if (!index) { + idr_preload(GFP_KERNEL); spin_lock_bh(&idrinfo->lock); err = idr_alloc_ext(idr, NULL, &idr_index, 1, 0, - GFP_KERNEL); + GFP_ATOMIC); spin_unlock_bh(&idrinfo->lock); + idr_preload_end(); if (err) { err3: free_percpu(p->cpu_qstats); goto err2; } p->tcfa_index = idr_index; - } - else { + } else { + idr_preload(GFP_KERNEL); spin_lock_bh(&idrinfo->lock); err = idr_alloc_ext(idr, NULL, NULL, index, index + 1, - GFP_KERNEL); + GFP_ATOMIC); spin_unlock_bh(&idrinfo->lock); - if (err) { + idr_preload_end(); + if (err) goto err3; - } p->tcfa_index = index; } From b46a1c3794bdbe778035ff791956fc57d89dc1d1 Mon Sep 17 00:00:00 2001 From: Gavi Teitz Date: Tue, 14 Nov 2017 15:01:40 +0200 Subject: [PATCH 31/34] net/mlx5e: Enable stateless offloads for VF representor netdevs Brought the offload attribute set of the representors to be more similar to that of the regular NIC netdevs, in order to increase the representor's performance. The impact of this change can be seen in the following measurements: Baseline VF/VM: TCP Throughput [Gb/s] UDP Received PPS (Drop %) VF to VM 29.1 571K (0%) VM to VF 16.4 623K (27%) Representor/VM before current changes: TCP Throughput [Gb/s] UDP Received PPS (Drop %) Representor to VM 4.4 450K (0%) VM to Representor 0.6 232K (75%) Representor/VM after current changes: TCP Throughput [Gb/s] UDP Received PPS (Drop %) Representor to VM 40.8 462K (0%) VM to Representor 0.6 248K (74%) Issue: 1155559 Change-Id: I07ebdc98b1593ae4fd1719e3af383a9a9dbd6112 Signed-off-by: Gavi Teitz --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 45e60be9c27766..1f5ff642bcdc69 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -815,6 +815,16 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL; netdev->hw_features |= NETIF_F_HW_TC; + netdev->hw_features |= NETIF_F_SG; + netdev->hw_features |= NETIF_F_IP_CSUM; + netdev->hw_features |= NETIF_F_IPV6_CSUM; + netdev->hw_features |= NETIF_F_GRO; + netdev->hw_features |= NETIF_F_TSO; + netdev->hw_features |= NETIF_F_TSO6; + netdev->hw_features |= NETIF_F_RXCSUM; + + netdev->features |= netdev->hw_features; + eth_hw_addr_random(netdev); } From 27b3360ed7dca1b9759dd1f19986ab8a2ac563ec Mon Sep 17 00:00:00 2001 From: Gavi Teitz Date: Wed, 15 Nov 2017 10:45:14 +0200 Subject: [PATCH 32/34] net/mlx5e: Change VF representor's RQ size and type Set the representor's rq params using the same method the standard nic netdev uses to set its rq params. This allowed the representor to use a striding rq if it is supported, and allowed for increasing the size of the representor's rq if it is not. This increases the representor's receive performance, as can be seen in the following measruements: Baseline VF/VM: TCP Throughput [Gb/s] UDP Received PPS (Drop %) VF to VM 29.1 571K (0%) VM to VF 16.4 623K (27%) Representor/VM before current changes: TCP Throughput [Gb/s] UDP Received PPS (Drop %) Representor to VM 40.8 462K (0%) VM to Representor 0.6 248K (74%) Representor/VM after current changes: TCP Throughput [Gb/s] UDP Received PPS (Drop %) Representor to VM 40.6 476K (0%) VM to Representor 20.2 620K (74%) Issue: 1155559 Change-Id: I6cd96f24d1c93da4dd2b8d80ce0ae4ab19028d2a Signed-off-by: Gavi Teitz --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 7 +++---- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 9c8435b045eb9b..4ead3c951dc4a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -932,6 +932,8 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params, u8 rq_type); +void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params); static inline struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 57f31fa478ceee..c7a7f951275133 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -115,7 +115,7 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); } -static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) && !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ? diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 1f5ff642bcdc69..0a8a353d9d640f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -787,15 +787,14 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev, MLX5_CQ_PERIOD_MODE_START_FROM_EQE; params->log_sq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; - params->rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST; - params->log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; + + mlx5e_set_rq_params(mdev, params); params->rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); mlx5e_set_rx_cq_mode_params(params, cq_period_mode); params->tx_max_inline = mlx5e_get_max_inline_cap(mdev); params->num_tc = 1; - params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); } @@ -933,7 +932,7 @@ static struct mlx5e_profile mlx5e_rep_profile = { .max_nch = mlx5e_get_rep_max_num_channels, .update_carrier = NULL, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, - .rx_handlers.handle_rx_cqe_mpwqe = NULL /* Not supported */, + .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, .max_tc = 1, }; From 5ef7c32ec72fe90f321bab8e9041fa85057d0dc5 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Thu, 30 Nov 2017 12:44:32 +0200 Subject: [PATCH 33/34] Revert "net/mlx5e: Change VF representor's RQ size and type" This reverts commit 27b3360ed7dca1b9759dd1f19986ab8a2ac563ec. --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 -- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 7 ++++--- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 4ead3c951dc4a0..9c8435b045eb9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -932,8 +932,6 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params, u8 rq_type); -void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); static inline struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c7a7f951275133..57f31fa478ceee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -115,7 +115,7 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); } -void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) && !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ? diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 0a8a353d9d640f..1f5ff642bcdc69 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -787,14 +787,15 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev, MLX5_CQ_PERIOD_MODE_START_FROM_EQE; params->log_sq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; - - mlx5e_set_rq_params(mdev, params); + params->rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST; + params->log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; params->rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); mlx5e_set_rx_cq_mode_params(params, cq_period_mode); params->tx_max_inline = mlx5e_get_max_inline_cap(mdev); params->num_tc = 1; + params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); } @@ -932,7 +933,7 @@ static struct mlx5e_profile mlx5e_rep_profile = { .max_nch = mlx5e_get_rep_max_num_channels, .update_carrier = NULL, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, - .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, + .rx_handlers.handle_rx_cqe_mpwqe = NULL /* Not supported */, .max_tc = 1, }; From 589d3f748326d11d5215f7ac9d79d8f8eb63a027 Mon Sep 17 00:00:00 2001 From: Gavi Teitz Date: Sun, 5 Nov 2017 13:00:28 +0200 Subject: [PATCH 34/34] net/mlx5: Increased the representor's RQ size Increased the RQ size of the representors, to match that of a standard mlx5 NIC netdev, which increased the performance of the represntor's receive rate by about 50x. Issue: 1155559 Change-Id: I6cd96f24d1c93da4dd2b8d80ce0ae4ab19028d2a Signed-off-by: Gavi Teitz --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 1f5ff642bcdc69..71805de6654739 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -788,7 +788,7 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev, params->log_sq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; params->rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST; - params->log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; + params->log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; params->rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); mlx5e_set_rx_cq_mode_params(params, cq_period_mode);