diff --git a/MAINTAINERS b/MAINTAINERS index fbbda4671e734d..f0bb5ee1787068 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10260,7 +10260,8 @@ F: Documentation/networking/device_drivers/ethernet/intel/ F: drivers/net/ethernet/intel/ F: drivers/net/ethernet/intel/*/ F: include/linux/avf/virtchnl.h -F: include/linux/net/intel/iidc.h +F: include/linux/net/intel/ +F: include/linux/net/intel/*/ INTEL ETHERNET PROTOCOL DRIVER FOR RDMA M: Mustafa Ismail diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 9bc0a951989964..cec4a938fbd0f6 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -84,6 +84,12 @@ config E1000E_HWTS devices. The cross-timestamp is available through the PTP clock driver precise cross-timestamp ioctl (PTP_SYS_OFFSET_PRECISE). +config LIBIE + tristate + help + libie (Intel Ethernet library) is a common library containing + routines shared by several Intel Ethernet drivers. + config IGB tristate "Intel(R) 82575/82576 PCI-Express Gigabit Ethernet support" depends on PCI @@ -225,6 +231,7 @@ config I40E depends on PTP_1588_CLOCK_OPTIONAL depends on PCI select AUXILIARY_BUS + select LIBIE help This driver supports Intel(R) Ethernet Controller XL710 Family of devices. For more information on how to identify your adapter, go @@ -254,8 +261,9 @@ config IAVF tristate config I40EVF tristate "Intel(R) Ethernet Adaptive Virtual Function support" - select IAVF depends on PCI_MSI + select IAVF + select LIBIE help This driver supports virtual functions for Intel XL710, X710, X722, XXV710, and all devices advertising support for @@ -282,6 +290,7 @@ config ICE depends on GNSS || GNSS = n select AUXILIARY_BUS select DIMLIB + select LIBIE select NET_DEVLINK select PLDMFW help diff --git a/drivers/net/ethernet/intel/Makefile b/drivers/net/ethernet/intel/Makefile index d80d04132073ca..ce622b4d825df7 100644 --- a/drivers/net/ethernet/intel/Makefile +++ b/drivers/net/ethernet/intel/Makefile @@ -15,3 +15,4 @@ obj-$(CONFIG_I40E) += i40e/ obj-$(CONFIG_IAVF) += iavf/ obj-$(CONFIG_FM10K) += fm10k/ obj-$(CONFIG_ICE) += ice/ +obj-$(CONFIG_LIBIE) += libie/ diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index ed88e38d488b2d..25bb858268fcd6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -383,259 +383,6 @@ int i40e_aq_set_rss_key(struct i40e_hw *hw, return i40e_aq_get_set_rss_key(hw, vsi_id, key, true); } -/* The i40e_ptype_lookup table is used to convert from the 8-bit ptype in the - * hardware to a bit-field that can be used by SW to more easily determine the - * packet type. - * - * Macros are used to shorten the table lines and make this table human - * readable. - * - * We store the PTYPE in the top byte of the bit field - this is just so that - * we can check that the table doesn't have a row missing, as the index into - * the table should be the PTYPE. - * - * Typical work flow: - * - * IF NOT i40e_ptype_lookup[ptype].known - * THEN - * Packet is unknown - * ELSE IF i40e_ptype_lookup[ptype].outer_ip == I40E_RX_PTYPE_OUTER_IP - * Use the rest of the fields to look at the tunnels, inner protocols, etc - * ELSE - * Use the enum i40e_rx_l2_ptype to decode the packet type - * ENDIF - */ - -/* macro to make the table lines short, use explicit indexing with [PTYPE] */ -#define I40E_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\ - [PTYPE] = { \ - 1, \ - I40E_RX_PTYPE_OUTER_##OUTER_IP, \ - I40E_RX_PTYPE_OUTER_##OUTER_IP_VER, \ - I40E_RX_PTYPE_##OUTER_FRAG, \ - I40E_RX_PTYPE_TUNNEL_##T, \ - I40E_RX_PTYPE_TUNNEL_END_##TE, \ - I40E_RX_PTYPE_##TEF, \ - I40E_RX_PTYPE_INNER_PROT_##I, \ - I40E_RX_PTYPE_PAYLOAD_LAYER_##PL } - -#define I40E_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } - -/* shorter macros makes the table fit but are terse */ -#define I40E_RX_PTYPE_NOF I40E_RX_PTYPE_NOT_FRAG -#define I40E_RX_PTYPE_FRG I40E_RX_PTYPE_FRAG -#define I40E_RX_PTYPE_INNER_PROT_TS I40E_RX_PTYPE_INNER_PROT_TIMESYNC - -/* Lookup table mapping in the 8-bit HW PTYPE to the bit field for decoding */ -struct i40e_rx_ptype_decoded i40e_ptype_lookup[BIT(8)] = { - /* L2 Packet types */ - I40E_PTT_UNUSED_ENTRY(0), - I40E_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - I40E_PTT(2, L2, NONE, NOF, NONE, NONE, NOF, TS, PAY2), - I40E_PTT(3, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - I40E_PTT_UNUSED_ENTRY(4), - I40E_PTT_UNUSED_ENTRY(5), - I40E_PTT(6, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - I40E_PTT(7, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - I40E_PTT_UNUSED_ENTRY(8), - I40E_PTT_UNUSED_ENTRY(9), - I40E_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - I40E_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), - I40E_PTT(12, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(13, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(14, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(15, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(16, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(17, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(18, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(19, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(20, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(21, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - - /* Non Tunneled IPv4 */ - I40E_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(25), - I40E_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP, PAY4), - I40E_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4), - I40E_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4), - - /* IPv4 --> IPv4 */ - I40E_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3), - I40E_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3), - I40E_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(32), - I40E_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP, PAY4), - I40E_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), - I40E_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> IPv6 */ - I40E_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3), - I40E_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3), - I40E_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(39), - I40E_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP, PAY4), - I40E_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), - I40E_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT */ - I40E_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), - - /* IPv4 --> GRE/NAT --> IPv4 */ - I40E_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), - I40E_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), - I40E_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(47), - I40E_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), - I40E_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), - I40E_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> IPv6 */ - I40E_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), - I40E_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), - I40E_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(54), - I40E_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), - I40E_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), - I40E_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> MAC */ - I40E_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), - - /* IPv4 --> GRE/NAT --> MAC --> IPv4 */ - I40E_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), - I40E_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), - I40E_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(62), - I40E_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), - I40E_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), - I40E_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT -> MAC --> IPv6 */ - I40E_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), - I40E_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), - I40E_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(69), - I40E_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), - I40E_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), - I40E_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> MAC/VLAN */ - I40E_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), - - /* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */ - I40E_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), - I40E_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), - I40E_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(77), - I40E_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), - I40E_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), - I40E_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), - - /* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */ - I40E_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), - I40E_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), - I40E_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(84), - I40E_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), - I40E_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), - I40E_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), - - /* Non Tunneled IPv6 */ - I40E_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(91), - I40E_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4), - I40E_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4), - I40E_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4), - - /* IPv6 --> IPv4 */ - I40E_PTT(95, IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3), - I40E_PTT(96, IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3), - I40E_PTT(97, IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(98), - I40E_PTT(99, IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP, PAY4), - I40E_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), - I40E_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> IPv6 */ - I40E_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3), - I40E_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3), - I40E_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(105), - I40E_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP, PAY4), - I40E_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), - I40E_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT */ - I40E_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> IPv4 */ - I40E_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), - I40E_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), - I40E_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(113), - I40E_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), - I40E_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), - I40E_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> IPv6 */ - I40E_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), - I40E_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), - I40E_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(120), - I40E_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), - I40E_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), - I40E_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC */ - I40E_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> MAC -> IPv4 */ - I40E_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), - I40E_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), - I40E_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(128), - I40E_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), - I40E_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), - I40E_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC -> IPv6 */ - I40E_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), - I40E_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), - I40E_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(135), - I40E_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), - I40E_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), - I40E_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC/VLAN */ - I40E_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */ - I40E_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), - I40E_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), - I40E_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(143), - I40E_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), - I40E_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), - I40E_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */ - I40E_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), - I40E_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), - I40E_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(150), - I40E_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), - I40E_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), - I40E_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), - - /* unused entries */ - [154 ... 255] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } -}; - /** * i40e_init_shared_code - Initialize the shared code * @hw: pointer to hardware structure diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index c8ff5675b29d8b..d89a5fff15983d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -97,6 +97,7 @@ MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all), Debug mask (0x8XXXXXXX MODULE_AUTHOR("Intel Corporation, "); MODULE_DESCRIPTION("Intel(R) Ethernet Connection XL710 Network Driver"); +MODULE_IMPORT_NS(LIBIE); MODULE_LICENSE("GPL v2"); static struct workqueue_struct *i40e_wq; diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index fe845987d99a55..5287d0ef32d5c7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -380,13 +380,6 @@ void i40e_set_pci_config_data(struct i40e_hw *hw, u16 link_status); int i40e_set_mac_type(struct i40e_hw *hw); -extern struct i40e_rx_ptype_decoded i40e_ptype_lookup[]; - -static inline struct i40e_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype) -{ - return i40e_ptype_lookup[ptype]; -} - /** * i40e_virtchnl_link_speed - Convert AdminQ link_speed to virtchnl definition * @link_speed: the speed to convert diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index c8c2cbaa0ede6c..e4bfc7e3c076e1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1,8 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2013 - 2018 Intel Corporation. */ -#include #include +#include +#include #include #include #include "i40e.h" @@ -1758,40 +1759,32 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, struct sk_buff *skb, union i40e_rx_desc *rx_desc) { - struct i40e_rx_ptype_decoded decoded; + struct libie_rx_ptype_parsed parsed; u32 rx_error, rx_status; bool ipv4, ipv6; u8 ptype; u64 qword; + skb->ip_summed = CHECKSUM_NONE; + qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT; + + parsed = libie_parse_rx_ptype(ptype); + if (!libie_has_rx_checksum(vsi->netdev, parsed)) + return; + rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >> I40E_RXD_QW1_ERROR_SHIFT; rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> I40E_RXD_QW1_STATUS_SHIFT; - decoded = decode_rx_desc_ptype(ptype); - - skb->ip_summed = CHECKSUM_NONE; - - skb_checksum_none_assert(skb); - - /* Rx csum enabled and ip headers found? */ - if (!(vsi->netdev->features & NETIF_F_RXCSUM)) - return; /* did the hardware decode the packet and checksum? */ if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT))) return; - /* both known and outer_ip must be set for the below code to work */ - if (!(decoded.known && decoded.outer_ip)) - return; - - ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4); - ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6); + ipv4 = parsed.outer_ip == LIBIE_RX_PTYPE_OUTER_IPV4; + ipv6 = parsed.outer_ip == LIBIE_RX_PTYPE_OUTER_IPV6; if (ipv4 && (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) | @@ -1819,49 +1812,16 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, * we need to bump the checksum level by 1 to reflect the fact that * we are indicating we validated the inner checksum. */ - if (decoded.tunnel_type >= I40E_RX_PTYPE_TUNNEL_IP_GRENAT) + if (parsed.tunnel_type >= LIBIE_RX_PTYPE_TUNNEL_IP_GRENAT) skb->csum_level = 1; - /* Only report checksum unnecessary for TCP, UDP, or SCTP */ - switch (decoded.inner_prot) { - case I40E_RX_PTYPE_INNER_PROT_TCP: - case I40E_RX_PTYPE_INNER_PROT_UDP: - case I40E_RX_PTYPE_INNER_PROT_SCTP: - skb->ip_summed = CHECKSUM_UNNECESSARY; - fallthrough; - default: - break; - } - + skb->ip_summed = CHECKSUM_UNNECESSARY; return; checksum_fail: vsi->back->hw_csum_rx_error++; } -/** - * i40e_ptype_to_htype - get a hash type - * @ptype: the ptype value from the descriptor - * - * Returns a hash type to be used by skb_set_hash - **/ -static inline int i40e_ptype_to_htype(u8 ptype) -{ - struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype); - - if (!decoded.known) - return PKT_HASH_TYPE_NONE; - - if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && - decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4) - return PKT_HASH_TYPE_L4; - else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && - decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3) - return PKT_HASH_TYPE_L3; - else - return PKT_HASH_TYPE_L2; -} - /** * i40e_rx_hash - set the hash value in the skb * @ring: descriptor ring @@ -1874,17 +1834,19 @@ static inline void i40e_rx_hash(struct i40e_ring *ring, struct sk_buff *skb, u8 rx_ptype) { + struct libie_rx_ptype_parsed parsed; u32 hash; const __le64 rss_mask = cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH << I40E_RX_DESC_STATUS_FLTSTAT_SHIFT); - if (!(ring->netdev->features & NETIF_F_RXHASH)) + parsed = libie_parse_rx_ptype(rx_ptype); + if (!libie_has_rx_hash(ring->netdev, parsed)) return; if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) { hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss); - skb_set_hash(skb, hash, i40e_ptype_to_htype(rx_ptype)); + libie_skb_set_hash(skb, hash, parsed); } } diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index 388c3d36d96a55..05b8510f99a930 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -773,94 +773,6 @@ enum i40e_rx_desc_error_l3l4e_fcoe_masks { #define I40E_RXD_QW1_PTYPE_SHIFT 30 #define I40E_RXD_QW1_PTYPE_MASK (0xFFULL << I40E_RXD_QW1_PTYPE_SHIFT) -/* Packet type non-ip values */ -enum i40e_rx_l2_ptype { - I40E_RX_PTYPE_L2_RESERVED = 0, - I40E_RX_PTYPE_L2_MAC_PAY2 = 1, - I40E_RX_PTYPE_L2_TIMESYNC_PAY2 = 2, - I40E_RX_PTYPE_L2_FIP_PAY2 = 3, - I40E_RX_PTYPE_L2_OUI_PAY2 = 4, - I40E_RX_PTYPE_L2_MACCNTRL_PAY2 = 5, - I40E_RX_PTYPE_L2_LLDP_PAY2 = 6, - I40E_RX_PTYPE_L2_ECP_PAY2 = 7, - I40E_RX_PTYPE_L2_EVB_PAY2 = 8, - I40E_RX_PTYPE_L2_QCN_PAY2 = 9, - I40E_RX_PTYPE_L2_EAPOL_PAY2 = 10, - I40E_RX_PTYPE_L2_ARP = 11, - I40E_RX_PTYPE_L2_FCOE_PAY3 = 12, - I40E_RX_PTYPE_L2_FCOE_FCDATA_PAY3 = 13, - I40E_RX_PTYPE_L2_FCOE_FCRDY_PAY3 = 14, - I40E_RX_PTYPE_L2_FCOE_FCRSP_PAY3 = 15, - I40E_RX_PTYPE_L2_FCOE_FCOTHER_PA = 16, - I40E_RX_PTYPE_L2_FCOE_VFT_PAY3 = 17, - I40E_RX_PTYPE_L2_FCOE_VFT_FCDATA = 18, - I40E_RX_PTYPE_L2_FCOE_VFT_FCRDY = 19, - I40E_RX_PTYPE_L2_FCOE_VFT_FCRSP = 20, - I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER = 21, - I40E_RX_PTYPE_GRENAT4_MAC_PAY3 = 58, - I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4 = 87, - I40E_RX_PTYPE_GRENAT6_MAC_PAY3 = 124, - I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4 = 153 -}; - -struct i40e_rx_ptype_decoded { - u32 known:1; - u32 outer_ip:1; - u32 outer_ip_ver:1; - u32 outer_frag:1; - u32 tunnel_type:3; - u32 tunnel_end_prot:2; - u32 tunnel_end_frag:1; - u32 inner_prot:4; - u32 payload_layer:3; -}; - -enum i40e_rx_ptype_outer_ip { - I40E_RX_PTYPE_OUTER_L2 = 0, - I40E_RX_PTYPE_OUTER_IP = 1 -}; - -enum i40e_rx_ptype_outer_ip_ver { - I40E_RX_PTYPE_OUTER_NONE = 0, - I40E_RX_PTYPE_OUTER_IPV4 = 0, - I40E_RX_PTYPE_OUTER_IPV6 = 1 -}; - -enum i40e_rx_ptype_outer_fragmented { - I40E_RX_PTYPE_NOT_FRAG = 0, - I40E_RX_PTYPE_FRAG = 1 -}; - -enum i40e_rx_ptype_tunnel_type { - I40E_RX_PTYPE_TUNNEL_NONE = 0, - I40E_RX_PTYPE_TUNNEL_IP_IP = 1, - I40E_RX_PTYPE_TUNNEL_IP_GRENAT = 2, - I40E_RX_PTYPE_TUNNEL_IP_GRENAT_MAC = 3, - I40E_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN = 4, -}; - -enum i40e_rx_ptype_tunnel_end_prot { - I40E_RX_PTYPE_TUNNEL_END_NONE = 0, - I40E_RX_PTYPE_TUNNEL_END_IPV4 = 1, - I40E_RX_PTYPE_TUNNEL_END_IPV6 = 2, -}; - -enum i40e_rx_ptype_inner_prot { - I40E_RX_PTYPE_INNER_PROT_NONE = 0, - I40E_RX_PTYPE_INNER_PROT_UDP = 1, - I40E_RX_PTYPE_INNER_PROT_TCP = 2, - I40E_RX_PTYPE_INNER_PROT_SCTP = 3, - I40E_RX_PTYPE_INNER_PROT_ICMP = 4, - I40E_RX_PTYPE_INNER_PROT_TIMESYNC = 5 -}; - -enum i40e_rx_ptype_payload_layer { - I40E_RX_PTYPE_PAYLOAD_LAYER_NONE = 0, - I40E_RX_PTYPE_PAYLOAD_LAYER_PAY2 = 1, - I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3 = 2, - I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4 = 3, -}; - #define I40E_RXD_QW1_LENGTH_PBUF_SHIFT 38 #define I40E_RXD_QW1_LENGTH_PBUF_MASK (0x3FFFULL << \ I40E_RXD_QW1_LENGTH_PBUF_SHIFT) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 8a4587585acde7..ee2a1e682a1c9b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -612,6 +612,9 @@ static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_id, u32 qtx_ctl; int ret = 0; + if (info->ring_len == 0) + return 0; + if (!i40e_vc_isvalid_vsi_id(vf, info->vsi_id)) { ret = -ENOENT; goto error_context; @@ -688,6 +691,9 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id, struct i40e_hmc_obj_rxq rx_ctx; int ret = 0; + if (info->ring_len == 0) + return 0; + /* clear the context structure first */ memset(&rx_ctx, 0, sizeof(struct i40e_hmc_obj_rxq)); diff --git a/drivers/net/ethernet/intel/iavf/Makefile b/drivers/net/ethernet/intel/iavf/Makefile index 9c3e45c54d0133..19eb29005e7a06 100644 --- a/drivers/net/ethernet/intel/iavf/Makefile +++ b/drivers/net/ethernet/intel/iavf/Makefile @@ -13,4 +13,5 @@ obj-$(CONFIG_IAVF) += iavf.o iavf-objs := iavf_main.o iavf_ethtool.o iavf_virtchnl.o iavf_fdir.o \ iavf_adv_rss.o \ - iavf_txrx.o iavf_common.o iavf_adminq.o iavf_client.o + iavf_txrx.o iavf_common.o iavf_adminq.o iavf_client.o \ + iavf_xsk.o diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 2cdce251472c08..a91da041b43fa6 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -4,6 +4,7 @@ #ifndef _IAVF_H_ #define _IAVF_H_ +#include #include #include #include @@ -27,13 +28,17 @@ #include #include #include +#include #include #include #include #include #include #include +#include +#include +#include "iavf_xsk.h" #include "iavf_type.h" #include #include "iavf_txrx.h" @@ -83,10 +88,6 @@ struct iavf_vsi { #define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN) -#define IAVF_RX_DESC(R, i) (&(((union iavf_32byte_rx_desc *)((R)->desc))[i])) -#define IAVF_TX_DESC(R, i) (&(((struct iavf_tx_desc *)((R)->desc))[i])) -#define IAVF_TX_CTXTDESC(R, i) \ - (&(((struct iavf_tx_context_desc *)((R)->desc))[i])) #define IAVF_MAX_REQ_QUEUES 16 #define IAVF_HKEY_ARRAY_SIZE ((IAVF_VFQF_HKEY_MAX_INDEX + 1) * 4) @@ -107,7 +108,8 @@ struct iavf_q_vector { struct napi_struct napi; struct iavf_ring_container rx; struct iavf_ring_container tx; - u32 ring_mask; + u32 rx_ring_mask; + u32 tx_ring_mask; u8 itr_countdown; /* when 0 should adjust adaptive ITR */ u8 num_ringpairs; /* total number of ring pairs in vector */ u16 v_idx; /* index in the vsi->q_vector array. */ @@ -243,6 +245,8 @@ struct iavf_cloud_filter { bool add; /* filter needs to be added */ }; +#define IAVF_XDP_LINK_TIMEOUT_MS 1000 + #define IAVF_RESET_WAIT_MS 10 #define IAVF_RESET_WAIT_DETECTED_COUNT 500 #define IAVF_RESET_WAIT_COMPLETE_COUNT 2000 @@ -263,11 +267,15 @@ struct iavf_adapter { /* Lock to protect accesses to MAC and VLAN lists */ spinlock_t mac_vlan_list_lock; char misc_vector_name[IFNAMSIZ + 9]; - int num_active_queues; - int num_req_queues; + u32 num_active_queues; + u32 num_xdp_tx_queues; + u32 num_req_queues; + struct bpf_prog *xdp_prog; + unsigned long *af_xdp_zc_qps; /* TX */ struct iavf_ring *tx_rings; + struct iavf_ring *xdp_rings; u32 tx_timeout_count; u32 tx_desc_count; @@ -294,7 +302,7 @@ struct iavf_adapter { #define IAVF_FLAG_CLIENT_NEEDS_L2_PARAMS BIT(12) #define IAVF_FLAG_PROMISC_ON BIT(13) #define IAVF_FLAG_ALLMULTI_ON BIT(14) -#define IAVF_FLAG_LEGACY_RX BIT(15) +/* BIT(15) is free, was IAVF_FLAG_LEGACY_RX */ #define IAVF_FLAG_REINIT_ITR_NEEDED BIT(16) #define IAVF_FLAG_QUEUES_DISABLED BIT(17) #define IAVF_FLAG_SETUP_NETDEV_FEATURES BIT(18) @@ -510,6 +518,30 @@ static inline void iavf_change_state(struct iavf_adapter *adapter, iavf_state_str(adapter->state)); } +/** + * iavf_adapter_xdp_active - Determine if XDP program is loaded + * @adapter: board private structure + * + * Returns true if XDP program is loaded on a given adapter. + */ +static inline bool iavf_adapter_xdp_active(struct iavf_adapter *adapter) +{ + return !!READ_ONCE(adapter->xdp_prog); +} + +static inline struct xsk_buff_pool *iavf_xsk_pool(struct iavf_ring *ring) +{ + struct iavf_adapter *adapter = ring->vsi->back; + struct iavf_vsi *vsi = ring->vsi; + u16 qid = ring->queue_index; + + if (!iavf_adapter_xdp_active(adapter) || + !test_bit(qid, adapter->af_xdp_zc_qps)) + return NULL; + + return xsk_get_pool_from_qid(vsi->netdev, qid); +} + int iavf_up(struct iavf_adapter *adapter); void iavf_down(struct iavf_adapter *adapter); int iavf_process_config(struct iavf_adapter *adapter); @@ -537,11 +569,17 @@ int iavf_send_vf_offload_vlan_v2_msg(struct iavf_adapter *adapter); void iavf_set_queue_vlan_tag_loc(struct iavf_adapter *adapter); u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter); void iavf_irq_enable(struct iavf_adapter *adapter, bool flush); -void iavf_configure_queues(struct iavf_adapter *adapter); +int iavf_configure_selected_queues(struct iavf_adapter *adapter, u32 qp_mask, + bool wait); +int iavf_configure_queues(struct iavf_adapter *adapter, bool wait); void iavf_deconfigure_queues(struct iavf_adapter *adapter); -void iavf_enable_queues(struct iavf_adapter *adapter); -void iavf_disable_queues(struct iavf_adapter *adapter); -void iavf_map_queues(struct iavf_adapter *adapter); +int iavf_enable_queues(struct iavf_adapter *adapter, bool wait); +int iavf_disable_queues(struct iavf_adapter *adapter, bool wait); +int iavf_enable_selected_queues(struct iavf_adapter *adapter, u32 rx_queues, + u32 tx_queues, bool wait); +int iavf_disable_selected_queues(struct iavf_adapter *adapter, u32 rx_queues, + u32 tx_queues, bool wait); +int iavf_map_queues(struct iavf_adapter *adapter, bool wait); int iavf_request_queues(struct iavf_adapter *adapter, int num); void iavf_add_ether_addrs(struct iavf_adapter *adapter); void iavf_del_ether_addrs(struct iavf_adapter *adapter); @@ -556,9 +594,14 @@ void iavf_set_rss_key(struct iavf_adapter *adapter); void iavf_set_rss_lut(struct iavf_adapter *adapter); void iavf_enable_vlan_stripping(struct iavf_adapter *adapter); void iavf_disable_vlan_stripping(struct iavf_adapter *adapter); +int iavf_poll_for_link_status(struct iavf_adapter *adapter, unsigned int msecs); void iavf_virtchnl_completion(struct iavf_adapter *adapter, enum virtchnl_ops v_opcode, enum iavf_status v_retval, u8 *msg, u16 msglen); +int iavf_process_pending_pf_msg(struct iavf_adapter *adapter, + unsigned int timeout_msecs); +void iavf_configure_rx_ring(struct iavf_adapter *adapter, + struct iavf_ring *rx_ring); int iavf_config_rss(struct iavf_adapter *adapter); int iavf_lan_add_device(struct iavf_adapter *adapter); int iavf_lan_del_device(struct iavf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_common.c b/drivers/net/ethernet/intel/iavf/iavf_common.c index dd11dbbd5551a2..ba6c9f154d189a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_common.c +++ b/drivers/net/ethernet/intel/iavf/iavf_common.c @@ -499,259 +499,6 @@ enum iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 vsi_id, return iavf_aq_get_set_rss_key(hw, vsi_id, key, true); } -/* The iavf_ptype_lookup table is used to convert from the 8-bit ptype in the - * hardware to a bit-field that can be used by SW to more easily determine the - * packet type. - * - * Macros are used to shorten the table lines and make this table human - * readable. - * - * We store the PTYPE in the top byte of the bit field - this is just so that - * we can check that the table doesn't have a row missing, as the index into - * the table should be the PTYPE. - * - * Typical work flow: - * - * IF NOT iavf_ptype_lookup[ptype].known - * THEN - * Packet is unknown - * ELSE IF iavf_ptype_lookup[ptype].outer_ip == IAVF_RX_PTYPE_OUTER_IP - * Use the rest of the fields to look at the tunnels, inner protocols, etc - * ELSE - * Use the enum iavf_rx_l2_ptype to decode the packet type - * ENDIF - */ - -/* macro to make the table lines short, use explicit indexing with [PTYPE] */ -#define IAVF_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\ - [PTYPE] = { \ - 1, \ - IAVF_RX_PTYPE_OUTER_##OUTER_IP, \ - IAVF_RX_PTYPE_OUTER_##OUTER_IP_VER, \ - IAVF_RX_PTYPE_##OUTER_FRAG, \ - IAVF_RX_PTYPE_TUNNEL_##T, \ - IAVF_RX_PTYPE_TUNNEL_END_##TE, \ - IAVF_RX_PTYPE_##TEF, \ - IAVF_RX_PTYPE_INNER_PROT_##I, \ - IAVF_RX_PTYPE_PAYLOAD_LAYER_##PL } - -#define IAVF_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } - -/* shorter macros makes the table fit but are terse */ -#define IAVF_RX_PTYPE_NOF IAVF_RX_PTYPE_NOT_FRAG -#define IAVF_RX_PTYPE_FRG IAVF_RX_PTYPE_FRAG -#define IAVF_RX_PTYPE_INNER_PROT_TS IAVF_RX_PTYPE_INNER_PROT_TIMESYNC - -/* Lookup table mapping the 8-bit HW PTYPE to the bit field for decoding */ -struct iavf_rx_ptype_decoded iavf_ptype_lookup[BIT(8)] = { - /* L2 Packet types */ - IAVF_PTT_UNUSED_ENTRY(0), - IAVF_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - IAVF_PTT(2, L2, NONE, NOF, NONE, NONE, NOF, TS, PAY2), - IAVF_PTT(3, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - IAVF_PTT_UNUSED_ENTRY(4), - IAVF_PTT_UNUSED_ENTRY(5), - IAVF_PTT(6, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - IAVF_PTT(7, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - IAVF_PTT_UNUSED_ENTRY(8), - IAVF_PTT_UNUSED_ENTRY(9), - IAVF_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - IAVF_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), - IAVF_PTT(12, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(13, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(14, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(15, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(16, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(17, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(18, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(19, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(20, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(21, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - - /* Non Tunneled IPv4 */ - IAVF_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(25), - IAVF_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP, PAY4), - IAVF_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4), - IAVF_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4), - - /* IPv4 --> IPv4 */ - IAVF_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3), - IAVF_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3), - IAVF_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(32), - IAVF_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP, PAY4), - IAVF_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> IPv6 */ - IAVF_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3), - IAVF_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3), - IAVF_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(39), - IAVF_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP, PAY4), - IAVF_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT */ - IAVF_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), - - /* IPv4 --> GRE/NAT --> IPv4 */ - IAVF_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), - IAVF_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), - IAVF_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(47), - IAVF_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), - IAVF_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> IPv6 */ - IAVF_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), - IAVF_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), - IAVF_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(54), - IAVF_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), - IAVF_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> MAC */ - IAVF_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), - - /* IPv4 --> GRE/NAT --> MAC --> IPv4 */ - IAVF_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), - IAVF_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), - IAVF_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(62), - IAVF_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), - IAVF_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT -> MAC --> IPv6 */ - IAVF_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), - IAVF_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), - IAVF_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(69), - IAVF_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), - IAVF_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> MAC/VLAN */ - IAVF_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), - - /* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */ - IAVF_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), - IAVF_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), - IAVF_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(77), - IAVF_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), - IAVF_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), - - /* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */ - IAVF_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), - IAVF_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), - IAVF_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(84), - IAVF_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), - IAVF_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), - - /* Non Tunneled IPv6 */ - IAVF_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(91), - IAVF_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4), - IAVF_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4), - IAVF_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4), - - /* IPv6 --> IPv4 */ - IAVF_PTT(95, IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3), - IAVF_PTT(96, IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3), - IAVF_PTT(97, IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(98), - IAVF_PTT(99, IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP, PAY4), - IAVF_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> IPv6 */ - IAVF_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3), - IAVF_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3), - IAVF_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(105), - IAVF_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP, PAY4), - IAVF_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT */ - IAVF_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> IPv4 */ - IAVF_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), - IAVF_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), - IAVF_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(113), - IAVF_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), - IAVF_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> IPv6 */ - IAVF_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), - IAVF_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), - IAVF_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(120), - IAVF_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), - IAVF_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC */ - IAVF_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> MAC -> IPv4 */ - IAVF_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), - IAVF_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), - IAVF_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(128), - IAVF_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), - IAVF_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC -> IPv6 */ - IAVF_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), - IAVF_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), - IAVF_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(135), - IAVF_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), - IAVF_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC/VLAN */ - IAVF_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */ - IAVF_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), - IAVF_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), - IAVF_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(143), - IAVF_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), - IAVF_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */ - IAVF_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), - IAVF_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), - IAVF_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(150), - IAVF_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), - IAVF_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), - - /* unused entries */ - [154 ... 255] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } -}; - /** * iavf_aq_send_msg_to_pf * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 6f171d1d85b75f..e14e5f84f6ebce 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -46,16 +46,6 @@ struct iavf_stats { .stat_offset = offsetof(_type, _stat) \ } -/* Helper macro for defining some statistics related to queues */ -#define IAVF_QUEUE_STAT(_name, _stat) \ - IAVF_STAT(struct iavf_ring, _name, _stat) - -/* Stats associated with a Tx or Rx ring */ -static const struct iavf_stats iavf_gstrings_queue_stats[] = { - IAVF_QUEUE_STAT("%s-%u.packets", stats.packets), - IAVF_QUEUE_STAT("%s-%u.bytes", stats.bytes), -}; - /** * iavf_add_one_ethtool_stat - copy the stat into the supplied buffer * @data: location to store the stat value @@ -141,43 +131,6 @@ __iavf_add_ethtool_stats(u64 **data, void *pointer, #define iavf_add_ethtool_stats(data, pointer, stats) \ __iavf_add_ethtool_stats(data, pointer, stats, ARRAY_SIZE(stats)) -/** - * iavf_add_queue_stats - copy queue statistics into supplied buffer - * @data: ethtool stats buffer - * @ring: the ring to copy - * - * Queue statistics must be copied while protected by - * u64_stats_fetch_begin, so we can't directly use iavf_add_ethtool_stats. - * Assumes that queue stats are defined in iavf_gstrings_queue_stats. If the - * ring pointer is null, zero out the queue stat values and update the data - * pointer. Otherwise safely copy the stats from the ring into the supplied - * buffer and update the data pointer when finished. - * - * This function expects to be called while under rcu_read_lock(). - **/ -static void -iavf_add_queue_stats(u64 **data, struct iavf_ring *ring) -{ - const unsigned int size = ARRAY_SIZE(iavf_gstrings_queue_stats); - const struct iavf_stats *stats = iavf_gstrings_queue_stats; - unsigned int start; - unsigned int i; - - /* To avoid invalid statistics values, ensure that we keep retrying - * the copy until we get a consistent value according to - * u64_stats_fetch_retry. But first, make sure our ring is - * non-null before attempting to access its syncp. - */ - do { - start = !ring ? 0 : u64_stats_fetch_begin(&ring->syncp); - for (i = 0; i < size; i++) - iavf_add_one_ethtool_stat(&(*data)[i], ring, &stats[i]); - } while (ring && u64_stats_fetch_retry(&ring->syncp, start)); - - /* Once we successfully copy the stats in, update the data pointer */ - *data += size; -} - /** * __iavf_add_stat_strings - copy stat strings into ethtool buffer * @p: ethtool supplied buffer @@ -237,31 +190,6 @@ static const struct iavf_stats iavf_gstrings_stats[] = { #define IAVF_STATS_LEN ARRAY_SIZE(iavf_gstrings_stats) -#define IAVF_QUEUE_STATS_LEN ARRAY_SIZE(iavf_gstrings_queue_stats) - -/* For now we have one and only one private flag and it is only defined - * when we have support for the SKIP_CPU_SYNC DMA attribute. Instead - * of leaving all this code sitting around empty we will strip it unless - * our one private flag is actually available. - */ -struct iavf_priv_flags { - char flag_string[ETH_GSTRING_LEN]; - u32 flag; - bool read_only; -}; - -#define IAVF_PRIV_FLAG(_name, _flag, _read_only) { \ - .flag_string = _name, \ - .flag = _flag, \ - .read_only = _read_only, \ -} - -static const struct iavf_priv_flags iavf_gstrings_priv_flags[] = { - IAVF_PRIV_FLAG("legacy-rx", IAVF_FLAG_LEGACY_RX, 0), -}; - -#define IAVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(iavf_gstrings_priv_flags) - /** * iavf_get_link_ksettings - Get Link Speed and Duplex settings * @netdev: network interface device structure @@ -331,20 +259,22 @@ static int iavf_get_link_ksettings(struct net_device *netdev, **/ static int iavf_get_sset_count(struct net_device *netdev, int sset) { - /* Report the maximum number queues, even if not every queue is - * currently configured. Since allocation of queues is in pairs, - * use netdev->real_num_tx_queues * 2. The real_num_tx_queues is set - * at device creation and never changes. - */ + u32 num; - if (sset == ETH_SS_STATS) - return IAVF_STATS_LEN + - (IAVF_QUEUE_STATS_LEN * 2 * - netdev->real_num_tx_queues); - else if (sset == ETH_SS_PRIV_FLAGS) - return IAVF_PRIV_FLAGS_STR_LEN; - else + switch (sset) { + case ETH_SS_STATS: + /* Per-queue */ + num = libie_rq_stats_get_sset_count(); + num += libie_sq_stats_get_sset_count(); + num *= netdev->real_num_tx_queues; + + /* Global */ + num += IAVF_STATS_LEN; + + return num; + default: return -EINVAL; + } } /** @@ -371,37 +301,19 @@ static void iavf_get_ethtool_stats(struct net_device *netdev, * it to iterate over rings' stats. */ for (i = 0; i < adapter->num_active_queues; i++) { - struct iavf_ring *ring; + const struct iavf_ring *ring; /* Tx rings stats */ - ring = &adapter->tx_rings[i]; - iavf_add_queue_stats(&data, ring); + libie_sq_stats_get_data(&data, &adapter->tx_rings[i].sq_stats); /* Rx rings stats */ ring = &adapter->rx_rings[i]; - iavf_add_queue_stats(&data, ring); + libie_rq_stats_get_data(&data, &ring->rq_stats, + ring->rx_pages ? ring->pool : NULL); } rcu_read_unlock(); } -/** - * iavf_get_priv_flag_strings - Get private flag strings - * @netdev: network interface device structure - * @data: buffer for string data - * - * Builds the private flags string table - **/ -static void iavf_get_priv_flag_strings(struct net_device *netdev, u8 *data) -{ - unsigned int i; - - for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) { - snprintf(data, ETH_GSTRING_LEN, "%s", - iavf_gstrings_priv_flags[i].flag_string); - data += ETH_GSTRING_LEN; - } -} - /** * iavf_get_stat_strings - Get stat strings * @netdev: network interface device structure @@ -419,10 +331,8 @@ static void iavf_get_stat_strings(struct net_device *netdev, u8 *data) * real_num_tx_queues for both Tx and Rx queues. */ for (i = 0; i < netdev->real_num_tx_queues; i++) { - iavf_add_stat_strings(&data, iavf_gstrings_queue_stats, - "tx", i); - iavf_add_stat_strings(&data, iavf_gstrings_queue_stats, - "rx", i); + libie_sq_stats_get_strings(&data, i); + libie_rq_stats_get_strings(&data, i); } } @@ -440,105 +350,11 @@ static void iavf_get_strings(struct net_device *netdev, u32 sset, u8 *data) case ETH_SS_STATS: iavf_get_stat_strings(netdev, data); break; - case ETH_SS_PRIV_FLAGS: - iavf_get_priv_flag_strings(netdev, data); - break; default: break; } } -/** - * iavf_get_priv_flags - report device private flags - * @netdev: network interface device structure - * - * The get string set count and the string set should be matched for each - * flag returned. Add new strings for each flag to the iavf_gstrings_priv_flags - * array. - * - * Returns a u32 bitmap of flags. - **/ -static u32 iavf_get_priv_flags(struct net_device *netdev) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - u32 i, ret_flags = 0; - - for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) { - const struct iavf_priv_flags *priv_flags; - - priv_flags = &iavf_gstrings_priv_flags[i]; - - if (priv_flags->flag & adapter->flags) - ret_flags |= BIT(i); - } - - return ret_flags; -} - -/** - * iavf_set_priv_flags - set private flags - * @netdev: network interface device structure - * @flags: bit flags to be set - **/ -static int iavf_set_priv_flags(struct net_device *netdev, u32 flags) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - u32 orig_flags, new_flags, changed_flags; - u32 i; - - orig_flags = READ_ONCE(adapter->flags); - new_flags = orig_flags; - - for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) { - const struct iavf_priv_flags *priv_flags; - - priv_flags = &iavf_gstrings_priv_flags[i]; - - if (flags & BIT(i)) - new_flags |= priv_flags->flag; - else - new_flags &= ~(priv_flags->flag); - - if (priv_flags->read_only && - ((orig_flags ^ new_flags) & ~BIT(i))) - return -EOPNOTSUPP; - } - - /* Before we finalize any flag changes, any checks which we need to - * perform to determine if the new flags will be supported should go - * here... - */ - - /* Compare and exchange the new flags into place. If we failed, that - * is if cmpxchg returns anything but the old value, this means - * something else must have modified the flags variable since we - * copied it. We'll just punt with an error and log something in the - * message buffer. - */ - if (cmpxchg(&adapter->flags, orig_flags, new_flags) != orig_flags) { - dev_warn(&adapter->pdev->dev, - "Unable to update adapter->flags as it was modified by another thread...\n"); - return -EAGAIN; - } - - changed_flags = orig_flags ^ new_flags; - - /* Process any additional changes needed as a result of flag changes. - * The changed_flags value reflects the list of bits that were changed - * in the code above. - */ - - /* issue a reset to force legacy-rx change to take effect */ - if (changed_flags & IAVF_FLAG_LEGACY_RX) { - if (netif_running(netdev)) { - adapter->flags |= IAVF_FLAG_RESET_NEEDED; - queue_work(adapter->wq, &adapter->reset_task); - } - } - - return 0; -} - /** * iavf_get_msglevel - Get debug message level * @netdev: network interface device structure @@ -584,7 +400,6 @@ static void iavf_get_drvinfo(struct net_device *netdev, strscpy(drvinfo->driver, iavf_driver_name, 32); strscpy(drvinfo->fw_version, "N/A", 4); strscpy(drvinfo->bus_info, pci_name(adapter->pdev), 32); - drvinfo->n_priv_flags = IAVF_PRIV_FLAGS_STR_LEN; } /** @@ -1829,6 +1644,7 @@ static int iavf_set_channels(struct net_device *netdev, struct ethtool_channels *ch) { struct iavf_adapter *adapter = netdev_priv(netdev); + u32 num_allowed = adapter->vsi_res->num_queue_pairs; u32 num_req = ch->combined_count; int i; @@ -1841,9 +1657,15 @@ static int iavf_set_channels(struct net_device *netdev, /* All of these should have already been checked by ethtool before this * even gets to us, but just to be sure. */ - if (num_req == 0 || num_req > adapter->vsi_res->num_queue_pairs) + if (num_req == 0 || num_req > num_allowed) return -EINVAL; + if (iavf_adapter_xdp_active(adapter) && num_req * 2 > num_allowed) { + netdev_err(netdev, "XDP is enabled, so maximum allowed queue number is reduced to %u, %u queues where requested\n", + num_allowed / 2, num_allowed); + return -EINVAL; + } + if (num_req == adapter->num_active_queues) return 0; @@ -1969,8 +1791,6 @@ static const struct ethtool_ops iavf_ethtool_ops = { .get_strings = iavf_get_strings, .get_ethtool_stats = iavf_get_ethtool_stats, .get_sset_count = iavf_get_sset_count, - .get_priv_flags = iavf_get_priv_flags, - .set_priv_flags = iavf_set_priv_flags, .get_msglevel = iavf_get_msglevel, .set_msglevel = iavf_set_msglevel, .get_coalesce = iavf_get_coalesce, diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 095201e83c9db0..59e47968ad264a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2013 - 2018 Intel Corporation. */ +#include + #include "iavf.h" #include "iavf_prototype.h" #include "iavf_client.h" @@ -46,6 +48,7 @@ MODULE_DEVICE_TABLE(pci, iavf_pci_tbl); MODULE_ALIAS("i40evf"); MODULE_AUTHOR("Intel Corporation, "); MODULE_DESCRIPTION("Intel(R) Ethernet Adaptive Virtual Function Network Driver"); +MODULE_IMPORT_NS(LIBIE); MODULE_LICENSE("GPL v2"); static const struct net_device_ops iavf_netdev_ops; @@ -452,7 +455,7 @@ iavf_map_vector_to_rxq(struct iavf_adapter *adapter, int v_idx, int r_idx) q_vector->rx.count++; q_vector->rx.next_update = jiffies + 1; q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting); - q_vector->ring_mask |= BIT(r_idx); + q_vector->rx_ring_mask |= BIT(r_idx); wr32(hw, IAVF_VFINT_ITRN1(IAVF_RX_ITR, q_vector->reg_idx), q_vector->rx.current_itr >> 1); q_vector->rx.current_itr = q_vector->rx.target_itr; @@ -463,12 +466,15 @@ iavf_map_vector_to_rxq(struct iavf_adapter *adapter, int v_idx, int r_idx) * @adapter: board private structure * @v_idx: interrupt number * @t_idx: queue number - **/ + * @xdpq: set to true if Tx queue is XDP Tx queue + */ static void -iavf_map_vector_to_txq(struct iavf_adapter *adapter, int v_idx, int t_idx) +iavf_map_vector_to_txq(struct iavf_adapter *adapter, int v_idx, int t_idx, + bool xdpq) { + struct iavf_ring *tx_ring = xdpq ? &adapter->xdp_rings[t_idx] + : &adapter->tx_rings[t_idx]; struct iavf_q_vector *q_vector = &adapter->q_vectors[v_idx]; - struct iavf_ring *tx_ring = &adapter->tx_rings[t_idx]; struct iavf_hw *hw = &adapter->hw; tx_ring->q_vector = q_vector; @@ -478,7 +484,7 @@ iavf_map_vector_to_txq(struct iavf_adapter *adapter, int v_idx, int t_idx) q_vector->tx.count++; q_vector->tx.next_update = jiffies + 1; q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting); - q_vector->num_ringpairs++; + q_vector->tx_ring_mask |= BIT(tx_ring->queue_index); wr32(hw, IAVF_VFINT_ITRN1(IAVF_TX_ITR, q_vector->reg_idx), q_vector->tx.target_itr >> 1); q_vector->tx.current_itr = q_vector->tx.target_itr; @@ -504,7 +510,11 @@ static void iavf_map_rings_to_vectors(struct iavf_adapter *adapter) for (; ridx < rings_remaining; ridx++) { iavf_map_vector_to_rxq(adapter, vidx, ridx); - iavf_map_vector_to_txq(adapter, vidx, ridx); + iavf_map_vector_to_txq(adapter, vidx, ridx, false); + if (iavf_adapter_xdp_active(adapter)) + iavf_map_vector_to_txq(adapter, vidx, ridx, true); + + adapter->q_vectors[vidx].num_ringpairs++; /* In the case where we have more queues than vectors, continue * round-robin on vectors until all queues are mapped. @@ -516,6 +526,54 @@ static void iavf_map_rings_to_vectors(struct iavf_adapter *adapter) adapter->aq_required |= IAVF_FLAG_AQ_MAP_VECTORS; } +/** + * iavf_unmap_rings_from_vectors - Clear existing mapping for queues and vectors + * @adapter: board private structure + * + */ +static void iavf_unmap_rings_from_vectors(struct iavf_adapter *adapter) +{ + struct iavf_ring *rx_ring, *tx_ring; + struct iavf_q_vector *q_vector; + int num_q_vectors, i; + + num_q_vectors = adapter->num_msix_vectors - NONQ_VECS; + for (i = 0; i < num_q_vectors; i++) { + q_vector = &adapter->q_vectors[i]; + q_vector->tx.ring = NULL; + q_vector->tx.count = 0; + q_vector->tx.next_update = 0; + q_vector->tx.target_itr = 0; + q_vector->tx.current_itr = 0; + q_vector->num_ringpairs = 0; + + q_vector->rx.ring = NULL; + q_vector->rx.count = 0; + q_vector->rx.next_update = 0; + q_vector->rx.target_itr = 0; + q_vector->rx.current_itr = 0; + q_vector->rx_ring_mask = 0; + q_vector->tx_ring_mask = 0; + } + + for (i = 0; i < adapter->num_active_queues; i++) { + rx_ring = &adapter->rx_rings[i]; + tx_ring = &adapter->tx_rings[i]; + + rx_ring->q_vector = NULL; + rx_ring->next = NULL; + tx_ring->q_vector = NULL; + tx_ring->next = NULL; + } + + for (i = 0; i < adapter->num_xdp_tx_queues; i++) { + tx_ring = &adapter->xdp_rings[i]; + + tx_ring->q_vector = NULL; + tx_ring->next = NULL; + } +} + /** * iavf_irq_affinity_notify - Callback for affinity changes * @notify: context as to what irq was changed @@ -654,6 +712,7 @@ static int iavf_request_misc_irq(struct iavf_adapter *adapter) **/ static void iavf_free_traffic_irqs(struct iavf_adapter *adapter) { + struct iavf_q_vector *q_vector; int vector, irq_num, q_vectors; if (!adapter->msix_entries) @@ -662,10 +721,14 @@ static void iavf_free_traffic_irqs(struct iavf_adapter *adapter) q_vectors = adapter->num_msix_vectors - NONQ_VECS; for (vector = 0; vector < q_vectors; vector++) { + q_vector = &adapter->q_vectors[vector]; + if (!q_vector->tx.ring && !q_vector->rx.ring) + continue; + irq_num = adapter->msix_entries[vector + NONQ_VECS].vector; irq_set_affinity_notifier(irq_num, NULL); irq_update_affinity_hint(irq_num, NULL); - free_irq(irq_num, &adapter->q_vectors[vector]); + free_irq(irq_num, q_vector); } } @@ -694,54 +757,69 @@ static void iavf_free_misc_irq(struct iavf_adapter *adapter) static void iavf_configure_tx(struct iavf_adapter *adapter) { struct iavf_hw *hw = &adapter->hw; - int i; + int i, j; - for (i = 0; i < adapter->num_active_queues; i++) - adapter->tx_rings[i].tail = hw->hw_addr + IAVF_QTX_TAIL1(i); + for (i = 0, j = 0; i < adapter->num_active_queues; i++, j++) + adapter->tx_rings[i].tail = hw->hw_addr + IAVF_QTX_TAIL1(j); + + for (i = 0; i < adapter->num_xdp_tx_queues; i++, j++) + adapter->xdp_rings[i].tail = hw->hw_addr + IAVF_QTX_TAIL1(j); } /** - * iavf_configure_rx - Configure Receive Unit after Reset + * iavf_configure_rx_ring - Configure a single Rx ring * @adapter: board private structure - * - * Configure the Rx unit of the MAC after a reset. - **/ -static void iavf_configure_rx(struct iavf_adapter *adapter) + * @rx_ring: Rx ring to be configured + * @rx_buf_len: buffer length that shall be used for the given Rx ring. + */ +void iavf_configure_rx_ring(struct iavf_adapter *adapter, + struct iavf_ring *rx_ring) { - unsigned int rx_buf_len = IAVF_RXBUFFER_2048; - struct iavf_hw *hw = &adapter->hw; - int i; + u32 queue_idx = rx_ring->queue_index; + int err; - /* Legacy Rx will always default to a 2048 buffer size. */ -#if (PAGE_SIZE < 8192) - if (!(adapter->flags & IAVF_FLAG_LEGACY_RX)) { - struct net_device *netdev = adapter->netdev; + rx_ring->tail = adapter->hw.hw_addr + IAVF_QRX_TAIL1(queue_idx); - /* For jumbo frames on systems with 4K pages we have to use - * an order 1 page, so we might as well increase the size - * of our Rx buffer to make better use of the available space - */ - rx_buf_len = IAVF_RXBUFFER_3072; + if (!xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) + err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, + rx_ring->queue_index, + rx_ring->q_vector->napi.napi_id); - /* We use a 1536 buffer size for configurations with - * standard Ethernet mtu. On x86 this gives us enough room - * for shared info and 192 bytes of padding. - */ - if (!IAVF_2K_TOO_SMALL_WITH_PADDING && - (netdev->mtu <= ETH_DATA_LEN)) - rx_buf_len = IAVF_RXBUFFER_1536 - NET_IP_ALIGN; - } -#endif + if (rx_ring->flags & IAVF_TXRX_FLAGS_XSK) { + err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, + MEM_TYPE_XSK_BUFF_POOL, + NULL); + if (err) + netdev_err(adapter->netdev, "xdp_rxq_info_reg_mem_model returned %d\n", + err); - for (i = 0; i < adapter->num_active_queues; i++) { - adapter->rx_rings[i].tail = hw->hw_addr + IAVF_QRX_TAIL1(i); - adapter->rx_rings[i].rx_buf_len = rx_buf_len; + xsk_pool_set_rxq_info(rx_ring->xsk_pool, &rx_ring->xdp_rxq); - if (adapter->flags & IAVF_FLAG_LEGACY_RX) - clear_ring_build_skb_enabled(&adapter->rx_rings[i]); - else - set_ring_build_skb_enabled(&adapter->rx_rings[i]); + iavf_check_alloc_rx_buffers_zc(adapter, rx_ring); + } else { + err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, + MEM_TYPE_PAGE_POOL, + rx_ring->pool); + if (err) + netdev_err(adapter->netdev, "Could not register XDP memory model for RX queue %u, error: %d\n", + queue_idx, err); + + iavf_alloc_rx_pages(rx_ring); } + + RCU_INIT_POINTER(rx_ring->xdp_prog, adapter->xdp_prog); +} + +/** + * iavf_configure_rx - Configure Receive Unit after Reset + * @adapter: board private structure + * + * Configure the Rx unit of the MAC after a reset. + */ +static void iavf_configure_rx(struct iavf_adapter *adapter) +{ + for (u32 i = 0; i < adapter->num_active_queues; i++) + iavf_configure_rx_ring(adapter, &adapter->rx_rings[i]); } /** @@ -1233,19 +1311,12 @@ static void iavf_napi_disable_all(struct iavf_adapter *adapter) static void iavf_configure(struct iavf_adapter *adapter) { struct net_device *netdev = adapter->netdev; - int i; iavf_set_rx_mode(netdev); iavf_configure_tx(adapter); iavf_configure_rx(adapter); adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_QUEUES; - - for (i = 0; i < adapter->num_active_queues; i++) { - struct iavf_ring *ring = &adapter->rx_rings[i]; - - iavf_alloc_rx_buffers(ring, IAVF_DESC_UNUSED(ring)); - } } /** @@ -1378,23 +1449,47 @@ static void iavf_clear_adv_rss_conf(struct iavf_adapter *adapter) } /** - * iavf_down - Shutdown the connection processing + * iavf_stop_traffic - Stop NAPI and interrupts before link down * @adapter: board private structure - * - * Expects to be called while holding the __IAVF_IN_CRITICAL_TASK bit lock. - **/ -void iavf_down(struct iavf_adapter *adapter) + */ +void iavf_stop_traffic(struct iavf_adapter *adapter) { struct net_device *netdev = adapter->netdev; - if (adapter->state <= __IAVF_DOWN_PENDING) - return; - netif_carrier_off(netdev); netif_tx_disable(netdev); adapter->link_up = false; iavf_napi_disable_all(adapter); iavf_irq_disable(adapter); +} + +/** + * iavf_start_traffic - Start NAPI and interrupts after link up + * @adapter: board private structure + */ +void iavf_start_traffic(struct iavf_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + + iavf_napi_enable_all(adapter); + iavf_irq_enable(adapter, true); + adapter->link_up = true; + netif_tx_start_all_queues(netdev); + netif_carrier_on(netdev); +} + +/** + * iavf_down - Shutdown the connection processing + * @adapter: board private structure + * + * Expects to be called while holding the __IAVF_IN_CRITICAL_TASK bit lock. + */ +void iavf_down(struct iavf_adapter *adapter) +{ + if (adapter->state <= __IAVF_DOWN_PENDING) + return; + + iavf_stop_traffic(adapter); iavf_clear_mac_vlan_filters(adapter); iavf_clear_cloud_filters(adapter); @@ -1467,6 +1562,19 @@ iavf_acquire_msix_vectors(struct iavf_adapter *adapter, int vectors) return 0; } +/** + * iavf_free_xdp_queues - Free memory for XDP rings + * @adapter: board private structure to update + * + * Free all of the memory associated with XDP queues. + */ +static void iavf_free_xdp_queues(struct iavf_adapter *adapter) +{ + adapter->num_xdp_tx_queues = 0; + kfree(adapter->xdp_rings); + adapter->xdp_rings = NULL; +} + /** * iavf_free_queues - Free memory for all rings * @adapter: board private structure to initialize @@ -1475,13 +1583,92 @@ iavf_acquire_msix_vectors(struct iavf_adapter *adapter, int vectors) **/ static void iavf_free_queues(struct iavf_adapter *adapter) { - if (!adapter->vsi_res) - return; adapter->num_active_queues = 0; kfree(adapter->tx_rings); adapter->tx_rings = NULL; kfree(adapter->rx_rings); adapter->rx_rings = NULL; + iavf_free_xdp_queues(adapter); +} + +/** + * iavf_set_rx_queue_vlan_tag_loc - set location for VLAN tag offload in Rx + * @adapter: board private structure + * @rx_ring: Rx ring where VLAN tag offload for VLAN will be set + * + * Helper function for setting VLAN tag offload location in a given Rx ring. + */ +static void iavf_set_rx_queue_vlan_tag_loc(struct iavf_adapter *adapter, + struct iavf_ring *rx_ring) +{ + struct virtchnl_vlan_supported_caps *caps; + + /* prevent multiple L2TAG bits being set after VFR */ + rx_ring->flags &= + ~(IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1 | + IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2); + + if (VLAN_ALLOWED(adapter)) { + rx_ring->flags |= IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1; + return; + } + + if (!VLAN_V2_ALLOWED(adapter)) + return; + + caps = &adapter->vlan_v2_caps.offloads.stripping_support; + + if ((caps->outer | caps->inner) & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1) + rx_ring->flags |= IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1; + else if ((caps->outer | caps->inner) & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2) + rx_ring->flags |= IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2; +} + +/** + * iavf_set_tx_queue_vlan_tag_loc - set location for VLAN tag offload in Tx + * @adapter: board private structure + * @tx_ring: Tx ring where VLAN tag offload for VLAN will be set + * + * Helper function for setting VLAN tag offload location in a given Tx ring. + */ +static void iavf_set_tx_queue_vlan_tag_loc(struct iavf_adapter *adapter, + struct iavf_ring *tx_ring) +{ + struct virtchnl_vlan_supported_caps *caps; + + /* prevent multiple L2TAG bits being set after VFR */ + tx_ring->flags &= + ~(IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1 | + IAVF_TXR_FLAGS_VLAN_TAG_LOC_L2TAG2); + + if (VLAN_ALLOWED(adapter)) { + tx_ring->flags |= IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1; + return; + } + + if (!VLAN_V2_ALLOWED(adapter)) + return; + + caps = &adapter->vlan_v2_caps.offloads.insertion_support; + + if ((caps->outer | caps->inner) & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1) + tx_ring->flags |= IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1; + else if ((caps->outer | caps->inner) & VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2) + tx_ring->flags |= IAVF_TXR_FLAGS_VLAN_TAG_LOC_L2TAG2; +} + +/** + * iavf_set_xdp_queue_vlan_tag_loc - set location for VLAN tag on XDP ring + * @adapter: board private structure + * + * Variation of iavf_set_queue_vlan_tag_loc, which configures XDP rings only. + */ +static void iavf_set_xdp_queue_vlan_tag_loc(struct iavf_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_xdp_tx_queues; i++) + iavf_set_tx_queue_vlan_tag_loc(adapter, &adapter->xdp_rings[i]); } /** @@ -1498,70 +1685,117 @@ void iavf_set_queue_vlan_tag_loc(struct iavf_adapter *adapter) int i; for (i = 0; i < adapter->num_active_queues; i++) { - struct iavf_ring *tx_ring = &adapter->tx_rings[i]; - struct iavf_ring *rx_ring = &adapter->rx_rings[i]; - - /* prevent multiple L2TAG bits being set after VFR */ - tx_ring->flags &= - ~(IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1 | - IAVF_TXR_FLAGS_VLAN_TAG_LOC_L2TAG2); - rx_ring->flags &= - ~(IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1 | - IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2); - - if (VLAN_ALLOWED(adapter)) { - tx_ring->flags |= IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1; - rx_ring->flags |= IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1; - } else if (VLAN_V2_ALLOWED(adapter)) { - struct virtchnl_vlan_supported_caps *stripping_support; - struct virtchnl_vlan_supported_caps *insertion_support; - - stripping_support = - &adapter->vlan_v2_caps.offloads.stripping_support; - insertion_support = - &adapter->vlan_v2_caps.offloads.insertion_support; - - if (stripping_support->outer) { - if (stripping_support->outer & - VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1) - rx_ring->flags |= - IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1; - else if (stripping_support->outer & - VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2) - rx_ring->flags |= - IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2; - } else if (stripping_support->inner) { - if (stripping_support->inner & - VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1) - rx_ring->flags |= - IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1; - else if (stripping_support->inner & - VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2) - rx_ring->flags |= - IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2; - } + iavf_set_rx_queue_vlan_tag_loc(adapter, &adapter->rx_rings[i]); + iavf_set_tx_queue_vlan_tag_loc(adapter, &adapter->tx_rings[i]); + } - if (insertion_support->outer) { - if (insertion_support->outer & - VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1) - tx_ring->flags |= - IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1; - else if (insertion_support->outer & - VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2) - tx_ring->flags |= - IAVF_TXR_FLAGS_VLAN_TAG_LOC_L2TAG2; - } else if (insertion_support->inner) { - if (insertion_support->inner & - VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1) - tx_ring->flags |= - IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1; - else if (insertion_support->inner & - VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2) - tx_ring->flags |= - IAVF_TXR_FLAGS_VLAN_TAG_LOC_L2TAG2; - } - } + iavf_set_xdp_queue_vlan_tag_loc(adapter); +} + +/** + * iavf_init_rx_ring - Init pointers and flags for a given Rx ring + * @adapter: board private structure to initialize + * @ring_index: index of the ring to be initialized + * + * Init all basic pointers and flags in a newly allocated Rx ring. + */ +static void iavf_init_rx_ring(struct iavf_adapter *adapter, + int ring_index) +{ + struct iavf_ring *rx_ring = &adapter->rx_rings[ring_index]; + + rx_ring->vsi = &adapter->vsi; + rx_ring->queue_index = ring_index; + rx_ring->netdev = adapter->netdev; + rx_ring->dev = &adapter->pdev->dev; + rx_ring->count = adapter->rx_desc_count; + rx_ring->itr_setting = IAVF_ITR_RX_DEF; +} + +/** + * iavf_init_tx_ring - Init pointers and flags for a given Tx ring + * @adapter: board private structure to initialize + * @ring_index: index of the ring to be initialized + * @xdp_ring: set to true if the ring is XDP Tx queue + * + * Init all basic pointers and flags in a newly allocated Tx ring. + */ +static void iavf_init_tx_ring(struct iavf_adapter *adapter, + int ring_index, + bool xdp_ring) +{ + struct iavf_ring *tx_ring = xdp_ring ? &adapter->xdp_rings[ring_index] + : &adapter->tx_rings[ring_index]; + + tx_ring->vsi = &adapter->vsi; + tx_ring->queue_index = ring_index; + tx_ring->netdev = adapter->netdev; + tx_ring->dev = &adapter->pdev->dev; + tx_ring->count = adapter->tx_desc_count; + tx_ring->itr_setting = IAVF_ITR_TX_DEF; + + tx_ring->flags = 0; + + if (adapter->flags & IAVF_FLAG_WB_ON_ITR_CAPABLE) + tx_ring->flags |= IAVF_TXR_FLAGS_WB_ON_ITR; + + u64_stats_init(&tx_ring->sq_stats.syncp); + + if (xdp_ring) { + tx_ring->queue_index += adapter->num_active_queues; + tx_ring->flags |= IAVF_TXRX_FLAGS_XDP; + spin_lock_init(&tx_ring->tx_lock); + } +} + +/** + * iavf_xdp_cfg_tx_sharing - Enable XDP TxQ sharing, if needed + * @adapter: board private structure + * + * If there is more CPUs than rings, sharing XDP TxQ allows us + * to handle XDP_REDIRECT from other interfaces. + */ +static void iavf_xdp_cfg_tx_sharing(struct iavf_adapter *adapter) +{ + u32 num_active_queues = adapter->num_active_queues; + u32 num_cpus = num_online_cpus(); + + if (!iavf_adapter_xdp_active(adapter) || num_active_queues >= num_cpus) + return; + + netdev_warn(adapter->netdev, + "System has %u CPUs, but only %u XDP queues can be configured, entering XDP TxQ sharing mode, performance is decreased\n", + num_cpus, num_active_queues); + static_branch_inc(&iavf_xdp_locking_key); +} + +/** + * iavf_alloc_xdp_queues - Allocate memory for XDP rings + * @adapter: board private structure to initialize + * @num_active_queues: number of exposed queue pairs + * + * Variation of iavf_alloc_queues(), which configures XDP queues only. + */ +static int iavf_alloc_xdp_queues(struct iavf_adapter *adapter, u32 num_active_queues) +{ + int i; + + adapter->xdp_rings = kcalloc(num_active_queues, + sizeof(struct iavf_ring), GFP_KERNEL); + if (!adapter->xdp_rings) + return -ENOMEM; + + adapter->num_xdp_tx_queues = num_active_queues; + + /* Setup extra XDP Tx queues if there are any */ + for (i = 0; i < adapter->num_xdp_tx_queues; i++) { + iavf_init_tx_ring(adapter, i, true); + adapter->rx_rings[i].xdp_ring = &adapter->xdp_rings[i]; } + + iavf_xdp_cfg_tx_sharing(adapter); + + return 0; } /** @@ -1574,7 +1808,8 @@ void iavf_set_queue_vlan_tag_loc(struct iavf_adapter *adapter) **/ static int iavf_alloc_queues(struct iavf_adapter *adapter) { - int i, num_active_queues; + u32 num_active_queues; + int i; /* If we're in reset reallocating queues we don't actually know yet for * certain the PF gave us the number of queues we asked for but we'll @@ -1591,7 +1826,6 @@ static int iavf_alloc_queues(struct iavf_adapter *adapter) adapter->vsi_res->num_queue_pairs, (int)(num_online_cpus())); - adapter->tx_rings = kcalloc(num_active_queues, sizeof(struct iavf_ring), GFP_KERNEL); if (!adapter->tx_rings) @@ -1601,29 +1835,16 @@ static int iavf_alloc_queues(struct iavf_adapter *adapter) if (!adapter->rx_rings) goto err_out; - for (i = 0; i < num_active_queues; i++) { - struct iavf_ring *tx_ring; - struct iavf_ring *rx_ring; - - tx_ring = &adapter->tx_rings[i]; - - tx_ring->queue_index = i; - tx_ring->netdev = adapter->netdev; - tx_ring->dev = &adapter->pdev->dev; - tx_ring->count = adapter->tx_desc_count; - tx_ring->itr_setting = IAVF_ITR_TX_DEF; - if (adapter->flags & IAVF_FLAG_WB_ON_ITR_CAPABLE) - tx_ring->flags |= IAVF_TXR_FLAGS_WB_ON_ITR; + adapter->num_active_queues = num_active_queues; - rx_ring = &adapter->rx_rings[i]; - rx_ring->queue_index = i; - rx_ring->netdev = adapter->netdev; - rx_ring->dev = &adapter->pdev->dev; - rx_ring->count = adapter->rx_desc_count; - rx_ring->itr_setting = IAVF_ITR_RX_DEF; + for (i = 0; i < num_active_queues; i++) { + iavf_init_tx_ring(adapter, i, false); + iavf_init_rx_ring(adapter, i); } - adapter->num_active_queues = num_active_queues; + if (iavf_adapter_xdp_active(adapter)) + if (iavf_alloc_xdp_queues(adapter, num_active_queues)) + goto err_out; iavf_set_queue_vlan_tag_loc(adapter); @@ -2001,12 +2222,12 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) if (adapter->aq_required & IAVF_FLAG_AQ_GET_OFFLOAD_VLAN_V2_CAPS) return iavf_send_vf_offload_vlan_v2_msg(adapter); if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_QUEUES) { - iavf_disable_queues(adapter); + iavf_disable_queues(adapter, false); return 0; } if (adapter->aq_required & IAVF_FLAG_AQ_MAP_VECTORS) { - iavf_map_queues(adapter); + iavf_map_queues(adapter, false); return 0; } @@ -2041,12 +2262,12 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) } if (adapter->aq_required & IAVF_FLAG_AQ_CONFIGURE_QUEUES) { - iavf_configure_queues(adapter); + iavf_configure_queues(adapter, false); return 0; } if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_QUEUES) { - iavf_enable_queues(adapter); + iavf_enable_queues(adapter, false); return 0; } @@ -2396,7 +2617,6 @@ int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter) return -EAGAIN; } - adapter->num_req_queues = 0; adapter->vsi.id = adapter->vsi_res->vsi_id; adapter->vsi.back = adapter; @@ -2590,11 +2810,10 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) netdev->netdev_ops = &iavf_netdev_ops; iavf_set_ethtool_ops(netdev); - netdev->watchdog_timeo = 5 * HZ; + netdev->max_mtu = LIBIE_MAX_MTU; - /* MTU range: 68 - 9710 */ - netdev->min_mtu = ETH_MIN_MTU; - netdev->max_mtu = IAVF_MAX_RXBUFFER - IAVF_PACKET_HDR_PAD; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_XSK_ZEROCOPY; if (!is_valid_ether_addr(adapter->hw.mac.addr)) { dev_info(&pdev->dev, "Invalid MAC address %pM, using random\n", @@ -2652,6 +2871,11 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) set_bit(__IAVF_VSI_DOWN, adapter->vsi.state); rtnl_unlock(); + adapter->af_xdp_zc_qps = bitmap_zalloc(adapter->num_active_queues, + GFP_KERNEL); + if (!adapter->af_xdp_zc_qps) + goto err_zc_qps; + iavf_misc_irq_enable(adapter); wake_up(&adapter->down_waitqueue); @@ -2673,6 +2897,8 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) return; err_mem: iavf_free_rss(adapter); +err_zc_qps: + bitmap_free(adapter->af_xdp_zc_qps); err_register: iavf_free_misc_irq(adapter); err_sw_init: @@ -2871,6 +3097,36 @@ static void iavf_watchdog_task(struct work_struct *work) HZ * 2); } +/** + * iavf_xchg_xdp_prog - set new prog and get an old one + * @adapter: board private structure + * @prog: new XDP program + * + * Returns pointer to the old XDP program. + * adapter->xdp_prog is not used in packet processing, so it can be + * safely set kinda like a flag before resource re-configuration (reset) + */ +static struct bpf_prog *iavf_xchg_xdp_prog(struct iavf_adapter *adapter, + struct bpf_prog *prog) +{ + return xchg(&adapter->xdp_prog, prog); +} + +/** + * iavf_free_xdp_prog - Release XDP program, if present + * @adapter: board private structure + * + * Should be used when adapter is being removed. + */ +static void iavf_free_xdp_prog(struct iavf_adapter *adapter) +{ + struct bpf_prog *old_xdp_prog; + + old_xdp_prog = iavf_xchg_xdp_prog(adapter, NULL); + if (old_xdp_prog) + bpf_prog_put(old_xdp_prog); +} + /** * iavf_disable_vf - disable VF * @adapter: board private structure @@ -2902,6 +3158,9 @@ static void iavf_disable_vf(struct iavf_adapter *adapter) iavf_free_all_rx_resources(adapter); } + iavf_free_xdp_prog(adapter); + bitmap_free(adapter->af_xdp_zc_qps); + spin_lock_bh(&adapter->mac_vlan_list_lock); /* Delete all of the filters */ @@ -3354,9 +3613,16 @@ void iavf_free_all_tx_resources(struct iavf_adapter *adapter) if (!adapter->tx_rings) return; + if (static_key_enabled(&iavf_xdp_locking_key)) + static_branch_dec(&iavf_xdp_locking_key); + for (i = 0; i < adapter->num_active_queues; i++) if (adapter->tx_rings[i].desc) iavf_free_tx_resources(&adapter->tx_rings[i]); + + for (i = 0; i < adapter->num_xdp_tx_queues; i++) + if (adapter->xdp_rings[i].desc) + iavf_free_tx_resources(&adapter->xdp_rings[i]); } /** @@ -3368,14 +3634,16 @@ void iavf_free_all_tx_resources(struct iavf_adapter *adapter) * callers duty to clean those orphaned rings. * * Return 0 on success, negative on failure - **/ + */ static int iavf_setup_all_tx_resources(struct iavf_adapter *adapter) { + struct iavf_ring *ring; int i, err = 0; for (i = 0; i < adapter->num_active_queues; i++) { - adapter->tx_rings[i].count = adapter->tx_desc_count; - err = iavf_setup_tx_descriptors(&adapter->tx_rings[i]); + ring = &adapter->tx_rings[i]; + ring->count = adapter->tx_desc_count; + err = iavf_setup_tx_descriptors(ring); if (!err) continue; dev_err(&adapter->pdev->dev, @@ -3383,6 +3651,17 @@ static int iavf_setup_all_tx_resources(struct iavf_adapter *adapter) break; } + for (i = 0; i < adapter->num_xdp_tx_queues; i++) { + ring = &adapter->xdp_rings[i]; + ring->count = adapter->tx_desc_count; + err = iavf_setup_tx_descriptors(ring); + if (!err) + continue; + dev_err(&adapter->pdev->dev, + "Allocation for XDP Queue %u failed\n", i); + break; + } + return err; } @@ -3398,10 +3677,13 @@ static int iavf_setup_all_tx_resources(struct iavf_adapter *adapter) **/ static int iavf_setup_all_rx_resources(struct iavf_adapter *adapter) { + struct iavf_ring *rx_ring; int i, err = 0; for (i = 0; i < adapter->num_active_queues; i++) { - adapter->rx_rings[i].count = adapter->rx_desc_count; + rx_ring = &adapter->rx_rings[i]; + rx_ring->count = adapter->rx_desc_count; + err = iavf_setup_rx_descriptors(&adapter->rx_rings[i]); if (!err) continue; @@ -4675,6 +4957,347 @@ static netdev_features_t iavf_fix_features(struct net_device *netdev, return iavf_fix_netdev_vlan_features(adapter, features); } +/** + * iavf_copy_xdp_prog_to_rings - update XDP prog references in rings + * @adapter: board private structure + * + * If program change also requires XDP resources reconfiguration, + * schedule a reset instead + */ +static void iavf_copy_xdp_prog_to_rings(const struct iavf_adapter *adapter) +{ + for (u32 i = 0; i < adapter->num_active_queues; i++) + rcu_assign_pointer(adapter->rx_rings[i].xdp_prog, + adapter->xdp_prog); + + /* No queue changes are needed, but running RX processing must finish */ + synchronize_net(); +} + +/** + * iavf_assign_bpf_prog - Assign a given BPF program to adapter + * @adapter: board private structure + * @prog: BPF program to be assigned to adapter + * + * Returns 0 on success, negative on failure + */ +static void iavf_assign_bpf_prog(struct iavf_adapter *adapter, + struct bpf_prog *prog) +{ + struct bpf_prog *old_prog; + + old_prog = iavf_xchg_xdp_prog(adapter, prog); + if (old_prog) + bpf_prog_put(old_prog); +} + +#define IAVF_XDP_LOCK_TIMEOUT_MS 5000 + +/** + * iavf_close_sync - Synchronous version of 'iavf_close', dedicated to XDP setup + * @adapter: board private structure + * + * Caller of this function needs to lock 'adapter->crit_lock' in order + * to prevent race conditions with 'reset_task' and VIRTCHNL communication. + * + * Returns 0 on success, negative on failure + */ +static int iavf_close_sync(struct iavf_adapter *adapter) +{ + int err; + + iavf_stop_traffic(adapter); + + err = iavf_disable_queues(adapter, true); + if (err) { + dev_err(&adapter->pdev->dev, "cannot disable queues for XDP setup, error: %d\n", + err); + goto err_virtchnl; + } + + iavf_free_all_tx_resources(adapter); + iavf_free_all_rx_resources(adapter); + + iavf_free_traffic_irqs(adapter); + + return 0; + +err_virtchnl: + iavf_start_traffic(adapter); + + return err; +} + +/** + * iavf_open_sync - Synchronous version of 'iavf_open', dedicated to XDP setup + * @adapter: board private structure + * + * Caller of this function needs to lock 'adapter->crit_lock' in order + * to prevent race conditions with 'reset_task' and VIRTCHNL communication. + * + * Returns 0 on success, negative on failure + */ +static int iavf_open_sync(struct iavf_adapter *adapter) +{ + int err, ret; + + err = iavf_setup_all_tx_resources(adapter); + if (err) { + dev_err(&adapter->pdev->dev, + "cannot setup Tx resources, error: %d\n", err); + goto err_setup_tx_resources; + } + + err = iavf_setup_all_rx_resources(adapter); + if (err) { + dev_err(&adapter->pdev->dev, + "cannot setup Rx resources, error: %d\n", err); + goto err_setup_rx_resources; + } + + err = iavf_request_traffic_irqs(adapter, adapter->netdev->name); + if (err) { + dev_err(&adapter->pdev->dev, + "cannot request interrupts, error: %d\n", err); + goto err_request_irq; + } + + iavf_configure_tx(adapter); + iavf_configure_rx(adapter); + + err = iavf_configure_queues(adapter, true); + if (err) { + dev_err(&adapter->pdev->dev, + "cannot configure queues in PF, error: %d\n", err); + goto err_virtchnl_req; + } + + err = iavf_map_queues(adapter, true); + if (err) { + dev_err(&adapter->pdev->dev, + "cannot map queues to vectors in PF, error: %d\n", err); + goto err_virtchnl_req; + } + + err = iavf_enable_queues(adapter, true); + if (err) { + dev_err(&adapter->pdev->dev, + "cannot enable queues in PF, error: %d\n", err); + goto err_virtchnl_req; + } + + ret = iavf_poll_for_link_status(adapter, IAVF_XDP_LINK_TIMEOUT_MS); + if (ret < 0) { + err = ret; + dev_err(&adapter->pdev->dev, + "cannot bring the link up, error: %d\n", err); + goto err_wrong_link_status; + } else if (!ret) { + err = -EBUSY; + dev_err(&adapter->pdev->dev, + "pf returned link down status, error: %d\n", err); + goto err_wrong_link_status; + } + + iavf_start_traffic(adapter); + + return 0; + +err_wrong_link_status: + iavf_close_sync(adapter); +err_virtchnl_req: +err_request_irq: + iavf_free_traffic_irqs(adapter); +err_setup_rx_resources: + iavf_free_all_rx_resources(adapter); +err_setup_tx_resources: + iavf_free_all_tx_resources(adapter); + + return err; +} + +/** + * iavf_destroy_xdp_rings - remove XDP program from adapter and release + * XDP rings related to that program. + * @adapter: board private structure + */ +static void iavf_destroy_xdp_rings(struct iavf_adapter *adapter) +{ + iavf_unmap_rings_from_vectors(adapter); + iavf_free_xdp_queues(adapter); + iavf_assign_bpf_prog(adapter, NULL); + iavf_map_rings_to_vectors(adapter); +} + +/** + * iavf_prepare_xdp_rings - add XDP program to adapter and setup XDP rings + * to handle that program. + * @adapter: board private structure + * @prog: XDP program + */ +static int iavf_prepare_xdp_rings(struct iavf_adapter *adapter, + struct bpf_prog *prog) +{ + int i, err; + + iavf_unmap_rings_from_vectors(adapter); + iavf_assign_bpf_prog(adapter, prog); + + err = iavf_alloc_xdp_queues(adapter, adapter->num_active_queues); + if (err) { + dev_err(&adapter->pdev->dev, + "cannot allocate memory for queues, error: %d\n", err); + goto err_alloc_queues; + } + + iavf_set_xdp_queue_vlan_tag_loc(adapter); + + iavf_map_rings_to_vectors(adapter); + + for_each_set_bit(i, adapter->af_xdp_zc_qps, adapter->num_active_queues) + napi_schedule(&adapter->rx_rings[i].q_vector->napi); + + return 0; + +err_alloc_queues: + iavf_assign_bpf_prog(adapter, NULL); + + return err; +} + +/** + * iavf_xdp_can_create_queues - check if queue number is appropriate for XDP + * @adapter: board private structure + * @extack: netlink extended ack + */ +static bool iavf_xdp_can_create_queues(struct iavf_adapter *adapter, + struct netlink_ext_ack *extack) +{ + u32 max_qp_num = adapter->vsi_res->num_queue_pairs; + u32 num_active_queues = adapter->num_active_queues; + + if (num_active_queues * 2 <= max_qp_num) + return true; + + netdev_warn(adapter->netdev, + "Current number of queue pairs (%u) set on adapter is too high to enable XDP, please configure queue number through ethtool to be no bigger than %u", + num_active_queues, max_qp_num); + + NL_SET_ERR_MSG_MOD(extack, + "XDP cannot be enabled due to configured queue number being too large, please check dmesg for more info"); + + return false; +} + +/** + * iavf_setup_xdp - handle xdp program change + * @adapter: board private structure + * @prog: XDP program + * @extack: netlink extended ack + */ +static int iavf_setup_xdp(struct iavf_adapter *adapter, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + u32 frame_size = READ_ONCE(adapter->netdev->mtu) + LIBIE_RX_LL_LEN; + bool needs_reconfig = !!prog != iavf_adapter_xdp_active(adapter); + bool was_running = netif_running(adapter->netdev); + int err; + + if (prog && frame_size > LIBIE_RX_BUF_LEN(LIBIE_XDP_HEADROOM)) { + NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP"); + return -EOPNOTSUPP; + } + + if (needs_reconfig) { + if (!iavf_xdp_can_create_queues(adapter, extack)) { + err = -EOPNOTSUPP; + goto err_no_queues; + } + + if (iavf_lock_timeout(&adapter->crit_lock, + IAVF_XDP_LOCK_TIMEOUT_MS)) { + err = -EBUSY; + dev_err(&adapter->pdev->dev, + "failed to acquire crit_lock in %s\n", + __func__); + goto err_crit_lock; + } + err = iavf_process_pending_pf_msg(adapter, + IAVF_XDP_LOCK_TIMEOUT_MS); + if (err) + goto err_pending_pf_msg; + + if (was_running) { + err = iavf_close_sync(adapter); + if (err) { + dev_err(&adapter->pdev->dev, + "cannot close the interface to setup XDP, error: %d\n", + err); + goto err_close_if; + } + } + + if (prog) { + err = iavf_prepare_xdp_rings(adapter, prog); + if (err) { + dev_err(&adapter->pdev->dev, + "cannot prepare rings to support XDP, error: %d\n", + err); + goto err_prepare_xdp_rings; + } + } else { + iavf_destroy_xdp_rings(adapter); + } + + if (was_running) { + err = iavf_open_sync(adapter); + if (err) { + dev_err(&adapter->pdev->dev, + "cannot open the interface after XDP setup, error: %d\n", + err); + goto err_open_if; + } + } + mutex_unlock(&adapter->crit_lock); + } else { + iavf_assign_bpf_prog(adapter, prog); + iavf_copy_xdp_prog_to_rings(adapter); + } + + return 0; + +err_open_if: +err_prepare_xdp_rings: + iavf_destroy_xdp_rings(adapter); + iavf_open_sync(adapter); +err_close_if: +err_pending_pf_msg: + mutex_unlock(&adapter->crit_lock); +err_crit_lock: +err_no_queues: + return err; +} + +/** + * iavf_xdp - XDP command handler + * @netdev: netdevice + * @xdp: XDP command + */ +static int iavf_xdp(struct net_device *netdev, struct netdev_bpf *xdp) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + + switch (xdp->command) { + case XDP_SETUP_PROG: + return iavf_setup_xdp(adapter, xdp->prog, xdp->extack); + case XDP_SETUP_XSK_POOL: + return iavf_xsk_pool_setup(adapter, xdp->xsk.pool, + xdp->xsk.queue_id); + default: + return -EINVAL; + } +} + static const struct net_device_ops iavf_netdev_ops = { .ndo_open = iavf_open, .ndo_stop = iavf_close, @@ -4690,6 +5313,9 @@ static const struct net_device_ops iavf_netdev_ops = { .ndo_fix_features = iavf_fix_features, .ndo_set_features = iavf_set_features, .ndo_setup_tc = iavf_setup_tc, + .ndo_bpf = iavf_xdp, + .ndo_xdp_xmit = iavf_xdp_xmit, + .ndo_xsk_wakeup = iavf_xsk_wakeup }; /** @@ -5124,6 +5750,8 @@ static void iavf_remove(struct pci_dev *pdev) iavf_free_all_rx_resources(adapter); iavf_free_misc_irq(adapter); + iavf_free_xdp_prog(adapter); + iavf_reset_interrupt_capability(adapter); iavf_free_q_vectors(adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_prototype.h b/drivers/net/ethernet/intel/iavf/iavf_prototype.h index edebfbbcffdc2e..c2e5dbc0a75a35 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_prototype.h +++ b/drivers/net/ethernet/intel/iavf/iavf_prototype.h @@ -51,13 +51,6 @@ enum iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 seid, enum iavf_status iavf_set_mac_type(struct iavf_hw *hw); -extern struct iavf_rx_ptype_decoded iavf_ptype_lookup[]; - -static inline struct iavf_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype) -{ - return iavf_ptype_lookup[ptype]; -} - void iavf_vf_parse_hw_config(struct iavf_hw *hw, struct virtchnl_vf_resource *msg); enum iavf_status iavf_vf_reset(struct iavf_hw *hw); diff --git a/drivers/net/ethernet/intel/iavf/iavf_trace.h b/drivers/net/ethernet/intel/iavf/iavf_trace.h index 82fda6f5abf043..383a5375392a20 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_trace.h +++ b/drivers/net/ethernet/intel/iavf/iavf_trace.h @@ -145,6 +145,14 @@ DEFINE_EVENT( TP_ARGS(ring, desc, skb)); +DEFINE_EVENT( + iavf_rx_template, iavf_clean_rx_irq_zc, + TP_PROTO(struct iavf_ring *ring, + union iavf_32byte_rx_desc *desc, + struct sk_buff *skb), + + TP_ARGS(ring, desc, skb)); + DEFINE_EVENT( iavf_rx_template, iavf_clean_rx_irq_rx, TP_PROTO(struct iavf_ring *ring, @@ -153,6 +161,14 @@ DEFINE_EVENT( TP_ARGS(ring, desc, skb)); +DEFINE_EVENT( + iavf_rx_template, iavf_clean_rx_irq_zc_rx, + TP_PROTO(struct iavf_ring *ring, + union iavf_32byte_rx_desc *desc, + struct sk_buff *skb), + + TP_ARGS(ring, desc, skb)); + DECLARE_EVENT_CLASS( iavf_xmit_template, diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index e989feda133c1e..cca9907bee5a1a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -1,21 +1,20 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2013 - 2018 Intel Corporation. */ +#include +#include +#include +#include #include #include "iavf.h" #include "iavf_trace.h" #include "iavf_prototype.h" -static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, - u32 td_tag) -{ - return cpu_to_le64(IAVF_TX_DESC_DTYPE_DATA | - ((u64)td_cmd << IAVF_TXD_QW1_CMD_SHIFT) | - ((u64)td_offset << IAVF_TXD_QW1_OFFSET_SHIFT) | - ((u64)size << IAVF_TXD_QW1_TX_BUF_SZ_SHIFT) | - ((u64)td_tag << IAVF_TXD_QW1_L2TAG1_SHIFT)); -} +DEFINE_STATIC_KEY_FALSE(iavf_xdp_locking_key); + +static bool iavf_xdp_xmit_back(const struct xdp_buff *buff, + struct iavf_ring *xdp_ring); #define IAVF_TXD_CMD (IAVF_TX_DESC_CMD_EOP | IAVF_TX_DESC_CMD_RS) @@ -27,22 +26,14 @@ static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, static void iavf_unmap_and_free_tx_resource(struct iavf_ring *ring, struct iavf_tx_buffer *tx_buffer) { - if (tx_buffer->skb) { - if (tx_buffer->tx_flags & IAVF_TX_FLAGS_FD_SB) - kfree(tx_buffer->raw_buf); - else - dev_kfree_skb_any(tx_buffer->skb); - if (dma_unmap_len(tx_buffer, len)) - dma_unmap_single(ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - } else if (dma_unmap_len(tx_buffer, len)) { + if (tx_buffer->skb) + dev_kfree_skb_any(tx_buffer->skb); + + if (dma_unmap_len(tx_buffer, len)) dma_unmap_page(ring->dev, dma_unmap_addr(tx_buffer, dma), dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); - } tx_buffer->next_to_watch = NULL; tx_buffer->skb = NULL; @@ -50,6 +41,51 @@ static void iavf_unmap_and_free_tx_resource(struct iavf_ring *ring, /* tx_buffer must be completely set up in the transmit path */ } +/** + * iavf_free_xdp_resource - Correctly free XDP TX buffer + * @ring: XDP ring + * @tx_buffer: the buffer being released + */ +static void iavf_free_xdp_resource(struct iavf_ring *ring, + struct iavf_tx_buffer *tx_buffer) +{ + struct page *page; + u32 put_size; + + switch (tx_buffer->xdp_type) { + case IAVF_XDP_BUFFER_TX: + page = tx_buffer->page; + put_size = dma_unmap_len(tx_buffer, len); + page_pool_put_page(page->pp, page, put_size, true); + break; + case IAVF_XDP_BUFFER_FRAME: + dma_unmap_page(ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + xdp_return_frame(tx_buffer->xdpf); + break; + } + + tx_buffer->xdp_type = IAVF_XDP_BUFFER_NONE; +} + +/** + * iavf_release_tx_resources - Release all Tx buffers on ring + * @ring: TX or XDP ring + */ +static void iavf_release_tx_resources(struct iavf_ring *ring) +{ + bool is_xdp = iavf_ring_is_xdp(ring); + u32 i; + + for (i = 0; i < ring->count; i++) + if (is_xdp) + iavf_free_xdp_resource(ring, &ring->tx_bi[i]); + else + iavf_unmap_and_free_tx_resource(ring, &ring->tx_bi[i]); +} + /** * iavf_clean_tx_ring - Free any empty Tx buffers * @tx_ring: ring to be cleaned @@ -57,15 +93,17 @@ static void iavf_unmap_and_free_tx_resource(struct iavf_ring *ring, void iavf_clean_tx_ring(struct iavf_ring *tx_ring) { unsigned long bi_size; - u16 i; /* ring already cleared, nothing to do */ if (!tx_ring->tx_bi) return; - /* Free all the Tx ring sk_buffs */ - for (i = 0; i < tx_ring->count; i++) - iavf_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]); + if (tx_ring->flags & IAVF_TXRX_FLAGS_XSK) { + iavf_xsk_clean_xdp_ring(tx_ring); + } else { + /* Free all the Tx ring sk_buffs */ + iavf_release_tx_resources(tx_ring); + } bi_size = sizeof(struct iavf_tx_buffer) * tx_ring->count; memset(tx_ring->tx_bi, 0, bi_size); @@ -80,7 +118,8 @@ void iavf_clean_tx_ring(struct iavf_ring *tx_ring) return; /* cleanup Tx queue statistics */ - netdev_tx_reset_queue(txring_txq(tx_ring)); + if (!(tx_ring->flags & IAVF_TXRX_FLAGS_XDP)) + netdev_tx_reset_queue(txring_txq(tx_ring)); } /** @@ -95,6 +134,11 @@ void iavf_free_tx_resources(struct iavf_ring *tx_ring) kfree(tx_ring->tx_bi); tx_ring->tx_bi = NULL; + if (tx_ring->flags & IAVF_TXRX_FLAGS_XSK) { + tx_ring->dev = tx_ring->xsk_pool->dev; + tx_ring->flags &= ~IAVF_TXRX_FLAGS_XSK; + } + if (tx_ring->desc) { dma_free_coherent(tx_ring->dev, tx_ring->size, tx_ring->desc, tx_ring->dma); @@ -157,6 +201,9 @@ void iavf_detect_recover_hung(struct iavf_vsi *vsi) for (i = 0; i < vsi->back->num_active_queues; i++) { tx_ring = &vsi->back->tx_rings[i]; if (tx_ring && tx_ring->desc) { + const struct libie_sq_stats *st = &tx_ring->sq_stats; + u32 start; + /* If packet counter has not changed the queue is * likely stalled, so force an interrupt for this * queue. @@ -164,8 +211,13 @@ void iavf_detect_recover_hung(struct iavf_vsi *vsi) * prev_pkt_ctr would be negative if there was no * pending work. */ - packets = tx_ring->stats.packets & INT_MAX; - if (tx_ring->tx_stats.prev_pkt_ctr == packets) { + do { + start = u64_stats_fetch_begin(&st->syncp); + packets = u64_stats_read(&st->packets) & + INT_MAX; + } while (u64_stats_fetch_retry(&st->syncp, start)); + + if (tx_ring->prev_pkt_ctr == packets) { iavf_force_wb(vsi, tx_ring->q_vector); continue; } @@ -174,7 +226,7 @@ void iavf_detect_recover_hung(struct iavf_vsi *vsi) * to iavf_get_tx_pending() */ smp_rmb(); - tx_ring->tx_stats.prev_pkt_ctr = + tx_ring->prev_pkt_ctr = iavf_get_tx_pending(tx_ring, true) ? packets : -1; } } @@ -193,10 +245,10 @@ void iavf_detect_recover_hung(struct iavf_vsi *vsi) static bool iavf_clean_tx_irq(struct iavf_vsi *vsi, struct iavf_ring *tx_ring, int napi_budget) { + struct libie_sq_onstack_stats stats = { }; int i = tx_ring->next_to_clean; struct iavf_tx_buffer *tx_buf; struct iavf_tx_desc *tx_desc; - unsigned int total_bytes = 0, total_packets = 0; unsigned int budget = IAVF_DEFAULT_IRQ_WORK; tx_buf = &tx_ring->tx_bi[i]; @@ -223,8 +275,8 @@ static bool iavf_clean_tx_irq(struct iavf_vsi *vsi, tx_buf->next_to_watch = NULL; /* update the statistics for this packet */ - total_bytes += tx_buf->bytecount; - total_packets += tx_buf->gso_segs; + stats.bytes += tx_buf->bytecount; + stats.packets += tx_buf->gso_segs; /* free the skb */ napi_consume_skb(tx_buf->skb, napi_budget); @@ -281,12 +333,7 @@ static bool iavf_clean_tx_irq(struct iavf_vsi *vsi, i += tx_ring->count; tx_ring->next_to_clean = i; - u64_stats_update_begin(&tx_ring->syncp); - tx_ring->stats.bytes += total_bytes; - tx_ring->stats.packets += total_packets; - u64_stats_update_end(&tx_ring->syncp); - tx_ring->q_vector->tx.total_bytes += total_bytes; - tx_ring->q_vector->tx.total_packets += total_packets; + iavf_update_tx_ring_stats(tx_ring, &stats); if (tx_ring->flags & IAVF_TXR_FLAGS_WB_ON_ITR) { /* check to see if there are < 4 descriptors @@ -300,15 +347,16 @@ static bool iavf_clean_tx_irq(struct iavf_vsi *vsi, ((j / WB_STRIDE) == 0) && (j > 0) && !test_bit(__IAVF_VSI_DOWN, vsi->state) && (IAVF_DESC_UNUSED(tx_ring) != tx_ring->count)) - tx_ring->arm_wb = true; + tx_ring->flags |= IAVF_TXRX_FLAGS_ARM_WB; } /* notify netdev of completed buffers */ - netdev_tx_completed_queue(txring_txq(tx_ring), - total_packets, total_bytes); + if (!(tx_ring->flags & IAVF_TXRX_FLAGS_XDP)) + netdev_tx_completed_queue(txring_txq(tx_ring), + stats.packets, stats.bytes); #define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2)) - if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && + if (unlikely(stats.packets && netif_carrier_ok(tx_ring->netdev) && (IAVF_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) { /* Make sure that anybody stopping the queue after this * sees the new next_to_clean. @@ -319,7 +367,7 @@ static bool iavf_clean_tx_irq(struct iavf_vsi *vsi, !test_bit(__IAVF_VSI_DOWN, vsi->state)) { netif_wake_subqueue(tx_ring->netdev, tx_ring->queue_index); - ++tx_ring->tx_stats.restart_queue; + libie_stats_inc_one(&tx_ring->sq_stats, restarts); } } @@ -649,7 +697,8 @@ static void iavf_update_itr(struct iavf_q_vector *q_vector, int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring) { struct device *dev = tx_ring->dev; - int bi_size; + struct iavf_tx_desc *tx_desc; + int bi_size, j; if (!dev) return -ENOMEM; @@ -674,7 +723,14 @@ int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring) tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; - tx_ring->tx_stats.prev_pkt_ctr = -1; + tx_ring->prev_pkt_ctr = -1; + for (j = 0; j < tx_ring->count; j++) { + tx_desc = IAVF_TX_DESC(tx_ring, j); + tx_desc->cmd_type_offset_bsz = 0; + } + + iavf_xsk_setup_xdp_ring(tx_ring); + return 0; err: @@ -683,17 +739,30 @@ int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring) return -ENOMEM; } +static void iavf_clean_rx_pages(struct iavf_ring *rx_ring) +{ + for (u32 i = 0; i < rx_ring->count; i++) { + struct page *page = rx_ring->rx_pages[i]; + + if (!page) + continue; + + /* Invalidate cache lines that may have been written to by + * device so that we avoid corrupting memory. + */ + page_pool_dma_sync_full_for_cpu(rx_ring->pool, page); + page_pool_put_full_page(rx_ring->pool, page, false); + } +} + /** * iavf_clean_rx_ring - Free Rx buffers * @rx_ring: ring to be cleaned **/ void iavf_clean_rx_ring(struct iavf_ring *rx_ring) { - unsigned long bi_size; - u16 i; - /* ring already cleared, nothing to do */ - if (!rx_ring->rx_bi) + if (!rx_ring->rx_pages) return; if (rx_ring->skb) { @@ -701,41 +770,11 @@ void iavf_clean_rx_ring(struct iavf_ring *rx_ring) rx_ring->skb = NULL; } - /* Free all the Rx ring sk_buffs */ - for (i = 0; i < rx_ring->count; i++) { - struct iavf_rx_buffer *rx_bi = &rx_ring->rx_bi[i]; - - if (!rx_bi->page) - continue; - - /* Invalidate cache lines that may have been written to by - * device so that we avoid corrupting memory. - */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_bi->dma, - rx_bi->page_offset, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - - /* free resources associated with mapping */ - dma_unmap_page_attrs(rx_ring->dev, rx_bi->dma, - iavf_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - IAVF_RX_DMA_ATTR); - - __page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias); - - rx_bi->page = NULL; - rx_bi->page_offset = 0; - } - - bi_size = sizeof(struct iavf_rx_buffer) * rx_ring->count; - memset(rx_ring->rx_bi, 0, bi_size); - - /* Zero out the descriptor ring */ - memset(rx_ring->desc, 0, rx_ring->size); + if (rx_ring->flags & IAVF_TXRX_FLAGS_XSK) + iavf_xsk_clean_rx_ring(rx_ring); + else + iavf_clean_rx_pages(rx_ring); - rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; } @@ -748,9 +787,25 @@ void iavf_clean_rx_ring(struct iavf_ring *rx_ring) **/ void iavf_free_rx_resources(struct iavf_ring *rx_ring) { + struct device *dev; + iavf_clean_rx_ring(rx_ring); - kfree(rx_ring->rx_bi); - rx_ring->rx_bi = NULL; + kfree(rx_ring->rx_pages); + rx_ring->rx_pages = NULL; + + /* This also unregisters memory model */ + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + + if (rx_ring->flags & IAVF_TXRX_FLAGS_XSK) { + dev = rx_ring->xsk_pool->dev; + rx_ring->flags &= ~IAVF_TXRX_FLAGS_XSK; + } else { + dev = rx_ring->pool->p.dev; + libie_rx_page_pool_destroy(rx_ring->pool, &rx_ring->rq_stats); + } + + rx_ring->dev = dev; if (rx_ring->desc) { dma_free_coherent(rx_ring->dev, rx_ring->size, @@ -759,6 +814,17 @@ void iavf_free_rx_resources(struct iavf_ring *rx_ring) } } +/** + * iavf_is_xdp_enabled - Check if XDP is enabled on the RX ring + * @rx_ring: Rx descriptor ring + * + * Returns true, if the ring has been configured for XDP. + */ +static bool iavf_is_xdp_enabled(const struct iavf_ring *rx_ring) +{ + return !!rcu_access_pointer(rx_ring->xdp_prog); +} + /** * iavf_setup_rx_descriptors - Allocate Rx descriptors * @rx_ring: Rx descriptor ring (for a specific queue) to setup @@ -768,16 +834,17 @@ void iavf_free_rx_resources(struct iavf_ring *rx_ring) int iavf_setup_rx_descriptors(struct iavf_ring *rx_ring) { struct device *dev = rx_ring->dev; - int bi_size; + struct page_pool *pool; + int ret = -ENOMEM; /* warn if we are about to overwrite the pointer */ - WARN_ON(rx_ring->rx_bi); - bi_size = sizeof(struct iavf_rx_buffer) * rx_ring->count; - rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL); - if (!rx_ring->rx_bi) - goto err; + WARN_ON(rx_ring->rx_pages); - u64_stats_init(&rx_ring->syncp); + /* Both iavf_ring::rx_pages and ::xdp_buff are arrays of pointers */ + rx_ring->rx_pages = kcalloc(rx_ring->count, sizeof(*rx_ring->rx_pages), + GFP_KERNEL); + if (!rx_ring->rx_pages) + return ret; /* Round up to nearest 4K */ rx_ring->size = rx_ring->count * sizeof(union iavf_32byte_rx_desc); @@ -791,240 +858,127 @@ int iavf_setup_rx_descriptors(struct iavf_ring *rx_ring) goto err; } - rx_ring->next_to_alloc = 0; - rx_ring->next_to_clean = 0; - rx_ring->next_to_use = 0; - - return 0; -err: - kfree(rx_ring->rx_bi); - rx_ring->rx_bi = NULL; - return -ENOMEM; -} - -/** - * iavf_release_rx_desc - Store the new tail and head values - * @rx_ring: ring to bump - * @val: new head index - **/ -static inline void iavf_release_rx_desc(struct iavf_ring *rx_ring, u32 val) -{ - rx_ring->next_to_use = val; - - /* update next to alloc since we have filled the ring */ - rx_ring->next_to_alloc = val; - - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). - */ - wmb(); - writel(val, rx_ring->tail); -} - -/** - * iavf_rx_offset - Return expected offset into page to access data - * @rx_ring: Ring we are requesting offset of - * - * Returns the offset value for ring into the data buffer. - */ -static inline unsigned int iavf_rx_offset(struct iavf_ring *rx_ring) -{ - return ring_uses_build_skb(rx_ring) ? IAVF_SKB_PAD : 0; -} - -/** - * iavf_alloc_mapped_page - recycle or make a new page - * @rx_ring: ring to use - * @bi: rx_buffer struct to modify - * - * Returns true if the page was successfully allocated or - * reused. - **/ -static bool iavf_alloc_mapped_page(struct iavf_ring *rx_ring, - struct iavf_rx_buffer *bi) -{ - struct page *page = bi->page; - dma_addr_t dma; - - /* since we are recycling buffers we should seldom need to alloc */ - if (likely(page)) { - rx_ring->rx_stats.page_reuse_count++; - return true; - } - - /* alloc new page for storage */ - page = dev_alloc_pages(iavf_rx_pg_order(rx_ring)); - if (unlikely(!page)) { - rx_ring->rx_stats.alloc_page_failed++; - return false; - } - - /* map page for use */ - dma = dma_map_page_attrs(rx_ring->dev, page, 0, - iavf_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - IAVF_RX_DMA_ATTR); + iavf_xsk_setup_rx_ring(rx_ring); + if (rx_ring->flags & IAVF_TXRX_FLAGS_XSK) + goto finish; - /* if mapping failed free memory back to system since - * there isn't much point in holding memory we can't use - */ - if (dma_mapping_error(rx_ring->dev, dma)) { - __free_pages(page, iavf_rx_pg_order(rx_ring)); - rx_ring->rx_stats.alloc_page_failed++; - return false; + pool = libie_rx_page_pool_create(rx_ring->netdev, rx_ring->count, + iavf_is_xdp_enabled(rx_ring)); + if (IS_ERR(pool)) { + ret = PTR_ERR(pool); + goto err_free_dma; } - bi->dma = dma; - bi->page = page; - bi->page_offset = iavf_rx_offset(rx_ring); + rx_ring->pool = pool; - /* initialize pagecnt_bias to 1 representing we fully own page */ - bi->pagecnt_bias = 1; - - return true; -} +finish: + rx_ring->next_to_clean = 0; + rx_ring->next_to_use = 0; -/** - * iavf_receive_skb - Send a completed packet up the stack - * @rx_ring: rx ring in play - * @skb: packet to send up - * @vlan_tag: vlan tag for packet - **/ -static void iavf_receive_skb(struct iavf_ring *rx_ring, - struct sk_buff *skb, u16 vlan_tag) -{ - struct iavf_q_vector *q_vector = rx_ring->q_vector; + return 0; - if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && - (vlan_tag & VLAN_VID_MASK)) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); - else if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_STAG_RX) && - vlan_tag & VLAN_VID_MASK) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021AD), vlan_tag); +err_free_dma: + dma_free_coherent(dev, rx_ring->size, rx_ring->desc, rx_ring->dma); +err: + kfree(rx_ring->rx_pages); + rx_ring->rx_pages = NULL; - napi_gro_receive(&q_vector->napi, skb); + return ret; } /** - * iavf_alloc_rx_buffers - Replace used receive buffers + * __iavf_alloc_rx_pages - Replace used receive pages * @rx_ring: ring to place buffers on - * @cleaned_count: number of buffers to replace + * @to_refill: number of buffers to replace + * @gfp: GFP mask to allocate pages * - * Returns false if all allocations were successful, true if any fail + * Returns 0 if all allocations were successful or the number of buffers left + * to refill in case of an allocation failure. **/ -bool iavf_alloc_rx_buffers(struct iavf_ring *rx_ring, u16 cleaned_count) +static u32 __iavf_alloc_rx_pages(struct iavf_ring *rx_ring, u32 to_refill, + gfp_t gfp) { - u16 ntu = rx_ring->next_to_use; + struct page_pool *pool = rx_ring->pool; + u32 ntu = rx_ring->next_to_use; union iavf_rx_desc *rx_desc; - struct iavf_rx_buffer *bi; + u32 hr = pool->p.offset; /* do nothing if no valid netdev defined */ - if (!rx_ring->netdev || !cleaned_count) - return false; + if (unlikely(!rx_ring->netdev || !to_refill)) + return 0; rx_desc = IAVF_RX_DESC(rx_ring, ntu); - bi = &rx_ring->rx_bi[ntu]; do { - if (!iavf_alloc_mapped_page(rx_ring, bi)) - goto no_buffers; + struct page *page; + dma_addr_t dma; - /* sync the buffer for use by the device */ - dma_sync_single_range_for_device(rx_ring->dev, bi->dma, - bi->page_offset, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); + page = page_pool_alloc_pages(pool, gfp); + if (!page) + break; + + rx_ring->rx_pages[ntu] = page; + dma = page_pool_get_dma_addr(page); /* Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ - rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); + rx_desc->read.pkt_addr = cpu_to_le64(dma + hr); rx_desc++; - bi++; ntu++; if (unlikely(ntu == rx_ring->count)) { rx_desc = IAVF_RX_DESC(rx_ring, 0); - bi = rx_ring->rx_bi; ntu = 0; } /* clear the status bits for the next_to_use descriptor */ rx_desc->wb.qword1.status_error_len = 0; - - cleaned_count--; - } while (cleaned_count); + } while (--to_refill); if (rx_ring->next_to_use != ntu) iavf_release_rx_desc(rx_ring, ntu); - return false; - -no_buffers: - if (rx_ring->next_to_use != ntu) - iavf_release_rx_desc(rx_ring, ntu); + return to_refill; +} - /* make sure to come back via polling to try again after - * allocation failure - */ - return true; +void iavf_alloc_rx_pages(struct iavf_ring *rxr) +{ + __iavf_alloc_rx_pages(rxr, IAVF_DESC_UNUSED(rxr), GFP_KERNEL); } /** * iavf_rx_checksum - Indicate in skb if hw indicated a good cksum * @vsi: the VSI we care about * @skb: skb currently being received and modified - * @rx_desc: the receive descriptor + * @qword: `wb.qword1.status_error_len` from the descriptor + * @parsed: TODO **/ -static inline void iavf_rx_checksum(struct iavf_vsi *vsi, - struct sk_buff *skb, - union iavf_rx_desc *rx_desc) +static void iavf_rx_checksum(struct iavf_vsi *vsi, struct sk_buff *skb, + u64 qword, struct libie_rx_ptype_parsed parsed) { - struct iavf_rx_ptype_decoded decoded; u32 rx_error, rx_status; - bool ipv4, ipv6; - u8 ptype; - u64 qword; - qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - ptype = (qword & IAVF_RXD_QW1_PTYPE_MASK) >> IAVF_RXD_QW1_PTYPE_SHIFT; - rx_error = (qword & IAVF_RXD_QW1_ERROR_MASK) >> - IAVF_RXD_QW1_ERROR_SHIFT; + if (!libie_has_rx_checksum(vsi->netdev, parsed)) + return; + rx_status = (qword & IAVF_RXD_QW1_STATUS_MASK) >> IAVF_RXD_QW1_STATUS_SHIFT; - decoded = decode_rx_desc_ptype(ptype); - - skb->ip_summed = CHECKSUM_NONE; - - skb_checksum_none_assert(skb); - - /* Rx csum enabled and ip headers found? */ - if (!(vsi->netdev->features & NETIF_F_RXCSUM)) - return; /* did the hardware decode the packet and checksum? */ if (!(rx_status & BIT(IAVF_RX_DESC_STATUS_L3L4P_SHIFT))) return; - /* both known and outer_ip must be set for the below code to work */ - if (!(decoded.known && decoded.outer_ip)) - return; - - ipv4 = (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == IAVF_RX_PTYPE_OUTER_IPV4); - ipv6 = (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == IAVF_RX_PTYPE_OUTER_IPV6); + rx_error = (qword & IAVF_RXD_QW1_ERROR_MASK) >> + IAVF_RXD_QW1_ERROR_SHIFT; - if (ipv4 && + if (parsed.outer_ip == LIBIE_RX_PTYPE_OUTER_IPV4 && (rx_error & (BIT(IAVF_RX_DESC_ERROR_IPE_SHIFT) | BIT(IAVF_RX_DESC_ERROR_EIPE_SHIFT)))) goto checksum_fail; - /* likely incorrect csum if alternate IP extension headers found */ - if (ipv6 && - rx_status & BIT(IAVF_RX_DESC_STATUS_IPV6EXADD_SHIFT)) + else if (parsed.outer_ip == LIBIE_RX_PTYPE_OUTER_IPV6 && + (rx_status & BIT(IAVF_RX_DESC_STATUS_IPV6EXADD_SHIFT))) /* don't increment checksum err here, non-fatal err */ return; @@ -1039,447 +993,206 @@ static inline void iavf_rx_checksum(struct iavf_vsi *vsi, if (rx_error & BIT(IAVF_RX_DESC_ERROR_PPRS_SHIFT)) return; - /* Only report checksum unnecessary for TCP, UDP, or SCTP */ - switch (decoded.inner_prot) { - case IAVF_RX_PTYPE_INNER_PROT_TCP: - case IAVF_RX_PTYPE_INNER_PROT_UDP: - case IAVF_RX_PTYPE_INNER_PROT_SCTP: - skb->ip_summed = CHECKSUM_UNNECESSARY; - fallthrough; - default: - break; - } - + skb->ip_summed = CHECKSUM_UNNECESSARY; return; checksum_fail: vsi->back->hw_csum_rx_error++; } -/** - * iavf_ptype_to_htype - get a hash type - * @ptype: the ptype value from the descriptor - * - * Returns a hash type to be used by skb_set_hash - **/ -static inline int iavf_ptype_to_htype(u8 ptype) -{ - struct iavf_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype); - - if (!decoded.known) - return PKT_HASH_TYPE_NONE; - - if (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP && - decoded.payload_layer == IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY4) - return PKT_HASH_TYPE_L4; - else if (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP && - decoded.payload_layer == IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY3) - return PKT_HASH_TYPE_L3; - else - return PKT_HASH_TYPE_L2; -} - /** * iavf_rx_hash - set the hash value in the skb * @ring: descriptor ring * @rx_desc: specific descriptor * @skb: skb currently being received and modified - * @rx_ptype: Rx packet type + * @qword: `wb.qword1.status_error_len` from the descriptor + * @parsed: TODO **/ -static inline void iavf_rx_hash(struct iavf_ring *ring, - union iavf_rx_desc *rx_desc, - struct sk_buff *skb, - u8 rx_ptype) +static void iavf_rx_hash(const struct iavf_ring *ring, + const union iavf_rx_desc *rx_desc, + struct sk_buff *skb, u64 qword, + struct libie_rx_ptype_parsed parsed) { + const u64 rss_mask = (u64)IAVF_RX_DESC_FLTSTAT_RSS_HASH << + IAVF_RX_DESC_STATUS_FLTSTAT_SHIFT; u32 hash; - const __le64 rss_mask = - cpu_to_le64((u64)IAVF_RX_DESC_FLTSTAT_RSS_HASH << - IAVF_RX_DESC_STATUS_FLTSTAT_SHIFT); - if (!(ring->netdev->features & NETIF_F_RXHASH)) + if (!libie_has_rx_hash(ring->netdev, parsed) || + (qword & rss_mask) != rss_mask) return; - if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) { - hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss); - skb_set_hash(skb, hash, iavf_ptype_to_htype(rx_ptype)); - } + hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss); + libie_skb_set_hash(skb, hash, parsed); } -/** - * iavf_process_skb_fields - Populate skb header fields from Rx descriptor - * @rx_ring: rx descriptor ring packet is being transacted on - * @rx_desc: pointer to the EOP Rx descriptor - * @skb: pointer to current skb being populated - * @rx_ptype: the packet type decoded by hardware - * - * This function checks the ring, descriptor, and packet information in - * order to populate the hash, checksum, VLAN, protocol, and - * other fields within the skb. - **/ -static inline -void iavf_process_skb_fields(struct iavf_ring *rx_ring, - union iavf_rx_desc *rx_desc, struct sk_buff *skb, - u8 rx_ptype) +static void iavf_rx_vlan(const struct iavf_ring *rx_ring, + const union iavf_rx_desc *rx_desc, + struct sk_buff *skb, u64 qword) { - iavf_rx_hash(rx_ring, rx_desc, skb, rx_ptype); + u16 vlan_tag; + __be16 prot; - iavf_rx_checksum(rx_ring->vsi, skb, rx_desc); + if (rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) + prot = htons(ETH_P_8021Q); + else if (rx_ring->netdev->features & NETIF_F_HW_VLAN_STAG_RX) + prot = htons(ETH_P_8021AD); + else + return; - skb_record_rx_queue(skb, rx_ring->queue_index); + if ((qword & BIT(IAVF_RX_DESC_STATUS_L2TAG1P_SHIFT)) && + (rx_ring->flags & IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1)) + vlan_tag = le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1); + else if ((rx_ring->flags & IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2) && + (rx_desc->wb.qword2.ext_status & + cpu_to_le16(BIT(IAVF_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT)))) + vlan_tag = le16_to_cpu(rx_desc->wb.qword2.l2tag2_2); + else + vlan_tag = 0; - /* modifies the skb - consumes the enet header */ - skb->protocol = eth_type_trans(skb, rx_ring->netdev); + if (vlan_tag & VLAN_VID_MASK) + __vlan_hwaccel_put_tag(skb, prot, vlan_tag); } /** - * iavf_cleanup_headers - Correct empty headers + * iavf_process_skb_fields - Populate skb header fields from Rx descriptor * @rx_ring: rx descriptor ring packet is being transacted on - * @skb: pointer to current skb being fixed - * - * Also address the case where we are pulling data in on pages only - * and as such no data is present in the skb header. - * - * In addition if skb is not at least 60 bytes we need to pad it so that - * it is large enough to qualify as a valid Ethernet frame. - * - * Returns true if an error was encountered and skb was freed. - **/ -static bool iavf_cleanup_headers(struct iavf_ring *rx_ring, struct sk_buff *skb) -{ - /* if eth_skb_pad returns an error the skb was freed */ - if (eth_skb_pad(skb)) - return true; - - return false; -} - -/** - * iavf_reuse_rx_page - page flip buffer and store it back on the ring - * @rx_ring: rx descriptor ring to store buffers on - * @old_buff: donor buffer to have page reused - * - * Synchronizes page for reuse by the adapter - **/ -static void iavf_reuse_rx_page(struct iavf_ring *rx_ring, - struct iavf_rx_buffer *old_buff) -{ - struct iavf_rx_buffer *new_buff; - u16 nta = rx_ring->next_to_alloc; - - new_buff = &rx_ring->rx_bi[nta]; - - /* update, and store next to alloc */ - nta++; - rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; - - /* transfer page from old buffer to new buffer */ - new_buff->dma = old_buff->dma; - new_buff->page = old_buff->page; - new_buff->page_offset = old_buff->page_offset; - new_buff->pagecnt_bias = old_buff->pagecnt_bias; -} - -/** - * iavf_can_reuse_rx_page - Determine if this page can be reused by - * the adapter for another receive - * - * @rx_buffer: buffer containing the page - * - * If page is reusable, rx_buffer->page_offset is adjusted to point to - * an unused region in the page. - * - * For small pages, @truesize will be a constant value, half the size - * of the memory at page. We'll attempt to alternate between high and - * low halves of the page, with one half ready for use by the hardware - * and the other half being consumed by the stack. We use the page - * ref count to determine whether the stack has finished consuming the - * portion of this page that was passed up with a previous packet. If - * the page ref count is >1, we'll assume the "other" half page is - * still busy, and this page cannot be reused. - * - * For larger pages, @truesize will be the actual space used by the - * received packet (adjusted upward to an even multiple of the cache - * line size). This will advance through the page by the amount - * actually consumed by the received packets while there is still - * space for a buffer. Each region of larger pages will be used at - * most once, after which the page will not be reused. + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being populated + * @qword: `wb.qword1.status_error_len` from the descriptor * - * In either case, if the page is reusable its refcount is increased. + * This function checks the ring, descriptor, and packet information in + * order to populate the hash, checksum, VLAN, protocol, and + * other fields within the skb. **/ -static bool iavf_can_reuse_rx_page(struct iavf_rx_buffer *rx_buffer) +void iavf_process_skb_fields(const struct iavf_ring *rx_ring, + const union iavf_rx_desc *rx_desc, + struct sk_buff *skb, u64 qword) { - unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; - struct page *page = rx_buffer->page; + struct libie_rx_ptype_parsed parsed; + u32 ptype; - /* Is any reuse possible? */ - if (!dev_page_is_reusable(page)) - return false; - -#if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely((page_count(page) - pagecnt_bias) > 1)) - return false; -#else -#define IAVF_LAST_OFFSET \ - (SKB_WITH_OVERHEAD(PAGE_SIZE) - IAVF_RXBUFFER_2048) - if (rx_buffer->page_offset > IAVF_LAST_OFFSET) - return false; -#endif + ptype = FIELD_GET(IAVF_RXD_QW1_PTYPE_MASK, qword); + parsed = libie_parse_rx_ptype(ptype); - /* If we have drained the page fragment pool we need to update - * the pagecnt_bias and page count so that we fully restock the - * number of references the driver holds. - */ - if (unlikely(!pagecnt_bias)) { - page_ref_add(page, USHRT_MAX); - rx_buffer->pagecnt_bias = USHRT_MAX; - } + iavf_rx_hash(rx_ring, rx_desc, skb, qword, parsed); + iavf_rx_checksum(rx_ring->vsi, skb, qword, parsed); + iavf_rx_vlan(rx_ring, rx_desc, skb, qword); - return true; + skb_record_rx_queue(skb, rx_ring->queue_index); } /** * iavf_add_rx_frag - Add contents of Rx buffer to sk_buff - * @rx_ring: rx descriptor ring to transact packets on - * @rx_buffer: buffer containing page to add * @skb: sk_buff to place the data into + * @page: page containing data to add + * @hr: headroom in front of the data * @size: packet length from rx_desc * - * This function will add the data contained in rx_buffer->page to the skb. + * This function will add the data contained in page to the skb. * It will just attach the page as a frag to the skb. - * - * The function will then update the page offset. - **/ -static void iavf_add_rx_frag(struct iavf_ring *rx_ring, - struct iavf_rx_buffer *rx_buffer, - struct sk_buff *skb, - unsigned int size) -{ -#if (PAGE_SIZE < 8192) - unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(size + iavf_rx_offset(rx_ring)); -#endif - - if (!size) - return; - - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, - rx_buffer->page_offset, size, truesize); - - /* page is being used so we must update the page offset */ -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif -} - -/** - * iavf_get_rx_buffer - Fetch Rx buffer and synchronize data for use - * @rx_ring: rx descriptor ring to transact packets on - * @size: size of buffer to add to skb - * - * This function will pull an Rx buffer from the ring and synchronize it - * for use by the CPU. - */ -static struct iavf_rx_buffer *iavf_get_rx_buffer(struct iavf_ring *rx_ring, - const unsigned int size) -{ - struct iavf_rx_buffer *rx_buffer; - - rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; - prefetchw(rx_buffer->page); - if (!size) - return rx_buffer; - - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - size, - DMA_FROM_DEVICE); - - /* We have pulled a buffer for use, so decrement pagecnt_bias */ - rx_buffer->pagecnt_bias--; - - return rx_buffer; -} - -/** - * iavf_construct_skb - Allocate skb and populate it - * @rx_ring: rx descriptor ring to transact packets on - * @rx_buffer: rx buffer to pull data from - * @size: size of buffer to add to skb - * - * This function allocates an skb. It then populates it with the page - * data from the current receive descriptor, taking care to set up the - * skb correctly. */ -static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring, - struct iavf_rx_buffer *rx_buffer, - unsigned int size) +static void iavf_add_rx_frag(struct sk_buff *skb, struct page *page, u32 hr, + u32 size) { - void *va; -#if (PAGE_SIZE < 8192) - unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(size); -#endif - unsigned int headlen; - struct sk_buff *skb; - - if (!rx_buffer) - return NULL; - /* prefetch first cache line of first page */ - va = page_address(rx_buffer->page) + rx_buffer->page_offset; - net_prefetch(va); - - /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - IAVF_RX_HDR_SIZE, - GFP_ATOMIC | __GFP_NOWARN); - if (unlikely(!skb)) - return NULL; - - /* Determine available headroom for copy */ - headlen = size; - if (headlen > IAVF_RX_HDR_SIZE) - headlen = eth_get_headlen(skb->dev, va, IAVF_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); - - /* update all of the pointers */ - size -= headlen; - if (size) { - skb_add_rx_frag(skb, 0, rx_buffer->page, - rx_buffer->page_offset + headlen, - size, truesize); - - /* buffer is used by skb, update page_offset */ -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif - } else { - /* buffer is unused, reset bias back to rx_buffer */ - rx_buffer->pagecnt_bias++; - } - - return skb; + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, hr, size, + LIBIE_RX_TRUESIZE); } /** * iavf_build_skb - Build skb around an existing buffer - * @rx_ring: Rx descriptor ring to transact packets on - * @rx_buffer: Rx buffer to pull data from - * @size: size of buffer to add to skb + * @xdp: initialized XDP buffer * * This function builds an skb around an existing Rx buffer, taking care * to set up the skb correctly and avoid any memcpy overhead. */ -static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring, - struct iavf_rx_buffer *rx_buffer, - unsigned int size) +static struct sk_buff *iavf_build_skb(const struct xdp_buff *xdp) { - void *va; -#if (PAGE_SIZE < 8192) - unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + - SKB_DATA_ALIGN(IAVF_SKB_PAD + size); -#endif struct sk_buff *skb; + u32 metasize; - if (!rx_buffer || !size) - return NULL; - /* prefetch first cache line of first page */ - va = page_address(rx_buffer->page) + rx_buffer->page_offset; - net_prefetch(va); + net_prefetch(xdp->data_meta); /* build an skb around the page buffer */ - skb = napi_build_skb(va - IAVF_SKB_PAD, truesize); + skb = napi_build_skb(xdp->data_hard_start, LIBIE_RX_TRUESIZE); if (unlikely(!skb)) return NULL; + skb_mark_for_recycle(skb); + /* update pointers within the skb to store the data */ - skb_reserve(skb, IAVF_SKB_PAD); - __skb_put(skb, size); + skb_reserve(skb, xdp->data - xdp->data_hard_start); + __skb_put(skb, xdp->data_end - xdp->data); - /* buffer is used by skb, update page_offset */ -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif + metasize = xdp->data - xdp->data_meta; + if (metasize) + skb_metadata_set(skb, metasize); return skb; } /** - * iavf_put_rx_buffer - Clean up used buffer and either recycle or free - * @rx_ring: rx descriptor ring to transact packets on - * @rx_buffer: rx buffer to pull data from - * - * This function will clean up the contents of the rx_buffer. It will - * either recycle the buffer or unmap it and free the associated resources. - */ -static void iavf_put_rx_buffer(struct iavf_ring *rx_ring, - struct iavf_rx_buffer *rx_buffer) + * iavf_is_non_eop - check whether a buffer is non-EOP + * @qword: `wb.qword1.status_error_len` from the descriptor + * @stats: NAPI poll local stats to update + **/ +static bool iavf_is_non_eop(u64 qword, struct libie_rq_onstack_stats *stats) { - if (!rx_buffer) - return; + /* if we are the last buffer then there is nothing else to do */ + if (likely(iavf_test_staterr(qword, IAVF_RX_DESC_STATUS_EOF_SHIFT))) + return false; - if (iavf_can_reuse_rx_page(rx_buffer)) { - /* hand second half of page back to the ring */ - iavf_reuse_rx_page(rx_ring, rx_buffer); - rx_ring->rx_stats.page_reuse_count++; - } else { - /* we are not reusing the buffer so unmap it */ - dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, - iavf_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, IAVF_RX_DMA_ATTR); - __page_frag_cache_drain(rx_buffer->page, - rx_buffer->pagecnt_bias); - } + stats->fragments++; - /* clear contents of buffer_info */ - rx_buffer->page = NULL; + return true; } /** - * iavf_is_non_eop - process handling of non-EOP buffers - * @rx_ring: Rx ring being processed - * @rx_desc: Rx descriptor for current buffer - * @skb: Current socket buffer containing buffer in progress + * iavf_run_xdp - Run XDP program and perform resulting action + * @rx_ring: RX descriptor ring to transact packets on + * @xdp: a prepared XDP buffer + * @xdp_prog: an XDP program assigned to the interface + * @xdp_ring: XDP TX queue assigned to the RX ring + * @rxq_xdp_act: Logical OR of flags of XDP actions that require finalization * - * This function updates next to clean. If the buffer is an EOP buffer - * this function exits returning false, otherwise it will place the - * sk_buff in the next buffer to be chained and return true indicating - * that this is in fact a non-EOP buffer. - **/ -static bool iavf_is_non_eop(struct iavf_ring *rx_ring, - union iavf_rx_desc *rx_desc, - struct sk_buff *skb) + * Returns resulting XDP action. + */ +static unsigned int +iavf_run_xdp(struct iavf_ring *rx_ring, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog, struct iavf_ring *xdp_ring, + u32 *rxq_xdp_act) { - u32 ntc = rx_ring->next_to_clean + 1; + unsigned int xdp_act; - /* fetch, update, and store next to clean */ - ntc = (ntc < rx_ring->count) ? ntc : 0; - rx_ring->next_to_clean = ntc; + xdp_act = bpf_prog_run_xdp(xdp_prog, xdp); - prefetch(IAVF_RX_DESC(rx_ring, ntc)); + switch (xdp_act) { + case XDP_PASS: + case XDP_DROP: + break; + case XDP_TX: + if (unlikely(!iavf_xdp_xmit_back(xdp, xdp_ring))) + goto xdp_err; - /* if we are the last buffer then there is nothing else to do */ -#define IAVF_RXD_EOF BIT(IAVF_RX_DESC_STATUS_EOF_SHIFT) - if (likely(iavf_test_staterr(rx_desc, IAVF_RXD_EOF))) - return false; + *rxq_xdp_act |= IAVF_RXQ_XDP_ACT_FINALIZE_TX; + break; + case XDP_REDIRECT: + if (unlikely(xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))) + goto xdp_err; - rx_ring->rx_stats.non_eop_descs++; + *rxq_xdp_act |= IAVF_RXQ_XDP_ACT_FINALIZE_REDIR; + break; + default: + bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, xdp_act); - return true; + fallthrough; + case XDP_ABORTED: +xdp_err: + trace_xdp_exception(rx_ring->netdev, xdp_prog, xdp_act); + + return XDP_DROP; + } + + return xdp_act; } /** @@ -1496,27 +1209,45 @@ static bool iavf_is_non_eop(struct iavf_ring *rx_ring, **/ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget) { - unsigned int total_rx_bytes = 0, total_rx_packets = 0; + const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; + struct libie_rq_onstack_stats stats = { }; + u32 to_refill = IAVF_DESC_UNUSED(rx_ring); + struct page_pool *pool = rx_ring->pool; struct sk_buff *skb = rx_ring->skb; - u16 cleaned_count = IAVF_DESC_UNUSED(rx_ring); - bool failure = false; + u32 ntc = rx_ring->next_to_clean; + u32 ring_size = rx_ring->count; + struct iavf_ring *xdp_ring; + struct bpf_prog *xdp_prog; + u32 hr = pool->p.offset; + u32 cleaned_count = 0; + unsigned int xdp_act; + struct xdp_buff xdp; + u32 rxq_xdp_act = 0; + u16 cached_ntu; + + xdp_prog = rcu_dereference(rx_ring->xdp_prog); + if (xdp_prog) { + xdp_ring = rx_ring->xdp_ring; + cached_ntu = xdp_ring->next_to_use; + } + xdp_init_buff(&xdp, PAGE_SIZE, &rx_ring->xdp_rxq); - while (likely(total_rx_packets < (unsigned int)budget)) { - struct iavf_rx_buffer *rx_buffer; + while (likely(cleaned_count < budget)) { union iavf_rx_desc *rx_desc; - unsigned int size; - u16 vlan_tag = 0; - u8 rx_ptype; + u32 size, put_size; + struct page *page; u64 qword; /* return some buffers to hardware, one at a time is too slow */ - if (cleaned_count >= IAVF_RX_BUFFER_WRITE) { - failure = failure || - iavf_alloc_rx_buffers(rx_ring, cleaned_count); - cleaned_count = 0; + if (to_refill >= IAVF_RX_BUFFER_WRITE) { + to_refill = __iavf_alloc_rx_pages(rx_ring, to_refill, + gfp); + if (unlikely(to_refill)) + libie_stats_inc_one(&rx_ring->rq_stats, + alloc_page_fail); } - rx_desc = IAVF_RX_DESC(rx_ring, rx_ring->next_to_clean); + rx_desc = IAVF_RX_DESC(rx_ring, ntc); /* status_error_len will always be zero for unused descriptors * because it's cleared in cleanup, and overlaps with hdr_addr @@ -1524,97 +1255,128 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget) * hardware wrote DD then the length will be non-zero */ qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); + if (!iavf_test_staterr(qword, IAVF_RX_DESC_STATUS_DD_SHIFT)) + break; /* This memory barrier is needed to keep us from reading * any other fields out of the rx_desc until we have * verified the descriptor has been written back. */ dma_rmb(); -#define IAVF_RXD_DD BIT(IAVF_RX_DESC_STATUS_DD_SHIFT) - if (!iavf_test_staterr(rx_desc, IAVF_RXD_DD)) - break; size = (qword & IAVF_RXD_QW1_LENGTH_PBUF_MASK) >> IAVF_RXD_QW1_LENGTH_PBUF_SHIFT; iavf_trace(clean_rx_irq, rx_ring, rx_desc, skb); - rx_buffer = iavf_get_rx_buffer(rx_ring, size); + page = rx_ring->rx_pages[ntc]; + rx_ring->rx_pages[ntc] = NULL; + + /* Very rare, but possible case. The most common reason: + * the last fragment contained FCS only, which was then + * stripped by the HW. + */ + if (unlikely(!size)) { + page_pool_recycle_direct(pool, page); + goto no_skb; + } + + page_pool_dma_sync_for_cpu(pool, page, size); + put_size = size; + + xdp_prepare_buff(&xdp, page_address(page), hr, size, true); + if (!xdp_prog) + goto construct_skb; + + xdp_act = iavf_run_xdp(rx_ring, &xdp, xdp_prog, xdp_ring, + &rxq_xdp_act); + put_size = max_t(u32, xdp.data_end - xdp.data_hard_start - hr, + put_size); + + if (xdp_act == XDP_PASS) + goto construct_skb; + else if (xdp_act == XDP_DROP) + page_pool_put_page(pool, page, put_size, true); + + stats.bytes += size; + stats.packets++; + + skb = NULL; + goto no_skb; + +construct_skb: /* retrieve a buffer from the ring */ if (skb) - iavf_add_rx_frag(rx_ring, rx_buffer, skb, size); - else if (ring_uses_build_skb(rx_ring)) - skb = iavf_build_skb(rx_ring, rx_buffer, size); + iavf_add_rx_frag(skb, page, hr, size); else - skb = iavf_construct_skb(rx_ring, rx_buffer, size); + skb = iavf_build_skb(&xdp); /* exit if we failed to retrieve a buffer */ if (!skb) { - rx_ring->rx_stats.alloc_buff_failed++; - if (rx_buffer && size) - rx_buffer->pagecnt_bias++; + page_pool_put_page(pool, page, put_size, true); + libie_stats_inc_one(&rx_ring->rq_stats, + build_skb_fail); break; } - iavf_put_rx_buffer(rx_ring, rx_buffer); +no_skb: cleaned_count++; + to_refill++; + if (unlikely(++ntc == ring_size)) + ntc = 0; - if (iavf_is_non_eop(rx_ring, rx_desc, skb)) + if (iavf_is_non_eop(qword, &stats) || !skb) continue; + prefetch(rx_desc); + /* ERR_MASK will only have valid bits if EOP set, and * what we are doing here is actually checking * IAVF_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in * the error field */ - if (unlikely(iavf_test_staterr(rx_desc, BIT(IAVF_RXD_QW1_ERROR_SHIFT)))) { - dev_kfree_skb_any(skb); - skb = NULL; - continue; - } - - if (iavf_cleanup_headers(rx_ring, skb)) { + if (unlikely(iavf_test_staterr(qword, + IAVF_RXD_QW1_ERROR_SHIFT))) { + dev_kfree_skb(skb); skb = NULL; continue; } /* probably a little skewed due to removing CRC */ - total_rx_bytes += skb->len; - - qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - rx_ptype = (qword & IAVF_RXD_QW1_PTYPE_MASK) >> - IAVF_RXD_QW1_PTYPE_SHIFT; + stats.bytes += skb->len; /* populate checksum, VLAN, and protocol */ - iavf_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); - - if (qword & BIT(IAVF_RX_DESC_STATUS_L2TAG1P_SHIFT) && - rx_ring->flags & IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1) - vlan_tag = le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1); - if (rx_desc->wb.qword2.ext_status & - cpu_to_le16(BIT(IAVF_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT)) && - rx_ring->flags & IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2) - vlan_tag = le16_to_cpu(rx_desc->wb.qword2.l2tag2_2); + iavf_process_skb_fields(rx_ring, rx_desc, skb, qword); iavf_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb); - iavf_receive_skb(rx_ring, skb, vlan_tag); + skb->protocol = eth_type_trans(skb, rx_ring->netdev); + napi_gro_receive(&rx_ring->q_vector->napi, skb); skb = NULL; /* update budget accounting */ - total_rx_packets++; + stats.packets++; } + rx_ring->next_to_clean = ntc; rx_ring->skb = skb; - u64_stats_update_begin(&rx_ring->syncp); - rx_ring->stats.packets += total_rx_packets; - rx_ring->stats.bytes += total_rx_bytes; - u64_stats_update_end(&rx_ring->syncp); - rx_ring->q_vector->rx.total_packets += total_rx_packets; - rx_ring->q_vector->rx.total_bytes += total_rx_bytes; + iavf_finalize_xdp_rx(xdp_ring, rxq_xdp_act, cached_ntu); + + if (to_refill >= IAVF_RX_BUFFER_WRITE) { + to_refill = __iavf_alloc_rx_pages(rx_ring, to_refill, gfp); + /* guarantee a trip back through this routine if there was + * a failure + */ + if (unlikely(to_refill)) { + libie_stats_inc_one(&rx_ring->rq_stats, + alloc_page_fail); + cleaned_count = budget; + } + } + + iavf_update_rx_ring_stats(rx_ring, &stats); - /* guarantee a trip back through this routine if there was a failure */ - return failure ? budget : (int)total_rx_packets; + return cleaned_count; } static inline u32 iavf_buildreg_itr(const int type, u16 itr) @@ -1743,12 +1505,21 @@ int iavf_napi_poll(struct napi_struct *napi, int budget) * budget and be more aggressive about cleaning up the Tx descriptors. */ iavf_for_each_ring(ring, q_vector->tx) { - if (!iavf_clean_tx_irq(vsi, ring, budget)) { + bool wd; + + if (ring->flags & IAVF_TXRX_FLAGS_XSK) + wd = iavf_xmit_zc(ring); + else if (ring->flags & IAVF_TXRX_FLAGS_XDP) + wd = true; + else + wd = iavf_clean_tx_irq(vsi, ring, budget); + + if (!wd) { clean_complete = false; continue; } - arm_wb |= ring->arm_wb; - ring->arm_wb = false; + arm_wb |= !!(ring->flags & IAVF_TXRX_FLAGS_ARM_WB); + ring->flags &= ~IAVF_TXRX_FLAGS_ARM_WB; } /* Handle case where we are called by netpoll with a budget of 0 */ @@ -1760,8 +1531,12 @@ int iavf_napi_poll(struct napi_struct *napi, int budget) */ budget_per_ring = max(budget/q_vector->num_ringpairs, 1); + rcu_read_lock(); + iavf_for_each_ring(ring, q_vector->rx) { - int cleaned = iavf_clean_rx_irq(ring, budget_per_ring); + int cleaned = !!(ring->flags & IAVF_TXRX_FLAGS_XSK) ? + iavf_clean_rx_irq_zc(ring, budget_per_ring) : + iavf_clean_rx_irq(ring, budget_per_ring); work_done += cleaned; /* if we clean as many as budgeted, we must not be done */ @@ -1769,6 +1544,8 @@ int iavf_napi_poll(struct napi_struct *napi, int budget) clean_complete = false; } + rcu_read_unlock(); + /* If work not completed, return budget and polling will return */ if (!clean_complete) { int cpu_id = smp_processor_id(); @@ -1791,10 +1568,8 @@ int iavf_napi_poll(struct napi_struct *napi, int budget) return budget - 1; } tx_only: - if (arm_wb) { - q_vector->tx.ring[0].tx_stats.tx_force_wb++; + if (arm_wb) iavf_enable_wb_on_itr(vsi, q_vector); - } return budget; } @@ -2253,6 +2028,7 @@ bool __iavf_chk_linearize(struct sk_buff *skb) int __iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size) { netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); + libie_stats_inc_one(&tx_ring->sq_stats, stops); /* Memory barrier before checking head and tail */ smp_mb(); @@ -2262,7 +2038,8 @@ int __iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size) /* A reprieve! - use start_queue because it doesn't call schedule */ netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index); - ++tx_ring->tx_stats.restart_queue; + libie_stats_inc_one(&tx_ring->sq_stats, restarts); + return 0; } @@ -2318,8 +2095,8 @@ static inline void iavf_tx_map(struct iavf_ring *tx_ring, struct sk_buff *skb, while (unlikely(size > IAVF_MAX_DATA_PER_TXD)) { tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, - max_data, td_tag); + iavf_build_ctob(td_cmd, td_offset, + max_data, td_tag); tx_desc++; i++; @@ -2339,8 +2116,9 @@ static inline void iavf_tx_map(struct iavf_ring *tx_ring, struct sk_buff *skb, if (likely(!data_len)) break; - tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, - size, td_tag); + tx_desc->cmd_type_offset_bsz = iavf_build_ctob(td_cmd, + td_offset, + size, td_tag); tx_desc++; i++; @@ -2372,7 +2150,7 @@ static inline void iavf_tx_map(struct iavf_ring *tx_ring, struct sk_buff *skb, /* write last descriptor with RS and EOP bits */ td_cmd |= IAVF_TXD_CMD; tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, size, td_tag); + iavf_build_ctob(td_cmd, td_offset, size, td_tag); skb_tx_timestamp(skb); @@ -2443,7 +2221,7 @@ static netdev_tx_t iavf_xmit_frame_ring(struct sk_buff *skb, return NETDEV_TX_OK; } count = iavf_txd_use_count(skb->len); - tx_ring->tx_stats.tx_linearize++; + libie_stats_inc_one(&tx_ring->sq_stats, linearized); } /* need: 1 descriptor per page * PAGE_SIZE/IAVF_MAX_DATA_PER_TXD, @@ -2453,7 +2231,7 @@ static netdev_tx_t iavf_xmit_frame_ring(struct sk_buff *skb, * otherwise try next time */ if (iavf_maybe_stop_tx(tx_ring, count + 4 + 1)) { - tx_ring->tx_stats.tx_busy++; + libie_stats_inc_one(&tx_ring->sq_stats, busy); return NETDEV_TX_BUSY; } @@ -2536,3 +2314,200 @@ netdev_tx_t iavf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) return iavf_xmit_frame_ring(skb, tx_ring); } + +/** + * iavf_clean_xdp_irq - Reclaim a batch of TX resources from completed XDP_TX + * @xdp_ring: XDP Tx ring + * + * Returns number of cleaned descriptors. + */ +static u32 iavf_clean_xdp_irq(struct iavf_ring *xdp_ring) +{ + struct libie_sq_onstack_stats stats = { }; + struct iavf_tx_desc *last_rs_desc; + u32 ntc = xdp_ring->next_to_clean; + u32 cnt = xdp_ring->count; + u16 done_frames = 0; + u16 rs_idx; + u32 i; + + /* Last RS index is invalid in xsk frames */ + if (!xdp_ring->tx_bi[ntc].page) + return 0; + + rs_idx = xdp_ring->tx_bi[ntc].rs_desc_idx; + last_rs_desc = IAVF_TX_DESC(xdp_ring, rs_idx); + if (last_rs_desc->cmd_type_offset_bsz & + cpu_to_le64(IAVF_TX_DESC_DTYPE_DESC_DONE)) { + done_frames = rs_idx >= ntc ? rs_idx - ntc + 1 : + rs_idx + cnt - ntc + 1; + last_rs_desc->cmd_type_offset_bsz = 0; + } + + for (i = 0; i < done_frames; i++) { + struct iavf_tx_buffer *tx_buf = &xdp_ring->tx_bi[ntc]; + + stats.bytes += tx_buf->bytecount; + /* normally tx_buf->gso_segs was taken but at this point + * it's always 1 for us + */ + stats.packets++; + + iavf_free_xdp_resource(xdp_ring, tx_buf); + + ntc++; + if (ntc >= xdp_ring->count) + ntc = 0; + } + + xdp_ring->next_to_clean = ntc; + iavf_update_tx_ring_stats(xdp_ring, &stats); + + return i; +} + +/** + * iavf_xmit_xdp_buff - submit single buffer to XDP ring for transmission + * @xdp: XDP buffer pointer + * @xdp_ring: XDP ring for transmission + * @frame: whether the function is called from .ndo_xdp_xmit() + * + * Returns negative on failure, 0 on success. + */ +static int iavf_xmit_xdp_buff(const struct xdp_buff *xdp, + struct iavf_ring *xdp_ring, + bool frame) +{ + u32 batch_sz = IAVF_RING_QUARTER(xdp_ring); + u32 size = xdp->data_end - xdp->data; + u32 ntu = xdp_ring->next_to_use; + struct iavf_tx_buffer *tx_buff; + struct iavf_tx_desc *tx_desc; + void *data = xdp->data; + dma_addr_t dma; + u32 free; + + free = IAVF_DESC_UNUSED(xdp_ring); + if (unlikely(free < batch_sz)) + free += iavf_clean_xdp_irq(xdp_ring); + if (unlikely(!free)) { + libie_stats_inc_one(&xdp_ring->sq_stats, busy); + return -EBUSY; + } + + if (frame) { + dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE); + if (dma_mapping_error(xdp_ring->dev, dma)) + return -ENOMEM; + } else { + struct page *page = virt_to_page(data); + u32 hr = data - xdp->data_hard_start; + + dma = page_pool_get_dma_addr(page) + hr; + dma_sync_single_for_device(xdp_ring->dev, dma, size, + DMA_BIDIRECTIONAL); + } + + tx_buff = &xdp_ring->tx_bi[ntu]; + tx_buff->bytecount = size; + tx_buff->gso_segs = 1; + if (frame) { + tx_buff->xdp_type = IAVF_XDP_BUFFER_FRAME; + tx_buff->xdpf = xdp->data_hard_start; + } else { + tx_buff->xdp_type = IAVF_XDP_BUFFER_TX; + tx_buff->page = virt_to_page(data); + } + + /* record length, and DMA address */ + dma_unmap_len_set(tx_buff, len, size); + dma_unmap_addr_set(tx_buff, dma, dma); + + tx_desc = IAVF_TX_DESC(xdp_ring, ntu); + tx_desc->buffer_addr = cpu_to_le64(dma); + tx_desc->cmd_type_offset_bsz = iavf_build_ctob(IAVF_TX_DESC_CMD_EOP, 0, + size, 0); + + xdp_ring->xdp_tx_active++; + ntu++; + + if (ntu == xdp_ring->count) + ntu = 0; + + xdp_ring->next_to_use = ntu; + + return 0; +} + +static bool iavf_xdp_xmit_back(const struct xdp_buff *buff, + struct iavf_ring *xdp_ring) +{ + bool ret; + + if (static_branch_unlikely(&iavf_xdp_locking_key)) + spin_lock(&xdp_ring->tx_lock); + + /* TODO: improve XDP_TX by batching */ + ret = !iavf_xmit_xdp_buff(buff, xdp_ring, false); + + if (static_branch_unlikely(&iavf_xdp_locking_key)) + spin_unlock(&xdp_ring->tx_lock); + + return ret; +} + +int iavf_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags) +{ + struct iavf_adapter *adapter = netdev_priv(dev); + struct iavf_tx_buffer *tx_buf; + struct iavf_ring *xdp_ring; + u32 queue_index, nxmit = 0; + int err = 0; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + if (unlikely(adapter->state == __IAVF_DOWN)) + return -ENETDOWN; + + if (!iavf_adapter_xdp_active(adapter)) + return -ENXIO; + + queue_index = smp_processor_id(); + if (static_branch_unlikely(&iavf_xdp_locking_key)) + queue_index %= adapter->num_xdp_tx_queues; + if (queue_index >= adapter->num_active_queues) + return -ENXIO; + + xdp_ring = &adapter->xdp_rings[queue_index]; + + if (static_branch_unlikely(&iavf_xdp_locking_key)) + spin_lock(&xdp_ring->tx_lock); + + tx_buf = &xdp_ring->tx_bi[xdp_ring->next_to_use]; + for (u32 i = 0; i < n; i++) { + struct xdp_frame *xdpf = frames[i]; + struct xdp_buff xdp; + + xdp_convert_frame_to_buff(xdpf, &xdp); + err = iavf_xmit_xdp_buff(&xdp, xdp_ring, true); + if (unlikely(err)) { + netdev_err(dev, "XDP frame TX failed, error: %d\n", + err); + break; + } + + nxmit++; + } + + if (likely(nxmit)) + tx_buf->rs_desc_idx = iavf_set_rs_bit(xdp_ring); + if (flags & XDP_XMIT_FLUSH) + iavf_xdp_ring_update_tail(xdp_ring); + + if (static_branch_unlikely(&iavf_xdp_locking_key)) + spin_unlock(&xdp_ring->tx_lock); + + return nxmit; +} diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h index 2624bf6d009e36..775eda86f05b54 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.h +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h @@ -4,6 +4,8 @@ #ifndef _IAVF_TXRX_H_ #define _IAVF_TXRX_H_ +#include + /* Interrupt Throttling and Rate Limiting Goodies */ #define IAVF_DEFAULT_IRQ_WORK 256 @@ -81,94 +83,21 @@ enum iavf_dyn_idx_t { BIT_ULL(IAVF_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | \ BIT_ULL(IAVF_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP)) -/* Supported Rx Buffer Sizes (a multiple of 128) */ -#define IAVF_RXBUFFER_256 256 -#define IAVF_RXBUFFER_1536 1536 /* 128B aligned standard Ethernet frame */ -#define IAVF_RXBUFFER_2048 2048 -#define IAVF_RXBUFFER_3072 3072 /* Used for large frames w/ padding */ -#define IAVF_MAX_RXBUFFER 9728 /* largest size for single descriptor */ - -/* NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we - * reserve 2 more, and skb_shared_info adds an additional 384 bytes more, - * this adds up to 512 bytes of extra data meaning the smallest allocation - * we could have is 1K. - * i.e. RXBUFFER_256 --> 960 byte skb (size-1024 slab) - * i.e. RXBUFFER_512 --> 1216 byte skb (size-2048 slab) - */ -#define IAVF_RX_HDR_SIZE IAVF_RXBUFFER_256 -#define IAVF_PACKET_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)) #define iavf_rx_desc iavf_32byte_rx_desc -#define IAVF_RX_DMA_ATTR \ - (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) - -/* Attempt to maximize the headroom available for incoming frames. We - * use a 2K buffer for receives and need 1536/1534 to store the data for - * the frame. This leaves us with 512 bytes of room. From that we need - * to deduct the space needed for the shared info and the padding needed - * to IP align the frame. - * - * Note: For cache line sizes 256 or larger this value is going to end - * up negative. In these cases we should fall back to the legacy - * receive path. - */ -#if (PAGE_SIZE < 8192) -#define IAVF_2K_TOO_SMALL_WITH_PADDING \ -((NET_SKB_PAD + IAVF_RXBUFFER_1536) > SKB_WITH_OVERHEAD(IAVF_RXBUFFER_2048)) - -static inline int iavf_compute_pad(int rx_buf_len) -{ - int page_size, pad_size; - - page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2); - pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len; - - return pad_size; -} - -static inline int iavf_skb_pad(void) -{ - int rx_buf_len; - - /* If a 2K buffer cannot handle a standard Ethernet frame then - * optimize padding for a 3K buffer instead of a 1.5K buffer. - * - * For a 3K buffer we need to add enough padding to allow for - * tailroom due to NET_IP_ALIGN possibly shifting us out of - * cache-line alignment. - */ - if (IAVF_2K_TOO_SMALL_WITH_PADDING) - rx_buf_len = IAVF_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN); - else - rx_buf_len = IAVF_RXBUFFER_1536; - - /* if needed make room for NET_IP_ALIGN */ - rx_buf_len -= NET_IP_ALIGN; - - return iavf_compute_pad(rx_buf_len); -} - -#define IAVF_SKB_PAD iavf_skb_pad() -#else -#define IAVF_2K_TOO_SMALL_WITH_PADDING false -#define IAVF_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) -#endif - /** * iavf_test_staterr - tests bits in Rx descriptor status and error fields - * @rx_desc: pointer to receive descriptor (in le64 format) - * @stat_err_bits: value to mask + * @qword: `wb.qword1.status_error_len` from the descriptor + * @stat_err: bit number to mask * * This function does some fast chicanery in order to return the * value of the mask which is really only used for boolean tests. * The status_error_len doesn't need to be shifted because it begins * at offset zero. */ -static inline bool iavf_test_staterr(union iavf_rx_desc *rx_desc, - const u64 stat_err_bits) +static inline bool iavf_test_staterr(u64 qword, const u64 stat_err) { - return !!(rx_desc->wb.qword1.status_error_len & - cpu_to_le64(stat_err_bits)); + return !!(qword & BIT_ULL(stat_err)); } /* How many Rx Buffers do we bundle into one write to the hardware ? */ @@ -250,7 +179,7 @@ static inline unsigned int iavf_txd_use_count(unsigned int size) #define IAVF_TX_FLAGS_IPV6 BIT(5) #define IAVF_TX_FLAGS_FCCRC BIT(6) #define IAVF_TX_FLAGS_FSO BIT(7) -#define IAVF_TX_FLAGS_FD_SB BIT(9) +/* BIT(9) is free, was IAVF_TX_FLAGS_FD_SB */ #define IAVF_TX_FLAGS_VXLAN_TUNNEL BIT(10) #define IAVF_TX_FLAGS_HW_OUTER_SINGLE_VLAN BIT(11) #define IAVF_TX_FLAGS_VLAN_MASK 0xffff0000 @@ -258,60 +187,40 @@ static inline unsigned int iavf_txd_use_count(unsigned int size) #define IAVF_TX_FLAGS_VLAN_PRIO_SHIFT 29 #define IAVF_TX_FLAGS_VLAN_SHIFT 16 +/** + * enum iavf_xdp_buffer_type - type of &iavf_tx_buffer on XDP queue + * @IAVF_XDP_BUFFER_NONE: unused, no action required + * @IAVF_XDP_BUFFER_TX: free according to our memory model + * @IAVF_XDP_BUFFER_FRAME: use xdp_return_frame() + */ +enum iavf_xdp_buffer_type { + IAVF_XDP_BUFFER_NONE = 0U, + IAVF_XDP_BUFFER_TX, + IAVF_XDP_BUFFER_FRAME, +}; + struct iavf_tx_buffer { - struct iavf_tx_desc *next_to_watch; + + /* Track the last frame in batch/packet */ + union { + struct iavf_tx_desc *next_to_watch; /* on skb TX queue */ + u16 rs_desc_idx; /* on XDP queue */ + }; union { - struct sk_buff *skb; - void *raw_buf; + struct sk_buff *skb; /* used for .ndo_start_xmit() */ + struct page *page; /* used for XDP_TX */ + struct xdp_frame *xdpf; /* used for .ndo_xdp_xmit() */ + struct xdp_buff *xdp; /* used for XDP_TX in ZC mode */ }; unsigned int bytecount; unsigned short gso_segs; + unsigned short xdp_type; DEFINE_DMA_UNMAP_ADDR(dma); DEFINE_DMA_UNMAP_LEN(len); u32 tx_flags; }; -struct iavf_rx_buffer { - dma_addr_t dma; - struct page *page; -#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) - __u32 page_offset; -#else - __u16 page_offset; -#endif - __u16 pagecnt_bias; -}; - -struct iavf_queue_stats { - u64 packets; - u64 bytes; -}; - -struct iavf_tx_queue_stats { - u64 restart_queue; - u64 tx_busy; - u64 tx_done_old; - u64 tx_linearize; - u64 tx_force_wb; - int prev_pkt_ctr; - u64 tx_lost_interrupt; -}; - -struct iavf_rx_queue_stats { - u64 non_eop_descs; - u64 alloc_page_failed; - u64 alloc_buff_failed; - u64 page_reuse_count; - u64 realloc_count; -}; - -enum iavf_ring_state_t { - __IAVF_TX_FDIR_INIT_DONE, - __IAVF_TX_XPS_INIT_DONE, - __IAVF_RING_STATE_NBITS /* must be last */ -}; - /* some useful defines for virtchannel interface, which * is the only remaining user of header split */ @@ -327,16 +236,20 @@ enum iavf_ring_state_t { struct iavf_ring { struct iavf_ring *next; /* pointer to next ring in q_vector */ void *desc; /* Descriptor ring memory */ - struct device *dev; /* Used for DMA mapping */ + union { + struct xsk_buff_pool *xsk_pool; /* Used on XSk queue pairs */ + struct page_pool *pool; /* Used for Rx page management */ + struct device *dev; /* Used for DMA mapping on Tx */ + }; struct net_device *netdev; /* netdev ring maps to */ union { struct iavf_tx_buffer *tx_bi; - struct iavf_rx_buffer *rx_bi; + struct xdp_buff **xdp_buff; + struct page **rx_pages; }; - DECLARE_BITMAP(state, __IAVF_RING_STATE_NBITS); + u8 __iomem *tail; u16 queue_index; /* Queue number of ring */ u8 dcb_tc; /* Traffic class of ring */ - u8 __iomem *tail; /* high bit set means dynamic, use accessors routines to read/write. * hardware only supports 2us resolution for the ITR registers. @@ -345,45 +258,29 @@ struct iavf_ring { */ u16 itr_setting; - u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ - u16 rx_buf_len; + u16 count; /* Number of descriptors */ /* used in interrupt processing */ u16 next_to_use; u16 next_to_clean; - u8 atr_sample_rate; - u8 atr_count; - - bool ring_active; /* is ring online or not */ - bool arm_wb; /* do something to arm write back */ - u8 packet_stride; - u16 flags; #define IAVF_TXR_FLAGS_WB_ON_ITR BIT(0) -#define IAVF_RXR_FLAGS_BUILD_SKB_ENABLED BIT(1) +#define IAVF_TXRX_FLAGS_ARM_WB BIT(1) +#define IAVF_TXRX_FLAGS_XDP BIT(2) #define IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1 BIT(3) #define IAVF_TXR_FLAGS_VLAN_TAG_LOC_L2TAG2 BIT(4) #define IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2 BIT(5) +#define IAVF_TXRX_FLAGS_XSK BIT(6) - /* stats structs */ - struct iavf_queue_stats stats; - struct u64_stats_sync syncp; union { - struct iavf_tx_queue_stats tx_stats; - struct iavf_rx_queue_stats rx_stats; + struct bpf_prog __rcu *xdp_prog; + u32 xdp_tx_active; /* TODO: comment */ }; - - unsigned int size; /* length of descriptor ring in bytes */ - dma_addr_t dma; /* physical address of ring */ - - struct iavf_vsi *vsi; /* Backreference to associated VSI */ - struct iavf_q_vector *q_vector; /* Backreference to associated vector */ - - struct rcu_head rcu; /* to avoid race on free */ - u16 next_to_alloc; - struct sk_buff *skb; /* When iavf_clean_rx_ring_irq() must + struct iavf_ring *xdp_ring; + union { + struct sk_buff *skb; /* When iavf_clean_rx_ring_irq() must * return before it sees the EOP for * the current packet, we save that skb * here and resume receiving this @@ -391,22 +288,31 @@ struct iavf_ring { * iavf_clean_rx_ring_irq() is called * for this ring. */ -} ____cacheline_internodealigned_in_smp; + spinlock_t tx_lock; /* Protect XDP TX ring, when shared */ + }; -static inline bool ring_uses_build_skb(struct iavf_ring *ring) -{ - return !!(ring->flags & IAVF_RXR_FLAGS_BUILD_SKB_ENABLED); -} + /* stats structs */ + union { + struct libie_sq_stats sq_stats; + struct libie_rq_stats rq_stats; + }; -static inline void set_ring_build_skb_enabled(struct iavf_ring *ring) -{ - ring->flags |= IAVF_RXR_FLAGS_BUILD_SKB_ENABLED; -} + struct iavf_vsi *vsi; /* Backreference to associated VSI */ + struct iavf_q_vector *q_vector; /* Backreference to associated vector */ -static inline void clear_ring_build_skb_enabled(struct iavf_ring *ring) -{ - ring->flags &= ~IAVF_RXR_FLAGS_BUILD_SKB_ENABLED; -} + int prev_pkt_ctr; /* For stall detection */ + unsigned int size; /* length of descriptor ring in bytes */ + dma_addr_t dma; /* physical address of ring */ + + struct rcu_head rcu; /* to avoid race on free */ + struct xdp_rxq_info xdp_rxq; +} ____cacheline_internodealigned_in_smp; + +#define IAVF_RING_QUARTER(R) ((R)->count >> 2) +#define IAVF_RX_DESC(R, i) (&(((union iavf_32byte_rx_desc *)((R)->desc))[i])) +#define IAVF_TX_DESC(R, i) (&(((struct iavf_tx_desc *)((R)->desc))[i])) +#define IAVF_TX_CTXTDESC(R, i) \ + (&(((struct iavf_tx_context_desc *)((R)->desc))[i])) #define IAVF_ITR_ADAPTIVE_MIN_INC 0x0002 #define IAVF_ITR_ADAPTIVE_MIN_USECS 0x0002 @@ -429,18 +335,7 @@ struct iavf_ring_container { #define iavf_for_each_ring(pos, head) \ for (pos = (head).ring; pos != NULL; pos = pos->next) -static inline unsigned int iavf_rx_pg_order(struct iavf_ring *ring) -{ -#if (PAGE_SIZE < 8192) - if (ring->rx_buf_len > (PAGE_SIZE / 2)) - return 1; -#endif - return 0; -} - -#define iavf_rx_pg_size(_ring) (PAGE_SIZE << iavf_rx_pg_order(_ring)) - -bool iavf_alloc_rx_buffers(struct iavf_ring *rxr, u16 cleaned_count); +void iavf_alloc_rx_pages(struct iavf_ring *rxr); netdev_tx_t iavf_xmit_frame(struct sk_buff *skb, struct net_device *netdev); void iavf_clean_tx_ring(struct iavf_ring *tx_ring); void iavf_clean_rx_ring(struct iavf_ring *rx_ring); @@ -455,6 +350,24 @@ void iavf_detect_recover_hung(struct iavf_vsi *vsi); int __iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size); bool __iavf_chk_linearize(struct sk_buff *skb); +DECLARE_STATIC_KEY_FALSE(iavf_xdp_locking_key); + +void iavf_process_skb_fields(const struct iavf_ring *rx_ring, + const union iavf_rx_desc *rx_desc, + struct sk_buff *skb, u64 qword); +int iavf_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags); + +static inline __le64 iavf_build_ctob(u32 td_cmd, u32 td_offset, + unsigned int size, u32 td_tag) +{ + return cpu_to_le64(IAVF_TX_DESC_DTYPE_DATA | + ((u64)td_cmd << IAVF_TXD_QW1_CMD_SHIFT) | + ((u64)td_offset << IAVF_TXD_QW1_OFFSET_SHIFT) | + ((u64)size << IAVF_TXD_QW1_TX_BUF_SZ_SHIFT) | + ((u64)td_tag << IAVF_TXD_QW1_L2TAG1_SHIFT)); +} + /** * iavf_xmit_descriptor_count - calculate number of Tx descriptors needed * @skb: send buffer @@ -462,7 +375,7 @@ bool __iavf_chk_linearize(struct sk_buff *skb); * Returns number of data descriptors needed for this skb. Returns 0 to indicate * there is not enough descriptors available in this ring since we need at least * one descriptor. - **/ + */ static inline int iavf_xmit_descriptor_count(struct sk_buff *skb) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[0]; @@ -487,7 +400,7 @@ static inline int iavf_xmit_descriptor_count(struct sk_buff *skb) * @size: the size buffer we want to assure is available * * Returns 0 if stop is not needed - **/ + */ static inline int iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size) { if (likely(IAVF_DESC_UNUSED(tx_ring) >= size)) @@ -503,7 +416,7 @@ static inline int iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size) * Note: Our HW can't scatter-gather more than 8 fragments to build * a packet on the wire and so we need to figure out the cases where we * need to linearize the skb. - **/ + */ static inline bool iavf_chk_linearize(struct sk_buff *skb, int count) { /* Both TSO and single send will work if count is less than 8 */ @@ -519,9 +432,134 @@ static inline bool iavf_chk_linearize(struct sk_buff *skb, int count) /** * txring_txq - helper to convert from a ring to a queue * @ring: Tx ring to find the netdev equivalent of - **/ + */ static inline struct netdev_queue *txring_txq(const struct iavf_ring *ring) { return netdev_get_tx_queue(ring->netdev, ring->queue_index); } + +/** + * iavf_xdp_ring_update_tail - Updates the XDP Tx ring tail register + * @xdp_ring: XDP Tx ring + * + * Notify hardware the new descriptor is ready to be transmitted + */ +static inline void iavf_xdp_ring_update_tail(const struct iavf_ring *xdp_ring) +{ + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. + */ + wmb(); + writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail); +} + +/** + * iavf_update_tx_ring_stats - Update TX ring stats after transmit completes + * @tx_ring: TX descriptor ring + * @tc: TODO + * @total_pkts: Number of packets transmitted since the last update + * @total_bytes: Number of bytes transmitted since the last update + */ +static inline void +__iavf_update_tx_ring_stats(struct iavf_ring *tx_ring, + struct iavf_ring_container *tc, + const struct libie_sq_onstack_stats *stats) +{ + libie_sq_napi_stats_add(&tx_ring->sq_stats, stats); + tc->total_bytes += stats->bytes; + tc->total_packets += stats->packets; +} + +#define iavf_update_tx_ring_stats(r, s) \ + __iavf_update_tx_ring_stats(r, &(r)->q_vector->tx, s) + +/** + * iavf_update_rx_ring_stats - Update RX ring stats + * @rx_ring: ring to bump + * @rc: TODO + * @rx_bytes: number of bytes processed since last update + * @rx_packets: number of packets processed since last update + */ +static inline void +__iavf_update_rx_ring_stats(struct iavf_ring *rx_ring, + struct iavf_ring_container *rc, + const struct libie_rq_onstack_stats *stats) +{ + libie_rq_napi_stats_add(&rx_ring->rq_stats, stats); + rc->total_packets += stats->packets; + rc->total_bytes += stats->bytes; +} + +#define iavf_update_rx_ring_stats(r, s) \ + __iavf_update_rx_ring_stats(r, &(r)->q_vector->rx, s) + +/** + * iavf_release_rx_desc - Store the new tail and head values + * @rx_ring: ring to bump + * @val: new head index + */ +static inline void iavf_release_rx_desc(struct iavf_ring *rx_ring, u32 val) +{ + rx_ring->next_to_use = val; + + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). + */ + wmb(); + writel(val, rx_ring->tail); +} + +#define IAVF_RXQ_XDP_ACT_FINALIZE_TX BIT(0) +#define IAVF_RXQ_XDP_ACT_FINALIZE_REDIR BIT(1) +#define IAVF_RXQ_XDP_ACT_STOP_NOW BIT(2) + +/** + * iavf_set_rs_bit - set RS bit on last produced descriptor. + * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * + * Returns the index of descriptor RS bit was set on (one behind current NTU). + */ +static inline u16 iavf_set_rs_bit(struct iavf_ring *xdp_ring) +{ + u16 rs_idx = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : + xdp_ring->count - 1; + struct iavf_tx_desc *tx_desc; + + tx_desc = IAVF_TX_DESC(xdp_ring, rs_idx); + tx_desc->cmd_type_offset_bsz |= + cpu_to_le64(IAVF_TX_DESC_CMD_RS << IAVF_TXD_QW1_CMD_SHIFT); + + return rs_idx; +} + +/** + * iavf_finalize_xdp_rx - Finalize XDP actions once per RX ring clean + * @xdp_ring: XDP TX queue assigned to a given RX ring + * @rxq_xdp_act: Logical OR of flags of XDP actions that require finalization + * @first_idx: index of the first frame in the transmitted batch on XDP queue + */ +static inline void iavf_finalize_xdp_rx(struct iavf_ring *xdp_ring, + u32 rxq_xdp_act, u32 first_idx) +{ + if (rxq_xdp_act & IAVF_RXQ_XDP_ACT_FINALIZE_REDIR) + xdp_do_flush_map(); + if (rxq_xdp_act & IAVF_RXQ_XDP_ACT_FINALIZE_TX) { + struct iavf_tx_buffer *tx_buf = &xdp_ring->tx_bi[first_idx]; + + if (static_branch_unlikely(&iavf_xdp_locking_key)) + spin_lock(&xdp_ring->tx_lock); + tx_buf->rs_desc_idx = iavf_set_rs_bit(xdp_ring); + iavf_xdp_ring_update_tail(xdp_ring); + if (static_branch_unlikely(&iavf_xdp_locking_key)) + spin_unlock(&xdp_ring->tx_lock); + } +} + +static inline bool iavf_ring_is_xdp(struct iavf_ring *ring) +{ + return !!(ring->flags & IAVF_TXRX_FLAGS_XDP); +} + #endif /* _IAVF_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_type.h b/drivers/net/ethernet/intel/iavf/iavf_type.h index 9f1f523807c4e6..bb90d8f3ad7efe 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_type.h +++ b/drivers/net/ethernet/intel/iavf/iavf_type.h @@ -10,8 +10,6 @@ #include "iavf_adminq.h" #include "iavf_devids.h" -#define IAVF_RXQ_CTX_DBUFF_SHIFT 7 - /* IAVF_MASK is a macro used on 32 bit registers */ #define IAVF_MASK(mask, shift) ((u32)(mask) << (shift)) @@ -339,94 +337,6 @@ enum iavf_rx_desc_error_l3l4e_fcoe_masks { #define IAVF_RXD_QW1_PTYPE_SHIFT 30 #define IAVF_RXD_QW1_PTYPE_MASK (0xFFULL << IAVF_RXD_QW1_PTYPE_SHIFT) -/* Packet type non-ip values */ -enum iavf_rx_l2_ptype { - IAVF_RX_PTYPE_L2_RESERVED = 0, - IAVF_RX_PTYPE_L2_MAC_PAY2 = 1, - IAVF_RX_PTYPE_L2_TIMESYNC_PAY2 = 2, - IAVF_RX_PTYPE_L2_FIP_PAY2 = 3, - IAVF_RX_PTYPE_L2_OUI_PAY2 = 4, - IAVF_RX_PTYPE_L2_MACCNTRL_PAY2 = 5, - IAVF_RX_PTYPE_L2_LLDP_PAY2 = 6, - IAVF_RX_PTYPE_L2_ECP_PAY2 = 7, - IAVF_RX_PTYPE_L2_EVB_PAY2 = 8, - IAVF_RX_PTYPE_L2_QCN_PAY2 = 9, - IAVF_RX_PTYPE_L2_EAPOL_PAY2 = 10, - IAVF_RX_PTYPE_L2_ARP = 11, - IAVF_RX_PTYPE_L2_FCOE_PAY3 = 12, - IAVF_RX_PTYPE_L2_FCOE_FCDATA_PAY3 = 13, - IAVF_RX_PTYPE_L2_FCOE_FCRDY_PAY3 = 14, - IAVF_RX_PTYPE_L2_FCOE_FCRSP_PAY3 = 15, - IAVF_RX_PTYPE_L2_FCOE_FCOTHER_PA = 16, - IAVF_RX_PTYPE_L2_FCOE_VFT_PAY3 = 17, - IAVF_RX_PTYPE_L2_FCOE_VFT_FCDATA = 18, - IAVF_RX_PTYPE_L2_FCOE_VFT_FCRDY = 19, - IAVF_RX_PTYPE_L2_FCOE_VFT_FCRSP = 20, - IAVF_RX_PTYPE_L2_FCOE_VFT_FCOTHER = 21, - IAVF_RX_PTYPE_GRENAT4_MAC_PAY3 = 58, - IAVF_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4 = 87, - IAVF_RX_PTYPE_GRENAT6_MAC_PAY3 = 124, - IAVF_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4 = 153 -}; - -struct iavf_rx_ptype_decoded { - u32 known:1; - u32 outer_ip:1; - u32 outer_ip_ver:1; - u32 outer_frag:1; - u32 tunnel_type:3; - u32 tunnel_end_prot:2; - u32 tunnel_end_frag:1; - u32 inner_prot:4; - u32 payload_layer:3; -}; - -enum iavf_rx_ptype_outer_ip { - IAVF_RX_PTYPE_OUTER_L2 = 0, - IAVF_RX_PTYPE_OUTER_IP = 1 -}; - -enum iavf_rx_ptype_outer_ip_ver { - IAVF_RX_PTYPE_OUTER_NONE = 0, - IAVF_RX_PTYPE_OUTER_IPV4 = 0, - IAVF_RX_PTYPE_OUTER_IPV6 = 1 -}; - -enum iavf_rx_ptype_outer_fragmented { - IAVF_RX_PTYPE_NOT_FRAG = 0, - IAVF_RX_PTYPE_FRAG = 1 -}; - -enum iavf_rx_ptype_tunnel_type { - IAVF_RX_PTYPE_TUNNEL_NONE = 0, - IAVF_RX_PTYPE_TUNNEL_IP_IP = 1, - IAVF_RX_PTYPE_TUNNEL_IP_GRENAT = 2, - IAVF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC = 3, - IAVF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN = 4, -}; - -enum iavf_rx_ptype_tunnel_end_prot { - IAVF_RX_PTYPE_TUNNEL_END_NONE = 0, - IAVF_RX_PTYPE_TUNNEL_END_IPV4 = 1, - IAVF_RX_PTYPE_TUNNEL_END_IPV6 = 2, -}; - -enum iavf_rx_ptype_inner_prot { - IAVF_RX_PTYPE_INNER_PROT_NONE = 0, - IAVF_RX_PTYPE_INNER_PROT_UDP = 1, - IAVF_RX_PTYPE_INNER_PROT_TCP = 2, - IAVF_RX_PTYPE_INNER_PROT_SCTP = 3, - IAVF_RX_PTYPE_INNER_PROT_ICMP = 4, - IAVF_RX_PTYPE_INNER_PROT_TIMESYNC = 5 -}; - -enum iavf_rx_ptype_payload_layer { - IAVF_RX_PTYPE_PAYLOAD_LAYER_NONE = 0, - IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY2 = 1, - IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY3 = 2, - IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY4 = 3, -}; - #define IAVF_RXD_QW1_LENGTH_PBUF_SHIFT 38 #define IAVF_RXD_QW1_LENGTH_PBUF_MASK (0x3FFFULL << \ IAVF_RXD_QW1_LENGTH_PBUF_SHIFT) diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 4e17d006c52d46..d23d5097db97ad 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -1,10 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2013 - 2018 Intel Corporation. */ +#include + #include "iavf.h" #include "iavf_prototype.h" #include "iavf_client.h" +#define IAVF_VC_MSG_TIMEOUT_MS 300 + /** * iavf_send_pf_msg * @adapter: adapter structure @@ -50,6 +54,59 @@ int iavf_send_api_ver(struct iavf_adapter *adapter) sizeof(vvi)); } +/** + * iavf_poll_virtchnl_msg_timeout + * @hw: HW configuration structure + * @event: event to populate on success + * @op_to_poll: requested virtchnl op to poll for + * @msecs: timeout in milliseconds + * + * Initialize poll for virtchnl msg matching the requested_op. Returns 0 + * if a message of the correct opcode is in the queue or an error code + * if no message matching the op code is waiting and other failures + * (including timeout). In case of timeout -EBUSY error is returned. + */ +static int +iavf_poll_virtchnl_msg_timeout(struct iavf_hw *hw, + struct iavf_arq_event_info *event, + enum virtchnl_ops op_to_poll, + unsigned int msecs) +{ + unsigned int wait, delay = 10; + enum virtchnl_ops received_op; + enum iavf_status status; + u32 v_retval; + + for (wait = 0; wait < msecs; wait += delay) { + /* When the AQ is empty, iavf_clean_arq_element will be + * nonzero and after some delay this loop will check again + * if any message is added to the AQ. + */ + status = iavf_clean_arq_element(hw, event, NULL); + if (status == IAVF_ERR_ADMIN_QUEUE_NO_WORK) + goto wait_for_msg; + else if (status != IAVF_SUCCESS) + break; + received_op = + (enum virtchnl_ops)le32_to_cpu(event->desc.cookie_high); + if (op_to_poll == received_op) + break; +wait_for_msg: + msleep(delay); + status = IAVF_ERR_NOT_READY; + } + + if (status == IAVF_SUCCESS) { + v_retval = le32_to_cpu(event->desc.cookie_low); + v_retval = virtchnl_status_to_errno((enum virtchnl_status_code) + v_retval); + } else { + v_retval = iavf_status_to_errno(status); + } + + return v_retval; +} + /** * iavf_poll_virtchnl_msg * @hw: HW configuration structure @@ -85,6 +142,83 @@ iavf_poll_virtchnl_msg(struct iavf_hw *hw, struct iavf_arq_event_info *event, return virtchnl_status_to_errno((enum virtchnl_status_code)v_retval); } +/** + * iavf_process_pending_pf_msg + * @adapter: adapter structure + * @timeout_msecs: timeout in milliseconds + * + * Check if any VIRTCHNL message is currently pending and process it + * if needed. + * Poll the admin queue for the PF response and process it using + * a standard handler. + * If no PF response has been received within a given timeout, exit + * with an error. + */ +int +iavf_process_pending_pf_msg(struct iavf_adapter *adapter, + unsigned int timeout_msecs) +{ + enum virtchnl_ops current_op = adapter->current_op; + struct iavf_hw *hw = &adapter->hw; + struct iavf_arq_event_info event; + enum virtchnl_ops v_op; + enum iavf_status v_ret; + int err; + + if (current_op == VIRTCHNL_OP_UNKNOWN) + return 0; + + event.buf_len = IAVF_MAX_AQ_BUF_SIZE; + event.msg_buf = kzalloc(IAVF_MAX_AQ_BUF_SIZE, GFP_KERNEL); + if (!event.msg_buf) + return -ENOMEM; + + err = iavf_poll_virtchnl_msg_timeout(hw, &event, current_op, + timeout_msecs); + if (err) + goto free_exit; + + v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high); + v_ret = (enum iavf_status)le32_to_cpu(event.desc.cookie_low); + + iavf_virtchnl_completion(adapter, v_op, v_ret, event.msg_buf, + event.msg_len); + +free_exit: + kfree(event.msg_buf); + + return err; +} + +/** + * iavf_get_vf_op_result + * @adapter: adapter structure + * @op: virtchnl operation + * @msecs: timeout in milliseconds + * + * Return a result of a given operation returned by PF + * or exit with timeout. + */ +static int iavf_get_vf_op_result(struct iavf_adapter *adapter, + enum virtchnl_ops op, + unsigned int msecs) +{ + struct iavf_hw *hw = &adapter->hw; + struct iavf_arq_event_info event; + int err; + + event.buf_len = IAVF_MAX_AQ_BUF_SIZE; + event.msg_buf = kzalloc(IAVF_MAX_AQ_BUF_SIZE, GFP_KERNEL); + if (!event.msg_buf) + return -ENOMEM; + + err = iavf_poll_virtchnl_msg_timeout(hw, &event, op, msecs); + kfree(event.msg_buf); + adapter->current_op = VIRTCHNL_OP_UNKNOWN; + + return err; +} + /** * iavf_verify_api_ver * @adapter: adapter structure @@ -261,58 +395,114 @@ int iavf_get_vf_vlan_v2_caps(struct iavf_adapter *adapter) } /** - * iavf_configure_queues + * iavf_set_qp_config_info + * @vqpi: virtchannel structure for queue pair configuration * @adapter: adapter structure + * @queue_index: index of queue pair in the adapter structure + * @max_frame: maximal frame size supported by the adapter + * @xdp_pair: true if the queue pair is assigned to XDP queues * - * Request that the PF set up our (previously allocated) queues. - **/ -void iavf_configure_queues(struct iavf_adapter *adapter) + * Fill virtchannel queue pair configuration structure + * with data for the Rx and Tx queues of a given index. + * To handle XDP queues, only Tx part of vqpi structure is filled + * with data. Because of virtchnl protocol can operate on queue pairs only, + * associate each extra Tx queue with an empty Rx queue + * (with zero length). + */ +static void iavf_set_qp_config_info(struct virtchnl_queue_pair_info *vqpi, + struct iavf_adapter *adapter, + int queue_index, u32 max_frame, + bool xdp_pair) +{ + struct iavf_ring *rxq = &adapter->rx_rings[queue_index]; + struct iavf_ring *txq; + u32 hr, max_len; + int xdpq_idx; + + if (xdp_pair) { + xdpq_idx = queue_index - adapter->num_xdp_tx_queues; + txq = &adapter->xdp_rings[xdpq_idx]; + } else { + txq = &adapter->tx_rings[queue_index]; + } + vqpi->txq.vsi_id = adapter->vsi_res->vsi_id; + vqpi->txq.queue_id = queue_index; + vqpi->txq.ring_len = txq->count; + vqpi->txq.dma_ring_addr = txq->dma; + + vqpi->rxq.vsi_id = adapter->vsi_res->vsi_id; + vqpi->rxq.queue_id = queue_index; + if (xdp_pair) { + vqpi->rxq.ring_len = 0; + return; + } + + if (rxq->flags & IAVF_TXRX_FLAGS_XSK) { + hr = xsk_pool_get_headroom(rxq->xsk_pool); + max_len = xsk_pool_get_rx_frame_size(rxq->xsk_pool); + } else { + hr = rxq->pool->p.offset; + max_len = rxq->pool->p.max_len; + } + + max_frame = min_not_zero(max_frame, LIBIE_MAX_RX_FRM_LEN(hr)); + + vqpi->rxq.ring_len = rxq->count; + vqpi->rxq.dma_ring_addr = rxq->dma; + vqpi->rxq.max_pkt_size = max_frame; + vqpi->rxq.databuffer_size = max_len; +} + +/** + * iavf_configure_selected_queues + * @adapter: adapter structure + * @qp_mask: mask of queue pairs to configure + * @wait: if true, wait until the request is completed + * + * Request PF to set up our selected (previously allocated) queues. + * Returns 0 if the command succeeds or negative value in case of error. + * + * Note: The caller must ensure that the calling context has taken + * 'adapter->crit_lock' mutex when 'wait' parameter is set to true. + */ +int iavf_configure_selected_queues(struct iavf_adapter *adapter, u32 qp_mask, + bool wait) { + int pairs = adapter->num_active_queues + adapter->num_xdp_tx_queues; + unsigned long num_qps_to_config, mask = qp_mask; + u32 idx, max_frame = adapter->vf_res->max_mtu; struct virtchnl_vsi_queue_config_info *vqci; - int i, max_frame = adapter->vf_res->max_mtu; - int pairs = adapter->num_active_queues; struct virtchnl_queue_pair_info *vqpi; size_t len; - if (max_frame > IAVF_MAX_RXBUFFER || !max_frame) - max_frame = IAVF_MAX_RXBUFFER; - if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot configure queues, command %d pending\n", + dev_err(&adapter->pdev->dev, + "Cannot configure queues, command %d pending\n", adapter->current_op); - return; + return -EBUSY; } + num_qps_to_config = hweight_long(mask); adapter->current_op = VIRTCHNL_OP_CONFIG_VSI_QUEUES; - len = struct_size(vqci, qpair, pairs); + len = struct_size(vqci, qpair, num_qps_to_config); vqci = kzalloc(len, GFP_KERNEL); if (!vqci) - return; - - /* Limit maximum frame size when jumbo frames is not enabled */ - if (!(adapter->flags & IAVF_FLAG_LEGACY_RX) && - (adapter->netdev->mtu <= ETH_DATA_LEN)) - max_frame = IAVF_RXBUFFER_1536 - NET_IP_ALIGN; + return -ENOMEM; vqci->vsi_id = adapter->vsi_res->vsi_id; - vqci->num_queue_pairs = pairs; + vqci->num_queue_pairs = num_qps_to_config; vqpi = vqci->qpair; /* Size check is not needed here - HW max is 16 queue pairs, and we * can fit info for 31 of them into the AQ buffer before it overflows. */ - for (i = 0; i < pairs; i++) { - vqpi->txq.vsi_id = vqci->vsi_id; - vqpi->txq.queue_id = i; - vqpi->txq.ring_len = adapter->tx_rings[i].count; - vqpi->txq.dma_ring_addr = adapter->tx_rings[i].dma; - vqpi->rxq.vsi_id = vqci->vsi_id; - vqpi->rxq.queue_id = i; - vqpi->rxq.ring_len = adapter->rx_rings[i].count; - vqpi->rxq.dma_ring_addr = adapter->rx_rings[i].dma; - vqpi->rxq.max_pkt_size = max_frame; - vqpi->rxq.databuffer_size = - ALIGN(adapter->rx_rings[i].rx_buf_len, - BIT_ULL(IAVF_RXQ_CTX_DBUFF_SHIFT)); + for_each_set_bit(idx, &mask, adapter->num_active_queues) { + iavf_set_qp_config_info(vqpi, adapter, idx, max_frame, false); + vqpi++; + } + + /* Set configuration info for XDP Tx queues. */ + for_each_set_bit_from(idx, &mask, pairs) { + iavf_set_qp_config_info(vqpi, adapter, idx, max_frame, true); vqpi++; } @@ -320,66 +510,170 @@ void iavf_configure_queues(struct iavf_adapter *adapter) iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_VSI_QUEUES, (u8 *)vqci, len); kfree(vqci); + + if (wait) + return iavf_get_vf_op_result(adapter, + VIRTCHNL_OP_CONFIG_VSI_QUEUES, + IAVF_VC_MSG_TIMEOUT_MS); + return 0; } /** - * iavf_enable_queues + * iavf_configure_queues * @adapter: adapter structure + * @wait: if true, wait until the request is completed * - * Request that the PF enable all of our queues. - **/ -void iavf_enable_queues(struct iavf_adapter *adapter) + * Send a request to PF to set up all allocated queues. + * Returns 0 if the command succeeds or negative value in case of error. + * + * Note: The caller must ensure that the calling context has taken + * 'adapter->crit_lock' mutex when 'wait' parameter is set to true. + */ +int iavf_configure_queues(struct iavf_adapter *adapter, bool wait) +{ + int pairs = adapter->num_active_queues + adapter->num_xdp_tx_queues; + u32 qpair_mask = BIT(pairs) - 1; + + return iavf_configure_selected_queues(adapter, qpair_mask, wait); +} + +/** + * iavf_enable_selected_queues + * @adapter: adapter structure + * @rx_queues: mask of Rx queues + * @tx_queues: mask of Tx queues + * @wait: if true, wait until the request is completed + * + * Send a request to PF to enable selected queues. + * Returns 0 if the command succeeds or negative value in case of error. + * + * Note: The caller must ensure that the calling context has taken + * 'adapter->crit_lock' mutex when 'wait' parameter is set to true. + */ +int iavf_enable_selected_queues(struct iavf_adapter *adapter, u32 rx_queues, + u32 tx_queues, bool wait) { struct virtchnl_queue_select vqs; if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot enable queues, command %d pending\n", + dev_err(&adapter->pdev->dev, + "Cannot enable queues, command %d pending\n", adapter->current_op); - return; + return -EBUSY; } adapter->current_op = VIRTCHNL_OP_ENABLE_QUEUES; vqs.vsi_id = adapter->vsi_res->vsi_id; - vqs.tx_queues = BIT(adapter->num_active_queues) - 1; - vqs.rx_queues = vqs.tx_queues; + vqs.tx_queues = tx_queues; + vqs.rx_queues = rx_queues; adapter->aq_required &= ~IAVF_FLAG_AQ_ENABLE_QUEUES; iavf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_QUEUES, (u8 *)&vqs, sizeof(vqs)); + + if (wait) + return iavf_get_vf_op_result(adapter, VIRTCHNL_OP_ENABLE_QUEUES, + IAVF_VC_MSG_TIMEOUT_MS); + return 0; } /** - * iavf_disable_queues + * iavf_disable_selected_queues * @adapter: adapter structure + * @rx_queues: mask of Rx queues + * @tx_queues: mask of Tx queues + * @wait: if true, wait until the request is completed * - * Request that the PF disable all of our queues. - **/ -void iavf_disable_queues(struct iavf_adapter *adapter) + * Send a request to PF to disable selected queues. + * Returns 0 if the command succeeds or negative value in case of error. + * + * Note: The caller must ensure that the calling context has taken + * 'adapter->crit_lock' mutex when 'wait' parameter is set to true. + */ +int iavf_disable_selected_queues(struct iavf_adapter *adapter, u32 rx_queues, + u32 tx_queues, bool wait) { struct virtchnl_queue_select vqs; if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ - dev_err(&adapter->pdev->dev, "Cannot disable queues, command %d pending\n", + dev_err(&adapter->pdev->dev, + "Cannot disable queues, command %d pending\n", adapter->current_op); - return; + return -EBUSY; } adapter->current_op = VIRTCHNL_OP_DISABLE_QUEUES; vqs.vsi_id = adapter->vsi_res->vsi_id; - vqs.tx_queues = BIT(adapter->num_active_queues) - 1; - vqs.rx_queues = vqs.tx_queues; + vqs.tx_queues = tx_queues; + vqs.rx_queues = rx_queues; adapter->aq_required &= ~IAVF_FLAG_AQ_DISABLE_QUEUES; iavf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_QUEUES, (u8 *)&vqs, sizeof(vqs)); + + if (wait) + return iavf_get_vf_op_result(adapter, + VIRTCHNL_OP_DISABLE_QUEUES, + IAVF_VC_MSG_TIMEOUT_MS); + return 0; +} + +/** + * iavf_enable_queues + * @adapter: adapter structure + * @wait: if true, wait until the request is completed + * + * Send a request to PF to enable all allocated queues. + * Returns 0 if the command succeeds or negative value in case of error. + * + * Note: The caller must ensure that the calling context has taken + * 'adapter->crit_lock' mutex when 'wait' parameter is set to true. + */ +int iavf_enable_queues(struct iavf_adapter *adapter, bool wait) +{ + u32 num_tx_queues = adapter->num_active_queues + + adapter->num_xdp_tx_queues; + + u32 rx_queues = BIT(adapter->num_active_queues) - 1; + u32 tx_queues = BIT(num_tx_queues) - 1; + + return iavf_enable_selected_queues(adapter, rx_queues, tx_queues, wait); +} + +/** + * iavf_disable_queues + * @adapter: adapter structure + * @wait: if true, wait until the request is completed + * + * Send a request to PF to disable all allocated queues. + * Returns 0 if the command succeeds or negative value in case of error. + * + * Note: The caller must ensure that the calling context has taken + * 'adapter->crit_lock' mutex when 'wait' parameter is set to true. + */ +int iavf_disable_queues(struct iavf_adapter *adapter, bool wait) +{ + u32 num_tx_queues = adapter->num_active_queues + + adapter->num_xdp_tx_queues; + + u32 rx_queues = BIT(adapter->num_active_queues) - 1; + u32 tx_queues = BIT(num_tx_queues) - 1; + + return iavf_disable_selected_queues(adapter, rx_queues, tx_queues, + wait); } /** * iavf_map_queues * @adapter: adapter structure + * @wait: if true, wait until the request is completed * - * Request that the PF map queues to interrupt vectors. Misc causes, including - * admin queue, are always mapped to vector 0. - **/ -void iavf_map_queues(struct iavf_adapter *adapter) + * Send a request to PF to update the mapping queues to interrupt vectors. + * Misc causes, including admin queue, are always mapped to vector 0. + * Returns 0 if the command succeeds or negative value in case of error. + * + * Note: The caller must ensure that the calling context has taken + * 'adapter->crit_lock' mutex when 'wait' parameter is set to true. + */ +int iavf_map_queues(struct iavf_adapter *adapter, bool wait) { struct virtchnl_irq_map_info *vimi; struct virtchnl_vector_map *vecmap; @@ -391,7 +685,7 @@ void iavf_map_queues(struct iavf_adapter *adapter) /* bail because we already have a command pending */ dev_err(&adapter->pdev->dev, "Cannot map queues to vectors, command %d pending\n", adapter->current_op); - return; + return -EBUSY; } adapter->current_op = VIRTCHNL_OP_CONFIG_IRQ_MAP; @@ -400,7 +694,7 @@ void iavf_map_queues(struct iavf_adapter *adapter) len = struct_size(vimi, vecmap, adapter->num_msix_vectors); vimi = kzalloc(len, GFP_KERNEL); if (!vimi) - return; + return -ENOMEM; vimi->num_vectors = adapter->num_msix_vectors; /* Queue vectors first */ @@ -410,8 +704,8 @@ void iavf_map_queues(struct iavf_adapter *adapter) vecmap->vsi_id = adapter->vsi_res->vsi_id; vecmap->vector_id = v_idx + NONQ_VECS; - vecmap->txq_map = q_vector->ring_mask; - vecmap->rxq_map = q_vector->ring_mask; + vecmap->txq_map = q_vector->tx_ring_mask; + vecmap->rxq_map = q_vector->rx_ring_mask; vecmap->rxitr_idx = IAVF_RX_ITR; vecmap->txitr_idx = IAVF_TX_ITR; } @@ -426,6 +720,12 @@ void iavf_map_queues(struct iavf_adapter *adapter) iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_IRQ_MAP, (u8 *)vimi, len); kfree(vimi); + + if (wait) + return iavf_get_vf_op_result(adapter, + VIRTCHNL_OP_CONFIG_IRQ_MAP, + IAVF_VC_MSG_TIMEOUT_MS); + return 0; } /** @@ -1886,6 +2186,52 @@ static void iavf_netdev_features_vlan_strip_set(struct net_device *netdev, netdev->features &= ~NETIF_F_HW_VLAN_CTAG_RX; } +/** + * iavf_poll_for_link_status - poll for PF notification about link status + * @adapter: adapter structure + * @msecs: timeout in milliseconds + * + * Returns: + * 0 - if notification about link down was received, + * 1 - if notification about link up was received, + * or negative error code in case of error. + */ +int iavf_poll_for_link_status(struct iavf_adapter *adapter, unsigned int msecs) +{ + struct iavf_hw *hw = &adapter->hw; + struct iavf_arq_event_info event; + struct virtchnl_pf_event *vpe; + int ret; + + event.buf_len = IAVF_MAX_AQ_BUF_SIZE; + event.msg_buf = kzalloc(IAVF_MAX_AQ_BUF_SIZE, GFP_KERNEL); + if (!event.msg_buf) + return -ENOMEM; + + ret = iavf_poll_virtchnl_msg_timeout(hw, &event, VIRTCHNL_OP_EVENT, + msecs); + if (ret) + goto virtchnl_msg_err; + + vpe = (struct virtchnl_pf_event *)event.msg_buf; + if (vpe->event == VIRTCHNL_EVENT_LINK_CHANGE) { + bool link_up = iavf_get_vpe_link_status(adapter, vpe); + + iavf_set_adapter_link_speed_from_vpe(adapter, vpe); + + ret = link_up ? 1 : 0; + } else { + iavf_virtchnl_completion(adapter, VIRTCHNL_OP_EVENT, 0, + event.msg_buf, event.msg_len); + ret = -EBUSY; + } + +virtchnl_msg_err: + kfree(event.msg_buf); + + return ret; +} + /** * iavf_virtchnl_completion * @adapter: adapter structure diff --git a/drivers/net/ethernet/intel/iavf/iavf_xsk.c b/drivers/net/ethernet/intel/iavf/iavf_xsk.c new file mode 100644 index 00000000000000..f0f88f25e4e011 --- /dev/null +++ b/drivers/net/ethernet/intel/iavf/iavf_xsk.c @@ -0,0 +1,1158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2022 Intel Corporation. */ + +#include +#include +#include +#include +#include +#include "iavf.h" +#include "iavf_trace.h" +#include "iavf_xsk.h" + +#define IAVF_CRIT_LOCK_WAIT_TIMEOUT_MS 1000 +#define IAVF_VC_MSG_TIMEOUT_MS 3000 + +/** + * iavf_max_xdp_queues_count - Returns the maximal number of XDP queues + * that can be created for current configuration + * of a given adapter. + * @adapter: adapter where XDP socket will be set up + */ +static u32 +iavf_max_xdp_queues_count(struct iavf_adapter *adapter) +{ + u32 max_qp_num = adapter->vsi_res->num_queue_pairs; + u32 num_active_queues = adapter->num_active_queues; + + return num_active_queues * 2 > max_qp_num ? max_qp_num / 2 : + num_active_queues; +} + +/** + * iavf_qp_clean_rings - Cleans all the rings of a given index + * @adapter: adapter that contains rings of interest + * @q_idx: ring index in array + */ +static void +iavf_qp_clean_rings(struct iavf_adapter *adapter, u16 q_idx) +{ + iavf_clean_tx_ring(&adapter->tx_rings[q_idx]); + if (iavf_adapter_xdp_active(adapter)) { + synchronize_rcu(); + iavf_clean_tx_ring(&adapter->xdp_rings[q_idx]); + } + iavf_clean_rx_ring(&adapter->rx_rings[q_idx]); +} + +/** + * iavf_qvec_toggle_napi - Enables/disables NAPI for a given q_vector + * @adapter: adapter that has netdev + * @q_vector: q_vector that has NAPI context + * @enable: true for enable, false for disable + */ +static void +iavf_qvec_toggle_napi(struct iavf_adapter *adapter, + struct iavf_q_vector *q_vector, bool enable) +{ + if (!adapter->vsi.netdev || !q_vector) + return; + + if (enable) + napi_enable(&q_vector->napi); + else + napi_disable(&q_vector->napi); +} + +/** + * iavf_trigger_sw_intr - trigger a software interrupt + * @adapter: adapter of interest + * @q_vector: interrupt vector to trigger the software interrupt for + */ +static void +iavf_trigger_sw_intr(struct iavf_adapter *adapter, + struct iavf_q_vector *q_vector) +{ + struct iavf_hw *hw = &adapter->hw; + + wr32(hw, IAVF_VFINT_DYN_CTLN1(q_vector->reg_idx), + (IAVF_VFINT_DYN_CTLN1_INTENA_MASK | + IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK | + IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_MASK | + IAVF_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK)); + + iavf_flush(hw); +} + +/** + * iavf_qvec_dis_irq - Mask off queue interrupt generation on given ring + * @adapter: the adapter that contains queue vector being un-configured + * @q_vector: queue vector + */ +static void +iavf_qvec_dis_irq(struct iavf_adapter *adapter, struct iavf_q_vector *q_vector) +{ + int base = adapter->vsi.base_vector; + struct iavf_hw *hw = &adapter->hw; + u16 reg = q_vector->reg_idx; + + wr32(hw, IAVF_VFINT_DYN_CTLN1(reg), 0); + synchronize_irq(adapter->msix_entries[reg + base].vector); + iavf_flush(hw); +} + +/** + * iavf_qvec_ena_irq - Enable IRQ for given queue vector + * @adapter: the adapter that contains queue vector + * @q_vector: queue vector + */ +static void +iavf_qvec_ena_irq(struct iavf_adapter *adapter, struct iavf_q_vector *q_vector) +{ + struct iavf_hw *hw = &adapter->hw; + + if (adapter) + if (adapter->state == __IAVF_DOWN) + return; + + wr32(hw, IAVF_VFINT_DYN_CTLN1(q_vector->reg_idx), + IAVF_VFINT_DYN_CTLN1_INTENA_MASK | + IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK); + + iavf_flush(hw); +} + +/** + * iavf_qp_dis - Disables a queue pair + * @adapter: adapter of interest + * @q_idx: ring index in array + * + * Returns 0 on success, negative on failure. + */ +static int iavf_qp_dis(struct iavf_adapter *adapter, u16 q_idx) +{ + struct iavf_vsi *vsi = &adapter->vsi; + struct iavf_ring *rx_ring, *xdp_ring; + struct iavf_q_vector *q_vector; + u32 rx_queues, tx_queues; + int err; + + if (q_idx >= adapter->num_active_queues) + return -EINVAL; + + rx_ring = &adapter->rx_rings[q_idx]; + q_vector = rx_ring->q_vector; + + rx_queues = BIT(q_idx); + tx_queues = rx_queues; + + netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + + iavf_qvec_toggle_napi(adapter, q_vector, false); + iavf_qvec_dis_irq(adapter, q_vector); + + xdp_ring = &adapter->xdp_rings[q_idx]; + + tx_queues |= BIT(xdp_ring->queue_index); + + err = iavf_disable_selected_queues(adapter, rx_queues, tx_queues, true); + if (err) + goto dis_exit; + + iavf_qp_clean_rings(adapter, q_idx); + if (!(rx_ring->flags & IAVF_TXRX_FLAGS_XSK)) { + struct device *dev = rx_ring->pool->p.dev; + + libie_rx_page_pool_destroy(rx_ring->pool, &rx_ring->rq_stats); + rx_ring->dev = dev; + } +dis_exit: + return err; +} + +/** + * iavf_qp_ena - Enables a queue pair + * @adapter: adapter of interest + * @q_idx: ring index in array + * + * Returns 0 on success, negative on failure. + */ +static int iavf_qp_ena(struct iavf_adapter *adapter, u16 q_idx) +{ + struct iavf_vsi *vsi = &adapter->vsi; + struct iavf_ring *rx_ring, *xdp_ring; + struct iavf_q_vector *q_vector; + u32 rx_queues, tx_queues; + int ret, err = 0; + + if (q_idx >= adapter->num_active_queues) + return -EINVAL; + + xdp_ring = &adapter->xdp_rings[q_idx]; + rx_ring = &adapter->rx_rings[q_idx]; + q_vector = rx_ring->q_vector; + + rx_queues = BIT(q_idx); + tx_queues = rx_queues; + tx_queues |= BIT(xdp_ring->queue_index); + + iavf_xsk_setup_xdp_ring(xdp_ring); + iavf_xsk_setup_rx_ring(rx_ring); + + if (!(rx_ring->flags & IAVF_TXRX_FLAGS_XSK)) { + rx_ring->pool = libie_rx_page_pool_create(rx_ring->netdev, + rx_ring->count, + true); + if (IS_ERR(rx_ring->pool)) { + err = PTR_ERR(rx_ring->pool); + goto ena_exit; + } + } + + iavf_configure_rx_ring(adapter, rx_ring); + + /* Use 'tx_queues' mask as a queue pair mask to configure + * also an extra XDP Tx queue. + */ + err = iavf_configure_selected_queues(adapter, tx_queues, true); + if (err) + goto ena_exit; + + err = iavf_enable_selected_queues(adapter, rx_queues, tx_queues, true); + if (err) + goto ena_exit; + + ret = iavf_poll_for_link_status(adapter, IAVF_XDP_LINK_TIMEOUT_MS); + if (ret < 0) { + err = ret; + dev_err(&adapter->pdev->dev, + "cannot bring the link up, error: %d\n", err); + goto ena_exit; + } else if (!ret) { + err = -EBUSY; + dev_err(&adapter->pdev->dev, + "pf returned link down status, error: %d\n", err); + goto ena_exit; + } + + iavf_qvec_toggle_napi(adapter, q_vector, true); + iavf_qvec_ena_irq(adapter, q_vector); + + netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); +ena_exit: + return err; +} + +/** + * iavf_xsk_pool_disable - disable a buffer pool region + * @adapter: Current adapter + * @qid: queue ID + * + * Returns 0 on success, negative on failure + */ +static int iavf_xsk_pool_disable(struct iavf_adapter *adapter, u16 qid) +{ + struct xsk_buff_pool *pool = xsk_get_pool_from_qid(adapter->vsi.netdev, + qid); + if (!pool) + return -EINVAL; + + clear_bit(qid, adapter->af_xdp_zc_qps); + xsk_pool_dma_unmap(pool, LIBIE_RX_DMA_ATTR); + + return 0; +} + +/** + * iavf_xsk_pool_enable - enable a buffer pool region + * @adapter: Current adapter + * @pool: pointer to a requested buffer pool region + * @qid: queue ID + * + * Returns 0 on success, negative on failure + */ +static int +iavf_xsk_pool_enable(struct iavf_adapter *adapter, struct xsk_buff_pool *pool, + u16 qid) +{ + struct iavf_vsi *vsi = &adapter->vsi; + int err; + + if (qid >= vsi->netdev->real_num_rx_queues || + qid >= vsi->netdev->real_num_tx_queues) + return -EINVAL; + + err = xsk_pool_dma_map(pool, &adapter->pdev->dev, LIBIE_RX_DMA_ATTR); + if (err) + return err; + + set_bit(qid, adapter->af_xdp_zc_qps); + + return 0; +} + +/** + * iavf_xsk_pool_setup - enable/disable a buffer pool region depending + * on its state + * @adapter: Current adapter + * @pool: buffer pool to enable/associate to a ring, NULL to disable + * @qid: queue ID + * + * Returns 0 on success, negative on failure + */ +int iavf_xsk_pool_setup(struct iavf_adapter *adapter, + struct xsk_buff_pool *pool, u32 qid) +{ + bool if_running, pool_present = !!pool; + struct iavf_vsi *vsi = &adapter->vsi; + int ret = 0, pool_failure = 0; + + if (qid >= iavf_max_xdp_queues_count(adapter)) { + netdev_err(vsi->netdev, "Wrong queue index for XDP.\n"); + pool_failure = -EINVAL; + goto failure; + } + + if_running = netif_running(vsi->netdev) && + iavf_adapter_xdp_active(adapter); + + if (if_running) { + if (iavf_lock_timeout(&adapter->crit_lock, + IAVF_CRIT_LOCK_WAIT_TIMEOUT_MS)) + return -EBUSY; + + ret = iavf_process_pending_pf_msg(adapter, + IAVF_VC_MSG_TIMEOUT_MS); + if (ret) + goto xsk_pool_if_up; + + ret = iavf_qp_dis(adapter, qid); + if (ret) { + netdev_err(vsi->netdev, "iavf_qp_dis error = %d\n", ret); + goto xsk_pool_if_up; + } + } + + pool_failure = pool_present ? iavf_xsk_pool_enable(adapter, pool, qid) : + iavf_xsk_pool_disable(adapter, qid); + +xsk_pool_if_up: + if (if_running) { + ret = iavf_qp_ena(adapter, qid); + mutex_unlock(&adapter->crit_lock); + if (!ret && pool_present) + napi_schedule(&adapter->rx_rings[qid].q_vector->napi); + else if (ret) + netdev_err(vsi->netdev, "iavf_qp_ena error = %d\n", ret); + } + +failure: + if (pool_failure) { + netdev_err(vsi->netdev, "Could not %sable buffer pool, error = %d\n", + pool_present ? "en" : "dis", pool_failure); + return pool_failure; + } + + return ret; +} + +/** + * iavf_clean_xdp_tx_buf - Free and unmap XDP Tx buffer + * @xdp_ring: XDP Tx ring + * @tx_buf: Tx buffer to clean + */ +static void +iavf_clean_xdp_tx_buf(struct iavf_ring *xdp_ring, struct iavf_tx_buffer *tx_buf) +{ + switch (tx_buf->xdp_type) { + case IAVF_XDP_BUFFER_FRAME: + dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); + dma_unmap_len_set(tx_buf, len, 0); + xdp_return_frame(tx_buf->xdpf); + tx_buf->xdpf = NULL; + break; + case IAVF_XDP_BUFFER_TX: + xsk_buff_free(tx_buf->xdp); + break; + } + + xdp_ring->xdp_tx_active--; + tx_buf->xdp_type = IAVF_XDP_BUFFER_NONE; +} + +/** + * iavf_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ + * @xdp_ring: XDP Tx ring + */ +static void iavf_clean_xdp_irq_zc(struct iavf_ring *xdp_ring) +{ + u16 ntc = xdp_ring->next_to_clean; + struct iavf_tx_buffer *tx_buf; + struct iavf_tx_desc *tx_desc; + u16 cnt = xdp_ring->count; + u16 done_frames = 0; + u16 xsk_frames = 0; + u16 last_rs; + int i; + + last_rs = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : cnt - 1; + tx_desc = IAVF_TX_DESC(xdp_ring, last_rs); + if ((tx_desc->cmd_type_offset_bsz & + cpu_to_le64(IAVF_TX_DESC_DTYPE_DESC_DONE))) { + if (last_rs >= ntc) + done_frames = last_rs - ntc + 1; + else + done_frames = last_rs + cnt - ntc + 1; + } + + if (!done_frames) + return; + + if (likely(!xdp_ring->xdp_tx_active)) { + xsk_frames = done_frames; + goto skip; + } + + ntc = xdp_ring->next_to_clean; + for (i = 0; i < done_frames; i++) { + tx_buf = &xdp_ring->tx_bi[ntc]; + + if (tx_buf->xdp_type) + iavf_clean_xdp_tx_buf(xdp_ring, tx_buf); + else + xsk_frames++; + + ntc++; + if (ntc >= xdp_ring->count) + ntc = 0; + } +skip: + tx_desc->cmd_type_offset_bsz = 0; + xdp_ring->next_to_clean += done_frames; + if (xdp_ring->next_to_clean >= cnt) + xdp_ring->next_to_clean -= cnt; + if (xsk_frames) + xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); +} + +/** + * iavf_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor + * @xdp_ring: XDP ring to produce the HW Tx descriptor on + * @desc: AF_XDP descriptor to pull the DMA address and length from + * @total_bytes: bytes accumulator that will be used for stats update + */ +static void iavf_xmit_pkt(struct iavf_ring *xdp_ring, struct xdp_desc *desc, + unsigned int *total_bytes) +{ + struct iavf_tx_desc *tx_desc; + dma_addr_t dma; + + dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc->addr); + xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc->len); + + tx_desc = IAVF_TX_DESC(xdp_ring, xdp_ring->next_to_use++); + tx_desc->buffer_addr = cpu_to_le64(dma); + tx_desc->cmd_type_offset_bsz = iavf_build_ctob(IAVF_TX_DESC_CMD_EOP, + 0, desc->len, 0); + + *total_bytes += desc->len; +} + +/** + * iavf_xmit_pkt_batch - produce a batch of HW Tx descriptors out + * of AF_XDP descriptors + * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from + * @total_bytes: bytes accumulator that will be used for stats update + */ +static void iavf_xmit_pkt_batch(struct iavf_ring *xdp_ring, + struct xdp_desc *descs, + unsigned int *total_bytes) +{ + u16 ntu = xdp_ring->next_to_use; + struct iavf_tx_desc *tx_desc; + u32 i; + + loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) { + dma_addr_t dma; + + dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, descs[i].addr); + xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, + descs[i].len); + + tx_desc = IAVF_TX_DESC(xdp_ring, ntu++); + tx_desc->buffer_addr = cpu_to_le64(dma); + tx_desc->cmd_type_offset_bsz = + iavf_build_ctob(IAVF_TX_DESC_CMD_EOP, 0, + descs[i].len, 0); + + *total_bytes += descs[i].len; + } + + xdp_ring->next_to_use = ntu; +} + +/** + * iavf_fill_tx_hw_ring - produce the number of Tx descriptors onto ring + * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from + * @nb_pkts: count of packets to be send + * @total_bytes: bytes accumulator that will be used for stats update + */ +static void iavf_fill_tx_hw_ring(struct iavf_ring *xdp_ring, + struct xdp_desc *descs, u32 nb_pkts, + unsigned int *total_bytes) +{ + u32 batched, leftover, i; + + batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH); + leftover = nb_pkts & (PKTS_PER_BATCH - 1); + + for (i = 0; i < batched; i += PKTS_PER_BATCH) + iavf_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes); + for (; i < batched + leftover; i++) + iavf_xmit_pkt(xdp_ring, &descs[i], total_bytes); +} + +/** + * iavf_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring + * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * + * Returns true if there is no more work that needs to be done, false otherwise + */ +bool iavf_xmit_zc(struct iavf_ring *xdp_ring) +{ + struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs; + struct libie_sq_onstack_stats stats = { }; + u32 nb_processed = 0; + bool ret = true; + int budget; + + if (static_branch_unlikely(&iavf_xdp_locking_key)) + spin_lock(&xdp_ring->tx_lock); + + iavf_clean_xdp_irq_zc(xdp_ring); + + budget = IAVF_DESC_UNUSED(xdp_ring); + budget = min_t(u16, budget, IAVF_RING_QUARTER(xdp_ring)); + + stats.packets = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, + budget); + if (!stats.packets) + goto unlock; + + if (xdp_ring->next_to_use + stats.packets >= xdp_ring->count) { + nb_processed = xdp_ring->count - xdp_ring->next_to_use; + iavf_fill_tx_hw_ring(xdp_ring, descs, nb_processed, + &stats.bytes); + xdp_ring->next_to_use = 0; + } + + iavf_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], + stats.packets - nb_processed, &stats.bytes); + + iavf_set_rs_bit(xdp_ring); + iavf_xdp_ring_update_tail(xdp_ring); + iavf_update_tx_ring_stats(xdp_ring, &stats); + + if (xsk_uses_need_wakeup(xdp_ring->xsk_pool)) + xsk_set_tx_need_wakeup(xdp_ring->xsk_pool); + + ret = stats.packets < budget; +unlock: + if (static_branch_unlikely(&iavf_xdp_locking_key)) + spin_unlock(&xdp_ring->tx_lock); + + return ret; +} + +/** + * iavf_xsk_wakeup - Implements ndo_xsk_wakeup + * @netdev: net_device + * @queue_id: queue to wake up + * @flags: ignored in our case, since we have Rx and Tx in the same NAPI + * + * Returns negative on error, zero otherwise. + */ +int iavf_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + struct iavf_q_vector *q_vector; + struct iavf_ring *ring; + + if (adapter->state == __IAVF_DOWN || + adapter->state == __IAVF_RESETTING) + return -ENETDOWN; + + if (!iavf_adapter_xdp_active(adapter)) + return -EINVAL; + + if (queue_id >= adapter->num_active_queues) + return -EINVAL; + + ring = &adapter->rx_rings[queue_id]; + + if (!(ring->xdp_ring->flags & IAVF_TXRX_FLAGS_XSK)) + return -EINVAL; + + q_vector = ring->q_vector; + if (!napi_if_scheduled_mark_missed(&q_vector->napi)) + iavf_trigger_sw_intr(adapter, q_vector); + + return 0; +} + +static u32 iavf_get_xdp_tx_qid(struct iavf_ring *ring) +{ + struct iavf_adapter *adapter = ring->vsi->back; + + return ring->queue_index - adapter->num_active_queues; +} + +static struct xsk_buff_pool *iavf_tx_xsk_pool(struct iavf_ring *ring) +{ + struct iavf_adapter *adapter = ring->vsi->back; + u32 qid; + + if (!iavf_adapter_xdp_active(adapter) || + !(ring->flags & IAVF_TXRX_FLAGS_XDP)) + return NULL; + + qid = iavf_get_xdp_tx_qid(ring); + if (!test_bit(qid, adapter->af_xdp_zc_qps)) + return NULL; + + return xsk_get_pool_from_qid(adapter->netdev, qid); +} + +void iavf_xsk_setup_xdp_ring(struct iavf_ring *xdp_ring) +{ + struct xsk_buff_pool *pool; + + pool = iavf_tx_xsk_pool(xdp_ring); + if (pool) { + xdp_ring->xsk_pool = pool; + xdp_ring->flags |= IAVF_TXRX_FLAGS_XSK; + } else { + xdp_ring->dev = &xdp_ring->vsi->back->pdev->dev; + xdp_ring->flags &= ~IAVF_TXRX_FLAGS_XSK; + } +} + +/** + * iavf_xsk_clean_xdp_ring - Clean the XDP Tx ring and its buffer pool queues + * @xdp_ring: XDP_Tx ring + */ +void iavf_xsk_clean_xdp_ring(struct iavf_ring *xdp_ring) +{ + u16 ntc = xdp_ring->next_to_clean, ntu = xdp_ring->next_to_use; + u32 xsk_frames = 0; + + while (ntc != ntu) { + struct iavf_tx_buffer *tx_buf = &xdp_ring->tx_bi[ntc]; + + if (tx_buf->xdp_type) + iavf_clean_xdp_tx_buf(xdp_ring, tx_buf); + else + xsk_frames++; + + tx_buf->page = NULL; + + ntc++; + if (ntc >= xdp_ring->count) + ntc = 0; + } + + if (xsk_frames) + xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); +} + +/** + * iavf_init_rx_descs_zc - pick buffers from XSK buffer pool and use it + * @pool: XSK Buffer pool to pull the buffers from + * @xdp: SW ring of xdp_buff that will hold the buffers + * @rx_desc: Pointer to Rx descriptors that will be filled + * @count: The number of buffers to allocate + * + * This function allocates a number of Rx buffers from the fill ring + * or the internal recycle mechanism and places them on the Rx ring. + * + * Note that ring wrap should be handled by caller of this function. + * + * Returns the amount of allocated Rx descriptors + */ +static u16 iavf_init_rx_descs_zc(struct xsk_buff_pool *pool, + struct xdp_buff **xdp, + union iavf_rx_desc *rx_desc, u16 count) +{ + dma_addr_t dma; + u16 num_buffs; + u16 i; + + num_buffs = xsk_buff_alloc_batch(pool, xdp, count); + for (i = 0; i < num_buffs; i++) { + dma = xsk_buff_xdp_get_dma(*xdp); + rx_desc->read.pkt_addr = cpu_to_le64(dma); + rx_desc->wb.qword1.status_error_len = 0; + + rx_desc++; + xdp++; + } + + return num_buffs; +} + +static struct xdp_buff **iavf_get_xdp_buff(struct iavf_ring *ring, u32 idx) +{ + return &ring->xdp_buff[idx]; +} + +/** + * __iavf_alloc_rx_buffers_zc - allocate a number of Rx buffers + * @rx_ring: Rx ring + * @count: The number of buffers to allocate + * + * Place the @count of descriptors onto Rx ring. Handle the ring wrap + * for case where space from next_to_use up to the end of ring is less + * than @count. Finally do a tail bump. + * + * Returns true if all allocations were successful, false if any fail. + */ +static bool __iavf_alloc_rx_buffers_zc(struct iavf_ring *rx_ring, u16 count) +{ + u32 nb_buffs_extra = 0, nb_buffs = 0; + u16 ntu = rx_ring->next_to_use; + union iavf_rx_desc *rx_desc; + u16 total_count = count; + struct xdp_buff **xdp; + + rx_desc = IAVF_RX_DESC(rx_ring, ntu); + xdp = iavf_get_xdp_buff(rx_ring, ntu); + + if (ntu + count >= rx_ring->count) { + nb_buffs_extra = iavf_init_rx_descs_zc(rx_ring->xsk_pool, xdp, + rx_desc, + rx_ring->count - ntu); + if (nb_buffs_extra != rx_ring->count - ntu) { + ntu += nb_buffs_extra; + goto exit; + } + rx_desc = IAVF_RX_DESC(rx_ring, 0); + xdp = iavf_get_xdp_buff(rx_ring, 0); + ntu = 0; + count -= nb_buffs_extra; + iavf_release_rx_desc(rx_ring, 0); + + if (!count) + goto exit; + } + + nb_buffs = iavf_init_rx_descs_zc(rx_ring->xsk_pool, xdp, rx_desc, count); + + ntu += nb_buffs; + if (ntu == rx_ring->count) + ntu = 0; + +exit: + if (rx_ring->next_to_use != ntu) + iavf_release_rx_desc(rx_ring, ntu); + + return total_count == (nb_buffs_extra + nb_buffs); +} + +/** + * iavf_alloc_rx_buffers_zc - allocate a number of Rx buffers + * @rx_ring: Rx ring + * @count: The number of buffers to allocate + * + * Wrapper for internal allocation routine; figure out how many tail + * bumps should take place based on the given threshold + * + * Returns true if all calls to internal alloc routine succeeded + */ +static bool iavf_alloc_rx_buffers_zc(struct iavf_ring *rx_ring, u16 count) +{ + u16 rx_thresh = IAVF_RING_QUARTER(rx_ring); + u16 leftover, i, tail_bumps; + + tail_bumps = count / rx_thresh; + leftover = count - (tail_bumps * rx_thresh); + + for (i = 0; i < tail_bumps; i++) + if (!__iavf_alloc_rx_buffers_zc(rx_ring, rx_thresh)) + return false; + return __iavf_alloc_rx_buffers_zc(rx_ring, leftover); +} + +/** + * iavf_check_alloc_rx_buffers_zc - allocate a number of Rx buffers with logs + * @adapter: board private structure + * @rx_ring: Rx ring + * + * Wrapper for internal allocation routine; Prints out logs, if allocation + * did not go as expected + */ +void iavf_check_alloc_rx_buffers_zc(struct iavf_adapter *adapter, + struct iavf_ring *rx_ring) +{ + u32 count = IAVF_DESC_UNUSED(rx_ring); + + if (!xsk_buff_can_alloc(rx_ring->xsk_pool, count)) { + netdev_warn(adapter->netdev, + "XSK buffer pool does not provide enough addresses to fill %d buffers on Rx ring %d\n", + count, rx_ring->queue_index); + netdev_warn(adapter->netdev, + "Change Rx ring/fill queue size to avoid performance issues\n"); + } + + if (!iavf_alloc_rx_buffers_zc(rx_ring, count)) + netdev_warn(adapter->netdev, + "Failed to allocate some buffers on XSK buffer pool enabled Rx ring %d\n", + rx_ring->queue_index); +} + +/** + * iavf_rx_xsk_pool - Get a valid xsk pool for RX ring + * @ring: Rx ring being configured + * + * Do not return a xsk pool, if socket is TX-only + **/ +static struct xsk_buff_pool *iavf_rx_xsk_pool(struct iavf_ring *ring) +{ + struct iavf_adapter *adapter = ring->vsi->back; + u16 qid = ring->queue_index; + struct xsk_buff_pool *pool; + + if (!iavf_adapter_xdp_active(adapter) || + !test_bit(qid, adapter->af_xdp_zc_qps)) + return NULL; + + pool = xsk_get_pool_from_qid(adapter->netdev, qid); + if (!pool || !xsk_buff_can_alloc(pool, 1)) + return NULL; + + return pool; +} + +void iavf_xsk_setup_rx_ring(struct iavf_ring *rx_ring) +{ + struct xsk_buff_pool *pool; + + pool = iavf_rx_xsk_pool(rx_ring); + if (pool) { + rx_ring->xsk_pool = pool; + rx_ring->flags |= IAVF_TXRX_FLAGS_XSK; + } else { + rx_ring->dev = &rx_ring->vsi->back->pdev->dev; + rx_ring->flags &= ~IAVF_TXRX_FLAGS_XSK; + } +} + +/** + * iavf_xsk_clean_rx_ring - clean buffer pool queues connected to a given Rx ring + * @rx_ring: ring to be cleaned + */ +void iavf_xsk_clean_rx_ring(struct iavf_ring *rx_ring) +{ + u16 ntc = rx_ring->next_to_clean; + u16 ntu = rx_ring->next_to_use; + + while (ntc != ntu) { + struct xdp_buff *xdp = *iavf_get_xdp_buff(rx_ring, ntc); + + xsk_buff_free(xdp); + ntc++; + if (ntc >= rx_ring->count) + ntc = 0; + } +} + +/** + * iavf_xmit_xdp_tx_zc - AF_XDP ZC handler for XDP_TX + * @xdp: XDP buffer to xmit + * @xdp_ring: XDP ring to produce descriptor onto + * + * Returns 0 for successfully produced desc, + * -EBUSY if there was not enough space on XDP ring. + */ +static int iavf_xmit_xdp_tx_zc(struct xdp_buff *xdp, + struct iavf_ring *xdp_ring) +{ + u32 size = xdp->data_end - xdp->data; + u32 ntu = xdp_ring->next_to_use; + struct iavf_tx_buffer *tx_buf; + struct iavf_tx_desc *tx_desc; + dma_addr_t dma; + + if (IAVF_DESC_UNUSED(xdp_ring) < IAVF_RING_QUARTER(xdp_ring)) + iavf_clean_xdp_irq_zc(xdp_ring); + + if (unlikely(!IAVF_DESC_UNUSED(xdp_ring))) { + libie_stats_inc_one(&xdp_ring->sq_stats, busy); + return -EBUSY; + } + + dma = xsk_buff_xdp_get_dma(xdp); + xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, size); + + tx_buf = &xdp_ring->tx_bi[ntu]; + tx_buf->bytecount = size; + tx_buf->gso_segs = 1; + tx_buf->xdp_type = IAVF_XDP_BUFFER_TX; + tx_buf->xdp = xdp; + + tx_desc = IAVF_TX_DESC(xdp_ring, ntu); + tx_desc->buffer_addr = cpu_to_le64(dma); + tx_desc->cmd_type_offset_bsz = iavf_build_ctob(IAVF_TX_DESC_CMD_EOP, + 0, size, 0); + + xdp_ring->xdp_tx_active++; + + if (++ntu == xdp_ring->count) + ntu = 0; + xdp_ring->next_to_use = ntu; + + return 0; +} + +static int iavf_xmit_xdp_tx_zc_locked(struct xdp_buff *xdp, + struct iavf_ring *xdp_ring) +{ + int ret; + + if (static_branch_unlikely(&iavf_xdp_locking_key)) + spin_lock(&xdp_ring->tx_lock); + ret = iavf_xmit_xdp_tx_zc(xdp, xdp_ring); + if (static_branch_unlikely(&iavf_xdp_locking_key)) + spin_unlock(&xdp_ring->tx_lock); + + return ret; +} + +/** + * iavf_run_xdp_zc - Run XDP program and perform resulting action for ZC + * @rx_ring: RX descriptor ring to transact packets on + * @xdp: a prepared XDP buffer + * @xdp_prog: an XDP program assigned to the interface + * @xdp_ring: XDP TX queue assigned to the RX ring + * @rxq_xdp_act: Logical OR of flags of XDP actions that require finalization + * + * Returns resulting XDP action. + */ +static unsigned int +iavf_run_xdp_zc(struct iavf_ring *rx_ring, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog, struct iavf_ring *xdp_ring, + u32 *rxq_xdp_act) +{ + unsigned int xdp_act; + int err; + + xdp_act = bpf_prog_run_xdp(xdp_prog, xdp); + + if (likely(xdp_act == XDP_REDIRECT)) { + err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); + if (likely(!err)) { + *rxq_xdp_act |= IAVF_RXQ_XDP_ACT_FINALIZE_REDIR; + return XDP_REDIRECT; + } + + if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS) + *rxq_xdp_act |= IAVF_RXQ_XDP_ACT_STOP_NOW; + + goto xdp_err; + } + + switch (xdp_act) { + case XDP_PASS: + break; + case XDP_TX: + err = iavf_xmit_xdp_tx_zc_locked(xdp, xdp_ring); + if (unlikely(err)) + goto xdp_err; + + *rxq_xdp_act |= IAVF_RXQ_XDP_ACT_FINALIZE_TX; + break; + default: + bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, xdp_act); + + fallthrough; + case XDP_ABORTED: +xdp_err: + trace_xdp_exception(rx_ring->netdev, xdp_prog, xdp_act); + + fallthrough; + case XDP_DROP: + xsk_buff_free(xdp); + + return XDP_DROP; + } + + return xdp_act; +} + +/** + * iavf_construct_skb_zc - Create an sk_buff from zero-copy buffer + * @rx_ring: Rx ring + * @xdp: Pointer to XDP buffer + * + * This function allocates a new skb from a zero-copy Rx buffer. + * + * Returns the skb on success, NULL on failure. + */ +static struct sk_buff * +iavf_construct_skb_zc(struct iavf_ring *rx_ring, struct xdp_buff *xdp) +{ + unsigned int totalsize = xdp->data_end - xdp->data_meta; + unsigned int metasize = xdp->data - xdp->data_meta; + struct sk_buff *skb; + + net_prefetch(xdp->data_meta); + + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize, + GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(!skb)) + return NULL; + + memcpy(__skb_put(skb, totalsize), xdp->data_meta, + ALIGN(totalsize, sizeof(long))); + + if (metasize) { + skb_metadata_set(skb, metasize); + __skb_pull(skb, metasize); + } + + xsk_buff_free(xdp); + + return skb; +} + +/** + * iavf_clean_rx_irq_zc - consumes packets from the hardware ring + * @rx_ring: AF_XDP Rx ring + * @budget: NAPI budget + * + * Returns number of processed packets on success, remaining budget on failure. + */ +int iavf_clean_rx_irq_zc(struct iavf_ring *rx_ring, int budget) +{ + struct libie_rq_onstack_stats stats = { }; + u32 ntc = rx_ring->next_to_clean; + u32 ring_size = rx_ring->count; + struct iavf_ring *xdp_ring; + struct bpf_prog *xdp_prog; + u32 cleaned_count = 0; + bool failure = false; + u32 rxq_xdp_act = 0; + u32 to_refill; + + xdp_prog = rcu_dereference(rx_ring->xdp_prog); + xdp_ring = rx_ring->xdp_ring; + + while (likely(cleaned_count < budget)) { + union iavf_rx_desc *rx_desc; + struct xdp_buff *xdp; + unsigned int xdp_act; + struct sk_buff *skb; + unsigned int size; + u64 qword; + + rx_desc = IAVF_RX_DESC(rx_ring, ntc); + + /* status_error_len will always be zero for unused descriptors + * because it's cleared in cleanup, and overlaps with hdr_addr + * which is always zero because packet split isn't used, if the + * hardware wrote DD then the length will be non-zero + */ + qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); + if (!iavf_test_staterr(qword, IAVF_RX_DESC_STATUS_DD_SHIFT)) + break; + + /* This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc until we have + * verified the descriptor has been written back. + */ + dma_rmb(); + + size = (qword & IAVF_RXD_QW1_LENGTH_PBUF_MASK) >> + IAVF_RXD_QW1_LENGTH_PBUF_SHIFT; + + xdp = *iavf_get_xdp_buff(rx_ring, ntc); + iavf_trace(clean_rx_irq_zc, rx_ring, rx_desc, NULL); + + if (unlikely(!size)) { + xsk_buff_free(xdp); + goto next; + } + + xsk_buff_set_size(xdp, size); + xsk_buff_dma_sync_for_cpu(xdp, rx_ring->xsk_pool); + + xdp_act = iavf_run_xdp_zc(rx_ring, xdp, xdp_prog, xdp_ring, + &rxq_xdp_act); + if (xdp_act == XDP_PASS) + goto construct_skb; + + if (unlikely(rxq_xdp_act & IAVF_RXQ_XDP_ACT_STOP_NOW)) { + failure = true; + break; + } + + stats.bytes += size; + stats.packets++; + +next: + cleaned_count++; + if (unlikely(++ntc == ring_size)) + ntc = 0; + + continue; + +construct_skb: + skb = iavf_construct_skb_zc(rx_ring, xdp); + if (!skb) { + libie_stats_inc_one(&rx_ring->rq_stats, + build_skb_fail); + break; + } + + cleaned_count++; + if (unlikely(++ntc == ring_size)) + ntc = 0; + + prefetch(rx_desc); + + /* probably a little skewed due to removing CRC */ + stats.bytes += skb->len; + + /* populate checksum, VLAN, and protocol */ + iavf_process_skb_fields(rx_ring, rx_desc, skb, qword); + + iavf_trace(clean_rx_irq_zc_rx, rx_ring, rx_desc, skb); + skb->protocol = eth_type_trans(skb, rx_ring->netdev); + napi_gro_receive(&rx_ring->q_vector->napi, skb); + + stats.packets++; + } + + rx_ring->next_to_clean = ntc; + + iavf_finalize_xdp_rx(xdp_ring, rxq_xdp_act, 0); + + to_refill = IAVF_DESC_UNUSED(rx_ring); + if (to_refill > IAVF_RING_QUARTER(rx_ring)) + failure |= !iavf_alloc_rx_buffers_zc(rx_ring, to_refill); + + iavf_update_rx_ring_stats(rx_ring, &stats); + + if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) { + if (failure || rx_ring->next_to_clean == rx_ring->next_to_use) + xsk_set_rx_need_wakeup(rx_ring->xsk_pool); + else + xsk_clear_rx_need_wakeup(rx_ring->xsk_pool); + + return cleaned_count; + } + + return unlikely(failure) ? budget : cleaned_count; +} diff --git a/drivers/net/ethernet/intel/iavf/iavf_xsk.h b/drivers/net/ethernet/intel/iavf/iavf_xsk.h new file mode 100644 index 00000000000000..65aae299db4c0a --- /dev/null +++ b/drivers/net/ethernet/intel/iavf/iavf_xsk.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2022 Intel Corporation. */ + +#ifndef _IAVF_XSK_H_ +#define _IAVF_XSK_H_ + +#include + +#define PKTS_PER_BATCH 8 + +#ifdef __clang__ +#define loop_unrolled_for _Pragma("clang loop unroll_count(8)") for +#elif __GNUC__ >= 8 +#define loop_unrolled_for _Pragma("GCC unroll 8") for +#else +#define loop_unrolled_for for +#endif + +struct iavf_adapter; +struct iavf_ring; +struct net_device; +struct xsk_buff_pool; + +int iavf_xsk_pool_setup(struct iavf_adapter *adapter, + struct xsk_buff_pool *pool, u32 qid); + +int iavf_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags); +bool iavf_xmit_zc(struct iavf_ring *xdp_ring); +void iavf_xsk_clean_xdp_ring(struct iavf_ring *xdp_ring); + +void iavf_xsk_clean_rx_ring(struct iavf_ring *rx_ring); +int iavf_clean_rx_irq_zc(struct iavf_ring *rx_ring, int budget); +void iavf_check_alloc_rx_buffers_zc(struct iavf_adapter *adapter, + struct iavf_ring *rx_ring); + +void iavf_xsk_setup_xdp_ring(struct iavf_ring *xdp_ring); +void iavf_xsk_setup_rx_ring(struct iavf_ring *rx_ring); + +#endif /* !_IAVF_XSK_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index 89f986a75cc855..611577ebc29d82 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -160,64 +160,6 @@ struct ice_fltr_desc { (0x1ULL << ICE_FXD_FLTR_WB_QW1_FAIL_PROF_S) #define ICE_FXD_FLTR_WB_QW1_FAIL_PROF_YES 0x1ULL -struct ice_rx_ptype_decoded { - u32 known:1; - u32 outer_ip:1; - u32 outer_ip_ver:2; - u32 outer_frag:1; - u32 tunnel_type:3; - u32 tunnel_end_prot:2; - u32 tunnel_end_frag:1; - u32 inner_prot:4; - u32 payload_layer:3; -}; - -enum ice_rx_ptype_outer_ip { - ICE_RX_PTYPE_OUTER_L2 = 0, - ICE_RX_PTYPE_OUTER_IP = 1, -}; - -enum ice_rx_ptype_outer_ip_ver { - ICE_RX_PTYPE_OUTER_NONE = 0, - ICE_RX_PTYPE_OUTER_IPV4 = 1, - ICE_RX_PTYPE_OUTER_IPV6 = 2, -}; - -enum ice_rx_ptype_outer_fragmented { - ICE_RX_PTYPE_NOT_FRAG = 0, - ICE_RX_PTYPE_FRAG = 1, -}; - -enum ice_rx_ptype_tunnel_type { - ICE_RX_PTYPE_TUNNEL_NONE = 0, - ICE_RX_PTYPE_TUNNEL_IP_IP = 1, - ICE_RX_PTYPE_TUNNEL_IP_GRENAT = 2, - ICE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC = 3, - ICE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN = 4, -}; - -enum ice_rx_ptype_tunnel_end_prot { - ICE_RX_PTYPE_TUNNEL_END_NONE = 0, - ICE_RX_PTYPE_TUNNEL_END_IPV4 = 1, - ICE_RX_PTYPE_TUNNEL_END_IPV6 = 2, -}; - -enum ice_rx_ptype_inner_prot { - ICE_RX_PTYPE_INNER_PROT_NONE = 0, - ICE_RX_PTYPE_INNER_PROT_UDP = 1, - ICE_RX_PTYPE_INNER_PROT_TCP = 2, - ICE_RX_PTYPE_INNER_PROT_SCTP = 3, - ICE_RX_PTYPE_INNER_PROT_ICMP = 4, - ICE_RX_PTYPE_INNER_PROT_TIMESYNC = 5, -}; - -enum ice_rx_ptype_payload_layer { - ICE_RX_PTYPE_PAYLOAD_LAYER_NONE = 0, - ICE_RX_PTYPE_PAYLOAD_LAYER_PAY2 = 1, - ICE_RX_PTYPE_PAYLOAD_LAYER_PAY3 = 2, - ICE_RX_PTYPE_PAYLOAD_LAYER_PAY4 = 3, -}; - /* Rx Flex Descriptor * This descriptor is used instead of the legacy version descriptor when * ice_rlan_ctx.adv_desc is set @@ -651,262 +593,4 @@ struct ice_tlan_ctx { u8 int_q_state; /* width not needed - internal - DO NOT WRITE!!! */ }; -/* The ice_ptype_lkup table is used to convert from the 10-bit ptype in the - * hardware to a bit-field that can be used by SW to more easily determine the - * packet type. - * - * Macros are used to shorten the table lines and make this table human - * readable. - * - * We store the PTYPE in the top byte of the bit field - this is just so that - * we can check that the table doesn't have a row missing, as the index into - * the table should be the PTYPE. - * - * Typical work flow: - * - * IF NOT ice_ptype_lkup[ptype].known - * THEN - * Packet is unknown - * ELSE IF ice_ptype_lkup[ptype].outer_ip == ICE_RX_PTYPE_OUTER_IP - * Use the rest of the fields to look at the tunnels, inner protocols, etc - * ELSE - * Use the enum ice_rx_l2_ptype to decode the packet type - * ENDIF - */ - -/* macro to make the table lines short, use explicit indexing with [PTYPE] */ -#define ICE_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\ - [PTYPE] = { \ - 1, \ - ICE_RX_PTYPE_OUTER_##OUTER_IP, \ - ICE_RX_PTYPE_OUTER_##OUTER_IP_VER, \ - ICE_RX_PTYPE_##OUTER_FRAG, \ - ICE_RX_PTYPE_TUNNEL_##T, \ - ICE_RX_PTYPE_TUNNEL_END_##TE, \ - ICE_RX_PTYPE_##TEF, \ - ICE_RX_PTYPE_INNER_PROT_##I, \ - ICE_RX_PTYPE_PAYLOAD_LAYER_##PL } - -#define ICE_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } - -/* shorter macros makes the table fit but are terse */ -#define ICE_RX_PTYPE_NOF ICE_RX_PTYPE_NOT_FRAG -#define ICE_RX_PTYPE_FRG ICE_RX_PTYPE_FRAG - -/* Lookup table mapping in the 10-bit HW PTYPE to the bit field for decoding */ -static const struct ice_rx_ptype_decoded ice_ptype_lkup[BIT(10)] = { - /* L2 Packet types */ - ICE_PTT_UNUSED_ENTRY(0), - ICE_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - ICE_PTT_UNUSED_ENTRY(2), - ICE_PTT_UNUSED_ENTRY(3), - ICE_PTT_UNUSED_ENTRY(4), - ICE_PTT_UNUSED_ENTRY(5), - ICE_PTT(6, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), - ICE_PTT(7, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), - ICE_PTT_UNUSED_ENTRY(8), - ICE_PTT_UNUSED_ENTRY(9), - ICE_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), - ICE_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), - ICE_PTT_UNUSED_ENTRY(12), - ICE_PTT_UNUSED_ENTRY(13), - ICE_PTT_UNUSED_ENTRY(14), - ICE_PTT_UNUSED_ENTRY(15), - ICE_PTT_UNUSED_ENTRY(16), - ICE_PTT_UNUSED_ENTRY(17), - ICE_PTT_UNUSED_ENTRY(18), - ICE_PTT_UNUSED_ENTRY(19), - ICE_PTT_UNUSED_ENTRY(20), - ICE_PTT_UNUSED_ENTRY(21), - - /* Non Tunneled IPv4 */ - ICE_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3), - ICE_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3), - ICE_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(25), - ICE_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP, PAY4), - ICE_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4), - ICE_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4), - - /* IPv4 --> IPv4 */ - ICE_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3), - ICE_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3), - ICE_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(32), - ICE_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP, PAY4), - ICE_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), - ICE_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> IPv6 */ - ICE_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3), - ICE_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3), - ICE_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(39), - ICE_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP, PAY4), - ICE_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), - ICE_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT */ - ICE_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), - - /* IPv4 --> GRE/NAT --> IPv4 */ - ICE_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), - ICE_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), - ICE_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(47), - ICE_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), - ICE_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), - ICE_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> IPv6 */ - ICE_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), - ICE_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), - ICE_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(54), - ICE_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), - ICE_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), - ICE_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> MAC */ - ICE_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), - - /* IPv4 --> GRE/NAT --> MAC --> IPv4 */ - ICE_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), - ICE_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), - ICE_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(62), - ICE_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), - ICE_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), - ICE_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT -> MAC --> IPv6 */ - ICE_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), - ICE_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), - ICE_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(69), - ICE_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), - ICE_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), - ICE_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> MAC/VLAN */ - ICE_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), - - /* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */ - ICE_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), - ICE_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), - ICE_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(77), - ICE_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), - ICE_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), - ICE_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), - - /* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */ - ICE_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), - ICE_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), - ICE_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(84), - ICE_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), - ICE_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), - ICE_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), - - /* Non Tunneled IPv6 */ - ICE_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3), - ICE_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3), - ICE_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(91), - ICE_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4), - ICE_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4), - ICE_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4), - - /* IPv6 --> IPv4 */ - ICE_PTT(95, IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3), - ICE_PTT(96, IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3), - ICE_PTT(97, IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(98), - ICE_PTT(99, IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP, PAY4), - ICE_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), - ICE_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> IPv6 */ - ICE_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3), - ICE_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3), - ICE_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(105), - ICE_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP, PAY4), - ICE_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), - ICE_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT */ - ICE_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> IPv4 */ - ICE_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), - ICE_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), - ICE_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(113), - ICE_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), - ICE_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), - ICE_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> IPv6 */ - ICE_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), - ICE_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), - ICE_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(120), - ICE_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), - ICE_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), - ICE_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC */ - ICE_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> MAC -> IPv4 */ - ICE_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), - ICE_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), - ICE_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(128), - ICE_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), - ICE_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), - ICE_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC -> IPv6 */ - ICE_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), - ICE_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), - ICE_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(135), - ICE_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), - ICE_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), - ICE_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC/VLAN */ - ICE_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */ - ICE_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), - ICE_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), - ICE_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(143), - ICE_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), - ICE_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), - ICE_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */ - ICE_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), - ICE_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), - ICE_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(150), - ICE_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), - ICE_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), - ICE_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), - - /* unused entries */ - [154 ... 1023] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } -}; - -static inline struct ice_rx_ptype_decoded ice_decode_rx_desc_ptype(u16 ptype) -{ - return ice_ptype_lkup[ptype]; -} - - #endif /* _ICE_LAN_TX_RX_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index a1f7c8edc22f34..f3d9c5ddef33e3 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -34,6 +34,7 @@ static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation."; MODULE_AUTHOR("Intel Corporation, "); MODULE_DESCRIPTION(DRV_SUMMARY); +MODULE_IMPORT_NS(LIBIE); MODULE_LICENSE("GPL v2"); MODULE_FIRMWARE(ICE_DDP_PKG_FILE); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 7bc5aa340c7df7..3b3793428ab9ca 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019, Intel Corporation. */ #include +#include #include "ice_txrx_lib.h" #include "ice_eswitch.h" @@ -38,30 +39,6 @@ void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val) } } -/** - * ice_ptype_to_htype - get a hash type - * @ptype: the ptype value from the descriptor - * - * Returns appropriate hash type (such as PKT_HASH_TYPE_L2/L3/L4) to be used by - * skb_set_hash based on PTYPE as parsed by HW Rx pipeline and is part of - * Rx desc. - */ -static enum pkt_hash_types ice_ptype_to_htype(u16 ptype) -{ - struct ice_rx_ptype_decoded decoded = ice_decode_rx_desc_ptype(ptype); - - if (!decoded.known) - return PKT_HASH_TYPE_NONE; - if (decoded.payload_layer == ICE_RX_PTYPE_PAYLOAD_LAYER_PAY4) - return PKT_HASH_TYPE_L4; - if (decoded.payload_layer == ICE_RX_PTYPE_PAYLOAD_LAYER_PAY3) - return PKT_HASH_TYPE_L3; - if (decoded.outer_ip == ICE_RX_PTYPE_OUTER_L2) - return PKT_HASH_TYPE_L2; - - return PKT_HASH_TYPE_NONE; -} - /** * ice_rx_hash - set the hash value in the skb * @rx_ring: descriptor ring @@ -74,9 +51,11 @@ ice_rx_hash(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb, u16 rx_ptype) { struct ice_32b_rx_flex_desc_nic *nic_mdid; + struct libie_rx_ptype_parsed parsed; u32 hash; - if (!(rx_ring->netdev->features & NETIF_F_RXHASH)) + parsed = libie_parse_rx_ptype(rx_ptype); + if (!libie_has_rx_hash(rx_ring->netdev, parsed)) return; if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC) @@ -84,7 +63,7 @@ ice_rx_hash(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc; hash = le32_to_cpu(nic_mdid->rss_hash); - skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype)); + libie_skb_set_hash(skb, hash, parsed); } /** @@ -92,7 +71,7 @@ ice_rx_hash(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, * @ring: the ring we care about * @skb: skb currently being received and modified * @rx_desc: the receive descriptor - * @ptype: the packet type decoded by hardware + * @ptype: the packet type parsed by hardware * * skb->protocol must be set before this function is called */ @@ -100,34 +79,26 @@ static void ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb, union ice_32b_rx_flex_desc *rx_desc, u16 ptype) { - struct ice_rx_ptype_decoded decoded; + struct libie_rx_ptype_parsed parsed; u16 rx_status0, rx_status1; bool ipv4, ipv6; - rx_status0 = le16_to_cpu(rx_desc->wb.status_error0); - rx_status1 = le16_to_cpu(rx_desc->wb.status_error1); - - decoded = ice_decode_rx_desc_ptype(ptype); - /* Start with CHECKSUM_NONE and by default csum_level = 0 */ skb->ip_summed = CHECKSUM_NONE; - skb_checksum_none_assert(skb); - /* check if Rx checksum is enabled */ - if (!(ring->netdev->features & NETIF_F_RXCSUM)) + parsed = libie_parse_rx_ptype(ptype); + if (!libie_has_rx_checksum(ring->netdev, parsed)) return; - /* check if HW has decoded the packet and checksum */ - if (!(rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S))) - return; + rx_status0 = le16_to_cpu(rx_desc->wb.status_error0); + rx_status1 = le16_to_cpu(rx_desc->wb.status_error1); - if (!(decoded.known && decoded.outer_ip)) + /* check if HW has parsed the packet and checksum */ + if (!(rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S))) return; - ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4); - ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6); + ipv4 = parsed.outer_ip == LIBIE_RX_PTYPE_OUTER_IPV4; + ipv6 = parsed.outer_ip == LIBIE_RX_PTYPE_OUTER_IPV6; if (ipv4 && (rx_status0 & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) | BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))) @@ -151,19 +122,10 @@ ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb, * we need to bump the checksum level by 1 to reflect the fact that * we are indicating we validated the inner checksum. */ - if (decoded.tunnel_type >= ICE_RX_PTYPE_TUNNEL_IP_GRENAT) + if (parsed.tunnel_type >= LIBIE_RX_PTYPE_TUNNEL_IP_GRENAT) skb->csum_level = 1; - /* Only report checksum unnecessary for TCP, UDP, or SCTP */ - switch (decoded.inner_prot) { - case ICE_RX_PTYPE_INNER_PROT_TCP: - case ICE_RX_PTYPE_INNER_PROT_UDP: - case ICE_RX_PTYPE_INNER_PROT_SCTP: - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - default: - break; - } + skb->ip_summed = CHECKSUM_UNNECESSARY; return; checksum_fail: @@ -175,7 +137,7 @@ ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb, * @rx_ring: Rx descriptor ring packet is being transacted on * @rx_desc: pointer to the EOP Rx descriptor * @skb: pointer to current skb being populated - * @ptype: the packet type decoded by hardware + * @ptype: the packet type parsed by hardware * * This function checks the ring, descriptor, and packet information in * order to populate the hash, checksum, VLAN, protocol, and diff --git a/drivers/net/ethernet/intel/libie/Makefile b/drivers/net/ethernet/intel/libie/Makefile new file mode 100644 index 00000000000000..76f32253481b70 --- /dev/null +++ b/drivers/net/ethernet/intel/libie/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright(c) 2023 Intel Corporation. + +obj-$(CONFIG_LIBIE) += libie.o + +libie-objs += rx.o +libie-objs += stats.o diff --git a/drivers/net/ethernet/intel/libie/internal.h b/drivers/net/ethernet/intel/libie/internal.h new file mode 100644 index 00000000000000..083398dc37c63d --- /dev/null +++ b/drivers/net/ethernet/intel/libie/internal.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* libie internal declarations not to be used in drivers. + * + * Copyright(c) 2023 Intel Corporation. + */ + +#ifndef __LIBIE_INTERNAL_H +#define __LIBIE_INTERNAL_H + +struct libie_rq_stats; +struct page_pool; + +#ifdef CONFIG_PAGE_POOL_STATS +void libie_rq_stats_sync_pp(struct libie_rq_stats *stats, + struct page_pool *pool); +#else +static inline void libie_rq_stats_sync_pp(struct libie_rq_stats *stats, + struct page_pool *pool) +{ +} +#endif + +#endif /* __LIBIE_INTERNAL_H */ diff --git a/drivers/net/ethernet/intel/libie/rx.c b/drivers/net/ethernet/intel/libie/rx.c new file mode 100644 index 00000000000000..65475bf6d2d27f --- /dev/null +++ b/drivers/net/ethernet/intel/libie/rx.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 Intel Corporation. */ + +#include + +#include "internal.h" + +/* O(1) converting i40e/ice/iavf's 8/10-bit hardware packet type to a parsed + * bitfield struct. + */ + +#define LIBIE_RX_PTYPE(oip, ofrag, tun, tp, tefr, iprot, pl) { \ + .outer_ip = LIBIE_RX_PTYPE_OUTER_##oip, \ + .outer_frag = LIBIE_RX_PTYPE_##ofrag, \ + .tunnel_type = LIBIE_RX_PTYPE_TUNNEL_IP_##tun, \ + .tunnel_end_prot = LIBIE_RX_PTYPE_TUNNEL_END_##tp, \ + .tunnel_end_frag = LIBIE_RX_PTYPE_##tefr, \ + .inner_prot = LIBIE_RX_PTYPE_INNER_##iprot, \ + .payload_layer = LIBIE_RX_PTYPE_PAYLOAD_##pl, \ + } + +#define LIBIE_RX_PTYPE_UNUSED { } + +#define __LIBIE_RX_PTYPE_L2(iprot, pl) \ + LIBIE_RX_PTYPE(L2, NOT_FRAG, NONE, NONE, NOT_FRAG, iprot, pl) +#define LIBIE_RX_PTYPE_L2 __LIBIE_RX_PTYPE_L2(NONE, L2) +#define LIBIE_RX_PTYPE_TS __LIBIE_RX_PTYPE_L2(TIMESYNC, L2) +#define LIBIE_RX_PTYPE_L3 __LIBIE_RX_PTYPE_L2(NONE, L3) + +#define LIBIE_RX_PTYPE_IP_FRAG(oip) \ + LIBIE_RX_PTYPE(IPV##oip, FRAG, NONE, NONE, NOT_FRAG, NONE, L3) +#define LIBIE_RX_PTYPE_IP_L3(oip, tun, teprot, tefr) \ + LIBIE_RX_PTYPE(IPV##oip, NOT_FRAG, tun, teprot, tefr, NONE, L3) +#define LIBIE_RX_PTYPE_IP_L4(oip, tun, teprot, iprot) \ + LIBIE_RX_PTYPE(IPV##oip, NOT_FRAG, tun, teprot, NOT_FRAG, iprot, L4) + +#define LIBIE_RX_PTYPE_IP_NOF(oip, tun, ver) \ + LIBIE_RX_PTYPE_IP_L3(oip, tun, ver, NOT_FRAG), \ + LIBIE_RX_PTYPE_IP_L4(oip, tun, ver, UDP), \ + LIBIE_RX_PTYPE_UNUSED, \ + LIBIE_RX_PTYPE_IP_L4(oip, tun, ver, TCP), \ + LIBIE_RX_PTYPE_IP_L4(oip, tun, ver, SCTP), \ + LIBIE_RX_PTYPE_IP_L4(oip, tun, ver, ICMP) + +/* IPv oip --> tun --> IPv ver */ +#define LIBIE_RX_PTYPE_IP_TUN_VER(oip, tun, ver) \ + LIBIE_RX_PTYPE_IP_L3(oip, tun, ver, FRAG), \ + LIBIE_RX_PTYPE_IP_NOF(oip, tun, ver) + +/* Non Tunneled IPv oip */ +#define LIBIE_RX_PTYPE_IP_RAW(oip) \ + LIBIE_RX_PTYPE_IP_FRAG(oip), \ + LIBIE_RX_PTYPE_IP_NOF(oip, NONE, NONE) + +/* IPv oip --> tun --> { IPv4, IPv6 } */ +#define LIBIE_RX_PTYPE_IP_TUN(oip, tun) \ + LIBIE_RX_PTYPE_IP_TUN_VER(oip, tun, IPV4), \ + LIBIE_RX_PTYPE_IP_TUN_VER(oip, tun, IPV6) + +/* IPv oip --> GRE/NAT tun --> { x, IPv4, IPv6 } */ +#define LIBIE_RX_PTYPE_IP_GRE(oip, tun) \ + LIBIE_RX_PTYPE_IP_L3(oip, tun, NONE, NOT_FRAG), \ + LIBIE_RX_PTYPE_IP_TUN(oip, tun) + +/* Non Tunneled IPv oip + * IPv oip --> { IPv4, IPv6 } + * IPv oip --> GRE/NAT --> { x, IPv4, IPv6 } + * IPv oip --> GRE/NAT --> MAC --> { x, IPv4, IPv6 } + * IPv oip --> GRE/NAT --> MAC/VLAN --> { x, IPv4, IPv6 } + */ +#define LIBIE_RX_PTYPE_IP(oip) \ + LIBIE_RX_PTYPE_IP_RAW(oip), \ + LIBIE_RX_PTYPE_IP_TUN(oip, IP), \ + LIBIE_RX_PTYPE_IP_GRE(oip, GRENAT), \ + LIBIE_RX_PTYPE_IP_GRE(oip, GRENAT_MAC), \ + LIBIE_RX_PTYPE_IP_GRE(oip, GRENAT_MAC_VLAN) + +/* Lookup table mapping for O(1) parsing */ +const struct libie_rx_ptype_parsed libie_rx_ptype_lut[LIBIE_RX_PTYPE_NUM] = { + /* L2 packet types */ + LIBIE_RX_PTYPE_UNUSED, + LIBIE_RX_PTYPE_L2, + LIBIE_RX_PTYPE_TS, + LIBIE_RX_PTYPE_L2, + LIBIE_RX_PTYPE_UNUSED, + LIBIE_RX_PTYPE_UNUSED, + LIBIE_RX_PTYPE_L2, + LIBIE_RX_PTYPE_L2, + LIBIE_RX_PTYPE_UNUSED, + LIBIE_RX_PTYPE_UNUSED, + LIBIE_RX_PTYPE_L2, + LIBIE_RX_PTYPE_UNUSED, + + LIBIE_RX_PTYPE_L3, + LIBIE_RX_PTYPE_L3, + LIBIE_RX_PTYPE_L3, + LIBIE_RX_PTYPE_L3, + LIBIE_RX_PTYPE_L3, + LIBIE_RX_PTYPE_L3, + LIBIE_RX_PTYPE_L3, + LIBIE_RX_PTYPE_L3, + LIBIE_RX_PTYPE_L3, + LIBIE_RX_PTYPE_L3, + + LIBIE_RX_PTYPE_IP(4), + LIBIE_RX_PTYPE_IP(6), +}; +EXPORT_SYMBOL_NS_GPL(libie_rx_ptype_lut, LIBIE); + +/* Page Pool */ + +/** + * libie_rx_sync_len - get the actual buffer size to be synced and passed to HW + * @dev: &net_device to calculate the size for + * @hr: headroom in front of each frame + * + * Returns the buffer size to pass it to HW and use for DMA synchronization + * for the MTU the @dev has. + */ +static u32 libie_rx_sync_len(const struct net_device *dev, u32 hr) +{ + u32 len; + + len = READ_ONCE(dev->mtu) + LIBIE_RX_LL_LEN; + len = ALIGN(len, LIBIE_RX_BUF_LEN_ALIGN); + len = min(len, LIBIE_RX_BUF_LEN(hr)); + + return len; +} + +/** + * libie_rx_page_pool_create - create a PP with the default libie settings + * @dev: &net_device which a PP will be created for + * @size: size of the PP, usually simply Rx queue len + * @xdp: whether XDP is enabled on the device + * + * Returns &page_pool on success, casted -errno on failure. + */ +struct page_pool *libie_rx_page_pool_create(const struct net_device *dev, + u32 size, bool xdp) +{ + u32 hr = xdp ? LIBIE_XDP_HEADROOM : LIBIE_SKB_HEADROOM; + const struct page_pool_params pp = { + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_MAP_WEAK | + PP_FLAG_DMA_SYNC_DEV, + .order = LIBIE_RX_PAGE_ORDER, + .pool_size = size, + .nid = NUMA_NO_NODE, + .dev = dev->dev.parent, + .dma_dir = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE, + .max_len = libie_rx_sync_len(dev, hr), + .offset = hr, + }; + + static_assert((PP_FLAG_DMA_MAP | PP_FLAG_DMA_MAP_WEAK) == + LIBIE_RX_DMA_ATTR); + + return page_pool_create(&pp); +} +EXPORT_SYMBOL_NS_GPL(libie_rx_page_pool_create, LIBIE); + +/** + * libie_rx_page_pool_destroy - destroy a &page_pool created by libie + * @pool: pool to destroy + * @stats: RQ stats from the ring (or %NULL to skip updating PP stats) + * + * As the stats usually has the same lifetime as the device, but PP is usually + * created/destroyed on ifup/ifdown, in order to not lose the stats accumulated + * during the last ifup, the PP stats need to be added to the driver stats + * container. Then the PP gets destroyed. + */ +void libie_rx_page_pool_destroy(struct page_pool *pool, + struct libie_rq_stats *stats) +{ + libie_rq_stats_sync_pp(stats, pool); + page_pool_destroy(pool); +} +EXPORT_SYMBOL_NS_GPL(libie_rx_page_pool_destroy, LIBIE); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) Ethernet common library"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/intel/libie/stats.c b/drivers/net/ethernet/intel/libie/stats.c new file mode 100644 index 00000000000000..95bbb38c39e348 --- /dev/null +++ b/drivers/net/ethernet/intel/libie/stats.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 Intel Corporation. */ + +#include +#include + +#include "internal.h" + +/* Rx per-queue stats */ + +static const char * const libie_rq_stats_str[] = { +#define act(s) __stringify(s), + DECLARE_LIBIE_RQ_STATS(act) +#undef act +}; + +#define LIBIE_RQ_STATS_NUM ARRAY_SIZE(libie_rq_stats_str) + +#ifdef CONFIG_PAGE_POOL_STATS +/** + * libie_rq_stats_get_pp - get the current stats from a &page_pool + * @sarr: local array to add stats to + * @pool: pool to get the stats from + * + * Adds the current "live" stats from an online PP to the stats read from + * the RQ container, so that the actual totals will be returned. + */ +static void libie_rq_stats_get_pp(u64 *sarr, struct page_pool *pool) +{ + struct page_pool_stats *pps; + /* Used only to calculate pos below */ + struct libie_rq_stats tmp; + u32 pos; + + /* Validate the libie PP stats array can be casted <-> PP struct */ + static_assert(sizeof(tmp.pp) == sizeof(*pps)); + + if (!pool) + return; + + /* Position of the first Page Pool stats field */ + pos = (u64_stats_t *)&tmp.pp - tmp.raw; + pps = (typeof(pps))&sarr[pos]; + + page_pool_get_stats(pool, pps); +} + +/** + * libie_rq_stats_sync_pp - add the current PP stats to the RQ stats container + * @stats: stats structure to update + * @pool: pool to read the stats + * + * Called by libie_rx_page_pool_destroy() to save the stats before destroying + * the pool. + */ +void libie_rq_stats_sync_pp(struct libie_rq_stats *stats, + struct page_pool *pool) +{ + u64_stats_t *qarr = (u64_stats_t *)&stats->pp; + struct page_pool_stats pps = { }; + u64 *sarr = (u64 *)&pps; + + if (!stats) + return; + + page_pool_get_stats(pool, &pps); + + u64_stats_update_begin(&stats->syncp); + + for (u32 i = 0; i < sizeof(pps) / sizeof(*sarr); i++) + u64_stats_add(&qarr[i], sarr[i]); + + u64_stats_update_end(&stats->syncp); +} +#else +static inline void libie_rq_stats_get_pp(u64 *sarr, struct page_pool *pool) +{ +} + +/* static inline void libie_rq_stats_sync_pp() is declared in "internal.h" */ +#endif + +/** + * libie_rq_stats_get_sset_count - get the number of Ethtool RQ stats provided + * + * Returns the number of per-queue Rx stats supported by the library. + */ +u32 libie_rq_stats_get_sset_count(void) +{ + return LIBIE_RQ_STATS_NUM; +} +EXPORT_SYMBOL_NS_GPL(libie_rq_stats_get_sset_count, LIBIE); + +/** + * libie_rq_stats_get_strings - get the name strings of Ethtool RQ stats + * @data: reference to the cursor pointing to the output buffer + * @qid: RQ number to print in the prefix + */ +void libie_rq_stats_get_strings(u8 **data, u32 qid) +{ + for (u32 i = 0; i < LIBIE_RQ_STATS_NUM; i++) + ethtool_sprintf(data, "rq%u_%s", qid, libie_rq_stats_str[i]); +} +EXPORT_SYMBOL_NS_GPL(libie_rq_stats_get_strings, LIBIE); + +/** + * libie_rq_stats_get_data - get the RQ stats in Ethtool format + * @data: reference to the cursor pointing to the output array + * @stats: RQ stats container from the queue + * @pool: &page_pool from the queue (%NULL to ignore PP "live" stats) + */ +void libie_rq_stats_get_data(u64 **data, const struct libie_rq_stats *stats, + struct page_pool *pool) +{ + u64 sarr[LIBIE_RQ_STATS_NUM]; + u32 start; + + do { + start = u64_stats_fetch_begin(&stats->syncp); + + for (u32 i = 0; i < LIBIE_RQ_STATS_NUM; i++) + sarr[i] = u64_stats_read(&stats->raw[i]); + } while (u64_stats_fetch_retry(&stats->syncp, start)); + + libie_rq_stats_get_pp(sarr, pool); + + for (u32 i = 0; i < LIBIE_RQ_STATS_NUM; i++) + (*data)[i] += sarr[i]; + + *data += LIBIE_RQ_STATS_NUM; +} +EXPORT_SYMBOL_NS_GPL(libie_rq_stats_get_data, LIBIE); + +/* Tx per-queue stats */ + +static const char * const libie_sq_stats_str[] = { +#define act(s) __stringify(s), + DECLARE_LIBIE_SQ_STATS(act) +#undef act +}; + +#define LIBIE_SQ_STATS_NUM ARRAY_SIZE(libie_sq_stats_str) + +/** + * libie_sq_stats_get_sset_count - get the number of Ethtool SQ stats provided + * + * Returns the number of per-queue Tx stats supported by the library. + */ +u32 libie_sq_stats_get_sset_count(void) +{ + return LIBIE_SQ_STATS_NUM; +} +EXPORT_SYMBOL_NS_GPL(libie_sq_stats_get_sset_count, LIBIE); + +/** + * libie_sq_stats_get_strings - get the name strings of Ethtool SQ stats + * @data: reference to the cursor pointing to the output buffer + * @qid: SQ number to print in the prefix + */ +void libie_sq_stats_get_strings(u8 **data, u32 qid) +{ + for (u32 i = 0; i < LIBIE_SQ_STATS_NUM; i++) + ethtool_sprintf(data, "sq%u_%s", qid, libie_sq_stats_str[i]); +} +EXPORT_SYMBOL_NS_GPL(libie_sq_stats_get_strings, LIBIE); + +/** + * libie_sq_stats_get_data - get the SQ stats in Ethtool format + * @data: reference to the cursor pointing to the output array + * @stats: SQ stats container from the queue + */ +void libie_sq_stats_get_data(u64 **data, const struct libie_sq_stats *stats) +{ + u64 sarr[LIBIE_SQ_STATS_NUM]; + u32 start; + + do { + start = u64_stats_fetch_begin(&stats->syncp); + + for (u32 i = 0; i < LIBIE_SQ_STATS_NUM; i++) + sarr[i] = u64_stats_read(&stats->raw[i]); + } while (u64_stats_fetch_retry(&stats->syncp, start)); + + for (u32 i = 0; i < LIBIE_SQ_STATS_NUM; i++) + (*data)[i] += sarr[i]; + + *data += LIBIE_SQ_STATS_NUM; +} +EXPORT_SYMBOL_NS_GPL(libie_sq_stats_get_data, LIBIE); diff --git a/include/linux/net/intel/libie/rx.h b/include/linux/net/intel/libie/rx.h new file mode 100644 index 00000000000000..d73efd721ffc6f --- /dev/null +++ b/include/linux/net/intel/libie/rx.h @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2023 Intel Corporation. */ + +#ifndef __LIBIE_RX_H +#define __LIBIE_RX_H + +#include +#include +#include + +/* O(1) converting i40e/ice/iavf's 8/10-bit hardware packet type to a parsed + * bitfield struct. + */ + +struct libie_rx_ptype_parsed { + u16 outer_ip:2; + u16 outer_frag:1; + u16 tunnel_type:3; + u16 tunnel_end_prot:2; + u16 tunnel_end_frag:1; + u16 inner_prot:3; + u16 payload_layer:2; +}; + +enum libie_rx_ptype_outer_ip { + LIBIE_RX_PTYPE_OUTER_L2 = 0U, + LIBIE_RX_PTYPE_OUTER_IPV4, + LIBIE_RX_PTYPE_OUTER_IPV6, +}; + +enum libie_rx_ptype_outer_fragmented { + LIBIE_RX_PTYPE_NOT_FRAG = 0U, + LIBIE_RX_PTYPE_FRAG, +}; + +enum libie_rx_ptype_tunnel_type { + LIBIE_RX_PTYPE_TUNNEL_IP_NONE = 0U, + LIBIE_RX_PTYPE_TUNNEL_IP_IP, + LIBIE_RX_PTYPE_TUNNEL_IP_GRENAT, + LIBIE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC, + LIBIE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN, +}; + +enum libie_rx_ptype_tunnel_end_prot { + LIBIE_RX_PTYPE_TUNNEL_END_NONE = 0U, + LIBIE_RX_PTYPE_TUNNEL_END_IPV4, + LIBIE_RX_PTYPE_TUNNEL_END_IPV6, +}; + +enum libie_rx_ptype_inner_prot { + LIBIE_RX_PTYPE_INNER_NONE = 0U, + LIBIE_RX_PTYPE_INNER_UDP, + LIBIE_RX_PTYPE_INNER_TCP, + LIBIE_RX_PTYPE_INNER_SCTP, + LIBIE_RX_PTYPE_INNER_ICMP, + LIBIE_RX_PTYPE_INNER_TIMESYNC, +}; + +enum libie_rx_ptype_payload_layer { + LIBIE_RX_PTYPE_PAYLOAD_NONE = PKT_HASH_TYPE_NONE, + LIBIE_RX_PTYPE_PAYLOAD_L2 = PKT_HASH_TYPE_L2, + LIBIE_RX_PTYPE_PAYLOAD_L3 = PKT_HASH_TYPE_L3, + LIBIE_RX_PTYPE_PAYLOAD_L4 = PKT_HASH_TYPE_L4, +}; + +#define LIBIE_RX_PTYPE_NUM 154 + +extern const struct libie_rx_ptype_parsed +libie_rx_ptype_lut[LIBIE_RX_PTYPE_NUM]; + +/** + * libie_parse_rx_ptype - convert HW packet type to software bitfield structure + * @ptype: 10-bit hardware packet type value from the descriptor + * + * @libie_rx_ptype_lut must be accessed only using this wrapper. + * + * Returns the parsed bitfield struct corresponding to the provided ptype. + */ +static inline struct libie_rx_ptype_parsed libie_parse_rx_ptype(u32 ptype) +{ + if (unlikely(ptype >= LIBIE_RX_PTYPE_NUM)) + ptype = 0; + + return libie_rx_ptype_lut[ptype]; +} + +/* libie_has_*() can be used to quickly check whether the HW metadata is + * available to avoid further expensive processing such as descriptor reads. + * They already check for the corresponding netdev feature to be enabled, + * thus can be used as drop-in replacements. + */ + +static inline bool libie_has_rx_checksum(const struct net_device *dev, + struct libie_rx_ptype_parsed parsed) +{ + /* _INNER_{SCTP,TCP,UDP} are possible only when _OUTER_IPV* is set, + * it is enough to check only for the L4 type. + */ + switch (parsed.inner_prot) { + case LIBIE_RX_PTYPE_INNER_TCP: + case LIBIE_RX_PTYPE_INNER_UDP: + case LIBIE_RX_PTYPE_INNER_SCTP: + return dev->features & NETIF_F_RXCSUM; + default: + return false; + } +} + +static inline bool libie_has_rx_hash(const struct net_device *dev, + struct libie_rx_ptype_parsed parsed) +{ + if (parsed.payload_layer < LIBIE_RX_PTYPE_PAYLOAD_L2) + return false; + + return dev->features & NETIF_F_RXHASH; +} + +/** + * libie_skb_set_hash - fill in skb hash value basing on the parsed ptype + * @skb: skb to fill the hash in + * @hash: 32-bit hash value from the descriptor + * @parsed: parsed packet type + */ +static inline void libie_skb_set_hash(struct sk_buff *skb, u32 hash, + struct libie_rx_ptype_parsed parsed) +{ + skb_set_hash(skb, hash, parsed.payload_layer); +} + +/* Rx MTU/buffer/truesize helpers. Mostly pure software-side; HW-defined values + * are valid for all Intel HW. + */ + +/* Space reserved in front of each frame */ +#define LIBIE_SKB_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN) +#define LIBIE_XDP_HEADROOM (max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + \ + NET_IP_ALIGN) +/* Maximum headroom to calculate max MTU below */ +#define LIBIE_MAX_HEADROOM LIBIE_XDP_HEADROOM +/* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */ +#define LIBIE_RX_LL_LEN (ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN) + +/* Truesize: total space wasted on each frame. Always use order-0 pages */ +#define LIBIE_RX_PAGE_ORDER 0 +#define LIBIE_RX_TRUESIZE (PAGE_SIZE << LIBIE_RX_PAGE_ORDER) +/* Rx buffer size config is a multiple of 128 */ +#define LIBIE_RX_BUF_LEN_ALIGN 128 +/* HW-writeable space in one buffer: truesize - headroom/tailroom, + * HW-aligned + */ +#define __LIBIE_RX_BUF_LEN(hr) \ + ALIGN_DOWN(SKB_MAX_ORDER(hr, LIBIE_RX_PAGE_ORDER), \ + LIBIE_RX_BUF_LEN_ALIGN) +/* The largest size for a single descriptor as per HW */ +#define LIBIE_MAX_RX_BUF_LEN 9728U +/* "True" HW-writeable space: minimum from SW and HW values */ +#define LIBIE_RX_BUF_LEN(hr) min_t(u32, __LIBIE_RX_BUF_LEN(hr), \ + LIBIE_MAX_RX_BUF_LEN) + +/* The maximum frame size as per HW (S/G) */ +#define __LIBIE_MAX_RX_FRM_LEN 16382U +/* ATST, HW can chain up to 5 Rx descriptors */ +#define LIBIE_MAX_RX_FRM_LEN(hr) \ + min_t(u32, __LIBIE_MAX_RX_FRM_LEN, LIBIE_RX_BUF_LEN(hr) * 5) +/* Maximum frame size minus LL overhead */ +#define LIBIE_MAX_MTU (LIBIE_MAX_RX_FRM_LEN(LIBIE_MAX_HEADROOM) - \ + LIBIE_RX_LL_LEN) + +/* DMA mapping attributes for Rx buffers: no impl. sync + relaxed on Sparc */ +#define LIBIE_RX_DMA_ATTR \ + (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) + +struct libie_rq_stats; + +struct page_pool *libie_rx_page_pool_create(const struct net_device *dev, + u32 size, bool xdp); +void libie_rx_page_pool_destroy(struct page_pool *pool, + struct libie_rq_stats *stats); + +#endif /* __LIBIE_RX_H */ diff --git a/include/linux/net/intel/libie/stats.h b/include/linux/net/intel/libie/stats.h new file mode 100644 index 00000000000000..23ca0079a90586 --- /dev/null +++ b/include/linux/net/intel/libie/stats.h @@ -0,0 +1,214 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2023 Intel Corporation. */ + +#ifndef __LIBIE_STATS_H +#define __LIBIE_STATS_H + +#include + +/* Common */ + +/* Use 32-byte alignment to reduce false sharing */ +#define __libie_stats_aligned __aligned(4 * sizeof(u64_stats_t)) + +/** + * libie_stats_add - update one structure counter from a local struct + * @qs: queue stats structure to update (&libie_rq_stats or &libie_sq_stats) + * @ss: local/onstack stats structure + * @f: name of the field to update + * + * If a local/onstack stats structure is used to collect statistics during + * hotpath loops, this macro can be used to shorthand updates, given that + * the fields have the same name. + * Must be guarded with u64_stats_update_{begin,end}(). + */ +#define libie_stats_add(qs, ss, f) \ + u64_stats_add(&(qs)->f, (ss)->f) + +/** + * __libie_stats_inc_one - safely increment one stats structure counter + * @s: queue stats structure to update (&libie_rq_stats or &libie_sq_stats) + * @f: name of the field to increment + * @n: name of the temporary variable, result of __UNIQUE_ID() + * + * To be used on exception or slow paths -- allocation fails, queue stops etc. + */ +#define __libie_stats_inc_one(s, f, n) ({ \ + typeof(*(s)) *n = (s); \ + \ + u64_stats_update_begin(&n->syncp); \ + u64_stats_inc(&n->f); \ + u64_stats_update_end(&n->syncp); \ +}) +#define libie_stats_inc_one(s, f) \ + __libie_stats_inc_one(s, f, __UNIQUE_ID(qs_)) + +/* Rx per-queue stats: + * packets: packets received on this queue + * bytes: bytes received on this queue + * fragments: number of processed descriptors carrying only a fragment + * alloc_page_fail: number of Rx page allocation fails + * build_skb_fail: number of build_skb() fails + * pp_alloc_fast: pages taken from the cache or ring + * pp_alloc_slow: actual page allocations + * pp_alloc_slow_ho: non-order-0 page allocations + * pp_alloc_empty: number of times the pool was empty + * pp_alloc_refill: number of cache refills + * pp_alloc_waive: NUMA node mismatches during recycling + * pp_recycle_cached: direct recyclings into the cache + * pp_recycle_cache_full: number of times the cache was full + * pp_recycle_ring: recyclings into the ring + * pp_recycle_ring_full: number of times the ring was full + * pp_recycle_released_ref: pages released due to elevated refcnt + */ + +#define DECLARE_LIBIE_RQ_NAPI_STATS(act) \ + act(packets) \ + act(bytes) \ + act(fragments) + +#define DECLARE_LIBIE_RQ_FAIL_STATS(act) \ + act(alloc_page_fail) \ + act(build_skb_fail) + +#ifdef CONFIG_PAGE_POOL_STATS +#define DECLARE_LIBIE_RQ_PP_STATS(act) \ + act(pp_alloc_fast) \ + act(pp_alloc_slow) \ + act(pp_alloc_slow_ho) \ + act(pp_alloc_empty) \ + act(pp_alloc_refill) \ + act(pp_alloc_waive) \ + act(pp_recycle_cached) \ + act(pp_recycle_cache_full) \ + act(pp_recycle_ring) \ + act(pp_recycle_ring_full) \ + act(pp_recycle_released_ref) +#else +#define DECLARE_LIBIE_RQ_PP_STATS(act) +#endif + +#define DECLARE_LIBIE_RQ_STATS(act) \ + DECLARE_LIBIE_RQ_NAPI_STATS(act) \ + DECLARE_LIBIE_RQ_FAIL_STATS(act) \ + DECLARE_LIBIE_RQ_PP_STATS(act) + +struct page_pool; + +struct libie_rq_stats { + struct u64_stats_sync syncp; + + union { + struct { +#define act(s) u64_stats_t s; + DECLARE_LIBIE_RQ_NAPI_STATS(act); + DECLARE_LIBIE_RQ_FAIL_STATS(act); + struct_group(pp, + DECLARE_LIBIE_RQ_PP_STATS(act); + ); +#undef act + }; + DECLARE_FLEX_ARRAY(u64_stats_t, raw); + }; +} __libie_stats_aligned; + +/* Rx stats being modified frequently during the NAPI polling, to sync them + * with the queue stats once after the loop is finished. + */ +struct libie_rq_onstack_stats { + union { + struct { +#define act(s) u32 s; + DECLARE_LIBIE_RQ_NAPI_STATS(act); +#undef act + }; + DECLARE_FLEX_ARRAY(u32, raw); + }; +}; + +/** + * libie_rq_napi_stats_add - add onstack Rx stats to the queue container + * @qs: Rx queue stats structure to update + * @ss: onstack structure to get the values from, updated during the NAPI loop + */ +static inline void +libie_rq_napi_stats_add(struct libie_rq_stats *qs, + const struct libie_rq_onstack_stats *ss) +{ + u64_stats_update_begin(&qs->syncp); + libie_stats_add(qs, ss, packets); + libie_stats_add(qs, ss, bytes); + libie_stats_add(qs, ss, fragments); + u64_stats_update_end(&qs->syncp); +} + +u32 libie_rq_stats_get_sset_count(void); +void libie_rq_stats_get_strings(u8 **data, u32 qid); +void libie_rq_stats_get_data(u64 **data, const struct libie_rq_stats *stats, + struct page_pool *pool); + +/* Tx per-queue stats: + * packets: packets sent from this queue + * bytes: bytes sent from this queue + * busy: number of xmit failures due to the ring being full + * stops: number times the ring was stopped from the driver + * restarts: number times it was started after being stopped + * linearized: number of skbs linearized due to HW limits + */ + +#define DECLARE_LIBIE_SQ_NAPI_STATS(act) \ + act(packets) \ + act(bytes) + +#define DECLARE_LIBIE_SQ_XMIT_STATS(act) \ + act(busy) \ + act(stops) \ + act(restarts) \ + act(linearized) + +#define DECLARE_LIBIE_SQ_STATS(act) \ + DECLARE_LIBIE_SQ_NAPI_STATS(act) \ + DECLARE_LIBIE_SQ_XMIT_STATS(act) + +struct libie_sq_stats { + struct u64_stats_sync syncp; + + union { + struct { +#define act(s) u64_stats_t s; + DECLARE_LIBIE_SQ_STATS(act); +#undef act + }; + DECLARE_FLEX_ARRAY(u64_stats_t, raw); + }; +} __libie_stats_aligned; + +struct libie_sq_onstack_stats { +#define act(s) u32 s; + DECLARE_LIBIE_SQ_NAPI_STATS(act); +#undef act +}; + +/** + * libie_sq_napi_stats_add - add onstack Tx stats to the queue container + * @qs: Tx queue stats structure to update + * @ss: onstack structure to get the values from, updated during the NAPI loop + */ +static inline void +libie_sq_napi_stats_add(struct libie_sq_stats *qs, + const struct libie_sq_onstack_stats *ss) +{ + if (unlikely(!ss->packets)) + return; + + u64_stats_update_begin(&qs->syncp); + libie_stats_add(qs, ss, packets); + libie_stats_add(qs, ss, bytes); + u64_stats_update_end(&qs->syncp); +} + +u32 libie_sq_stats_get_sset_count(void); +void libie_sq_stats_get_strings(u8 **data, u32 qid); +void libie_sq_stats_get_data(u64 **data, const struct libie_sq_stats *stats); + +#endif /* __LIBIE_STATS_H */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fe661011644b8f..3f3a2a82a86b30 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -5069,12 +5069,12 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb) #endif } -#ifdef CONFIG_PAGE_POOL static inline void skb_mark_for_recycle(struct sk_buff *skb) { +#ifdef CONFIG_PAGE_POOL skb->pp_recycle = 1; -} #endif +} #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ diff --git a/include/net/page_pool.h b/include/net/page_pool.h index ddfa0b32867776..fb949d168e14c0 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -32,12 +32,18 @@ #include /* Needed by ptr_ring */ #include -#include +#include -#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA +#define PP_FLAG_DMA_MAP BIT(5) /* Should page_pool do the DMA * map/unmap */ -#define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets +#define PP_FLAG_DMA_MAP_WEAK BIT(1) /* Map with %DMA_ATTR_WEAK_ORDERING */ +/* These flags correspond to the DMA map attributes to pass them directly to + * dma_map_page_attrs(), see page_pool_dma_map(). + */ +#define PP_FLAG_DMA_ATTR (PP_FLAG_DMA_MAP | \ + PP_FLAG_DMA_MAP_WEAK) +#define PP_FLAG_DMA_SYNC_DEV BIT(0) /* If set all pages that the driver gets * from page_pool will be * DMA-synced-for-device according to * the length provided by the device @@ -46,7 +52,7 @@ * device driver responsibility */ #define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */ -#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\ +#define PP_FLAG_ALL (PP_FLAG_DMA_ATTR |\ PP_FLAG_DMA_SYNC_DEV |\ PP_FLAG_PAGE_FRAG) @@ -233,8 +239,8 @@ static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, /* get the stored dma direction. A driver might decide to treat this locally and * avoid the extra cache line from page_pool to determine the direction */ -static -inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool) +static inline enum dma_data_direction +page_pool_get_dma_dir(const struct page_pool *pool) { return pool->p.dma_dir; } @@ -354,7 +360,7 @@ static inline void page_pool_recycle_direct(struct page_pool *pool, #define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \ (sizeof(dma_addr_t) > sizeof(unsigned long)) -static inline dma_addr_t page_pool_get_dma_addr(struct page *page) +static inline dma_addr_t page_pool_get_dma_addr(const struct page *page) { dma_addr_t ret = page->dma_addr; @@ -371,6 +377,37 @@ static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) page->dma_addr_upper = upper_32_bits(addr); } +/** + * page_pool_dma_sync_for_cpu - sync Rx page for CPU after it's written by HW + * @pool: page_pool which this page belongs to + * @page: page to sync + * @dma_sync_size: size of the data written to the page + * + * Can be used as a shorthand to sync Rx pages before accessing them in the + * driver. The caller must ensure the pool was created with %PP_FLAG_DMA_MAP. + */ +static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool, + const struct page *page, + u32 dma_sync_size) +{ + dma_sync_single_range_for_cpu(pool->p.dev, + page_pool_get_dma_addr(page), + pool->p.offset, dma_sync_size, + page_pool_get_dma_dir(pool)); +} + +/** + * page_pool_dma_sync_for_cpu - sync full Rx page for CPU + * @pool: page_pool which this page belongs to + * @page: page to sync + */ +static inline void +page_pool_dma_sync_full_for_cpu(const struct page_pool *pool, + const struct page *page) +{ + page_pool_dma_sync_for_cpu(pool, page, pool->p.max_len); +} + static inline bool is_page_pool_compiled_in(void) { #ifdef CONFIG_PAGE_POOL diff --git a/include/net/xdp.h b/include/net/xdp.h index 41c57b8b167147..383b25b426a482 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -317,35 +317,6 @@ void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq); void xdp_return_frame_bulk(struct xdp_frame *xdpf, struct xdp_frame_bulk *bq); -/* When sending xdp_frame into the network stack, then there is no - * return point callback, which is needed to release e.g. DMA-mapping - * resources with page_pool. Thus, have explicit function to release - * frame resources. - */ -void __xdp_release_frame(void *data, struct xdp_mem_info *mem); -static inline void xdp_release_frame(struct xdp_frame *xdpf) -{ - struct xdp_mem_info *mem = &xdpf->mem; - struct skb_shared_info *sinfo; - int i; - - /* Curr only page_pool needs this */ - if (mem->type != MEM_TYPE_PAGE_POOL) - return; - - if (likely(!xdp_frame_has_frags(xdpf))) - goto out; - - sinfo = xdp_get_shared_info_from_frame(xdpf); - for (i = 0; i < sinfo->nr_frags; i++) { - struct page *page = skb_frag_page(&sinfo->frags[i]); - - __xdp_release_frame(page_address(page), mem); - } -out: - __xdp_release_frame(xdpf->data, mem); -} - static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf) { struct skb_shared_info *sinfo; diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 193c1879986503..74e25b55e2f062 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -161,6 +161,13 @@ static int page_pool_init(struct page_pool *pool, return -EINVAL; } + /* Passing DMA mapping attributes without asking PP to map pages + * makes no sense. + */ + if ((pool->p.flags & PP_FLAG_DMA_ATTR) && + !(pool->p.flags & PP_FLAG_DMA_MAP)) + return -EINVAL; + if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) { /* In order to request DMA-sync-for-device the page * needs to be mapped @@ -308,6 +315,14 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page) { dma_addr_t dma; + /* Pages are always mapped with %DMA_ATTR_SKIP_CPU_SYNC, so its value + * corresponds to %PP_FLAG_DMA_MAP, which is always set when reaching + * this function. + */ + static_assert(PP_FLAG_DMA_MAP == DMA_ATTR_SKIP_CPU_SYNC); + /* Drivers may set this for PP to map with weak ordering */ + static_assert(PP_FLAG_DMA_MAP_WEAK == DMA_ATTR_WEAK_ORDERING); + /* Setup DMA mapping: use 'struct page' area for storing DMA-addr * since dma_addr_t can be either 32 or 64 bits and does not always fit * into page private data (i.e 32bit cpu with 64bit DMA caps) @@ -315,7 +330,8 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page) */ dma = dma_map_page_attrs(pool->p.dev, page, 0, (PAGE_SIZE << pool->p.order), - pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC); + pool->p.dma_dir, + pool->p.flags & PP_FLAG_DMA_ATTR); if (dma_mapping_error(pool->p.dev, dma)) return false; @@ -483,7 +499,7 @@ void page_pool_release_page(struct page_pool *pool, struct page *page) /* When page is unmapped, it cannot be returned to our pool */ dma_unmap_page_attrs(pool->p.dev, dma, PAGE_SIZE << pool->p.order, pool->p.dma_dir, - DMA_ATTR_SKIP_CPU_SYNC); + pool->p.flags & PP_FLAG_DMA_ATTR); page_pool_set_dma_addr(page, 0); skip_dma_unmap: page_pool_clear_pp_info(page); diff --git a/net/core/xdp.c b/net/core/xdp.c index 528d4b37983df8..018e0fe4e71405 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -531,21 +531,6 @@ void xdp_return_buff(struct xdp_buff *xdp) } EXPORT_SYMBOL_GPL(xdp_return_buff); -/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */ -void __xdp_release_frame(void *data, struct xdp_mem_info *mem) -{ - struct xdp_mem_allocator *xa; - struct page *page; - - rcu_read_lock(); - xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); - page = virt_to_head_page(data); - if (xa) - page_pool_release_page(xa->page_pool, page); - rcu_read_unlock(); -} -EXPORT_SYMBOL_GPL(__xdp_release_frame); - void xdp_attachment_setup(struct xdp_attachment_info *info, struct netdev_bpf *bpf) { @@ -658,8 +643,8 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, * - RX ring dev queue index (skb_record_rx_queue) */ - /* Until page_pool get SKB return path, release DMA here */ - xdp_release_frame(xdpf); + if (xdpf->mem.type == MEM_TYPE_PAGE_POOL) + skb_mark_for_recycle(skb); /* Allow SKB to reuse area used by xdp_frame */ xdp_scrub_frame(xdpf); diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c index 77a123071940ae..cd2d4e3258b899 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c @@ -4,6 +4,19 @@ #define ETH_ALEN 6 #define HDR_SZ (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr)) + +/** + * enum frame_mark - magics to distinguish page/packet paths + * @MARK_XMIT: page was recycled due to the frame being "xmitted" by the NIC. + * @MARK_IN: frame is being processed by the input XDP prog. + * @MARK_SKB: frame did hit the TC ingress hook as an skb. + */ +enum frame_mark { + MARK_XMIT = 0U, + MARK_IN = 0x42, + MARK_SKB = 0x45, +}; + const volatile int ifindex_out; const volatile int ifindex_in; const volatile __u8 expect_dst[ETH_ALEN]; @@ -34,10 +47,10 @@ int xdp_redirect(struct xdp_md *xdp) if (*metadata != 0x42) return XDP_ABORTED; - if (*payload == 0) { - *payload = 0x42; + if (*payload == MARK_XMIT) pkts_seen_zero++; - } + + *payload = MARK_IN; if (bpf_xdp_adjust_meta(xdp, 4)) return XDP_ABORTED; @@ -51,7 +64,7 @@ int xdp_redirect(struct xdp_md *xdp) return ret; } -static bool check_pkt(void *data, void *data_end) +static bool check_pkt(void *data, void *data_end, const __u32 mark) { struct ipv6hdr *iph = data + sizeof(struct ethhdr); __u8 *payload = data + HDR_SZ; @@ -59,13 +72,13 @@ static bool check_pkt(void *data, void *data_end) if (payload + 1 > data_end) return false; - if (iph->nexthdr != IPPROTO_UDP || *payload != 0x42) + if (iph->nexthdr != IPPROTO_UDP || *payload != MARK_IN) return false; /* reset the payload so the same packet doesn't get counted twice when * it cycles back through the kernel path and out the dst veth */ - *payload = 0; + *payload = mark; return true; } @@ -75,11 +88,11 @@ int xdp_count_pkts(struct xdp_md *xdp) void *data = (void *)(long)xdp->data; void *data_end = (void *)(long)xdp->data_end; - if (check_pkt(data, data_end)) + if (check_pkt(data, data_end, MARK_XMIT)) pkts_seen_xdp++; - /* Return XDP_DROP to make sure the data page is recycled, like when it - * exits a physical NIC. Recycled pages will be counted in the + /* Return %XDP_DROP to recycle the data page with %MARK_XMIT, like + * it exited a physical NIC. Those pages will be counted in the * pkts_seen_zero counter above. */ return XDP_DROP; @@ -91,9 +104,12 @@ int tc_count_pkts(struct __sk_buff *skb) void *data = (void *)(long)skb->data; void *data_end = (void *)(long)skb->data_end; - if (check_pkt(data, data_end)) + if (check_pkt(data, data_end, MARK_SKB)) pkts_seen_tc++; + /* Will be either recycled or freed, %MARK_SKB makes sure it won't + * hit any of the counters above. + */ return 0; }