diff --git a/MAINTAINERS b/MAINTAINERS index c36618d4659ec9..895bc6cc563c12 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10650,7 +10650,8 @@ F: Documentation/networking/device_drivers/ethernet/intel/ F: drivers/net/ethernet/intel/ F: drivers/net/ethernet/intel/*/ F: include/linux/avf/virtchnl.h -F: include/linux/net/intel/iidc.h +F: include/linux/net/intel/ +F: include/linux/net/intel/*/ INTEL ETHERNET PROTOCOL DRIVER FOR RDMA M: Mustafa Ismail diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 0c3725c3eefa46..fd23f81270b444 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -642,6 +642,8 @@ static int really_probe(struct device *dev, struct device_driver *drv) goto pinctrl_bind_failed; } + dma_setup_skip_sync(dev); + ret = driver_sysfs_add(dev); if (ret) { pr_err("%s: driver_sysfs_add(%s) failed\n", diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index e59f50e11ea8bb..b015c69b141468 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1717,6 +1717,7 @@ static const struct dma_map_ops iommu_dma_ops = { .unmap_page = iommu_dma_unmap_page, .map_sg = iommu_dma_map_sg, .unmap_sg = iommu_dma_unmap_sg, + .can_skip_sync = dev_is_dma_coherent, .sync_single_for_cpu = iommu_dma_sync_single_for_cpu, .sync_single_for_device = iommu_dma_sync_single_for_device, .sync_sg_for_cpu = iommu_dma_sync_sg_for_cpu, diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index d55638ad8704ff..c7da7d05d93e95 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -16,6 +16,8 @@ config NET_VENDOR_INTEL if NET_VENDOR_INTEL +source "drivers/net/ethernet/intel/libie/Kconfig" + config E100 tristate "Intel(R) PRO/100+ support" depends on PCI @@ -225,6 +227,7 @@ config I40E depends on PTP_1588_CLOCK_OPTIONAL depends on PCI select AUXILIARY_BUS + select LIBIE select NET_DEVLINK help This driver supports Intel(R) Ethernet Controller XL710 Family of @@ -253,6 +256,8 @@ config I40E_DCB # so that CONFIG_IAVF symbol will always mirror the state of CONFIG_I40EVF config IAVF tristate + select LIBIE + config I40EVF tristate "Intel(R) Ethernet Adaptive Virtual Function support" select IAVF @@ -283,6 +288,7 @@ config ICE depends on GNSS || GNSS = n select AUXILIARY_BUS select DIMLIB + select LIBIE select NET_DEVLINK select PLDMFW select DPLL diff --git a/drivers/net/ethernet/intel/Makefile b/drivers/net/ethernet/intel/Makefile index dacb481ee5b14b..60ca00a01de455 100644 --- a/drivers/net/ethernet/intel/Makefile +++ b/drivers/net/ethernet/intel/Makefile @@ -16,3 +16,4 @@ obj-$(CONFIG_IAVF) += iavf/ obj-$(CONFIG_FM10K) += fm10k/ obj-$(CONFIG_ICE) += ice/ obj-$(CONFIG_IDPF) += idpf/ +obj-$(CONFIG_LIBIE) += libie/ diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index de6ca629574271..e8031f1a9b4fc6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -381,259 +381,6 @@ int i40e_aq_set_rss_key(struct i40e_hw *hw, return i40e_aq_get_set_rss_key(hw, vsi_id, key, true); } -/* The i40e_ptype_lookup table is used to convert from the 8-bit ptype in the - * hardware to a bit-field that can be used by SW to more easily determine the - * packet type. - * - * Macros are used to shorten the table lines and make this table human - * readable. - * - * We store the PTYPE in the top byte of the bit field - this is just so that - * we can check that the table doesn't have a row missing, as the index into - * the table should be the PTYPE. - * - * Typical work flow: - * - * IF NOT i40e_ptype_lookup[ptype].known - * THEN - * Packet is unknown - * ELSE IF i40e_ptype_lookup[ptype].outer_ip == I40E_RX_PTYPE_OUTER_IP - * Use the rest of the fields to look at the tunnels, inner protocols, etc - * ELSE - * Use the enum i40e_rx_l2_ptype to decode the packet type - * ENDIF - */ - -/* macro to make the table lines short, use explicit indexing with [PTYPE] */ -#define I40E_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\ - [PTYPE] = { \ - 1, \ - I40E_RX_PTYPE_OUTER_##OUTER_IP, \ - I40E_RX_PTYPE_OUTER_##OUTER_IP_VER, \ - I40E_RX_PTYPE_##OUTER_FRAG, \ - I40E_RX_PTYPE_TUNNEL_##T, \ - I40E_RX_PTYPE_TUNNEL_END_##TE, \ - I40E_RX_PTYPE_##TEF, \ - I40E_RX_PTYPE_INNER_PROT_##I, \ - I40E_RX_PTYPE_PAYLOAD_LAYER_##PL } - -#define I40E_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } - -/* shorter macros makes the table fit but are terse */ -#define I40E_RX_PTYPE_NOF I40E_RX_PTYPE_NOT_FRAG -#define I40E_RX_PTYPE_FRG I40E_RX_PTYPE_FRAG -#define I40E_RX_PTYPE_INNER_PROT_TS I40E_RX_PTYPE_INNER_PROT_TIMESYNC - -/* Lookup table mapping in the 8-bit HW PTYPE to the bit field for decoding */ -struct i40e_rx_ptype_decoded i40e_ptype_lookup[BIT(8)] = { - /* L2 Packet types */ - I40E_PTT_UNUSED_ENTRY(0), - I40E_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - I40E_PTT(2, L2, NONE, NOF, NONE, NONE, NOF, TS, PAY2), - I40E_PTT(3, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - I40E_PTT_UNUSED_ENTRY(4), - I40E_PTT_UNUSED_ENTRY(5), - I40E_PTT(6, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - I40E_PTT(7, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - I40E_PTT_UNUSED_ENTRY(8), - I40E_PTT_UNUSED_ENTRY(9), - I40E_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - I40E_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), - I40E_PTT(12, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(13, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(14, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(15, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(16, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(17, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(18, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(19, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(20, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(21, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - - /* Non Tunneled IPv4 */ - I40E_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(25), - I40E_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP, PAY4), - I40E_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4), - I40E_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4), - - /* IPv4 --> IPv4 */ - I40E_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3), - I40E_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3), - I40E_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(32), - I40E_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP, PAY4), - I40E_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), - I40E_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> IPv6 */ - I40E_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3), - I40E_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3), - I40E_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(39), - I40E_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP, PAY4), - I40E_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), - I40E_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT */ - I40E_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), - - /* IPv4 --> GRE/NAT --> IPv4 */ - I40E_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), - I40E_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), - I40E_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(47), - I40E_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), - I40E_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), - I40E_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> IPv6 */ - I40E_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), - I40E_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), - I40E_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(54), - I40E_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), - I40E_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), - I40E_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> MAC */ - I40E_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), - - /* IPv4 --> GRE/NAT --> MAC --> IPv4 */ - I40E_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), - I40E_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), - I40E_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(62), - I40E_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), - I40E_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), - I40E_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT -> MAC --> IPv6 */ - I40E_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), - I40E_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), - I40E_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(69), - I40E_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), - I40E_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), - I40E_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> MAC/VLAN */ - I40E_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), - - /* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */ - I40E_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), - I40E_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), - I40E_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(77), - I40E_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), - I40E_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), - I40E_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), - - /* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */ - I40E_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), - I40E_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), - I40E_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(84), - I40E_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), - I40E_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), - I40E_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), - - /* Non Tunneled IPv6 */ - I40E_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(91), - I40E_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4), - I40E_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4), - I40E_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4), - - /* IPv6 --> IPv4 */ - I40E_PTT(95, IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3), - I40E_PTT(96, IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3), - I40E_PTT(97, IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(98), - I40E_PTT(99, IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP, PAY4), - I40E_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), - I40E_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> IPv6 */ - I40E_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3), - I40E_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3), - I40E_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(105), - I40E_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP, PAY4), - I40E_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), - I40E_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT */ - I40E_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> IPv4 */ - I40E_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), - I40E_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), - I40E_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(113), - I40E_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), - I40E_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), - I40E_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> IPv6 */ - I40E_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), - I40E_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), - I40E_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(120), - I40E_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), - I40E_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), - I40E_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC */ - I40E_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> MAC -> IPv4 */ - I40E_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), - I40E_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), - I40E_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(128), - I40E_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), - I40E_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), - I40E_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC -> IPv6 */ - I40E_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), - I40E_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), - I40E_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(135), - I40E_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), - I40E_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), - I40E_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC/VLAN */ - I40E_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */ - I40E_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), - I40E_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), - I40E_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(143), - I40E_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), - I40E_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), - I40E_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */ - I40E_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), - I40E_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), - I40E_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), - I40E_PTT_UNUSED_ENTRY(150), - I40E_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), - I40E_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), - I40E_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), - - /* unused entries */ - [154 ... 255] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } -}; - /** * i40e_init_shared_code - Initialize the shared code * @hw: pointer to hardware structure diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index ae8f9f135725b4..ea258640b6bc38 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -100,6 +100,7 @@ MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all), Debug mask (0x8XXXXXXX MODULE_AUTHOR("Intel Corporation, "); MODULE_DESCRIPTION("Intel(R) Ethernet Connection XL710 Network Driver"); +MODULE_IMPORT_NS(LIBIE); MODULE_LICENSE("GPL v2"); static struct workqueue_struct *i40e_wq; diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index af42693305815c..f3dbf6dd2bc624 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -371,13 +371,6 @@ void i40e_set_pci_config_data(struct i40e_hw *hw, u16 link_status); int i40e_set_mac_type(struct i40e_hw *hw); -extern struct i40e_rx_ptype_decoded i40e_ptype_lookup[]; - -static inline struct i40e_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype) -{ - return i40e_ptype_lookup[ptype]; -} - /** * i40e_virtchnl_link_speed - Convert AdminQ link_speed to virtchnl definition * @link_speed: the speed to convert diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 971ba33220381b..deff4d13c2998a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2,6 +2,7 @@ /* Copyright(c) 2013 - 2018 Intel Corporation. */ #include +#include #include #include #include @@ -1750,38 +1751,30 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, struct sk_buff *skb, union i40e_rx_desc *rx_desc) { - struct i40e_rx_ptype_decoded decoded; + struct libie_rx_ptype_parsed parsed; u32 rx_error, rx_status; bool ipv4, ipv6; u8 ptype; u64 qword; - qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - ptype = FIELD_GET(I40E_RXD_QW1_PTYPE_MASK, qword); - rx_error = FIELD_GET(I40E_RXD_QW1_ERROR_MASK, qword); - rx_status = FIELD_GET(I40E_RXD_QW1_STATUS_MASK, qword); - decoded = decode_rx_desc_ptype(ptype); - skb->ip_summed = CHECKSUM_NONE; - skb_checksum_none_assert(skb); + qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); + ptype = FIELD_GET(I40E_RXD_QW1_PTYPE_MASK, qword); - /* Rx csum enabled and ip headers found? */ - if (!(vsi->netdev->features & NETIF_F_RXCSUM)) + parsed = libie_parse_rx_ptype(ptype); + if (!libie_has_rx_checksum(vsi->netdev, parsed)) return; + rx_error = FIELD_GET(I40E_RXD_QW1_ERROR_MASK, qword); + rx_status = FIELD_GET(I40E_RXD_QW1_STATUS_MASK, qword); + /* did the hardware decode the packet and checksum? */ if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT))) return; - /* both known and outer_ip must be set for the below code to work */ - if (!(decoded.known && decoded.outer_ip)) - return; - - ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4); - ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6); + ipv4 = parsed.outer_ip == LIBIE_RX_PTYPE_OUTER_IPV4; + ipv6 = parsed.outer_ip == LIBIE_RX_PTYPE_OUTER_IPV6; if (ipv4 && (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) | @@ -1809,49 +1802,16 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, * we need to bump the checksum level by 1 to reflect the fact that * we are indicating we validated the inner checksum. */ - if (decoded.tunnel_type >= I40E_RX_PTYPE_TUNNEL_IP_GRENAT) + if (parsed.tunnel_type >= LIBIE_RX_PTYPE_TUNNEL_IP_GRENAT) skb->csum_level = 1; - /* Only report checksum unnecessary for TCP, UDP, or SCTP */ - switch (decoded.inner_prot) { - case I40E_RX_PTYPE_INNER_PROT_TCP: - case I40E_RX_PTYPE_INNER_PROT_UDP: - case I40E_RX_PTYPE_INNER_PROT_SCTP: - skb->ip_summed = CHECKSUM_UNNECESSARY; - fallthrough; - default: - break; - } - + skb->ip_summed = CHECKSUM_UNNECESSARY; return; checksum_fail: vsi->back->hw_csum_rx_error++; } -/** - * i40e_ptype_to_htype - get a hash type - * @ptype: the ptype value from the descriptor - * - * Returns a hash type to be used by skb_set_hash - **/ -static inline int i40e_ptype_to_htype(u8 ptype) -{ - struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype); - - if (!decoded.known) - return PKT_HASH_TYPE_NONE; - - if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && - decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4) - return PKT_HASH_TYPE_L4; - else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && - decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3) - return PKT_HASH_TYPE_L3; - else - return PKT_HASH_TYPE_L2; -} - /** * i40e_rx_hash - set the hash value in the skb * @ring: descriptor ring @@ -1864,17 +1824,19 @@ static inline void i40e_rx_hash(struct i40e_ring *ring, struct sk_buff *skb, u8 rx_ptype) { + struct libie_rx_ptype_parsed parsed; u32 hash; const __le64 rss_mask = cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH << I40E_RX_DESC_STATUS_FLTSTAT_SHIFT); - if (!(ring->netdev->features & NETIF_F_RXHASH)) + parsed = libie_parse_rx_ptype(rx_ptype); + if (!libie_has_rx_hash(ring->netdev, parsed)) return; if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) { hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss); - skb_set_hash(skb, hash, i40e_ptype_to_htype(rx_ptype)); + libie_skb_set_hash(skb, hash, parsed); } } diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index d9031499697e6a..28568e126850e1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -745,94 +745,6 @@ enum i40e_rx_desc_error_l3l4e_fcoe_masks { #define I40E_RXD_QW1_PTYPE_SHIFT 30 #define I40E_RXD_QW1_PTYPE_MASK (0xFFULL << I40E_RXD_QW1_PTYPE_SHIFT) -/* Packet type non-ip values */ -enum i40e_rx_l2_ptype { - I40E_RX_PTYPE_L2_RESERVED = 0, - I40E_RX_PTYPE_L2_MAC_PAY2 = 1, - I40E_RX_PTYPE_L2_TIMESYNC_PAY2 = 2, - I40E_RX_PTYPE_L2_FIP_PAY2 = 3, - I40E_RX_PTYPE_L2_OUI_PAY2 = 4, - I40E_RX_PTYPE_L2_MACCNTRL_PAY2 = 5, - I40E_RX_PTYPE_L2_LLDP_PAY2 = 6, - I40E_RX_PTYPE_L2_ECP_PAY2 = 7, - I40E_RX_PTYPE_L2_EVB_PAY2 = 8, - I40E_RX_PTYPE_L2_QCN_PAY2 = 9, - I40E_RX_PTYPE_L2_EAPOL_PAY2 = 10, - I40E_RX_PTYPE_L2_ARP = 11, - I40E_RX_PTYPE_L2_FCOE_PAY3 = 12, - I40E_RX_PTYPE_L2_FCOE_FCDATA_PAY3 = 13, - I40E_RX_PTYPE_L2_FCOE_FCRDY_PAY3 = 14, - I40E_RX_PTYPE_L2_FCOE_FCRSP_PAY3 = 15, - I40E_RX_PTYPE_L2_FCOE_FCOTHER_PA = 16, - I40E_RX_PTYPE_L2_FCOE_VFT_PAY3 = 17, - I40E_RX_PTYPE_L2_FCOE_VFT_FCDATA = 18, - I40E_RX_PTYPE_L2_FCOE_VFT_FCRDY = 19, - I40E_RX_PTYPE_L2_FCOE_VFT_FCRSP = 20, - I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER = 21, - I40E_RX_PTYPE_GRENAT4_MAC_PAY3 = 58, - I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4 = 87, - I40E_RX_PTYPE_GRENAT6_MAC_PAY3 = 124, - I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4 = 153 -}; - -struct i40e_rx_ptype_decoded { - u32 known:1; - u32 outer_ip:1; - u32 outer_ip_ver:1; - u32 outer_frag:1; - u32 tunnel_type:3; - u32 tunnel_end_prot:2; - u32 tunnel_end_frag:1; - u32 inner_prot:4; - u32 payload_layer:3; -}; - -enum i40e_rx_ptype_outer_ip { - I40E_RX_PTYPE_OUTER_L2 = 0, - I40E_RX_PTYPE_OUTER_IP = 1 -}; - -enum i40e_rx_ptype_outer_ip_ver { - I40E_RX_PTYPE_OUTER_NONE = 0, - I40E_RX_PTYPE_OUTER_IPV4 = 0, - I40E_RX_PTYPE_OUTER_IPV6 = 1 -}; - -enum i40e_rx_ptype_outer_fragmented { - I40E_RX_PTYPE_NOT_FRAG = 0, - I40E_RX_PTYPE_FRAG = 1 -}; - -enum i40e_rx_ptype_tunnel_type { - I40E_RX_PTYPE_TUNNEL_NONE = 0, - I40E_RX_PTYPE_TUNNEL_IP_IP = 1, - I40E_RX_PTYPE_TUNNEL_IP_GRENAT = 2, - I40E_RX_PTYPE_TUNNEL_IP_GRENAT_MAC = 3, - I40E_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN = 4, -}; - -enum i40e_rx_ptype_tunnel_end_prot { - I40E_RX_PTYPE_TUNNEL_END_NONE = 0, - I40E_RX_PTYPE_TUNNEL_END_IPV4 = 1, - I40E_RX_PTYPE_TUNNEL_END_IPV6 = 2, -}; - -enum i40e_rx_ptype_inner_prot { - I40E_RX_PTYPE_INNER_PROT_NONE = 0, - I40E_RX_PTYPE_INNER_PROT_UDP = 1, - I40E_RX_PTYPE_INNER_PROT_TCP = 2, - I40E_RX_PTYPE_INNER_PROT_SCTP = 3, - I40E_RX_PTYPE_INNER_PROT_ICMP = 4, - I40E_RX_PTYPE_INNER_PROT_TIMESYNC = 5 -}; - -enum i40e_rx_ptype_payload_layer { - I40E_RX_PTYPE_PAYLOAD_LAYER_NONE = 0, - I40E_RX_PTYPE_PAYLOAD_LAYER_PAY2 = 1, - I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3 = 2, - I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4 = 3, -}; - #define I40E_RXD_QW1_LENGTH_PBUF_SHIFT 38 #define I40E_RXD_QW1_LENGTH_PBUF_MASK (0x3FFFULL << \ I40E_RXD_QW1_LENGTH_PBUF_SHIFT) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index db8188c7ac4bbf..23a6557fc3db2b 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -287,7 +287,7 @@ struct iavf_adapter { #define IAVF_FLAG_RESET_PENDING BIT(4) #define IAVF_FLAG_RESET_NEEDED BIT(5) #define IAVF_FLAG_WB_ON_ITR_CAPABLE BIT(6) -#define IAVF_FLAG_LEGACY_RX BIT(15) +/* BIT(15) is free, was IAVF_FLAG_LEGACY_RX */ #define IAVF_FLAG_REINIT_ITR_NEEDED BIT(16) #define IAVF_FLAG_QUEUES_DISABLED BIT(17) #define IAVF_FLAG_SETUP_NETDEV_FEATURES BIT(18) diff --git a/drivers/net/ethernet/intel/iavf/iavf_common.c b/drivers/net/ethernet/intel/iavf/iavf_common.c index 5a25233a89d5fd..aa751ce3425b4d 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_common.c +++ b/drivers/net/ethernet/intel/iavf/iavf_common.c @@ -432,259 +432,6 @@ enum iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 vsi_id, return iavf_aq_get_set_rss_key(hw, vsi_id, key, true); } -/* The iavf_ptype_lookup table is used to convert from the 8-bit ptype in the - * hardware to a bit-field that can be used by SW to more easily determine the - * packet type. - * - * Macros are used to shorten the table lines and make this table human - * readable. - * - * We store the PTYPE in the top byte of the bit field - this is just so that - * we can check that the table doesn't have a row missing, as the index into - * the table should be the PTYPE. - * - * Typical work flow: - * - * IF NOT iavf_ptype_lookup[ptype].known - * THEN - * Packet is unknown - * ELSE IF iavf_ptype_lookup[ptype].outer_ip == IAVF_RX_PTYPE_OUTER_IP - * Use the rest of the fields to look at the tunnels, inner protocols, etc - * ELSE - * Use the enum iavf_rx_l2_ptype to decode the packet type - * ENDIF - */ - -/* macro to make the table lines short, use explicit indexing with [PTYPE] */ -#define IAVF_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\ - [PTYPE] = { \ - 1, \ - IAVF_RX_PTYPE_OUTER_##OUTER_IP, \ - IAVF_RX_PTYPE_OUTER_##OUTER_IP_VER, \ - IAVF_RX_PTYPE_##OUTER_FRAG, \ - IAVF_RX_PTYPE_TUNNEL_##T, \ - IAVF_RX_PTYPE_TUNNEL_END_##TE, \ - IAVF_RX_PTYPE_##TEF, \ - IAVF_RX_PTYPE_INNER_PROT_##I, \ - IAVF_RX_PTYPE_PAYLOAD_LAYER_##PL } - -#define IAVF_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } - -/* shorter macros makes the table fit but are terse */ -#define IAVF_RX_PTYPE_NOF IAVF_RX_PTYPE_NOT_FRAG -#define IAVF_RX_PTYPE_FRG IAVF_RX_PTYPE_FRAG -#define IAVF_RX_PTYPE_INNER_PROT_TS IAVF_RX_PTYPE_INNER_PROT_TIMESYNC - -/* Lookup table mapping the 8-bit HW PTYPE to the bit field for decoding */ -struct iavf_rx_ptype_decoded iavf_ptype_lookup[BIT(8)] = { - /* L2 Packet types */ - IAVF_PTT_UNUSED_ENTRY(0), - IAVF_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - IAVF_PTT(2, L2, NONE, NOF, NONE, NONE, NOF, TS, PAY2), - IAVF_PTT(3, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - IAVF_PTT_UNUSED_ENTRY(4), - IAVF_PTT_UNUSED_ENTRY(5), - IAVF_PTT(6, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - IAVF_PTT(7, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - IAVF_PTT_UNUSED_ENTRY(8), - IAVF_PTT_UNUSED_ENTRY(9), - IAVF_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - IAVF_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), - IAVF_PTT(12, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(13, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(14, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(15, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(16, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(17, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(18, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(19, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(20, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(21, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3), - - /* Non Tunneled IPv4 */ - IAVF_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(25), - IAVF_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP, PAY4), - IAVF_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4), - IAVF_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4), - - /* IPv4 --> IPv4 */ - IAVF_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3), - IAVF_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3), - IAVF_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(32), - IAVF_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP, PAY4), - IAVF_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> IPv6 */ - IAVF_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3), - IAVF_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3), - IAVF_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(39), - IAVF_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP, PAY4), - IAVF_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT */ - IAVF_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), - - /* IPv4 --> GRE/NAT --> IPv4 */ - IAVF_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), - IAVF_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), - IAVF_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(47), - IAVF_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), - IAVF_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> IPv6 */ - IAVF_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), - IAVF_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), - IAVF_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(54), - IAVF_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), - IAVF_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> MAC */ - IAVF_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), - - /* IPv4 --> GRE/NAT --> MAC --> IPv4 */ - IAVF_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), - IAVF_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), - IAVF_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(62), - IAVF_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), - IAVF_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT -> MAC --> IPv6 */ - IAVF_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), - IAVF_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), - IAVF_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(69), - IAVF_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), - IAVF_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> MAC/VLAN */ - IAVF_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), - - /* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */ - IAVF_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), - IAVF_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), - IAVF_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(77), - IAVF_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), - IAVF_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), - - /* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */ - IAVF_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), - IAVF_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), - IAVF_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(84), - IAVF_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), - IAVF_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), - - /* Non Tunneled IPv6 */ - IAVF_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(91), - IAVF_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4), - IAVF_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4), - IAVF_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4), - - /* IPv6 --> IPv4 */ - IAVF_PTT(95, IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3), - IAVF_PTT(96, IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3), - IAVF_PTT(97, IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(98), - IAVF_PTT(99, IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP, PAY4), - IAVF_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> IPv6 */ - IAVF_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3), - IAVF_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3), - IAVF_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(105), - IAVF_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP, PAY4), - IAVF_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT */ - IAVF_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> IPv4 */ - IAVF_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), - IAVF_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), - IAVF_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(113), - IAVF_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), - IAVF_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> IPv6 */ - IAVF_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), - IAVF_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), - IAVF_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(120), - IAVF_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), - IAVF_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC */ - IAVF_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> MAC -> IPv4 */ - IAVF_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), - IAVF_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), - IAVF_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(128), - IAVF_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), - IAVF_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC -> IPv6 */ - IAVF_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), - IAVF_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), - IAVF_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(135), - IAVF_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), - IAVF_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC/VLAN */ - IAVF_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */ - IAVF_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), - IAVF_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), - IAVF_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(143), - IAVF_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), - IAVF_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), - IAVF_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */ - IAVF_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), - IAVF_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), - IAVF_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), - IAVF_PTT_UNUSED_ENTRY(150), - IAVF_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), - IAVF_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), - IAVF_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), - - /* unused entries */ - [154 ... 255] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } -}; - /** * iavf_aq_send_msg_to_pf * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 378c3e9ddf9d45..20fc7cc6524a0f 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -2,6 +2,7 @@ /* Copyright(c) 2013 - 2018 Intel Corporation. */ #include +#include #include /* ethtool support for iavf */ @@ -47,16 +48,6 @@ struct iavf_stats { .stat_offset = offsetof(_type, _stat) \ } -/* Helper macro for defining some statistics related to queues */ -#define IAVF_QUEUE_STAT(_name, _stat) \ - IAVF_STAT(struct iavf_ring, _name, _stat) - -/* Stats associated with a Tx or Rx ring */ -static const struct iavf_stats iavf_gstrings_queue_stats[] = { - IAVF_QUEUE_STAT("%s-%u.packets", stats.packets), - IAVF_QUEUE_STAT("%s-%u.bytes", stats.bytes), -}; - /** * iavf_add_one_ethtool_stat - copy the stat into the supplied buffer * @data: location to store the stat value @@ -142,43 +133,6 @@ __iavf_add_ethtool_stats(u64 **data, void *pointer, #define iavf_add_ethtool_stats(data, pointer, stats) \ __iavf_add_ethtool_stats(data, pointer, stats, ARRAY_SIZE(stats)) -/** - * iavf_add_queue_stats - copy queue statistics into supplied buffer - * @data: ethtool stats buffer - * @ring: the ring to copy - * - * Queue statistics must be copied while protected by - * u64_stats_fetch_begin, so we can't directly use iavf_add_ethtool_stats. - * Assumes that queue stats are defined in iavf_gstrings_queue_stats. If the - * ring pointer is null, zero out the queue stat values and update the data - * pointer. Otherwise safely copy the stats from the ring into the supplied - * buffer and update the data pointer when finished. - * - * This function expects to be called while under rcu_read_lock(). - **/ -static void -iavf_add_queue_stats(u64 **data, struct iavf_ring *ring) -{ - const unsigned int size = ARRAY_SIZE(iavf_gstrings_queue_stats); - const struct iavf_stats *stats = iavf_gstrings_queue_stats; - unsigned int start; - unsigned int i; - - /* To avoid invalid statistics values, ensure that we keep retrying - * the copy until we get a consistent value according to - * u64_stats_fetch_retry. But first, make sure our ring is - * non-null before attempting to access its syncp. - */ - do { - start = !ring ? 0 : u64_stats_fetch_begin(&ring->syncp); - for (i = 0; i < size; i++) - iavf_add_one_ethtool_stat(&(*data)[i], ring, &stats[i]); - } while (ring && u64_stats_fetch_retry(&ring->syncp, start)); - - /* Once we successfully copy the stats in, update the data pointer */ - *data += size; -} - /** * __iavf_add_stat_strings - copy stat strings into ethtool buffer * @p: ethtool supplied buffer @@ -238,31 +192,6 @@ static const struct iavf_stats iavf_gstrings_stats[] = { #define IAVF_STATS_LEN ARRAY_SIZE(iavf_gstrings_stats) -#define IAVF_QUEUE_STATS_LEN ARRAY_SIZE(iavf_gstrings_queue_stats) - -/* For now we have one and only one private flag and it is only defined - * when we have support for the SKIP_CPU_SYNC DMA attribute. Instead - * of leaving all this code sitting around empty we will strip it unless - * our one private flag is actually available. - */ -struct iavf_priv_flags { - char flag_string[ETH_GSTRING_LEN]; - u32 flag; - bool read_only; -}; - -#define IAVF_PRIV_FLAG(_name, _flag, _read_only) { \ - .flag_string = _name, \ - .flag = _flag, \ - .read_only = _read_only, \ -} - -static const struct iavf_priv_flags iavf_gstrings_priv_flags[] = { - IAVF_PRIV_FLAG("legacy-rx", IAVF_FLAG_LEGACY_RX, 0), -}; - -#define IAVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(iavf_gstrings_priv_flags) - /** * iavf_get_link_ksettings - Get Link Speed and Duplex settings * @netdev: network interface device structure @@ -332,20 +261,22 @@ static int iavf_get_link_ksettings(struct net_device *netdev, **/ static int iavf_get_sset_count(struct net_device *netdev, int sset) { - /* Report the maximum number queues, even if not every queue is - * currently configured. Since allocation of queues is in pairs, - * use netdev->real_num_tx_queues * 2. The real_num_tx_queues is set - * at device creation and never changes. - */ + u32 num; - if (sset == ETH_SS_STATS) - return IAVF_STATS_LEN + - (IAVF_QUEUE_STATS_LEN * 2 * - netdev->real_num_tx_queues); - else if (sset == ETH_SS_PRIV_FLAGS) - return IAVF_PRIV_FLAGS_STR_LEN; - else + switch (sset) { + case ETH_SS_STATS: + /* Per-queue */ + num = libie_rq_stats_get_sset_count(); + num += libie_sq_stats_get_sset_count(); + num *= netdev->real_num_tx_queues; + + /* Global */ + num += IAVF_STATS_LEN; + + return num; + default: return -EINVAL; + } } /** @@ -372,34 +303,15 @@ static void iavf_get_ethtool_stats(struct net_device *netdev, * it to iterate over rings' stats. */ for (i = 0; i < adapter->num_active_queues; i++) { - struct iavf_ring *ring; + /* Rx rings stats */ + libie_rq_stats_get_data(&data, &adapter->rx_rings[i].rq_stats); /* Tx rings stats */ - ring = &adapter->tx_rings[i]; - iavf_add_queue_stats(&data, ring); - - /* Rx rings stats */ - ring = &adapter->rx_rings[i]; - iavf_add_queue_stats(&data, ring); + libie_sq_stats_get_data(&data, &adapter->tx_rings[i].sq_stats); } rcu_read_unlock(); } -/** - * iavf_get_priv_flag_strings - Get private flag strings - * @netdev: network interface device structure - * @data: buffer for string data - * - * Builds the private flags string table - **/ -static void iavf_get_priv_flag_strings(struct net_device *netdev, u8 *data) -{ - unsigned int i; - - for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) - ethtool_puts(&data, iavf_gstrings_priv_flags[i].flag_string); -} - /** * iavf_get_stat_strings - Get stat strings * @netdev: network interface device structure @@ -417,10 +329,8 @@ static void iavf_get_stat_strings(struct net_device *netdev, u8 *data) * real_num_tx_queues for both Tx and Rx queues. */ for (i = 0; i < netdev->real_num_tx_queues; i++) { - iavf_add_stat_strings(&data, iavf_gstrings_queue_stats, - "tx", i); - iavf_add_stat_strings(&data, iavf_gstrings_queue_stats, - "rx", i); + libie_rq_stats_get_strings(&data, i); + libie_sq_stats_get_strings(&data, i); } } @@ -438,108 +348,11 @@ static void iavf_get_strings(struct net_device *netdev, u32 sset, u8 *data) case ETH_SS_STATS: iavf_get_stat_strings(netdev, data); break; - case ETH_SS_PRIV_FLAGS: - iavf_get_priv_flag_strings(netdev, data); - break; default: break; } } -/** - * iavf_get_priv_flags - report device private flags - * @netdev: network interface device structure - * - * The get string set count and the string set should be matched for each - * flag returned. Add new strings for each flag to the iavf_gstrings_priv_flags - * array. - * - * Returns a u32 bitmap of flags. - **/ -static u32 iavf_get_priv_flags(struct net_device *netdev) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - u32 i, ret_flags = 0; - - for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) { - const struct iavf_priv_flags *priv_flags; - - priv_flags = &iavf_gstrings_priv_flags[i]; - - if (priv_flags->flag & adapter->flags) - ret_flags |= BIT(i); - } - - return ret_flags; -} - -/** - * iavf_set_priv_flags - set private flags - * @netdev: network interface device structure - * @flags: bit flags to be set - **/ -static int iavf_set_priv_flags(struct net_device *netdev, u32 flags) -{ - struct iavf_adapter *adapter = netdev_priv(netdev); - u32 orig_flags, new_flags, changed_flags; - int ret = 0; - u32 i; - - orig_flags = READ_ONCE(adapter->flags); - new_flags = orig_flags; - - for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) { - const struct iavf_priv_flags *priv_flags; - - priv_flags = &iavf_gstrings_priv_flags[i]; - - if (flags & BIT(i)) - new_flags |= priv_flags->flag; - else - new_flags &= ~(priv_flags->flag); - - if (priv_flags->read_only && - ((orig_flags ^ new_flags) & ~BIT(i))) - return -EOPNOTSUPP; - } - - /* Before we finalize any flag changes, any checks which we need to - * perform to determine if the new flags will be supported should go - * here... - */ - - /* Compare and exchange the new flags into place. If we failed, that - * is if cmpxchg returns anything but the old value, this means - * something else must have modified the flags variable since we - * copied it. We'll just punt with an error and log something in the - * message buffer. - */ - if (cmpxchg(&adapter->flags, orig_flags, new_flags) != orig_flags) { - dev_warn(&adapter->pdev->dev, - "Unable to update adapter->flags as it was modified by another thread...\n"); - return -EAGAIN; - } - - changed_flags = orig_flags ^ new_flags; - - /* Process any additional changes needed as a result of flag changes. - * The changed_flags value reflects the list of bits that were changed - * in the code above. - */ - - /* issue a reset to force legacy-rx change to take effect */ - if (changed_flags & IAVF_FLAG_LEGACY_RX) { - if (netif_running(netdev)) { - iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); - ret = iavf_wait_for_reset(adapter); - if (ret) - netdev_warn(netdev, "Changing private flags timeout or interrupted waiting for reset"); - } - } - - return ret; -} - /** * iavf_get_msglevel - Get debug message level * @netdev: network interface device structure @@ -585,7 +398,6 @@ static void iavf_get_drvinfo(struct net_device *netdev, strscpy(drvinfo->driver, iavf_driver_name, 32); strscpy(drvinfo->fw_version, "N/A", 4); strscpy(drvinfo->bus_info, pci_name(adapter->pdev), 32); - drvinfo->n_priv_flags = IAVF_PRIV_FLAGS_STR_LEN; } /** @@ -1995,8 +1807,6 @@ static const struct ethtool_ops iavf_ethtool_ops = { .get_strings = iavf_get_strings, .get_ethtool_stats = iavf_get_ethtool_stats, .get_sset_count = iavf_get_sset_count, - .get_priv_flags = iavf_get_priv_flags, - .set_priv_flags = iavf_set_priv_flags, .get_msglevel = iavf_get_msglevel, .set_msglevel = iavf_set_msglevel, .get_coalesce = iavf_get_coalesce, diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 335fd13e86f717..c74fd8c39505f0 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2013 - 2018 Intel Corporation. */ +#include + #include "iavf.h" #include "iavf_prototype.h" /* All iavf tracepoints are defined by the include below, which must @@ -45,6 +47,7 @@ MODULE_DEVICE_TABLE(pci, iavf_pci_tbl); MODULE_ALIAS("i40evf"); MODULE_AUTHOR("Intel Corporation, "); MODULE_DESCRIPTION("Intel(R) Ethernet Adaptive Virtual Function Network Driver"); +MODULE_IMPORT_NS(LIBIE); MODULE_LICENSE("GPL v2"); static const struct net_device_ops iavf_netdev_ops; @@ -714,40 +717,10 @@ static void iavf_configure_tx(struct iavf_adapter *adapter) **/ static void iavf_configure_rx(struct iavf_adapter *adapter) { - unsigned int rx_buf_len = IAVF_RXBUFFER_2048; struct iavf_hw *hw = &adapter->hw; - int i; - - /* Legacy Rx will always default to a 2048 buffer size. */ -#if (PAGE_SIZE < 8192) - if (!(adapter->flags & IAVF_FLAG_LEGACY_RX)) { - struct net_device *netdev = adapter->netdev; - - /* For jumbo frames on systems with 4K pages we have to use - * an order 1 page, so we might as well increase the size - * of our Rx buffer to make better use of the available space - */ - rx_buf_len = IAVF_RXBUFFER_3072; - - /* We use a 1536 buffer size for configurations with - * standard Ethernet mtu. On x86 this gives us enough room - * for shared info and 192 bytes of padding. - */ - if (!IAVF_2K_TOO_SMALL_WITH_PADDING && - (netdev->mtu <= ETH_DATA_LEN)) - rx_buf_len = IAVF_RXBUFFER_1536 - NET_IP_ALIGN; - } -#endif - for (i = 0; i < adapter->num_active_queues; i++) { + for (u32 i = 0; i < adapter->num_active_queues; i++) adapter->rx_rings[i].tail = hw->hw_addr + IAVF_QRX_TAIL1(i); - adapter->rx_rings[i].rx_buf_len = rx_buf_len; - - if (adapter->flags & IAVF_FLAG_LEGACY_RX) - clear_ring_build_skb_enabled(&adapter->rx_rings[i]); - else - set_ring_build_skb_enabled(&adapter->rx_rings[i]); - } } /** @@ -1611,13 +1584,14 @@ static int iavf_alloc_queues(struct iavf_adapter *adapter) tx_ring->itr_setting = IAVF_ITR_TX_DEF; if (adapter->flags & IAVF_FLAG_WB_ON_ITR_CAPABLE) tx_ring->flags |= IAVF_TXR_FLAGS_WB_ON_ITR; + u64_stats_init(&tx_ring->sq_stats.syncp); rx_ring = &adapter->rx_rings[i]; rx_ring->queue_index = i; rx_ring->netdev = adapter->netdev; - rx_ring->dev = &adapter->pdev->dev; rx_ring->count = adapter->rx_desc_count; rx_ring->itr_setting = IAVF_ITR_RX_DEF; + u64_stats_init(&rx_ring->rq_stats.syncp); } adapter->num_active_queues = num_active_queues; @@ -2651,9 +2625,9 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) iavf_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; - /* MTU range: 68 - 9710 */ + /* MTU range: 68 - 16356 */ netdev->min_mtu = ETH_MIN_MTU; - netdev->max_mtu = IAVF_MAX_RXBUFFER - IAVF_PACKET_HDR_PAD; + netdev->max_mtu = LIBIE_MAX_MTU; if (!is_valid_ether_addr(adapter->hw.mac.addr)) { dev_info(&pdev->dev, "Invalid MAC address %pM, using random\n", diff --git a/drivers/net/ethernet/intel/iavf/iavf_prototype.h b/drivers/net/ethernet/intel/iavf/iavf_prototype.h index 4a48e6171405fc..48c3901381b4a0 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_prototype.h +++ b/drivers/net/ethernet/intel/iavf/iavf_prototype.h @@ -45,13 +45,6 @@ enum iavf_status iavf_aq_set_rss_lut(struct iavf_hw *hw, u16 seid, enum iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 seid, struct iavf_aqc_get_set_rss_key_data *key); -extern struct iavf_rx_ptype_decoded iavf_ptype_lookup[]; - -static inline struct iavf_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype) -{ - return iavf_ptype_lookup[ptype]; -} - void iavf_vf_parse_hw_config(struct iavf_hw *hw, struct virtchnl_vf_resource *msg); enum iavf_status iavf_aq_send_msg_to_pf(struct iavf_hw *hw, diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index b71484c87a8461..4473ab9c870fb6 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -2,6 +2,7 @@ /* Copyright(c) 2013 - 2018 Intel Corporation. */ #include +#include #include #include "iavf.h" @@ -176,6 +177,9 @@ void iavf_detect_recover_hung(struct iavf_vsi *vsi) for (i = 0; i < vsi->back->num_active_queues; i++) { tx_ring = &vsi->back->tx_rings[i]; if (tx_ring && tx_ring->desc) { + const struct libie_sq_stats *st = &tx_ring->sq_stats; + u32 start; + /* If packet counter has not changed the queue is * likely stalled, so force an interrupt for this * queue. @@ -183,8 +187,13 @@ void iavf_detect_recover_hung(struct iavf_vsi *vsi) * prev_pkt_ctr would be negative if there was no * pending work. */ - packets = tx_ring->stats.packets & INT_MAX; - if (tx_ring->tx_stats.prev_pkt_ctr == packets) { + do { + start = u64_stats_fetch_begin(&st->syncp); + packets = u64_stats_read(&st->packets) & + INT_MAX; + } while (u64_stats_fetch_retry(&st->syncp, start)); + + if (tx_ring->prev_pkt_ctr == packets) { iavf_force_wb(vsi, tx_ring->q_vector); continue; } @@ -193,7 +202,7 @@ void iavf_detect_recover_hung(struct iavf_vsi *vsi) * to iavf_get_tx_pending() */ smp_rmb(); - tx_ring->tx_stats.prev_pkt_ctr = + tx_ring->prev_pkt_ctr = iavf_get_tx_pending(tx_ring, true) ? packets : -1; } } @@ -212,11 +221,11 @@ void iavf_detect_recover_hung(struct iavf_vsi *vsi) static bool iavf_clean_tx_irq(struct iavf_vsi *vsi, struct iavf_ring *tx_ring, int napi_budget) { + unsigned int budget = IAVF_DEFAULT_IRQ_WORK; + struct libie_sq_onstack_stats stats = { }; int i = tx_ring->next_to_clean; struct iavf_tx_buffer *tx_buf; struct iavf_tx_desc *tx_desc; - unsigned int total_bytes = 0, total_packets = 0; - unsigned int budget = IAVF_DEFAULT_IRQ_WORK; tx_buf = &tx_ring->tx_bi[i]; tx_desc = IAVF_TX_DESC(tx_ring, i); @@ -242,8 +251,8 @@ static bool iavf_clean_tx_irq(struct iavf_vsi *vsi, tx_buf->next_to_watch = NULL; /* update the statistics for this packet */ - total_bytes += tx_buf->bytecount; - total_packets += tx_buf->gso_segs; + stats.bytes += tx_buf->bytecount; + stats.packets += tx_buf->gso_segs; /* free the skb */ napi_consume_skb(tx_buf->skb, napi_budget); @@ -300,12 +309,9 @@ static bool iavf_clean_tx_irq(struct iavf_vsi *vsi, i += tx_ring->count; tx_ring->next_to_clean = i; - u64_stats_update_begin(&tx_ring->syncp); - tx_ring->stats.bytes += total_bytes; - tx_ring->stats.packets += total_packets; - u64_stats_update_end(&tx_ring->syncp); - tx_ring->q_vector->tx.total_bytes += total_bytes; - tx_ring->q_vector->tx.total_packets += total_packets; + libie_sq_napi_stats_add(&tx_ring->sq_stats, &stats); + tx_ring->q_vector->tx.total_bytes += stats.bytes; + tx_ring->q_vector->tx.total_packets += stats.packets; if (tx_ring->flags & IAVF_TXR_FLAGS_WB_ON_ITR) { /* check to see if there are < 4 descriptors @@ -319,15 +325,15 @@ static bool iavf_clean_tx_irq(struct iavf_vsi *vsi, ((j / WB_STRIDE) == 0) && (j > 0) && !test_bit(__IAVF_VSI_DOWN, vsi->state) && (IAVF_DESC_UNUSED(tx_ring) != tx_ring->count)) - tx_ring->arm_wb = true; + tx_ring->flags |= IAVF_TXR_FLAGS_ARM_WB; } /* notify netdev of completed buffers */ netdev_tx_completed_queue(txring_txq(tx_ring), - total_packets, total_bytes); + stats.packets, stats.bytes); #define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2)) - if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && + if (unlikely(stats.packets && netif_carrier_ok(tx_ring->netdev) && (IAVF_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) { /* Make sure that anybody stopping the queue after this * sees the new next_to_clean. @@ -338,7 +344,7 @@ static bool iavf_clean_tx_irq(struct iavf_vsi *vsi, !test_bit(__IAVF_VSI_DOWN, vsi->state)) { netif_wake_subqueue(tx_ring->netdev, tx_ring->queue_index); - ++tx_ring->tx_stats.restart_queue; + libie_stats_inc_one(&tx_ring->sq_stats, restarts); } } @@ -674,7 +680,7 @@ int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring) tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; - tx_ring->tx_stats.prev_pkt_ctr = -1; + tx_ring->prev_pkt_ctr = -1; return 0; err: @@ -689,9 +695,6 @@ int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring) **/ static void iavf_clean_rx_ring(struct iavf_ring *rx_ring) { - unsigned long bi_size; - u16 i; - /* ring already cleared, nothing to do */ if (!rx_ring->rx_bi) return; @@ -701,41 +704,16 @@ static void iavf_clean_rx_ring(struct iavf_ring *rx_ring) rx_ring->skb = NULL; } - /* Free all the Rx ring sk_buffs */ - for (i = 0; i < rx_ring->count; i++) { - struct iavf_rx_buffer *rx_bi = &rx_ring->rx_bi[i]; + /* Free all the Rx ring buffers */ + for (u32 i = rx_ring->next_to_clean; i != rx_ring->next_to_use; ) { + const struct libie_rx_buffer *rx_bi = &rx_ring->rx_bi[i]; - if (!rx_bi->page) - continue; + page_pool_put_full_page(rx_ring->pp, rx_bi->page, false); - /* Invalidate cache lines that may have been written to by - * device so that we avoid corrupting memory. - */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_bi->dma, - rx_bi->page_offset, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - - /* free resources associated with mapping */ - dma_unmap_page_attrs(rx_ring->dev, rx_bi->dma, - iavf_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - IAVF_RX_DMA_ATTR); - - __page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias); - - rx_bi->page = NULL; - rx_bi->page_offset = 0; + if (unlikely(++i == rx_ring->count)) + i = 0; } - bi_size = sizeof(struct iavf_rx_buffer) * rx_ring->count; - memset(rx_ring->rx_bi, 0, bi_size); - - /* Zero out the descriptor ring */ - memset(rx_ring->desc, 0, rx_ring->size); - - rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; } @@ -748,15 +726,22 @@ static void iavf_clean_rx_ring(struct iavf_ring *rx_ring) **/ void iavf_free_rx_resources(struct iavf_ring *rx_ring) { + struct libie_buf_queue bq = { + .pp = rx_ring->pp, + }; + iavf_clean_rx_ring(rx_ring); kfree(rx_ring->rx_bi); rx_ring->rx_bi = NULL; if (rx_ring->desc) { - dma_free_coherent(rx_ring->dev, rx_ring->size, + dma_free_coherent(rx_ring->pp->p.dev, rx_ring->size, rx_ring->desc, rx_ring->dma); rx_ring->desc = NULL; } + + libie_rx_page_pool_destroy(&bq); + rx_ring->pp = bq.pp; } /** @@ -767,38 +752,51 @@ void iavf_free_rx_resources(struct iavf_ring *rx_ring) **/ int iavf_setup_rx_descriptors(struct iavf_ring *rx_ring) { - struct device *dev = rx_ring->dev; - int bi_size; + struct libie_buf_queue bq = { + .count = rx_ring->count, + }; + int ret; + + ret = libie_rx_page_pool_create(&bq, &rx_ring->q_vector->napi); + if (ret) + return ret; + + rx_ring->pp = bq.pp; + rx_ring->truesize = bq.truesize; + rx_ring->rx_buf_len = bq.rx_buf_len; /* warn if we are about to overwrite the pointer */ WARN_ON(rx_ring->rx_bi); - bi_size = sizeof(struct iavf_rx_buffer) * rx_ring->count; - rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL); + rx_ring->rx_bi = kcalloc(rx_ring->count, sizeof(*rx_ring->rx_bi), + GFP_KERNEL); if (!rx_ring->rx_bi) goto err; - u64_stats_init(&rx_ring->syncp); - /* Round up to nearest 4K */ rx_ring->size = rx_ring->count * sizeof(union iavf_32byte_rx_desc); rx_ring->size = ALIGN(rx_ring->size, 4096); - rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, + rx_ring->desc = dma_alloc_coherent(bq.pp->p.dev, rx_ring->size, &rx_ring->dma, GFP_KERNEL); if (!rx_ring->desc) { - dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n", + dev_info(bq.pp->p.dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n", rx_ring->size); - goto err; + goto err_free_buf; } - rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; return 0; -err: + +err_free_buf: kfree(rx_ring->rx_bi); rx_ring->rx_bi = NULL; + +err: + libie_rx_page_pool_destroy(&bq); + rx_ring->pp = bq.pp; + return -ENOMEM; } @@ -811,9 +809,6 @@ static void iavf_release_rx_desc(struct iavf_ring *rx_ring, u32 val) { rx_ring->next_to_use = val; - /* update next to alloc since we have filled the ring */ - rx_ring->next_to_alloc = val; - /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, @@ -823,69 +818,6 @@ static void iavf_release_rx_desc(struct iavf_ring *rx_ring, u32 val) writel(val, rx_ring->tail); } -/** - * iavf_rx_offset - Return expected offset into page to access data - * @rx_ring: Ring we are requesting offset of - * - * Returns the offset value for ring into the data buffer. - */ -static unsigned int iavf_rx_offset(struct iavf_ring *rx_ring) -{ - return ring_uses_build_skb(rx_ring) ? IAVF_SKB_PAD : 0; -} - -/** - * iavf_alloc_mapped_page - recycle or make a new page - * @rx_ring: ring to use - * @bi: rx_buffer struct to modify - * - * Returns true if the page was successfully allocated or - * reused. - **/ -static bool iavf_alloc_mapped_page(struct iavf_ring *rx_ring, - struct iavf_rx_buffer *bi) -{ - struct page *page = bi->page; - dma_addr_t dma; - - /* since we are recycling buffers we should seldom need to alloc */ - if (likely(page)) { - rx_ring->rx_stats.page_reuse_count++; - return true; - } - - /* alloc new page for storage */ - page = dev_alloc_pages(iavf_rx_pg_order(rx_ring)); - if (unlikely(!page)) { - rx_ring->rx_stats.alloc_page_failed++; - return false; - } - - /* map page for use */ - dma = dma_map_page_attrs(rx_ring->dev, page, 0, - iavf_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - IAVF_RX_DMA_ATTR); - - /* if mapping failed free memory back to system since - * there isn't much point in holding memory we can't use - */ - if (dma_mapping_error(rx_ring->dev, dma)) { - __free_pages(page, iavf_rx_pg_order(rx_ring)); - rx_ring->rx_stats.alloc_page_failed++; - return false; - } - - bi->dma = dma; - bi->page = page; - bi->page_offset = iavf_rx_offset(rx_ring); - - /* initialize pagecnt_bias to 1 representing we fully own page */ - bi->pagecnt_bias = 1; - - return true; -} - /** * iavf_receive_skb - Send a completed packet up the stack * @rx_ring: rx ring in play @@ -916,38 +848,37 @@ static void iavf_receive_skb(struct iavf_ring *rx_ring, **/ bool iavf_alloc_rx_buffers(struct iavf_ring *rx_ring, u16 cleaned_count) { + const struct libie_buf_queue bq = { + .pp = rx_ring->pp, + .rx_bi = rx_ring->rx_bi, + .truesize = rx_ring->truesize, + .count = rx_ring->count, + }; u16 ntu = rx_ring->next_to_use; union iavf_rx_desc *rx_desc; - struct iavf_rx_buffer *bi; /* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) return false; rx_desc = IAVF_RX_DESC(rx_ring, ntu); - bi = &rx_ring->rx_bi[ntu]; do { - if (!iavf_alloc_mapped_page(rx_ring, bi)) - goto no_buffers; + dma_addr_t addr; - /* sync the buffer for use by the device */ - dma_sync_single_range_for_device(rx_ring->dev, bi->dma, - bi->page_offset, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); + addr = libie_rx_alloc(&bq, ntu); + if (addr == DMA_MAPPING_ERROR) + goto no_buffers; /* Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ - rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); + rx_desc->read.pkt_addr = cpu_to_le64(addr); rx_desc++; - bi++; ntu++; if (unlikely(ntu == rx_ring->count)) { rx_desc = IAVF_RX_DESC(rx_ring, 0); - bi = rx_ring->rx_bi; ntu = 0; } @@ -966,6 +897,8 @@ bool iavf_alloc_rx_buffers(struct iavf_ring *rx_ring, u16 cleaned_count) if (rx_ring->next_to_use != ntu) iavf_release_rx_desc(rx_ring, ntu); + libie_stats_inc_one(&rx_ring->rq_stats, alloc_page_fail); + /* make sure to come back via polling to try again after * allocation failure */ @@ -982,38 +915,30 @@ static void iavf_rx_checksum(struct iavf_vsi *vsi, struct sk_buff *skb, union iavf_rx_desc *rx_desc) { - struct iavf_rx_ptype_decoded decoded; + struct libie_rx_ptype_parsed parsed; u32 rx_error, rx_status; bool ipv4, ipv6; u8 ptype; u64 qword; - qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - ptype = FIELD_GET(IAVF_RXD_QW1_PTYPE_MASK, qword); - rx_error = FIELD_GET(IAVF_RXD_QW1_ERROR_MASK, qword); - rx_status = FIELD_GET(IAVF_RXD_QW1_STATUS_MASK, qword); - decoded = decode_rx_desc_ptype(ptype); - skb->ip_summed = CHECKSUM_NONE; - skb_checksum_none_assert(skb); + qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); + ptype = FIELD_GET(IAVF_RXD_QW1_PTYPE_MASK, qword); - /* Rx csum enabled and ip headers found? */ - if (!(vsi->netdev->features & NETIF_F_RXCSUM)) + parsed = libie_parse_rx_ptype(ptype); + if (!libie_has_rx_checksum(vsi->netdev, parsed)) return; + rx_error = FIELD_GET(IAVF_RXD_QW1_ERROR_MASK, qword); + rx_status = FIELD_GET(IAVF_RXD_QW1_STATUS_MASK, qword); + /* did the hardware decode the packet and checksum? */ if (!(rx_status & BIT(IAVF_RX_DESC_STATUS_L3L4P_SHIFT))) return; - /* both known and outer_ip must be set for the below code to work */ - if (!(decoded.known && decoded.outer_ip)) - return; - - ipv4 = (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == IAVF_RX_PTYPE_OUTER_IPV4); - ipv6 = (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == IAVF_RX_PTYPE_OUTER_IPV6); + ipv4 = parsed.outer_ip == LIBIE_RX_PTYPE_OUTER_IPV4; + ipv6 = parsed.outer_ip == LIBIE_RX_PTYPE_OUTER_IPV6; if (ipv4 && (rx_error & (BIT(IAVF_RX_DESC_ERROR_IPE_SHIFT) | @@ -1037,46 +962,13 @@ static void iavf_rx_checksum(struct iavf_vsi *vsi, if (rx_error & BIT(IAVF_RX_DESC_ERROR_PPRS_SHIFT)) return; - /* Only report checksum unnecessary for TCP, UDP, or SCTP */ - switch (decoded.inner_prot) { - case IAVF_RX_PTYPE_INNER_PROT_TCP: - case IAVF_RX_PTYPE_INNER_PROT_UDP: - case IAVF_RX_PTYPE_INNER_PROT_SCTP: - skb->ip_summed = CHECKSUM_UNNECESSARY; - fallthrough; - default: - break; - } - + skb->ip_summed = CHECKSUM_UNNECESSARY; return; checksum_fail: vsi->back->hw_csum_rx_error++; } -/** - * iavf_ptype_to_htype - get a hash type - * @ptype: the ptype value from the descriptor - * - * Returns a hash type to be used by skb_set_hash - **/ -static int iavf_ptype_to_htype(u8 ptype) -{ - struct iavf_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype); - - if (!decoded.known) - return PKT_HASH_TYPE_NONE; - - if (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP && - decoded.payload_layer == IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY4) - return PKT_HASH_TYPE_L4; - else if (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP && - decoded.payload_layer == IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY3) - return PKT_HASH_TYPE_L3; - else - return PKT_HASH_TYPE_L2; -} - /** * iavf_rx_hash - set the hash value in the skb * @ring: descriptor ring @@ -1089,17 +981,19 @@ static void iavf_rx_hash(struct iavf_ring *ring, struct sk_buff *skb, u8 rx_ptype) { + struct libie_rx_ptype_parsed parsed; u32 hash; const __le64 rss_mask = cpu_to_le64((u64)IAVF_RX_DESC_FLTSTAT_RSS_HASH << IAVF_RX_DESC_STATUS_FLTSTAT_SHIFT); - if (!(ring->netdev->features & NETIF_F_RXHASH)) + parsed = libie_parse_rx_ptype(rx_ptype); + if (!libie_has_rx_hash(ring->netdev, parsed)) return; if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) { hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss); - skb_set_hash(skb, hash, iavf_ptype_to_htype(rx_ptype)); + libie_skb_set_hash(skb, hash, parsed); } } @@ -1151,96 +1045,10 @@ static bool iavf_cleanup_headers(struct iavf_ring *rx_ring, struct sk_buff *skb) return false; } -/** - * iavf_reuse_rx_page - page flip buffer and store it back on the ring - * @rx_ring: rx descriptor ring to store buffers on - * @old_buff: donor buffer to have page reused - * - * Synchronizes page for reuse by the adapter - **/ -static void iavf_reuse_rx_page(struct iavf_ring *rx_ring, - struct iavf_rx_buffer *old_buff) -{ - struct iavf_rx_buffer *new_buff; - u16 nta = rx_ring->next_to_alloc; - - new_buff = &rx_ring->rx_bi[nta]; - - /* update, and store next to alloc */ - nta++; - rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; - - /* transfer page from old buffer to new buffer */ - new_buff->dma = old_buff->dma; - new_buff->page = old_buff->page; - new_buff->page_offset = old_buff->page_offset; - new_buff->pagecnt_bias = old_buff->pagecnt_bias; -} - -/** - * iavf_can_reuse_rx_page - Determine if this page can be reused by - * the adapter for another receive - * - * @rx_buffer: buffer containing the page - * - * If page is reusable, rx_buffer->page_offset is adjusted to point to - * an unused region in the page. - * - * For small pages, @truesize will be a constant value, half the size - * of the memory at page. We'll attempt to alternate between high and - * low halves of the page, with one half ready for use by the hardware - * and the other half being consumed by the stack. We use the page - * ref count to determine whether the stack has finished consuming the - * portion of this page that was passed up with a previous packet. If - * the page ref count is >1, we'll assume the "other" half page is - * still busy, and this page cannot be reused. - * - * For larger pages, @truesize will be the actual space used by the - * received packet (adjusted upward to an even multiple of the cache - * line size). This will advance through the page by the amount - * actually consumed by the received packets while there is still - * space for a buffer. Each region of larger pages will be used at - * most once, after which the page will not be reused. - * - * In either case, if the page is reusable its refcount is increased. - **/ -static bool iavf_can_reuse_rx_page(struct iavf_rx_buffer *rx_buffer) -{ - unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; - struct page *page = rx_buffer->page; - - /* Is any reuse possible? */ - if (!dev_page_is_reusable(page)) - return false; - -#if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely((page_count(page) - pagecnt_bias) > 1)) - return false; -#else -#define IAVF_LAST_OFFSET \ - (SKB_WITH_OVERHEAD(PAGE_SIZE) - IAVF_RXBUFFER_2048) - if (rx_buffer->page_offset > IAVF_LAST_OFFSET) - return false; -#endif - - /* If we have drained the page fragment pool we need to update - * the pagecnt_bias and page count so that we fully restock the - * number of references the driver holds. - */ - if (unlikely(!pagecnt_bias)) { - page_ref_add(page, USHRT_MAX); - rx_buffer->pagecnt_bias = USHRT_MAX; - } - - return true; -} - /** * iavf_add_rx_frag - Add contents of Rx buffer to sk_buff - * @rx_ring: rx descriptor ring to transact packets on - * @rx_buffer: buffer containing page to add * @skb: sk_buff to place the data into + * @rx_buffer: buffer containing page to add * @size: packet length from rx_desc * * This function will add the data contained in rx_buffer->page to the skb. @@ -1248,210 +1056,54 @@ static bool iavf_can_reuse_rx_page(struct iavf_rx_buffer *rx_buffer) * * The function will then update the page offset. **/ -static void iavf_add_rx_frag(struct iavf_ring *rx_ring, - struct iavf_rx_buffer *rx_buffer, - struct sk_buff *skb, +static void iavf_add_rx_frag(struct sk_buff *skb, + const struct libie_rx_buffer *rx_buffer, unsigned int size) { -#if (PAGE_SIZE < 8192) - unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(size + iavf_rx_offset(rx_ring)); -#endif - - if (!size) - return; + u32 hr = rx_buffer->page->pp->p.offset; skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, - rx_buffer->page_offset, size, truesize); - - /* page is being used so we must update the page offset */ -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif -} - -/** - * iavf_get_rx_buffer - Fetch Rx buffer and synchronize data for use - * @rx_ring: rx descriptor ring to transact packets on - * @size: size of buffer to add to skb - * - * This function will pull an Rx buffer from the ring and synchronize it - * for use by the CPU. - */ -static struct iavf_rx_buffer *iavf_get_rx_buffer(struct iavf_ring *rx_ring, - const unsigned int size) -{ - struct iavf_rx_buffer *rx_buffer; - - rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; - prefetchw(rx_buffer->page); - if (!size) - return rx_buffer; - - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - size, - DMA_FROM_DEVICE); - - /* We have pulled a buffer for use, so decrement pagecnt_bias */ - rx_buffer->pagecnt_bias--; - - return rx_buffer; -} - -/** - * iavf_construct_skb - Allocate skb and populate it - * @rx_ring: rx descriptor ring to transact packets on - * @rx_buffer: rx buffer to pull data from - * @size: size of buffer to add to skb - * - * This function allocates an skb. It then populates it with the page - * data from the current receive descriptor, taking care to set up the - * skb correctly. - */ -static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring, - struct iavf_rx_buffer *rx_buffer, - unsigned int size) -{ - void *va; -#if (PAGE_SIZE < 8192) - unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(size); -#endif - unsigned int headlen; - struct sk_buff *skb; - - if (!rx_buffer) - return NULL; - /* prefetch first cache line of first page */ - va = page_address(rx_buffer->page) + rx_buffer->page_offset; - net_prefetch(va); - - /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - IAVF_RX_HDR_SIZE, - GFP_ATOMIC | __GFP_NOWARN); - if (unlikely(!skb)) - return NULL; - - /* Determine available headroom for copy */ - headlen = size; - if (headlen > IAVF_RX_HDR_SIZE) - headlen = eth_get_headlen(skb->dev, va, IAVF_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); - - /* update all of the pointers */ - size -= headlen; - if (size) { - skb_add_rx_frag(skb, 0, rx_buffer->page, - rx_buffer->page_offset + headlen, - size, truesize); - - /* buffer is used by skb, update page_offset */ -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif - } else { - /* buffer is unused, reset bias back to rx_buffer */ - rx_buffer->pagecnt_bias++; - } - - return skb; + rx_buffer->offset + hr, size, rx_buffer->truesize); } /** * iavf_build_skb - Build skb around an existing buffer - * @rx_ring: Rx descriptor ring to transact packets on * @rx_buffer: Rx buffer to pull data from * @size: size of buffer to add to skb * * This function builds an skb around an existing Rx buffer, taking care * to set up the skb correctly and avoid any memcpy overhead. */ -static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring, - struct iavf_rx_buffer *rx_buffer, +static struct sk_buff *iavf_build_skb(const struct libie_rx_buffer *rx_buffer, unsigned int size) { - void *va; -#if (PAGE_SIZE < 8192) - unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + - SKB_DATA_ALIGN(IAVF_SKB_PAD + size); -#endif + u32 hr = rx_buffer->page->pp->p.offset; struct sk_buff *skb; + void *va; - if (!rx_buffer || !size) - return NULL; /* prefetch first cache line of first page */ - va = page_address(rx_buffer->page) + rx_buffer->page_offset; - net_prefetch(va); + va = page_address(rx_buffer->page) + rx_buffer->offset; + net_prefetch(va + hr); /* build an skb around the page buffer */ - skb = napi_build_skb(va - IAVF_SKB_PAD, truesize); + skb = napi_build_skb(va, rx_buffer->truesize); if (unlikely(!skb)) return NULL; + skb_mark_for_recycle(skb); + /* update pointers within the skb to store the data */ - skb_reserve(skb, IAVF_SKB_PAD); + skb_reserve(skb, hr); __skb_put(skb, size); - /* buffer is used by skb, update page_offset */ -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif - return skb; } -/** - * iavf_put_rx_buffer - Clean up used buffer and either recycle or free - * @rx_ring: rx descriptor ring to transact packets on - * @rx_buffer: rx buffer to pull data from - * - * This function will clean up the contents of the rx_buffer. It will - * either recycle the buffer or unmap it and free the associated resources. - */ -static void iavf_put_rx_buffer(struct iavf_ring *rx_ring, - struct iavf_rx_buffer *rx_buffer) -{ - if (!rx_buffer) - return; - - if (iavf_can_reuse_rx_page(rx_buffer)) { - /* hand second half of page back to the ring */ - iavf_reuse_rx_page(rx_ring, rx_buffer); - rx_ring->rx_stats.page_reuse_count++; - } else { - /* we are not reusing the buffer so unmap it */ - dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, - iavf_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, IAVF_RX_DMA_ATTR); - __page_frag_cache_drain(rx_buffer->page, - rx_buffer->pagecnt_bias); - } - - /* clear contents of buffer_info */ - rx_buffer->page = NULL; -} - /** * iavf_is_non_eop - process handling of non-EOP buffers * @rx_ring: Rx ring being processed * @rx_desc: Rx descriptor for current buffer - * @skb: Current socket buffer containing buffer in progress + * @stats: NAPI poll local stats to update * * This function updates next to clean. If the buffer is an EOP buffer * this function exits returning false, otherwise it will place the @@ -1460,7 +1112,7 @@ static void iavf_put_rx_buffer(struct iavf_ring *rx_ring, **/ static bool iavf_is_non_eop(struct iavf_ring *rx_ring, union iavf_rx_desc *rx_desc, - struct sk_buff *skb) + struct libie_rq_onstack_stats *stats) { u32 ntc = rx_ring->next_to_clean + 1; @@ -1475,7 +1127,7 @@ static bool iavf_is_non_eop(struct iavf_ring *rx_ring, if (likely(iavf_test_staterr(rx_desc, IAVF_RXD_EOF))) return false; - rx_ring->rx_stats.non_eop_descs++; + stats->fragments++; return true; } @@ -1494,13 +1146,13 @@ static bool iavf_is_non_eop(struct iavf_ring *rx_ring, **/ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget) { - unsigned int total_rx_bytes = 0, total_rx_packets = 0; - struct sk_buff *skb = rx_ring->skb; u16 cleaned_count = IAVF_DESC_UNUSED(rx_ring); + struct libie_rq_onstack_stats stats = { }; + struct sk_buff *skb = rx_ring->skb; bool failure = false; - while (likely(total_rx_packets < (unsigned int)budget)) { - struct iavf_rx_buffer *rx_buffer; + while (likely(stats.packets < budget)) { + struct libie_rx_buffer *rx_buffer; union iavf_rx_desc *rx_desc; unsigned int size; u16 vlan_tag = 0; @@ -1535,28 +1187,28 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget) size = FIELD_GET(IAVF_RXD_QW1_LENGTH_PBUF_MASK, qword); iavf_trace(clean_rx_irq, rx_ring, rx_desc, skb); - rx_buffer = iavf_get_rx_buffer(rx_ring, size); + + rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; + if (!libie_rx_sync_for_cpu(rx_buffer, size)) + goto skip_data; /* retrieve a buffer from the ring */ if (skb) - iavf_add_rx_frag(rx_ring, rx_buffer, skb, size); - else if (ring_uses_build_skb(rx_ring)) - skb = iavf_build_skb(rx_ring, rx_buffer, size); + iavf_add_rx_frag(skb, rx_buffer, size); else - skb = iavf_construct_skb(rx_ring, rx_buffer, size); + skb = iavf_build_skb(rx_buffer, size); /* exit if we failed to retrieve a buffer */ if (!skb) { - rx_ring->rx_stats.alloc_buff_failed++; - if (rx_buffer && size) - rx_buffer->pagecnt_bias++; + libie_stats_inc_one(&rx_ring->rq_stats, + build_skb_fail); break; } - iavf_put_rx_buffer(rx_ring, rx_buffer); +skip_data: cleaned_count++; - if (iavf_is_non_eop(rx_ring, rx_desc, skb)) + if (iavf_is_non_eop(rx_ring, rx_desc, &stats)) continue; /* ERR_MASK will only have valid bits if EOP set, and @@ -1576,7 +1228,7 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget) } /* probably a little skewed due to removing CRC */ - total_rx_bytes += skb->len; + stats.bytes += skb->len; qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); rx_ptype = FIELD_GET(IAVF_RXD_QW1_PTYPE_MASK, qword); @@ -1597,20 +1249,20 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget) skb = NULL; /* update budget accounting */ - total_rx_packets++; + stats.packets++; } rx_ring->skb = skb; - u64_stats_update_begin(&rx_ring->syncp); - rx_ring->stats.packets += total_rx_packets; - rx_ring->stats.bytes += total_rx_bytes; - u64_stats_update_end(&rx_ring->syncp); - rx_ring->q_vector->rx.total_packets += total_rx_packets; - rx_ring->q_vector->rx.total_bytes += total_rx_bytes; + libie_rq_napi_stats_add(&rx_ring->rq_stats, &stats); + rx_ring->q_vector->rx.total_packets += stats.packets; + rx_ring->q_vector->rx.total_bytes += stats.bytes; /* guarantee a trip back through this routine if there was a failure */ - return failure ? budget : (int)total_rx_packets; + if (unlikely(failure)) + return budget; + + return stats.packets; } static inline u32 iavf_buildreg_itr(const int type, u16 itr) @@ -1743,8 +1395,8 @@ int iavf_napi_poll(struct napi_struct *napi, int budget) clean_complete = false; continue; } - arm_wb |= ring->arm_wb; - ring->arm_wb = false; + arm_wb |= !!(ring->flags & IAVF_TXR_FLAGS_ARM_WB); + ring->flags &= ~IAVF_TXR_FLAGS_ARM_WB; } /* Handle case where we are called by netpoll with a budget of 0 */ @@ -1787,10 +1439,8 @@ int iavf_napi_poll(struct napi_struct *napi, int budget) return budget - 1; } tx_only: - if (arm_wb) { - q_vector->tx.ring[0].tx_stats.tx_force_wb++; + if (arm_wb) iavf_enable_wb_on_itr(vsi, q_vector); - } return budget; } @@ -2249,6 +1899,7 @@ bool __iavf_chk_linearize(struct sk_buff *skb) int __iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size) { netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); + libie_stats_inc_one(&tx_ring->sq_stats, stops); /* Memory barrier before checking head and tail */ smp_mb(); @@ -2258,7 +1909,8 @@ int __iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size) /* A reprieve! - use start_queue because it doesn't call schedule */ netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index); - ++tx_ring->tx_stats.restart_queue; + libie_stats_inc_one(&tx_ring->sq_stats, restarts); + return 0; } @@ -2438,7 +2090,7 @@ static netdev_tx_t iavf_xmit_frame_ring(struct sk_buff *skb, return NETDEV_TX_OK; } count = iavf_txd_use_count(skb->len); - tx_ring->tx_stats.tx_linearize++; + libie_stats_inc_one(&tx_ring->sq_stats, linearized); } /* need: 1 descriptor per page * PAGE_SIZE/IAVF_MAX_DATA_PER_TXD, @@ -2448,7 +2100,7 @@ static netdev_tx_t iavf_xmit_frame_ring(struct sk_buff *skb, * otherwise try next time */ if (iavf_maybe_stop_tx(tx_ring, count + 4 + 1)) { - tx_ring->tx_stats.tx_busy++; + libie_stats_inc_one(&tx_ring->sq_stats, busy); return NETDEV_TX_BUSY; } diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h index 10ba36602c0c14..f2d54b136289f2 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.h +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h @@ -4,6 +4,8 @@ #ifndef _IAVF_TXRX_H_ #define _IAVF_TXRX_H_ +#include + /* Interrupt Throttling and Rate Limiting Goodies */ #define IAVF_DEFAULT_IRQ_WORK 256 @@ -80,79 +82,8 @@ enum iavf_dyn_idx_t { BIT_ULL(IAVF_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | \ BIT_ULL(IAVF_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP)) -/* Supported Rx Buffer Sizes (a multiple of 128) */ -#define IAVF_RXBUFFER_256 256 -#define IAVF_RXBUFFER_1536 1536 /* 128B aligned standard Ethernet frame */ -#define IAVF_RXBUFFER_2048 2048 -#define IAVF_RXBUFFER_3072 3072 /* Used for large frames w/ padding */ -#define IAVF_MAX_RXBUFFER 9728 /* largest size for single descriptor */ - -/* NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we - * reserve 2 more, and skb_shared_info adds an additional 384 bytes more, - * this adds up to 512 bytes of extra data meaning the smallest allocation - * we could have is 1K. - * i.e. RXBUFFER_256 --> 960 byte skb (size-1024 slab) - * i.e. RXBUFFER_512 --> 1216 byte skb (size-2048 slab) - */ -#define IAVF_RX_HDR_SIZE IAVF_RXBUFFER_256 -#define IAVF_PACKET_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)) #define iavf_rx_desc iavf_32byte_rx_desc -#define IAVF_RX_DMA_ATTR \ - (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) - -/* Attempt to maximize the headroom available for incoming frames. We - * use a 2K buffer for receives and need 1536/1534 to store the data for - * the frame. This leaves us with 512 bytes of room. From that we need - * to deduct the space needed for the shared info and the padding needed - * to IP align the frame. - * - * Note: For cache line sizes 256 or larger this value is going to end - * up negative. In these cases we should fall back to the legacy - * receive path. - */ -#if (PAGE_SIZE < 8192) -#define IAVF_2K_TOO_SMALL_WITH_PADDING \ -((NET_SKB_PAD + IAVF_RXBUFFER_1536) > SKB_WITH_OVERHEAD(IAVF_RXBUFFER_2048)) - -static inline int iavf_compute_pad(int rx_buf_len) -{ - int page_size, pad_size; - - page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2); - pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len; - - return pad_size; -} - -static inline int iavf_skb_pad(void) -{ - int rx_buf_len; - - /* If a 2K buffer cannot handle a standard Ethernet frame then - * optimize padding for a 3K buffer instead of a 1.5K buffer. - * - * For a 3K buffer we need to add enough padding to allow for - * tailroom due to NET_IP_ALIGN possibly shifting us out of - * cache-line alignment. - */ - if (IAVF_2K_TOO_SMALL_WITH_PADDING) - rx_buf_len = IAVF_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN); - else - rx_buf_len = IAVF_RXBUFFER_1536; - - /* if needed make room for NET_IP_ALIGN */ - rx_buf_len -= NET_IP_ALIGN; - - return iavf_compute_pad(rx_buf_len); -} - -#define IAVF_SKB_PAD iavf_skb_pad() -#else -#define IAVF_2K_TOO_SMALL_WITH_PADDING false -#define IAVF_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) -#endif - /** * iavf_test_staterr - tests bits in Rx descriptor status and error fields * @rx_desc: pointer to receive descriptor (in le64 format) @@ -271,46 +202,6 @@ struct iavf_tx_buffer { u32 tx_flags; }; -struct iavf_rx_buffer { - dma_addr_t dma; - struct page *page; -#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) - __u32 page_offset; -#else - __u16 page_offset; -#endif - __u16 pagecnt_bias; -}; - -struct iavf_queue_stats { - u64 packets; - u64 bytes; -}; - -struct iavf_tx_queue_stats { - u64 restart_queue; - u64 tx_busy; - u64 tx_done_old; - u64 tx_linearize; - u64 tx_force_wb; - int prev_pkt_ctr; - u64 tx_lost_interrupt; -}; - -struct iavf_rx_queue_stats { - u64 non_eop_descs; - u64 alloc_page_failed; - u64 alloc_buff_failed; - u64 page_reuse_count; - u64 realloc_count; -}; - -enum iavf_ring_state_t { - __IAVF_TX_FDIR_INIT_DONE, - __IAVF_TX_XPS_INIT_DONE, - __IAVF_RING_STATE_NBITS /* must be last */ -}; - /* some useful defines for virtchannel interface, which * is the only remaining user of header split */ @@ -326,16 +217,19 @@ enum iavf_ring_state_t { struct iavf_ring { struct iavf_ring *next; /* pointer to next ring in q_vector */ void *desc; /* Descriptor ring memory */ - struct device *dev; /* Used for DMA mapping */ + union { + struct page_pool *pp; /* Used on Rx for buffer management */ + struct device *dev; /* Used on Tx for DMA mapping */ + }; struct net_device *netdev; /* netdev ring maps to */ union { + struct libie_rx_buffer *rx_bi; struct iavf_tx_buffer *tx_bi; - struct iavf_rx_buffer *rx_bi; }; - DECLARE_BITMAP(state, __IAVF_RING_STATE_NBITS); - u16 queue_index; /* Queue number of ring */ - u8 dcb_tc; /* Traffic class of ring */ u8 __iomem *tail; + u32 truesize; + + u16 queue_index; /* Queue number of ring */ /* high bit set means dynamic, use accessors routines to read/write. * hardware only supports 2us resolution for the ITR registers. @@ -345,35 +239,26 @@ struct iavf_ring { u16 itr_setting; u16 count; /* Number of descriptors */ - u16 reg_idx; /* HW register index of the ring */ - u16 rx_buf_len; /* used in interrupt processing */ u16 next_to_use; u16 next_to_clean; - u8 atr_sample_rate; - u8 atr_count; - - bool ring_active; /* is ring online or not */ - bool arm_wb; /* do something to arm write back */ - u8 packet_stride; - u16 flags; #define IAVF_TXR_FLAGS_WB_ON_ITR BIT(0) -#define IAVF_RXR_FLAGS_BUILD_SKB_ENABLED BIT(1) +#define IAVF_TXR_FLAGS_ARM_WB BIT(1) +/* BIT(2) is free */ #define IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1 BIT(3) #define IAVF_TXR_FLAGS_VLAN_TAG_LOC_L2TAG2 BIT(4) #define IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2 BIT(5) /* stats structs */ - struct iavf_queue_stats stats; - struct u64_stats_sync syncp; union { - struct iavf_tx_queue_stats tx_stats; - struct iavf_rx_queue_stats rx_stats; + struct libie_rq_stats rq_stats; + struct libie_sq_stats sq_stats; }; + int prev_pkt_ctr; /* For Tx stall detection */ unsigned int size; /* length of descriptor ring in bytes */ dma_addr_t dma; /* physical address of ring */ @@ -381,7 +266,6 @@ struct iavf_ring { struct iavf_q_vector *q_vector; /* Backreference to associated vector */ struct rcu_head rcu; /* to avoid race on free */ - u16 next_to_alloc; struct sk_buff *skb; /* When iavf_clean_rx_ring_irq() must * return before it sees the EOP for * the current packet, we save that skb @@ -390,22 +274,9 @@ struct iavf_ring { * iavf_clean_rx_ring_irq() is called * for this ring. */ -} ____cacheline_internodealigned_in_smp; - -static inline bool ring_uses_build_skb(struct iavf_ring *ring) -{ - return !!(ring->flags & IAVF_RXR_FLAGS_BUILD_SKB_ENABLED); -} - -static inline void set_ring_build_skb_enabled(struct iavf_ring *ring) -{ - ring->flags |= IAVF_RXR_FLAGS_BUILD_SKB_ENABLED; -} -static inline void clear_ring_build_skb_enabled(struct iavf_ring *ring) -{ - ring->flags &= ~IAVF_RXR_FLAGS_BUILD_SKB_ENABLED; -} + u32 rx_buf_len; +} ____cacheline_internodealigned_in_smp; #define IAVF_ITR_ADAPTIVE_MIN_INC 0x0002 #define IAVF_ITR_ADAPTIVE_MIN_USECS 0x0002 @@ -428,17 +299,6 @@ struct iavf_ring_container { #define iavf_for_each_ring(pos, head) \ for (pos = (head).ring; pos != NULL; pos = pos->next) -static inline unsigned int iavf_rx_pg_order(struct iavf_ring *ring) -{ -#if (PAGE_SIZE < 8192) - if (ring->rx_buf_len > (PAGE_SIZE / 2)) - return 1; -#endif - return 0; -} - -#define iavf_rx_pg_size(_ring) (PAGE_SIZE << iavf_rx_pg_order(_ring)) - bool iavf_alloc_rx_buffers(struct iavf_ring *rxr, u16 cleaned_count); netdev_tx_t iavf_xmit_frame(struct sk_buff *skb, struct net_device *netdev); int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring); diff --git a/drivers/net/ethernet/intel/iavf/iavf_type.h b/drivers/net/ethernet/intel/iavf/iavf_type.h index 2b6a207fa441de..f6b09e57abcef6 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_type.h +++ b/drivers/net/ethernet/intel/iavf/iavf_type.h @@ -10,8 +10,6 @@ #include "iavf_adminq.h" #include "iavf_devids.h" -#define IAVF_RXQ_CTX_DBUFF_SHIFT 7 - /* IAVF_MASK is a macro used on 32 bit registers */ #define IAVF_MASK(mask, shift) ((u32)(mask) << (shift)) @@ -327,94 +325,6 @@ enum iavf_rx_desc_error_l3l4e_fcoe_masks { #define IAVF_RXD_QW1_PTYPE_SHIFT 30 #define IAVF_RXD_QW1_PTYPE_MASK (0xFFULL << IAVF_RXD_QW1_PTYPE_SHIFT) -/* Packet type non-ip values */ -enum iavf_rx_l2_ptype { - IAVF_RX_PTYPE_L2_RESERVED = 0, - IAVF_RX_PTYPE_L2_MAC_PAY2 = 1, - IAVF_RX_PTYPE_L2_TIMESYNC_PAY2 = 2, - IAVF_RX_PTYPE_L2_FIP_PAY2 = 3, - IAVF_RX_PTYPE_L2_OUI_PAY2 = 4, - IAVF_RX_PTYPE_L2_MACCNTRL_PAY2 = 5, - IAVF_RX_PTYPE_L2_LLDP_PAY2 = 6, - IAVF_RX_PTYPE_L2_ECP_PAY2 = 7, - IAVF_RX_PTYPE_L2_EVB_PAY2 = 8, - IAVF_RX_PTYPE_L2_QCN_PAY2 = 9, - IAVF_RX_PTYPE_L2_EAPOL_PAY2 = 10, - IAVF_RX_PTYPE_L2_ARP = 11, - IAVF_RX_PTYPE_L2_FCOE_PAY3 = 12, - IAVF_RX_PTYPE_L2_FCOE_FCDATA_PAY3 = 13, - IAVF_RX_PTYPE_L2_FCOE_FCRDY_PAY3 = 14, - IAVF_RX_PTYPE_L2_FCOE_FCRSP_PAY3 = 15, - IAVF_RX_PTYPE_L2_FCOE_FCOTHER_PA = 16, - IAVF_RX_PTYPE_L2_FCOE_VFT_PAY3 = 17, - IAVF_RX_PTYPE_L2_FCOE_VFT_FCDATA = 18, - IAVF_RX_PTYPE_L2_FCOE_VFT_FCRDY = 19, - IAVF_RX_PTYPE_L2_FCOE_VFT_FCRSP = 20, - IAVF_RX_PTYPE_L2_FCOE_VFT_FCOTHER = 21, - IAVF_RX_PTYPE_GRENAT4_MAC_PAY3 = 58, - IAVF_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4 = 87, - IAVF_RX_PTYPE_GRENAT6_MAC_PAY3 = 124, - IAVF_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4 = 153 -}; - -struct iavf_rx_ptype_decoded { - u32 known:1; - u32 outer_ip:1; - u32 outer_ip_ver:1; - u32 outer_frag:1; - u32 tunnel_type:3; - u32 tunnel_end_prot:2; - u32 tunnel_end_frag:1; - u32 inner_prot:4; - u32 payload_layer:3; -}; - -enum iavf_rx_ptype_outer_ip { - IAVF_RX_PTYPE_OUTER_L2 = 0, - IAVF_RX_PTYPE_OUTER_IP = 1 -}; - -enum iavf_rx_ptype_outer_ip_ver { - IAVF_RX_PTYPE_OUTER_NONE = 0, - IAVF_RX_PTYPE_OUTER_IPV4 = 0, - IAVF_RX_PTYPE_OUTER_IPV6 = 1 -}; - -enum iavf_rx_ptype_outer_fragmented { - IAVF_RX_PTYPE_NOT_FRAG = 0, - IAVF_RX_PTYPE_FRAG = 1 -}; - -enum iavf_rx_ptype_tunnel_type { - IAVF_RX_PTYPE_TUNNEL_NONE = 0, - IAVF_RX_PTYPE_TUNNEL_IP_IP = 1, - IAVF_RX_PTYPE_TUNNEL_IP_GRENAT = 2, - IAVF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC = 3, - IAVF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN = 4, -}; - -enum iavf_rx_ptype_tunnel_end_prot { - IAVF_RX_PTYPE_TUNNEL_END_NONE = 0, - IAVF_RX_PTYPE_TUNNEL_END_IPV4 = 1, - IAVF_RX_PTYPE_TUNNEL_END_IPV6 = 2, -}; - -enum iavf_rx_ptype_inner_prot { - IAVF_RX_PTYPE_INNER_PROT_NONE = 0, - IAVF_RX_PTYPE_INNER_PROT_UDP = 1, - IAVF_RX_PTYPE_INNER_PROT_TCP = 2, - IAVF_RX_PTYPE_INNER_PROT_SCTP = 3, - IAVF_RX_PTYPE_INNER_PROT_ICMP = 4, - IAVF_RX_PTYPE_INNER_PROT_TIMESYNC = 5 -}; - -enum iavf_rx_ptype_payload_layer { - IAVF_RX_PTYPE_PAYLOAD_LAYER_NONE = 0, - IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY2 = 1, - IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY3 = 2, - IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY4 = 3, -}; - #define IAVF_RXD_QW1_LENGTH_PBUF_SHIFT 38 #define IAVF_RXD_QW1_LENGTH_PBUF_MASK (0x3FFFULL << \ IAVF_RXD_QW1_LENGTH_PBUF_SHIFT) diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 22f2df7c460b86..1e543f6a7c3033 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2013 - 2018 Intel Corporation. */ +#include + #include "iavf.h" #include "iavf_prototype.h" @@ -268,13 +270,13 @@ int iavf_get_vf_vlan_v2_caps(struct iavf_adapter *adapter) void iavf_configure_queues(struct iavf_adapter *adapter) { struct virtchnl_vsi_queue_config_info *vqci; - int i, max_frame = adapter->vf_res->max_mtu; int pairs = adapter->num_active_queues; struct virtchnl_queue_pair_info *vqpi; + u32 i, max_frame; size_t len; - if (max_frame > IAVF_MAX_RXBUFFER || !max_frame) - max_frame = IAVF_MAX_RXBUFFER; + max_frame = LIBIE_MAX_RX_FRM_LEN(adapter->rx_rings->pp->p.offset); + max_frame = min_not_zero(adapter->vf_res->max_mtu, max_frame); if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -288,11 +290,6 @@ void iavf_configure_queues(struct iavf_adapter *adapter) if (!vqci) return; - /* Limit maximum frame size when jumbo frames is not enabled */ - if (!(adapter->flags & IAVF_FLAG_LEGACY_RX) && - (adapter->netdev->mtu <= ETH_DATA_LEN)) - max_frame = IAVF_RXBUFFER_1536 - NET_IP_ALIGN; - vqci->vsi_id = adapter->vsi_res->vsi_id; vqci->num_queue_pairs = pairs; vqpi = vqci->qpair; @@ -309,9 +306,7 @@ void iavf_configure_queues(struct iavf_adapter *adapter) vqpi->rxq.ring_len = adapter->rx_rings[i].count; vqpi->rxq.dma_ring_addr = adapter->rx_rings[i].dma; vqpi->rxq.max_pkt_size = max_frame; - vqpi->rxq.databuffer_size = - ALIGN(adapter->rx_rings[i].rx_buf_len, - BIT_ULL(IAVF_RXQ_CTX_DBUFF_SHIFT)); + vqpi->rxq.databuffer_size = adapter->rx_rings[i].rx_buf_len; if (CRC_OFFLOAD_ALLOWED(adapter)) vqpi->rxq.crc_disable = !!(adapter->netdev->features & NETIF_F_RXFCS); diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index d384ddfcb83e97..611577ebc29d82 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -160,64 +160,6 @@ struct ice_fltr_desc { (0x1ULL << ICE_FXD_FLTR_WB_QW1_FAIL_PROF_S) #define ICE_FXD_FLTR_WB_QW1_FAIL_PROF_YES 0x1ULL -struct ice_rx_ptype_decoded { - u32 known:1; - u32 outer_ip:1; - u32 outer_ip_ver:2; - u32 outer_frag:1; - u32 tunnel_type:3; - u32 tunnel_end_prot:2; - u32 tunnel_end_frag:1; - u32 inner_prot:4; - u32 payload_layer:3; -}; - -enum ice_rx_ptype_outer_ip { - ICE_RX_PTYPE_OUTER_L2 = 0, - ICE_RX_PTYPE_OUTER_IP = 1, -}; - -enum ice_rx_ptype_outer_ip_ver { - ICE_RX_PTYPE_OUTER_NONE = 0, - ICE_RX_PTYPE_OUTER_IPV4 = 1, - ICE_RX_PTYPE_OUTER_IPV6 = 2, -}; - -enum ice_rx_ptype_outer_fragmented { - ICE_RX_PTYPE_NOT_FRAG = 0, - ICE_RX_PTYPE_FRAG = 1, -}; - -enum ice_rx_ptype_tunnel_type { - ICE_RX_PTYPE_TUNNEL_NONE = 0, - ICE_RX_PTYPE_TUNNEL_IP_IP = 1, - ICE_RX_PTYPE_TUNNEL_IP_GRENAT = 2, - ICE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC = 3, - ICE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN = 4, -}; - -enum ice_rx_ptype_tunnel_end_prot { - ICE_RX_PTYPE_TUNNEL_END_NONE = 0, - ICE_RX_PTYPE_TUNNEL_END_IPV4 = 1, - ICE_RX_PTYPE_TUNNEL_END_IPV6 = 2, -}; - -enum ice_rx_ptype_inner_prot { - ICE_RX_PTYPE_INNER_PROT_NONE = 0, - ICE_RX_PTYPE_INNER_PROT_UDP = 1, - ICE_RX_PTYPE_INNER_PROT_TCP = 2, - ICE_RX_PTYPE_INNER_PROT_SCTP = 3, - ICE_RX_PTYPE_INNER_PROT_ICMP = 4, - ICE_RX_PTYPE_INNER_PROT_TIMESYNC = 5, -}; - -enum ice_rx_ptype_payload_layer { - ICE_RX_PTYPE_PAYLOAD_LAYER_NONE = 0, - ICE_RX_PTYPE_PAYLOAD_LAYER_PAY2 = 1, - ICE_RX_PTYPE_PAYLOAD_LAYER_PAY3 = 2, - ICE_RX_PTYPE_PAYLOAD_LAYER_PAY4 = 3, -}; - /* Rx Flex Descriptor * This descriptor is used instead of the legacy version descriptor when * ice_rlan_ctx.adv_desc is set @@ -651,266 +593,4 @@ struct ice_tlan_ctx { u8 int_q_state; /* width not needed - internal - DO NOT WRITE!!! */ }; -/* The ice_ptype_lkup table is used to convert from the 10-bit ptype in the - * hardware to a bit-field that can be used by SW to more easily determine the - * packet type. - * - * Macros are used to shorten the table lines and make this table human - * readable. - * - * We store the PTYPE in the top byte of the bit field - this is just so that - * we can check that the table doesn't have a row missing, as the index into - * the table should be the PTYPE. - * - * Typical work flow: - * - * IF NOT ice_ptype_lkup[ptype].known - * THEN - * Packet is unknown - * ELSE IF ice_ptype_lkup[ptype].outer_ip == ICE_RX_PTYPE_OUTER_IP - * Use the rest of the fields to look at the tunnels, inner protocols, etc - * ELSE - * Use the enum ice_rx_l2_ptype to decode the packet type - * ENDIF - */ -#define ICE_PTYPES \ - /* L2 Packet types */ \ - ICE_PTT_UNUSED_ENTRY(0), \ - ICE_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), \ - ICE_PTT_UNUSED_ENTRY(2), \ - ICE_PTT_UNUSED_ENTRY(3), \ - ICE_PTT_UNUSED_ENTRY(4), \ - ICE_PTT_UNUSED_ENTRY(5), \ - ICE_PTT(6, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), \ - ICE_PTT(7, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), \ - ICE_PTT_UNUSED_ENTRY(8), \ - ICE_PTT_UNUSED_ENTRY(9), \ - ICE_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), \ - ICE_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), \ - ICE_PTT_UNUSED_ENTRY(12), \ - ICE_PTT_UNUSED_ENTRY(13), \ - ICE_PTT_UNUSED_ENTRY(14), \ - ICE_PTT_UNUSED_ENTRY(15), \ - ICE_PTT_UNUSED_ENTRY(16), \ - ICE_PTT_UNUSED_ENTRY(17), \ - ICE_PTT_UNUSED_ENTRY(18), \ - ICE_PTT_UNUSED_ENTRY(19), \ - ICE_PTT_UNUSED_ENTRY(20), \ - ICE_PTT_UNUSED_ENTRY(21), \ - \ - /* Non Tunneled IPv4 */ \ - ICE_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3), \ - ICE_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3), \ - ICE_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(25), \ - ICE_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP, PAY4), \ - ICE_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4), \ - ICE_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4), \ - \ - /* IPv4 --> IPv4 */ \ - ICE_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3), \ - ICE_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3), \ - ICE_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(32), \ - ICE_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP, PAY4), \ - ICE_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), \ - ICE_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), \ - \ - /* IPv4 --> IPv6 */ \ - ICE_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3), \ - ICE_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3), \ - ICE_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(39), \ - ICE_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP, PAY4), \ - ICE_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), \ - ICE_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), \ - \ - /* IPv4 --> GRE/NAT */ \ - ICE_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), \ - \ - /* IPv4 --> GRE/NAT --> IPv4 */ \ - ICE_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), \ - ICE_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), \ - ICE_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(47), \ - ICE_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), \ - ICE_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), \ - ICE_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), \ - \ - /* IPv4 --> GRE/NAT --> IPv6 */ \ - ICE_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), \ - ICE_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), \ - ICE_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(54), \ - ICE_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), \ - ICE_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), \ - ICE_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), \ - \ - /* IPv4 --> GRE/NAT --> MAC */ \ - ICE_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), \ - \ - /* IPv4 --> GRE/NAT --> MAC --> IPv4 */ \ - ICE_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), \ - ICE_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), \ - ICE_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(62), \ - ICE_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), \ - ICE_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), \ - ICE_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), \ - \ - /* IPv4 --> GRE/NAT -> MAC --> IPv6 */ \ - ICE_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), \ - ICE_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), \ - ICE_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(69), \ - ICE_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), \ - ICE_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), \ - ICE_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), \ - \ - /* IPv4 --> GRE/NAT --> MAC/VLAN */ \ - ICE_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), \ - \ - /* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */ \ - ICE_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), \ - ICE_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), \ - ICE_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(77), \ - ICE_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), \ - ICE_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), \ - ICE_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), \ - \ - /* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */ \ - ICE_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), \ - ICE_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), \ - ICE_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(84), \ - ICE_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), \ - ICE_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), \ - ICE_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), \ - \ - /* Non Tunneled IPv6 */ \ - ICE_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3), \ - ICE_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3), \ - ICE_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(91), \ - ICE_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4), \ - ICE_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4), \ - ICE_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4), \ - \ - /* IPv6 --> IPv4 */ \ - ICE_PTT(95, IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3), \ - ICE_PTT(96, IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3), \ - ICE_PTT(97, IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(98), \ - ICE_PTT(99, IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP, PAY4), \ - ICE_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), \ - ICE_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), \ - \ - /* IPv6 --> IPv6 */ \ - ICE_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3), \ - ICE_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3), \ - ICE_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(105), \ - ICE_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP, PAY4), \ - ICE_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), \ - ICE_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), \ - \ - /* IPv6 --> GRE/NAT */ \ - ICE_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), \ - \ - /* IPv6 --> GRE/NAT -> IPv4 */ \ - ICE_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), \ - ICE_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), \ - ICE_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(113), \ - ICE_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), \ - ICE_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), \ - ICE_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), \ - \ - /* IPv6 --> GRE/NAT -> IPv6 */ \ - ICE_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), \ - ICE_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), \ - ICE_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(120), \ - ICE_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), \ - ICE_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), \ - ICE_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), \ - \ - /* IPv6 --> GRE/NAT -> MAC */ \ - ICE_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), \ - \ - /* IPv6 --> GRE/NAT -> MAC -> IPv4 */ \ - ICE_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), \ - ICE_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), \ - ICE_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(128), \ - ICE_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), \ - ICE_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), \ - ICE_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), \ - \ - /* IPv6 --> GRE/NAT -> MAC -> IPv6 */ \ - ICE_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), \ - ICE_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), \ - ICE_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(135), \ - ICE_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), \ - ICE_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), \ - ICE_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), \ - \ - /* IPv6 --> GRE/NAT -> MAC/VLAN */ \ - ICE_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), \ - \ - /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */ \ - ICE_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), \ - ICE_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), \ - ICE_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(143), \ - ICE_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), \ - ICE_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), \ - ICE_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), \ - \ - /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */ \ - ICE_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), \ - ICE_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), \ - ICE_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), \ - ICE_PTT_UNUSED_ENTRY(150), \ - ICE_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), \ - ICE_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), \ - ICE_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), - -#define ICE_NUM_DEFINED_PTYPES 154 - -/* macro to make the table lines short, use explicit indexing with [PTYPE] */ -#define ICE_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\ - [PTYPE] = { \ - 1, \ - ICE_RX_PTYPE_OUTER_##OUTER_IP, \ - ICE_RX_PTYPE_OUTER_##OUTER_IP_VER, \ - ICE_RX_PTYPE_##OUTER_FRAG, \ - ICE_RX_PTYPE_TUNNEL_##T, \ - ICE_RX_PTYPE_TUNNEL_END_##TE, \ - ICE_RX_PTYPE_##TEF, \ - ICE_RX_PTYPE_INNER_PROT_##I, \ - ICE_RX_PTYPE_PAYLOAD_LAYER_##PL } - -#define ICE_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } - -/* shorter macros makes the table fit but are terse */ -#define ICE_RX_PTYPE_NOF ICE_RX_PTYPE_NOT_FRAG -#define ICE_RX_PTYPE_FRG ICE_RX_PTYPE_FRAG - -/* Lookup table mapping in the 10-bit HW PTYPE to the bit field for decoding */ -static const struct ice_rx_ptype_decoded ice_ptype_lkup[BIT(10)] = { - ICE_PTYPES - - /* unused entries */ - [ICE_NUM_DEFINED_PTYPES ... 1023] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } -}; - -static inline struct ice_rx_ptype_decoded ice_decode_rx_desc_ptype(u16 ptype) -{ - return ice_ptype_lkup[ptype]; -} - - #endif /* _ICE_LAN_TX_RX_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index dd4a9bc0dfdc66..2bb8c38f8d0507 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -36,6 +36,7 @@ static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation."; MODULE_AUTHOR("Intel Corporation, "); MODULE_DESCRIPTION(DRV_SUMMARY); +MODULE_IMPORT_NS(LIBIE); MODULE_LICENSE("GPL v2"); MODULE_FIRMWARE(ICE_DDP_PKG_FILE); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 839e5da24ad583..0e838e3b49846e 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019, Intel Corporation. */ #include +#include #include "ice_txrx_lib.h" #include "ice_eswitch.h" @@ -38,30 +39,6 @@ void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val) } } -/** - * ice_ptype_to_htype - get a hash type - * @ptype: the ptype value from the descriptor - * - * Returns appropriate hash type (such as PKT_HASH_TYPE_L2/L3/L4) to be used by - * skb_set_hash based on PTYPE as parsed by HW Rx pipeline and is part of - * Rx desc. - */ -static enum pkt_hash_types ice_ptype_to_htype(u16 ptype) -{ - struct ice_rx_ptype_decoded decoded = ice_decode_rx_desc_ptype(ptype); - - if (!decoded.known) - return PKT_HASH_TYPE_NONE; - if (decoded.payload_layer == ICE_RX_PTYPE_PAYLOAD_LAYER_PAY4) - return PKT_HASH_TYPE_L4; - if (decoded.payload_layer == ICE_RX_PTYPE_PAYLOAD_LAYER_PAY3) - return PKT_HASH_TYPE_L3; - if (decoded.outer_ip == ICE_RX_PTYPE_OUTER_L2) - return PKT_HASH_TYPE_L2; - - return PKT_HASH_TYPE_NONE; -} - /** * ice_get_rx_hash - get RX hash value from descriptor * @rx_desc: specific descriptor @@ -91,14 +68,16 @@ ice_rx_hash_to_skb(const struct ice_rx_ring *rx_ring, const union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb, u16 rx_ptype) { + struct libie_rx_ptype_parsed parsed; u32 hash; - if (!(rx_ring->netdev->features & NETIF_F_RXHASH)) + parsed = libie_parse_rx_ptype(rx_ptype); + if (!libie_has_rx_hash(rx_ring->netdev, parsed)) return; hash = ice_get_rx_hash(rx_desc); if (likely(hash)) - skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype)); + libie_skb_set_hash(skb, hash, parsed); } /** @@ -106,7 +85,7 @@ ice_rx_hash_to_skb(const struct ice_rx_ring *rx_ring, * @ring: the ring we care about * @skb: skb currently being received and modified * @rx_desc: the receive descriptor - * @ptype: the packet type decoded by hardware + * @ptype: the packet type parsed by hardware * * skb->protocol must be set before this function is called */ @@ -114,34 +93,26 @@ static void ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb, union ice_32b_rx_flex_desc *rx_desc, u16 ptype) { - struct ice_rx_ptype_decoded decoded; + struct libie_rx_ptype_parsed parsed; u16 rx_status0, rx_status1; bool ipv4, ipv6; - rx_status0 = le16_to_cpu(rx_desc->wb.status_error0); - rx_status1 = le16_to_cpu(rx_desc->wb.status_error1); - - decoded = ice_decode_rx_desc_ptype(ptype); - /* Start with CHECKSUM_NONE and by default csum_level = 0 */ skb->ip_summed = CHECKSUM_NONE; - skb_checksum_none_assert(skb); - /* check if Rx checksum is enabled */ - if (!(ring->netdev->features & NETIF_F_RXCSUM)) + parsed = libie_parse_rx_ptype(ptype); + if (!libie_has_rx_checksum(ring->netdev, parsed)) return; - /* check if HW has decoded the packet and checksum */ - if (!(rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S))) - return; + rx_status0 = le16_to_cpu(rx_desc->wb.status_error0); + rx_status1 = le16_to_cpu(rx_desc->wb.status_error1); - if (!(decoded.known && decoded.outer_ip)) + /* check if HW has parsed the packet and checksum */ + if (!(rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S))) return; - ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4); - ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && - (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6); + ipv4 = parsed.outer_ip == LIBIE_RX_PTYPE_OUTER_IPV4; + ipv6 = parsed.outer_ip == LIBIE_RX_PTYPE_OUTER_IPV6; if (ipv4 && (rx_status0 & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) | BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))) @@ -165,19 +136,10 @@ ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb, * we need to bump the checksum level by 1 to reflect the fact that * we are indicating we validated the inner checksum. */ - if (decoded.tunnel_type >= ICE_RX_PTYPE_TUNNEL_IP_GRENAT) + if (parsed.tunnel_type >= LIBIE_RX_PTYPE_TUNNEL_IP_GRENAT) skb->csum_level = 1; - /* Only report checksum unnecessary for TCP, UDP, or SCTP */ - switch (decoded.inner_prot) { - case ICE_RX_PTYPE_INNER_PROT_TCP: - case ICE_RX_PTYPE_INNER_PROT_UDP: - case ICE_RX_PTYPE_INNER_PROT_SCTP: - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - default: - break; - } + skb->ip_summed = CHECKSUM_UNNECESSARY; return; checksum_fail: @@ -523,42 +485,6 @@ static int ice_xdp_rx_hw_ts(const struct xdp_md *ctx, u64 *ts_ns) return 0; } -/* Define a ptype index -> XDP hash type lookup table. - * It uses the same ptype definitions as ice_decode_rx_desc_ptype[], - * avoiding possible copy-paste errors. - */ -#undef ICE_PTT -#undef ICE_PTT_UNUSED_ENTRY - -#define ICE_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\ - [PTYPE] = XDP_RSS_L3_##OUTER_IP_VER | XDP_RSS_L4_##I | XDP_RSS_TYPE_##PL - -#define ICE_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = 0 - -/* A few supplementary definitions for when XDP hash types do not coincide - * with what can be generated from ptype definitions - * by means of preprocessor concatenation. - */ -#define XDP_RSS_L3_NONE XDP_RSS_TYPE_NONE -#define XDP_RSS_L4_NONE XDP_RSS_TYPE_NONE -#define XDP_RSS_TYPE_PAY2 XDP_RSS_TYPE_L2 -#define XDP_RSS_TYPE_PAY3 XDP_RSS_TYPE_NONE -#define XDP_RSS_TYPE_PAY4 XDP_RSS_L4 - -static const enum xdp_rss_hash_type -ice_ptype_to_xdp_hash[ICE_NUM_DEFINED_PTYPES] = { - ICE_PTYPES -}; - -#undef XDP_RSS_L3_NONE -#undef XDP_RSS_L4_NONE -#undef XDP_RSS_TYPE_PAY2 -#undef XDP_RSS_TYPE_PAY3 -#undef XDP_RSS_TYPE_PAY4 - -#undef ICE_PTT -#undef ICE_PTT_UNUSED_ENTRY - /** * ice_xdp_rx_hash_type - Get XDP-specific hash type from the RX descriptor * @eop_desc: End of Packet descriptor @@ -566,12 +492,7 @@ ice_ptype_to_xdp_hash[ICE_NUM_DEFINED_PTYPES] = { static enum xdp_rss_hash_type ice_xdp_rx_hash_type(const union ice_32b_rx_flex_desc *eop_desc) { - u16 ptype = ice_get_ptype(eop_desc); - - if (unlikely(ptype >= ICE_NUM_DEFINED_PTYPES)) - return 0; - - return ice_ptype_to_xdp_hash[ptype]; + return libie_parse_rx_ptype(ice_get_ptype(eop_desc)).hash_type; } /** diff --git a/drivers/net/ethernet/intel/idpf/idpf_dev.c b/drivers/net/ethernet/intel/idpf/idpf_dev.c index 34ad1ac46b7834..2c67760861300b 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_dev.c +++ b/drivers/net/ethernet/intel/idpf/idpf_dev.c @@ -96,8 +96,10 @@ static int idpf_intr_reg_init(struct idpf_vport *vport) intr->dyn_ctl = idpf_get_reg_addr(adapter, reg_vals[vec_id].dyn_ctl_reg); intr->dyn_ctl_intena_m = PF_GLINT_DYN_CTL_INTENA_M; + intr->dyn_ctl_intena_msk_m = PF_GLINT_DYN_CTL_INTENA_MSK_M; intr->dyn_ctl_itridx_s = PF_GLINT_DYN_CTL_ITR_INDX_S; intr->dyn_ctl_intrvl_s = PF_GLINT_DYN_CTL_INTERVAL_S; + intr->dyn_ctl_wb_on_itr_m = PF_GLINT_DYN_CTL_WB_ON_ITR_M; spacing = IDPF_ITR_IDX_SPACING(reg_vals[vec_id].itrn_index_spacing, IDPF_PF_ITR_IDX_SPACING); diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c index 27b93592c4babb..fe39412fece7ca 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c @@ -1166,8 +1166,10 @@ int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget) &work_done); /* If work not completed, return budget and polling will return */ - if (!clean_complete) + if (!clean_complete) { + idpf_vport_intr_set_wb_on_itr(q_vector); return budget; + } work_done = min_t(int, work_done, budget - 1); @@ -1176,6 +1178,8 @@ int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget) */ if (likely(napi_complete_done(napi, work_done))) idpf_vport_intr_update_itr_ena_irq(q_vector); + else + idpf_vport_intr_set_wb_on_itr(q_vector); return work_done; } diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 2f8ad79ae3f046..d7dd84e530ada5 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -61,14 +61,14 @@ void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue) */ static void idpf_tx_buf_rel(struct idpf_queue *tx_q, struct idpf_tx_buf *tx_buf) { - if (tx_buf->skb) { + if (tx_buf->type == IDPF_TX_BUF_SKB) { if (dma_unmap_len(tx_buf, len)) dma_unmap_single(tx_q->dev, dma_unmap_addr(tx_buf, dma), dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); dev_kfree_skb_any(tx_buf->skb); - } else if (dma_unmap_len(tx_buf, len)) { + } else if (tx_buf->type == IDPF_TX_BUF_FRAG) { dma_unmap_page(tx_q->dev, dma_unmap_addr(tx_buf, dma), dma_unmap_len(tx_buf, len), @@ -77,6 +77,8 @@ static void idpf_tx_buf_rel(struct idpf_queue *tx_q, struct idpf_tx_buf *tx_buf) tx_buf->next_to_watch = NULL; tx_buf->skb = NULL; + tx_buf->nr_frags = 0; + tx_buf->type = IDPF_TX_BUF_EMPTY; tx_buf->compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG; dma_unmap_len_set(tx_buf, len, 0); } @@ -87,12 +89,25 @@ static void idpf_tx_buf_rel(struct idpf_queue *tx_q, struct idpf_tx_buf *tx_buf) */ static void idpf_tx_buf_rel_all(struct idpf_queue *txq) { - u16 i; + struct idpf_tx_stash *stash; + struct hlist_node *tmp; + u16 i, tag; /* Buffers already cleared, nothing to do */ if (!txq->tx_buf) return; + /* If a TX timeout occurred, there are potentially still bufs in the + * hash table, free them here. + */ + hash_for_each_safe(txq->sched_buf_hash, tag, tmp, stash, hlist) { + if (stash) { + idpf_tx_buf_rel(txq, &stash->buf); + hash_del(&stash->hlist); + idpf_buf_lifo_push(&txq->buf_stack, stash); + } + } + /* Free all the Tx buffer sk_buffs */ for (i = 0; i < txq->desc_count; i++) idpf_tx_buf_rel(txq, &txq->tx_buf[i]); @@ -175,10 +190,6 @@ static int idpf_tx_buf_alloc_all(struct idpf_queue *tx_q) if (!tx_q->tx_buf) return -ENOMEM; - /* Initialize tx_bufs with invalid completion tags */ - for (i = 0; i < tx_q->desc_count; i++) - tx_q->tx_buf[i].compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG; - /* Initialize tx buf stack for out-of-order completions if * flow scheduling offload is enabled */ @@ -1490,7 +1501,7 @@ static void idpf_tx_splitq_clean_hdr(struct idpf_queue *tx_q, } /* clear tx_buf data */ - tx_buf->skb = NULL; + tx_buf->type = IDPF_TX_BUF_EMPTY; cleaned->bytes += tx_buf->bytecount; cleaned->packets += tx_buf->gso_segs; @@ -1514,13 +1525,15 @@ static void idpf_tx_clean_stashed_bufs(struct idpf_queue *txq, u16 compl_tag, /* Buffer completion */ hash_for_each_possible_safe(txq->sched_buf_hash, stash, tmp_buf, hlist, compl_tag) { - if (unlikely(stash->buf.compl_tag != (int)compl_tag)) + if (unlikely(stash->buf.compl_tag != compl_tag)) continue; - if (stash->buf.skb) { + hash_del(&stash->hlist); // TODO: move back to the end if needed + + if (stash->buf.type == IDPF_TX_BUF_SKB) { idpf_tx_splitq_clean_hdr(txq, &stash->buf, cleaned, budget); - } else if (dma_unmap_len(&stash->buf, len)) { + } else if (stash->buf.type == IDPF_TX_BUF_FRAG) { dma_unmap_page(txq->dev, dma_unmap_addr(&stash->buf, dma), dma_unmap_len(&stash->buf, len), @@ -1530,8 +1543,6 @@ static void idpf_tx_clean_stashed_bufs(struct idpf_queue *txq, u16 compl_tag, /* Push shadow buf back onto stack */ idpf_buf_lifo_push(&txq->buf_stack, stash); - - hash_del(&stash->hlist); } } @@ -1546,8 +1557,7 @@ static int idpf_stash_flow_sch_buffers(struct idpf_queue *txq, { struct idpf_tx_stash *stash; - if (unlikely(!dma_unmap_addr(tx_buf, dma) && - !dma_unmap_len(tx_buf, len))) + if (unlikely(tx_buf->type <= IDPF_TX_BUF_RSVD)) return 0; stash = idpf_buf_lifo_pop(&txq->buf_stack); @@ -1562,6 +1572,8 @@ static int idpf_stash_flow_sch_buffers(struct idpf_queue *txq, stash->buf.skb = tx_buf->skb; stash->buf.bytecount = tx_buf->bytecount; stash->buf.gso_segs = tx_buf->gso_segs; + stash->buf.type = tx_buf->type; + stash->buf.nr_frags = tx_buf->nr_frags; dma_unmap_addr_set(&stash->buf, dma, dma_unmap_addr(tx_buf, dma)); dma_unmap_len_set(&stash->buf, len, dma_unmap_len(tx_buf, len)); stash->buf.compl_tag = tx_buf->compl_tag; @@ -1569,19 +1581,16 @@ static int idpf_stash_flow_sch_buffers(struct idpf_queue *txq, /* Add buffer to buf_hash table to be freed later */ hash_add(txq->sched_buf_hash, &stash->hlist, stash->buf.compl_tag); - memset(tx_buf, 0, sizeof(struct idpf_tx_buf)); - - /* Reinitialize buf_id portion of tag */ - tx_buf->compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG; + tx_buf->type = IDPF_TX_BUF_EMPTY; + tx_buf->nr_frags = 0; return 0; } #define idpf_tx_splitq_clean_bump_ntc(txq, ntc, desc, buf) \ do { \ - (ntc)++; \ - if (unlikely(!(ntc))) { \ - ntc -= (txq)->desc_count; \ + if (unlikely(++(ntc) == (txq)->desc_count)) { \ + ntc = 0; \ buf = (txq)->tx_buf; \ desc = IDPF_FLEX_TX_DESC(txq, 0); \ } else { \ @@ -1605,63 +1614,66 @@ do { \ * Separate packet completion events will be reported on the completion queue, * and the buffers will be cleaned separately. The stats are not updated from * this function when using flow-based scheduling. + * + * Furthermore, in flow scheduling mode, check to make sure there are enough + * reserve buffers to stash the packet. If there are not, return early, which + * will leave next_to_clean pointing to the packet that failed to be stashed. + * Return false in this scenario. Otherwise, return true. */ -static void idpf_tx_splitq_clean(struct idpf_queue *tx_q, u16 end, +static bool idpf_tx_splitq_clean(struct idpf_queue *tx_q, u16 end, int napi_budget, struct idpf_cleaned_stats *cleaned, bool descs_only) { union idpf_tx_flex_desc *next_pending_desc = NULL; union idpf_tx_flex_desc *tx_desc; - s16 ntc = tx_q->next_to_clean; + u16 ntc = tx_q->next_to_clean; struct idpf_tx_buf *tx_buf; + bool clean_complete = true; tx_desc = IDPF_FLEX_TX_DESC(tx_q, ntc); next_pending_desc = IDPF_FLEX_TX_DESC(tx_q, end); tx_buf = &tx_q->tx_buf[ntc]; - ntc -= tx_q->desc_count; while (tx_desc != next_pending_desc) { - union idpf_tx_flex_desc *eop_desc; + u16 eop_idx; /* If this entry in the ring was used as a context descriptor, - * it's corresponding entry in the buffer ring will have an - * invalid completion tag since no buffer was used. We can - * skip this descriptor since there is no buffer to clean. + * it's corresponding entry in the buffer ring is reserved. We + * can skip this descriptor since there is no buffer to clean. */ - if (unlikely(tx_buf->compl_tag == IDPF_SPLITQ_TX_INVAL_COMPL_TAG)) + if (tx_buf->type <= IDPF_TX_BUF_RSVD) goto fetch_next_txq_desc; - eop_desc = (union idpf_tx_flex_desc *)tx_buf->next_to_watch; - - /* clear next_to_watch to prevent false hangs */ - tx_buf->next_to_watch = NULL; + if (unlikely(tx_buf->type != IDPF_TX_BUF_SKB)) + break; + eop_idx = tx_buf->eop_idx; if (descs_only) { - if (idpf_stash_flow_sch_buffers(tx_q, tx_buf)) + if (IDPF_TX_BUF_RSV_UNUSED(tx_q) < tx_buf->nr_frags) { + clean_complete = false; goto tx_splitq_clean_out; + } - while (tx_desc != eop_desc) { + idpf_stash_flow_sch_buffers(tx_q, tx_buf); + + while (ntc != eop_idx) { idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, tx_desc, tx_buf); - if (dma_unmap_len(tx_buf, len)) { - if (idpf_stash_flow_sch_buffers(tx_q, - tx_buf)) - goto tx_splitq_clean_out; - } + idpf_stash_flow_sch_buffers(tx_q, tx_buf); } } else { idpf_tx_splitq_clean_hdr(tx_q, tx_buf, cleaned, napi_budget); /* unmap remaining buffers */ - while (tx_desc != eop_desc) { + while (ntc != eop_idx) { idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, tx_desc, tx_buf); /* unmap any remaining paged data */ - if (dma_unmap_len(tx_buf, len)) { + if (tx_buf->type == IDPF_TX_BUF_FRAG) { dma_unmap_page(tx_q->dev, dma_unmap_addr(tx_buf, dma), dma_unmap_len(tx_buf, len), @@ -1676,8 +1688,9 @@ static void idpf_tx_splitq_clean(struct idpf_queue *tx_q, u16 end, } tx_splitq_clean_out: - ntc += tx_q->desc_count; tx_q->next_to_clean = ntc; + + return clean_complete; } #define idpf_tx_clean_buf_ring_bump_ntc(txq, ntc, buf) \ @@ -1708,16 +1721,21 @@ static bool idpf_tx_clean_buf_ring(struct idpf_queue *txq, u16 compl_tag, { u16 idx = compl_tag & txq->compl_tag_bufid_m; struct idpf_tx_buf *tx_buf = NULL; - u16 ntc = txq->next_to_clean; - u16 num_descs_cleaned = 0; - u16 orig_idx = idx; + u16 ntc, orig_idx = idx; tx_buf = &txq->tx_buf[idx]; - while (tx_buf->compl_tag == (int)compl_tag) { - if (tx_buf->skb) { - idpf_tx_splitq_clean_hdr(txq, tx_buf, cleaned, budget); - } else if (dma_unmap_len(tx_buf, len)) { + if (unlikely(tx_buf->type <= IDPF_TX_BUF_RSVD || + tx_buf->compl_tag != compl_tag)) + return false; + + if (tx_buf->type == IDPF_TX_BUF_SKB) + idpf_tx_splitq_clean_hdr(txq, tx_buf, cleaned, budget); + + idpf_tx_clean_buf_ring_bump_ntc(txq, idx, tx_buf); + + while (tx_buf->compl_tag == compl_tag) { + if (tx_buf->type == IDPF_TX_BUF_FRAG) { dma_unmap_page(txq->dev, dma_unmap_addr(tx_buf, dma), dma_unmap_len(tx_buf, len), @@ -1725,38 +1743,46 @@ static bool idpf_tx_clean_buf_ring(struct idpf_queue *txq, u16 compl_tag, dma_unmap_len_set(tx_buf, len, 0); } - memset(tx_buf, 0, sizeof(struct idpf_tx_buf)); - tx_buf->compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG; + tx_buf->type = IDPF_TX_BUF_EMPTY; - num_descs_cleaned++; idpf_tx_clean_buf_ring_bump_ntc(txq, idx, tx_buf); } - /* If we didn't clean anything on the ring for this completion, there's - * nothing more to do. - */ - if (unlikely(!num_descs_cleaned)) - return false; - - /* Otherwise, if we did clean a packet on the ring directly, it's safe - * to assume that the descriptors starting from the original - * next_to_clean up until the previously cleaned packet can be reused. - * Therefore, we will go back in the ring and stash any buffers still - * in the ring into the hash table to be cleaned later. + /* It's possible the packet we just cleaned was an out of order + * completion, which means we can we can stash the buffers starting + * from the original next_to_clean and reuse the descriptors. We need + * to compare the descriptor ring next_to_clean packet's "first" buffer + * to the "first" buffer of the packet we just cleaned to determine if + * this is the case. Howevever, next_to_clean can point to either a + * reserved buffer that corresponds to a context descriptor used for the + * next_to_clean packet (TSO packet) or the "first" buffer (single + * packet). The orig_idx from the packet we just cleaned will always + * point to the "first" buffer. If next_to_clean points to a reserved + * buffer, let's bump ntc once and start the comparison from there. */ + ntc = txq->next_to_clean; tx_buf = &txq->tx_buf[ntc]; - while (tx_buf != &txq->tx_buf[orig_idx]) { - idpf_stash_flow_sch_buffers(txq, tx_buf); + + if (tx_buf->type == IDPF_TX_BUF_CTX) idpf_tx_clean_buf_ring_bump_ntc(txq, ntc, tx_buf); - } - /* Finally, update next_to_clean to reflect the work that was just done - * on the ring, if any. If the packet was only cleaned from the hash - * table, the ring will not be impacted, therefore we should not touch - * next_to_clean. The updated idx is used here + /* If ntc still points to a different "first" buffer, clean the + * descriptor ring and stash all of the buffers for later cleaning. If + * we cannot stash all of the buffers, next_to_clean will point to the + * "first" buffer of the packet that could not be stashed and cleaning + * will start there next time. + */ + if (unlikely(tx_buf != &txq->tx_buf[orig_idx] && + !idpf_tx_splitq_clean(txq, orig_idx, budget, cleaned, + true))) + goto clean_buf_ring_out; + + /* Otherwise, update next_to_clean to reflect the cleaning that was + * done above. */ txq->next_to_clean = idx; +clean_buf_ring_out: return true; } @@ -1781,7 +1807,9 @@ static void idpf_tx_handle_rs_completion(struct idpf_queue *txq, if (!test_bit(__IDPF_Q_FLOW_SCH_EN, txq->flags)) { u16 head = le16_to_cpu(desc->q_head_compl_tag.q_head); - return idpf_tx_splitq_clean(txq, head, budget, cleaned, false); + idpf_tx_splitq_clean(txq, head, budget, cleaned, false); + + return; } compl_tag = le16_to_cpu(desc->q_head_compl_tag.compl_tag); @@ -2216,7 +2244,9 @@ static void idpf_tx_splitq_map(struct idpf_queue *tx_q, if (dma_mapping_error(tx_q->dev, dma)) return idpf_tx_dma_map_error(tx_q, skb, first, i); + first->nr_frags++; tx_buf->compl_tag = params->compl_tag; + tx_buf->type = IDPF_TX_BUF_FRAG; /* record length, and DMA address */ dma_unmap_len_set(tx_buf, len, size); @@ -2270,14 +2300,15 @@ static void idpf_tx_splitq_map(struct idpf_queue *tx_q, idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, max_data); - tx_desc++; - i++; - - if (i == tx_q->desc_count) { + if (unlikely(++i == tx_q->desc_count)) { + tx_buf = tx_q->tx_buf; tx_desc = IDPF_FLEX_TX_DESC(tx_q, 0); i = 0; tx_q->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); + } else { + tx_buf++; + tx_desc++; } /* Since this packet has a buffer that is going to span @@ -2290,8 +2321,7 @@ static void idpf_tx_splitq_map(struct idpf_queue *tx_q, * simply pass over these holes and finish cleaning the * rest of the packet. */ - memset(&tx_q->tx_buf[i], 0, sizeof(struct idpf_tx_buf)); - tx_q->tx_buf[i].compl_tag = params->compl_tag; + tx_buf->type = IDPF_TX_BUF_EMPTY; /* Adjust the DMA offset and the remaining size of the * fragment. On the first iteration of this loop, @@ -2315,13 +2345,15 @@ static void idpf_tx_splitq_map(struct idpf_queue *tx_q, break; idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, size); - tx_desc++; - i++; - if (i == tx_q->desc_count) { + if (unlikely(++i == tx_q->desc_count)) { + tx_buf = tx_q->tx_buf; tx_desc = IDPF_FLEX_TX_DESC(tx_q, 0); i = 0; tx_q->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); + } else { + tx_buf++; + tx_desc++; } size = skb_frag_size(frag); @@ -2329,21 +2361,19 @@ static void idpf_tx_splitq_map(struct idpf_queue *tx_q, dma = skb_frag_dma_map(tx_q->dev, frag, 0, size, DMA_TO_DEVICE); - - tx_buf = &tx_q->tx_buf[i]; } /* record SW timestamp if HW timestamp is not available */ skb_tx_timestamp(skb); + first->type = IDPF_TX_BUF_SKB; + /* write last descriptor with RS and EOP bits */ + first->eop_idx = i; td_cmd |= params->eop_cmd; idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, size); i = idpf_tx_splitq_bump_ntu(tx_q, i); - /* set next_to_watch value indicating a packet is present */ - first->next_to_watch = tx_desc; - tx_q->txq_grp->num_completions_pending++; /* record bytecount for BQL */ @@ -2548,8 +2578,7 @@ idpf_tx_splitq_get_ctx_desc(struct idpf_queue *txq) struct idpf_flex_tx_ctx_desc *desc; int i = txq->next_to_use; - memset(&txq->tx_buf[i], 0, sizeof(struct idpf_tx_buf)); - txq->tx_buf[i].compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG; + txq->tx_buf[i].type = IDPF_TX_BUF_CTX; /* grab the next descriptor */ desc = IDPF_FLEX_TX_CTX_DESC(txq, i); @@ -3631,6 +3660,7 @@ void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector) IDPF_NO_ITR_UPDATE_IDX, 0); writel(intval, q_vector->intr_reg.dyn_ctl); + q_vector->wb_on_itr = false; } /** @@ -3922,8 +3952,10 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) clean_complete &= idpf_tx_splitq_clean_all(q_vector, budget, &work_done); /* If work not completed, return budget and polling will return */ - if (!clean_complete) + if (!clean_complete) { + idpf_vport_intr_set_wb_on_itr(q_vector); return budget; + } work_done = min_t(int, work_done, budget - 1); @@ -3932,6 +3964,8 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) */ if (likely(napi_complete_done(napi, work_done))) idpf_vport_intr_update_itr_ena_irq(q_vector); + else + idpf_vport_intr_set_wb_on_itr(q_vector); /* Switch to poll mode in the tear-down path after sending disable * queues virtchnl message, as the interrupts will be disabled after diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index df76493faa7569..4543569a6e7399 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -169,14 +169,34 @@ union idpf_tx_flex_desc { struct idpf_flex_tx_sched_desc flow; /* flow based scheduling */ }; +/** + * enum idpf_tx_buf_type - Type of &idpf_tx_buf to act on Tx completion + * @IDPF_TX_BUF_EMPTY: Unused, no action required + * @IDPF_TX_BUF_SKB: &sk_buff, unmap and consume_skb(), update stats + * @IDPF_TX_BUF_FRAG: Mapped skb, only unmap DMA + * @IDPF_TX_BUF_RSVD: Indicates ring entry is reserved, i.e. buffer is empty + * but should not be used, typically corresponds to context + * descriptor entry, no action required during cleaning +#ifdef CONFIG_XDP + * @IDPF_TX_BUF_XDP: &xdp_buff, unmap and page_frag_free(), update stats +#endif + */ +enum idpf_tx_buf_type { + IDPF_TX_BUF_EMPTY = 0U, + IDPF_TX_BUF_CTX, + IDPF_TX_BUF_RSVD, + IDPF_TX_BUF_SKB, + IDPF_TX_BUF_FRAG, +}; + /** * struct idpf_tx_buf - * @next_to_watch: Next descriptor to clean + * @next_to_watch: Next descriptor to clean // TODO: remove * @skb: Pointer to the skb - * @dma: DMA address - * @len: DMA length * @bytecount: Number of bytes - * @gso_segs: Number of GSO segments + * @gso_segs: Number of GSO segments* @eop_idx: Index in descriptor/buffer ring of last buffer for this packet + * @nr_frags: Total number of non empty buffers representing this packet + * @type: Type of buffer, &idpf_tx_buf_type * @compl_tag: Splitq only, unique identifier for a buffer. Used to compare * with completion tag returned in buffer completion event. * Because the completion tag is expected to be the same in all @@ -190,20 +210,22 @@ union idpf_tx_flex_desc { * @ctx_entry: Singleq only. Used to indicate the corresponding entry * in the descriptor ring was used for a context descriptor and * this buffer entry should be skipped. + * @dma: DMA address + * @len: DMA length */ struct idpf_tx_buf { - void *next_to_watch; + void *next_to_watch; //TODO: remove struct sk_buff *skb; - DEFINE_DMA_UNMAP_ADDR(dma); - DEFINE_DMA_UNMAP_LEN(len); unsigned int bytecount; unsigned short gso_segs; + u16 eop_idx; + u16 nr_frags:8; - union { - int compl_tag; - - bool ctx_entry; - }; + u16 type:8; + u16 compl_tag; + bool ctx_entry; // TODO: remove + DEFINE_DMA_UNMAP_LEN(len); + DEFINE_DMA_UNMAP_ADDR(dma); }; struct idpf_tx_stash { @@ -484,9 +506,11 @@ struct idpf_vec_regs { * struct idpf_intr_reg * @dyn_ctl: Dynamic control interrupt register * @dyn_ctl_intena_m: Mask for dyn_ctl interrupt enable + * @dyn_ctl_intena_msk_m: Mask for dyn_ctl interrupt enable mask * @dyn_ctl_itridx_s: Register bit offset for ITR index * @dyn_ctl_itridx_m: Mask for ITR index * @dyn_ctl_intrvl_s: Register bit offset for ITR interval + * @dyn_ctl_wb_on_itr_m: Mask for WB on ITR feature * @rx_itr: RX ITR register * @tx_itr: TX ITR register * @icr_ena: Interrupt cause register offset @@ -495,9 +519,11 @@ struct idpf_vec_regs { struct idpf_intr_reg { void __iomem *dyn_ctl; u32 dyn_ctl_intena_m; + u32 dyn_ctl_intena_msk_m; u32 dyn_ctl_itridx_s; u32 dyn_ctl_itridx_m; u32 dyn_ctl_intrvl_s; + u32 dyn_ctl_wb_on_itr_m; void __iomem *rx_itr; void __iomem *tx_itr; void __iomem *icr_ena; @@ -510,6 +536,7 @@ struct idpf_intr_reg { * @affinity_mask: CPU affinity mask * @napi: napi handler * @v_idx: Vector index + * @wb_on_itr: WB on ITR enabled or not * @intr_reg: See struct idpf_intr_reg * @num_txq: Number of TX queues * @tx: Array of TX queues to service @@ -533,6 +560,7 @@ struct idpf_q_vector { cpumask_t affinity_mask; struct napi_struct napi; u16 v_idx; + bool wb_on_itr; struct idpf_intr_reg intr_reg; u16 num_txq; @@ -973,6 +1001,26 @@ static inline void idpf_rx_sync_for_cpu(struct idpf_rx_buf *rx_buf, u32 len) page_pool_get_dma_dir(pp)); } +/** + * idpf_vport_intr_set_wb_on_itr - enable descriptor writeback on disabled interrupts + * @q_vector: pointer to queue vector struct + */ +static inline void idpf_vport_intr_set_wb_on_itr(struct idpf_q_vector *q_vector) +{ + struct idpf_intr_reg *reg; + + if (q_vector->wb_on_itr) + return; + + reg = &q_vector->intr_reg; + + writel(reg->dyn_ctl_wb_on_itr_m | reg->dyn_ctl_intena_msk_m | + IDPF_NO_ITR_UPDATE_IDX << reg->dyn_ctl_itridx_s, + reg->dyn_ctl); + + q_vector->wb_on_itr = true; +} + int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget); void idpf_vport_init_num_qs(struct idpf_vport *vport, struct virtchnl2_create_vport *vport_msg); diff --git a/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c b/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c index 8ade4e3a9fe111..f5b0a066663686 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c +++ b/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c @@ -96,7 +96,9 @@ static int idpf_vf_intr_reg_init(struct idpf_vport *vport) intr->dyn_ctl = idpf_get_reg_addr(adapter, reg_vals[vec_id].dyn_ctl_reg); intr->dyn_ctl_intena_m = VF_INT_DYN_CTLN_INTENA_M; + intr->dyn_ctl_intena_msk_m = VF_INT_DYN_CTLN_INTENA_MSK_M; intr->dyn_ctl_itridx_s = VF_INT_DYN_CTLN_ITR_INDX_S; + intr->dyn_ctl_wb_on_itr_m = VF_INT_DYN_CTLN_WB_ON_ITR_M; spacing = IDPF_ITR_IDX_SPACING(reg_vals[vec_id].itrn_index_spacing, IDPF_VF_ITR_IDX_SPACING); diff --git a/drivers/net/ethernet/intel/libie/Kconfig b/drivers/net/ethernet/intel/libie/Kconfig new file mode 100644 index 00000000000000..6e0162fb94d25e --- /dev/null +++ b/drivers/net/ethernet/intel/libie/Kconfig @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright(c) 2023 Intel Corporation. + +config LIBIE + tristate + select PAGE_POOL + help + libie (Intel Ethernet library) is a common library containing + routines shared between several Intel Ethernet drivers. diff --git a/drivers/net/ethernet/intel/libie/Makefile b/drivers/net/ethernet/intel/libie/Makefile new file mode 100644 index 00000000000000..76f32253481b70 --- /dev/null +++ b/drivers/net/ethernet/intel/libie/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright(c) 2023 Intel Corporation. + +obj-$(CONFIG_LIBIE) += libie.o + +libie-objs += rx.o +libie-objs += stats.o diff --git a/drivers/net/ethernet/intel/libie/rx.c b/drivers/net/ethernet/intel/libie/rx.c new file mode 100644 index 00000000000000..17da408991989d --- /dev/null +++ b/drivers/net/ethernet/intel/libie/rx.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 Intel Corporation. */ + +#include + +/* Rx buffer management */ + +/** + * libie_rx_hw_len - get the actual buffer size to be passed to HW + * @pp: &page_pool_params of the netdev to calculate the size for + * + * Return: HW-writeable length per one buffer to pass it to the HW accounting: + * MTU the @dev has, HW required alignment, minimum and maximum allowed values, + * and system's page size. + */ +static u32 libie_rx_hw_len(const struct page_pool_params *pp) +{ + u32 len; + + len = READ_ONCE(pp->netdev->mtu) + LIBIE_RX_LL_LEN; + len = ALIGN(len, LIBIE_RX_BUF_LEN_ALIGN); + len = clamp(len, LIBIE_MIN_RX_BUF_LEN, LIBIE_RX_BUF_LEN(pp->offset)); + + return len; +} + +/** + * libie_rx_page_pool_create - create a PP with the default libie settings + * @bq: buffer queue struct to fill + * @napi: &napi_struct covering this PP (no usage outside its poll loops) + * + * Return: 0 on success, -errno on failure. + */ +int libie_rx_page_pool_create(struct libie_buf_queue *bq, + struct napi_struct *napi) +{ + struct page_pool_params pp = { + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, + .order = LIBIE_RX_PAGE_ORDER, + .pool_size = bq->count, + .nid = NUMA_NO_NODE, + .dev = napi->dev->dev.parent, + .netdev = napi->dev, + .napi = napi, + .dma_dir = DMA_FROM_DEVICE, + .offset = LIBIE_SKB_HEADROOM, + }; + + /* HW-writeable / syncable length per one page */ + pp.max_len = LIBIE_RX_PAGE_LEN(pp.offset); + + /* HW-writeable length per buffer */ + bq->rx_buf_len = libie_rx_hw_len(&pp); + /* Buffer size to allocate */ + bq->truesize = roundup_pow_of_two(SKB_HEAD_ALIGN(pp.offset + + bq->rx_buf_len)); + + bq->pp = page_pool_create(&pp); + + return PTR_ERR_OR_ZERO(bq->pp); +} +EXPORT_SYMBOL_NS_GPL(libie_rx_page_pool_create, LIBIE); + +/** + * libie_rx_page_pool_destroy - destroy a &page_pool created by libie + * @bq: buffer queue to process + */ +void libie_rx_page_pool_destroy(struct libie_buf_queue *bq) +{ + page_pool_destroy(bq->pp); + bq->pp = NULL; +} +EXPORT_SYMBOL_NS_GPL(libie_rx_page_pool_destroy, LIBIE); + +/* O(1) converting i40e/ice/iavf's 8/10-bit hardware packet type to a parsed + * bitfield struct. + */ + +/* A few supplementary definitions for when XDP hash types do not coincide + * with what can be generated from ptype definitions by means of preprocessor + * concatenation. + */ +#define XDP_RSS_L3_L2 XDP_RSS_TYPE_NONE +#define XDP_RSS_L4_NONE XDP_RSS_TYPE_NONE +#define XDP_RSS_L4_TIMESYNC XDP_RSS_TYPE_NONE +#define XDP_RSS_TYPE_L3 XDP_RSS_TYPE_NONE +#define XDP_RSS_TYPE_L4 XDP_RSS_L4 + +#define LIBIE_RX_PTYPE(oip, ofrag, tun, tp, tefr, iprot, pl) { \ + .outer_ip = LIBIE_RX_PTYPE_OUTER_##oip, \ + .outer_frag = LIBIE_RX_PTYPE_##ofrag, \ + .tunnel_type = LIBIE_RX_PTYPE_TUNNEL_IP_##tun, \ + .tunnel_end_prot = LIBIE_RX_PTYPE_TUNNEL_END_##tp, \ + .tunnel_end_frag = LIBIE_RX_PTYPE_##tefr, \ + .inner_prot = LIBIE_RX_PTYPE_INNER_##iprot, \ + .payload_layer = LIBIE_RX_PTYPE_PAYLOAD_##pl, \ + .hash_type = XDP_RSS_L3_##oip | \ + XDP_RSS_L4_##iprot | \ + XDP_RSS_TYPE_##pl, \ + } + +#define LIBIE_RX_PTYPE_UNUSED { } + +#define __LIBIE_RX_PTYPE_L2(iprot, pl) \ + LIBIE_RX_PTYPE(L2, NOT_FRAG, NONE, NONE, NOT_FRAG, iprot, pl) +#define LIBIE_RX_PTYPE_L2 __LIBIE_RX_PTYPE_L2(NONE, L2) +#define LIBIE_RX_PTYPE_TS __LIBIE_RX_PTYPE_L2(TIMESYNC, L2) +#define LIBIE_RX_PTYPE_L3 __LIBIE_RX_PTYPE_L2(NONE, L3) + +#define LIBIE_RX_PTYPE_IP_FRAG(oip) \ + LIBIE_RX_PTYPE(IPV##oip, FRAG, NONE, NONE, NOT_FRAG, NONE, L3) +#define LIBIE_RX_PTYPE_IP_L3(oip, tun, teprot, tefr) \ + LIBIE_RX_PTYPE(IPV##oip, NOT_FRAG, tun, teprot, tefr, NONE, L3) +#define LIBIE_RX_PTYPE_IP_L4(oip, tun, teprot, iprot) \ + LIBIE_RX_PTYPE(IPV##oip, NOT_FRAG, tun, teprot, NOT_FRAG, iprot, L4) + +#define LIBIE_RX_PTYPE_IP_NOF(oip, tun, ver) \ + LIBIE_RX_PTYPE_IP_L3(oip, tun, ver, NOT_FRAG), \ + LIBIE_RX_PTYPE_IP_L4(oip, tun, ver, UDP), \ + LIBIE_RX_PTYPE_UNUSED, \ + LIBIE_RX_PTYPE_IP_L4(oip, tun, ver, TCP), \ + LIBIE_RX_PTYPE_IP_L4(oip, tun, ver, SCTP), \ + LIBIE_RX_PTYPE_IP_L4(oip, tun, ver, ICMP) + +/* IPv oip --> tun --> IPv ver */ +#define LIBIE_RX_PTYPE_IP_TUN_VER(oip, tun, ver) \ + LIBIE_RX_PTYPE_IP_L3(oip, tun, ver, FRAG), \ + LIBIE_RX_PTYPE_IP_NOF(oip, tun, ver) + +/* Non Tunneled IPv oip */ +#define LIBIE_RX_PTYPE_IP_RAW(oip) \ + LIBIE_RX_PTYPE_IP_FRAG(oip), \ + LIBIE_RX_PTYPE_IP_NOF(oip, NONE, NONE) + +/* IPv oip --> tun --> { IPv4, IPv6 } */ +#define LIBIE_RX_PTYPE_IP_TUN(oip, tun) \ + LIBIE_RX_PTYPE_IP_TUN_VER(oip, tun, IPV4), \ + LIBIE_RX_PTYPE_IP_TUN_VER(oip, tun, IPV6) + +/* IPv oip --> GRE/NAT tun --> { x, IPv4, IPv6 } */ +#define LIBIE_RX_PTYPE_IP_GRE(oip, tun) \ + LIBIE_RX_PTYPE_IP_L3(oip, tun, NONE, NOT_FRAG), \ + LIBIE_RX_PTYPE_IP_TUN(oip, tun) + +/* Non Tunneled IPv oip + * IPv oip --> { IPv4, IPv6 } + * IPv oip --> GRE/NAT --> { x, IPv4, IPv6 } + * IPv oip --> GRE/NAT --> MAC --> { x, IPv4, IPv6 } + * IPv oip --> GRE/NAT --> MAC/VLAN --> { x, IPv4, IPv6 } + */ +#define LIBIE_RX_PTYPE_IP(oip) \ + LIBIE_RX_PTYPE_IP_RAW(oip), \ + LIBIE_RX_PTYPE_IP_TUN(oip, IP), \ + LIBIE_RX_PTYPE_IP_GRE(oip, GRENAT), \ + LIBIE_RX_PTYPE_IP_GRE(oip, GRENAT_MAC), \ + LIBIE_RX_PTYPE_IP_GRE(oip, GRENAT_MAC_VLAN) + +/* Lookup table mapping for O(1) parsing */ +const struct libie_rx_ptype_parsed libie_rx_ptype_lut[LIBIE_RX_PTYPE_NUM] = { + /* L2 packet types */ + LIBIE_RX_PTYPE_UNUSED, + LIBIE_RX_PTYPE_L2, + LIBIE_RX_PTYPE_TS, + LIBIE_RX_PTYPE_L2, + LIBIE_RX_PTYPE_UNUSED, + LIBIE_RX_PTYPE_UNUSED, + LIBIE_RX_PTYPE_L2, + LIBIE_RX_PTYPE_L2, + LIBIE_RX_PTYPE_UNUSED, + LIBIE_RX_PTYPE_UNUSED, + LIBIE_RX_PTYPE_L2, + LIBIE_RX_PTYPE_UNUSED, + + LIBIE_RX_PTYPE_L3, + LIBIE_RX_PTYPE_L3, + LIBIE_RX_PTYPE_L3, + LIBIE_RX_PTYPE_L3, + LIBIE_RX_PTYPE_L3, + LIBIE_RX_PTYPE_L3, + LIBIE_RX_PTYPE_L3, + LIBIE_RX_PTYPE_L3, + LIBIE_RX_PTYPE_L3, + LIBIE_RX_PTYPE_L3, + + LIBIE_RX_PTYPE_IP(4), + LIBIE_RX_PTYPE_IP(6), +}; +EXPORT_SYMBOL_NS_GPL(libie_rx_ptype_lut, LIBIE); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) Ethernet common library"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/intel/libie/stats.c b/drivers/net/ethernet/intel/libie/stats.c new file mode 100644 index 00000000000000..85f5d279406f0b --- /dev/null +++ b/drivers/net/ethernet/intel/libie/stats.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 Intel Corporation. */ + +#include + +#include +#include + +/* Rx per-queue stats */ + +static const char * const libie_rq_stats_str[] = { +#define act(s) __stringify(s), + DECLARE_LIBIE_RQ_STATS(act) +#undef act +}; + +#define LIBIE_RQ_STATS_NUM ARRAY_SIZE(libie_rq_stats_str) + +/** + * libie_rq_stats_get_sset_count - get the number of Ethtool RQ stats provided + * + * Return: number of per-queue Rx stats supported by the library. + */ +u32 libie_rq_stats_get_sset_count(void) +{ + return LIBIE_RQ_STATS_NUM; +} +EXPORT_SYMBOL_NS_GPL(libie_rq_stats_get_sset_count, LIBIE); + +/** + * libie_rq_stats_get_strings - get the name strings of Ethtool RQ stats + * @data: reference to the cursor pointing to the output buffer + * @qid: RQ number to print in the prefix + */ +void libie_rq_stats_get_strings(u8 **data, u32 qid) +{ + for (u32 i = 0; i < LIBIE_RQ_STATS_NUM; i++) + ethtool_sprintf(data, "rq%u_%s", qid, libie_rq_stats_str[i]); +} +EXPORT_SYMBOL_NS_GPL(libie_rq_stats_get_strings, LIBIE); + +/** + * libie_rq_stats_get_data - get the RQ stats in Ethtool format + * @data: reference to the cursor pointing to the output array + * @stats: RQ stats container from the queue + */ +void libie_rq_stats_get_data(u64 **data, const struct libie_rq_stats *stats) +{ + u64 sarr[LIBIE_RQ_STATS_NUM]; + u32 start; + + do { + start = u64_stats_fetch_begin(&stats->syncp); + + for (u32 i = 0; i < LIBIE_RQ_STATS_NUM; i++) + sarr[i] = u64_stats_read(&stats->raw[i]); + } while (u64_stats_fetch_retry(&stats->syncp, start)); + + for (u32 i = 0; i < LIBIE_RQ_STATS_NUM; i++) + (*data)[i] += sarr[i]; + + *data += LIBIE_RQ_STATS_NUM; +} +EXPORT_SYMBOL_NS_GPL(libie_rq_stats_get_data, LIBIE); + +/* Tx per-queue stats */ + +static const char * const libie_sq_stats_str[] = { +#define act(s) __stringify(s), + DECLARE_LIBIE_SQ_STATS(act) +#undef act +}; + +#define LIBIE_SQ_STATS_NUM ARRAY_SIZE(libie_sq_stats_str) + +/** + * libie_sq_stats_get_sset_count - get the number of Ethtool SQ stats provided + * + * Return: number of per-queue Tx stats supported by the library. + */ +u32 libie_sq_stats_get_sset_count(void) +{ + return LIBIE_SQ_STATS_NUM; +} +EXPORT_SYMBOL_NS_GPL(libie_sq_stats_get_sset_count, LIBIE); + +/** + * libie_sq_stats_get_strings - get the name strings of Ethtool SQ stats + * @data: reference to the cursor pointing to the output buffer + * @qid: SQ number to print in the prefix + */ +void libie_sq_stats_get_strings(u8 **data, u32 qid) +{ + for (u32 i = 0; i < LIBIE_SQ_STATS_NUM; i++) + ethtool_sprintf(data, "sq%u_%s", qid, libie_sq_stats_str[i]); +} +EXPORT_SYMBOL_NS_GPL(libie_sq_stats_get_strings, LIBIE); + +/** + * libie_sq_stats_get_data - get the SQ stats in Ethtool format + * @data: reference to the cursor pointing to the output array + * @stats: SQ stats container from the queue + */ +void libie_sq_stats_get_data(u64 **data, const struct libie_sq_stats *stats) +{ + u64 sarr[LIBIE_SQ_STATS_NUM]; + u32 start; + + do { + start = u64_stats_fetch_begin(&stats->syncp); + + for (u32 i = 0; i < LIBIE_SQ_STATS_NUM; i++) + sarr[i] = u64_stats_read(&stats->raw[i]); + } while (u64_stats_fetch_retry(&stats->syncp, start)); + + for (u32 i = 0; i < LIBIE_SQ_STATS_NUM; i++) + (*data)[i] += sarr[i]; + + *data += LIBIE_SQ_STATS_NUM; +} +EXPORT_SYMBOL_NS_GPL(libie_sq_stats_get_data, LIBIE); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index d601b5faaed5b8..052d5f62de11df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -917,7 +917,7 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) if (!rq->xsk_pool) { count = mlx5e_refill_rx_wqes(rq, head, wqe_bulk); - } else if (likely(!rq->xsk_pool->dma_need_sync)) { + } else if (likely(dma_skip_sync(rq->xsk_pool->dev))) { mlx5e_xsk_free_rx_wqes(rq, head, wqe_bulk); count = mlx5e_xsk_alloc_rx_wqes_batched(rq, head, wqe_bulk); } else { diff --git a/include/linux/device.h b/include/linux/device.h index 6c83294395ac08..fe6db8613f1803 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -688,6 +688,8 @@ struct device_physical_location { * other devices probe successfully. * @dma_coherent: this particular device is dma coherent, even if the * architecture supports non-coherent devices. + * @dma_skip_sync: DMA sync operations can be skipped for coherent non-SWIOTLB + * buffers. * @dma_ops_bypass: If set to %true then the dma_ops are bypassed for the * streaming DMA operations (->map_* / ->unmap_* / ->sync_*), * and optionall (if the coherent mask is large enough) also @@ -802,6 +804,9 @@ struct device { defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) bool dma_coherent:1; #endif +#ifdef CONFIG_DMA_NEED_SYNC + bool dma_skip_sync:1; +#endif #ifdef CONFIG_DMA_OPS_BYPASS bool dma_ops_bypass : 1; #endif diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index f2fc203fb8a1a2..404c764f5f463d 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -77,6 +77,7 @@ struct dma_map_ops { int nents, enum dma_data_direction dir); void (*cache_sync)(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction); + bool (*can_skip_sync)(struct device *dev); int (*dma_supported)(struct device *dev, u64 mask); u64 (*get_required_mask)(struct device *dev); size_t (*max_mapping_size)(struct device *dev); @@ -110,6 +111,22 @@ static inline void set_dma_ops(struct device *dev, } #endif /* CONFIG_DMA_OPS */ +#ifdef CONFIG_DMA_NEED_SYNC + +static inline void dma_set_skip_sync(struct device *dev, bool skip) +{ + dev->dma_skip_sync = skip; +} + +void dma_setup_skip_sync(struct device *dev); + +#else /* !CONFIG_DMA_NEED_SYNC */ + +#define dma_set_skip_sync(dev, skip) do { } while (0) +#define dma_setup_skip_sync(dev) do { } while (0) + +#endif /* !CONFIG_DMA_NEED_SYNC */ + #ifdef CONFIG_DMA_CMA extern struct cma *dma_contiguous_default_area; diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 4a658de44ee92e..9f026678134f75 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -91,6 +91,27 @@ static inline void debug_dma_map_single(struct device *dev, const void *addr, } #endif /* CONFIG_DMA_API_DEBUG */ +#ifdef CONFIG_DMA_NEED_SYNC + +static inline bool dma_skip_sync(const struct device *dev) +{ + return dev->dma_skip_sync; +} + +bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr); + +static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + return dma_skip_sync(dev) ? false : __dma_need_sync(dev, dma_addr); +} + +#else /* !CONFIG_DMA_NEED_SYNC */ + +#define dma_skip_sync(dev) true +#define dma_need_sync(dev, dma_addr) false + +#endif /* !CONFIG_DMA_NEED_SYNC */ + #ifdef CONFIG_HAS_DMA static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { @@ -117,14 +138,31 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size, enum dma_data_direction dir, unsigned long attrs); void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs); -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, - enum dma_data_direction dir); -void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir); -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir); -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir); + +void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir); +void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir); +void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir); +void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir); + +#define dma_check_sync(op, dev, ...) \ + do { \ + if (!dma_skip_sync(dev)) \ + op(dev, __VA_ARGS__); \ + } while (0) + +#define dma_sync_single_for_cpu(d, a, s, r) \ + dma_check_sync(__dma_sync_single_for_cpu, d, a, s, r) +#define dma_sync_single_for_device(d, a, s, r) \ + dma_check_sync(__dma_sync_single_for_device, d, a, s, r) +#define dma_sync_sg_for_cpu(d, s, n, r) \ + dma_check_sync(__dma_sync_sg_for_cpu, d, s, n, r) +#define dma_sync_sg_for_device(d, s, n, r) \ + dma_check_sync(__dma_sync_sg_for_device, d, s, n, r) + void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs); void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, @@ -147,7 +185,6 @@ u64 dma_get_required_mask(struct device *dev); bool dma_addressing_limited(struct device *dev); size_t dma_max_mapping_size(struct device *dev); size_t dma_opt_mapping_size(struct device *dev); -bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); unsigned long dma_get_merge_boundary(struct device *dev); struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, enum dma_data_direction dir, gfp_t gfp, unsigned long attrs); @@ -277,10 +314,6 @@ static inline size_t dma_opt_mapping_size(struct device *dev) { return 0; } -static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) -{ - return false; -} static inline unsigned long dma_get_merge_boundary(struct device *dev) { return 0; @@ -348,20 +381,27 @@ static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr, return dma_unmap_page_attrs(dev, addr, size, dir, attrs); } -static inline void dma_sync_single_range_for_cpu(struct device *dev, - dma_addr_t addr, unsigned long offset, size_t size, - enum dma_data_direction dir) +static inline void +__dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir) { - return dma_sync_single_for_cpu(dev, addr + offset, size, dir); + __dma_sync_single_for_cpu(dev, addr + offset, size, dir); } -static inline void dma_sync_single_range_for_device(struct device *dev, - dma_addr_t addr, unsigned long offset, size_t size, - enum dma_data_direction dir) +static inline void +__dma_sync_single_range_for_device(struct device *dev, dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir) { - return dma_sync_single_for_device(dev, addr + offset, size, dir); + __dma_sync_single_for_device(dev, addr + offset, size, dir); } +#define dma_sync_single_range_for_cpu(d, a, o, s, r) \ + dma_check_sync(__dma_sync_single_range_for_cpu, d, a, o, s, r) +#define dma_sync_single_range_for_device(d, a, o, s, r) \ + dma_check_sync(__dma_sync_single_range_for_device, d, a, o, s, r) + /** * dma_unmap_sgtable - Unmap the given buffer for DMA * @dev: The device for which to perform the DMA operation diff --git a/include/linux/net/intel/libie/rx.h b/include/linux/net/intel/libie/rx.h new file mode 100644 index 00000000000000..38083eebffccda --- /dev/null +++ b/include/linux/net/intel/libie/rx.h @@ -0,0 +1,255 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2023 Intel Corporation. */ + +#ifndef __LIBIE_RX_H +#define __LIBIE_RX_H + +#include + +#include +#include + +/* Rx MTU/buffer/truesize helpers. Mostly pure software-side; HW-defined values + * are valid for all Intel HW. + */ + +/* Space reserved in front of each frame */ +#define LIBIE_SKB_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN) +/* Maximum headroom to calculate max MTU below */ +#define LIBIE_MAX_HEADROOM LIBIE_SKB_HEADROOM +/* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */ +#define LIBIE_RX_LL_LEN (ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN) + +/* Always use order-0 pages */ +#define LIBIE_RX_PAGE_ORDER 0 +/* Rx buffer size config is a multiple of 128, align to a cacheline boundary */ +#define LIBIE_RX_BUF_LEN_ALIGN SKB_DATA_ALIGN(128) +/* HW-writeable space in one buffer: truesize - headroom/tailroom, + * HW-aligned + */ +#define LIBIE_RX_PAGE_LEN(hr) \ + ALIGN_DOWN(SKB_MAX_ORDER(hr, LIBIE_RX_PAGE_ORDER), \ + LIBIE_RX_BUF_LEN_ALIGN) +/* The smallest and largest size for a single descriptor as per HW */ +#define LIBIE_MIN_RX_BUF_LEN 1024U +#define LIBIE_MAX_RX_BUF_LEN 9728U +/* "True" HW-writeable space: minimum from SW and HW values */ +#define LIBIE_RX_BUF_LEN(hr) min_t(u32, LIBIE_RX_PAGE_LEN(hr), \ + LIBIE_MAX_RX_BUF_LEN) + +/* The maximum frame size as per HW (S/G) */ +#define __LIBIE_MAX_RX_FRM_LEN 16382U +/* ATST, HW can chain up to 5 Rx descriptors */ +#define LIBIE_MAX_RX_FRM_LEN(hr) \ + min_t(u32, __LIBIE_MAX_RX_FRM_LEN, LIBIE_RX_BUF_LEN(hr) * 5) +/* Maximum frame size minus LL overhead */ +#define LIBIE_MAX_MTU \ + (LIBIE_MAX_RX_FRM_LEN(LIBIE_MAX_HEADROOM) - LIBIE_RX_LL_LEN) + +/* Rx buffer management */ + +/** + * struct libie_rx_buffer - structure representing an Rx buffer + * @page: page holding the buffer + * @offset: offset from the page start (to the headroom) + * @truesize: total space occupied by the buffer (w/ headroom and tailroom) + * + * Depending on the MTU, API switches between one-page-per-frame and shared + * page model (to conserve memory on bigger-page platforms). In case of the + * former, @offset is always 0 and @truesize is always ```PAGE_SIZE```. + */ +struct libie_rx_buffer { + struct page *page; + u32 offset; + u32 truesize; +} __aligned_largest; + +/** + * struct libie_buf_queue - structure representing a buffer queue + * @pp: &page_pool for buffer management + * @rx_bi: array of Rx buffers + * @truesize: size to allocate per buffer, w/overhead + * @count: number of descriptors/buffers the queue has + * @rx_buf_len: HW-writeable length per each buffer + */ +struct libie_buf_queue { + struct page_pool *pp; + struct libie_rx_buffer *rx_bi; + + u32 truesize; + u32 count; + + /* Cold fields */ + u32 rx_buf_len; +}; + +int libie_rx_page_pool_create(struct libie_buf_queue *bq, + struct napi_struct *napi); +void libie_rx_page_pool_destroy(struct libie_buf_queue *bq); + +/** + * libie_rx_alloc - allocate a new Rx buffer + * @bq: buffer queue to allocate for + * @i: index of the buffer within the queue + * + * Return: DMA address to be passed to HW for Rx on successful allocation, + * ```DMA_MAPPING_ERROR``` otherwise. + */ +static inline dma_addr_t libie_rx_alloc(const struct libie_buf_queue *bq, + u32 i) +{ + struct libie_rx_buffer *buf = &bq->rx_bi[i]; + + buf->truesize = bq->truesize; + buf->page = page_pool_dev_alloc(bq->pp, &buf->offset, &buf->truesize); + if (unlikely(!buf->page)) + return DMA_MAPPING_ERROR; + + return page_pool_get_dma_addr(buf->page) + buf->offset + + bq->pp->p.offset; +} + +/** + * libie_rx_sync_for_cpu - synchronize or recycle buffer post DMA + * @buf: buffer to process + * @len: frame length from the descriptor + * + * Process the buffer after it's written by HW. The regular path is to + * synchronize DMA for CPU, but in case of no data it will be immediately + * recycled back to its PP. + * + * Return: true when there's data to process, false otherwise. + */ +static inline bool libie_rx_sync_for_cpu(const struct libie_rx_buffer *buf, + u32 len) +{ + struct page *page = buf->page; + + /* Very rare, but possible case. The most common reason: + * the last fragment contained FCS only, which was then + * stripped by the HW. + */ + if (unlikely(!len)) { + page_pool_recycle_direct(page->pp, page); + return false; + } + + page_pool_dma_sync_for_cpu(page->pp, page, buf->offset, len); + + return true; +} + +/* O(1) converting i40e/ice/iavf's 8/10-bit hardware packet type to a parsed + * bitfield struct. + */ + +struct libie_rx_ptype_parsed { + u16 outer_ip:2; + u16 outer_frag:1; + u16 tunnel_type:3; + u16 tunnel_end_prot:2; + u16 tunnel_end_frag:1; + u16 inner_prot:3; + u16 payload_layer:2; + + enum xdp_rss_hash_type hash_type:16; +}; + +enum libie_rx_ptype_outer_ip { + LIBIE_RX_PTYPE_OUTER_L2 = 0U, + LIBIE_RX_PTYPE_OUTER_IPV4, + LIBIE_RX_PTYPE_OUTER_IPV6, +}; + +enum libie_rx_ptype_outer_fragmented { + LIBIE_RX_PTYPE_NOT_FRAG = 0U, + LIBIE_RX_PTYPE_FRAG, +}; + +enum libie_rx_ptype_tunnel_type { + LIBIE_RX_PTYPE_TUNNEL_IP_NONE = 0U, + LIBIE_RX_PTYPE_TUNNEL_IP_IP, + LIBIE_RX_PTYPE_TUNNEL_IP_GRENAT, + LIBIE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC, + LIBIE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN, +}; + +enum libie_rx_ptype_tunnel_end_prot { + LIBIE_RX_PTYPE_TUNNEL_END_NONE = 0U, + LIBIE_RX_PTYPE_TUNNEL_END_IPV4, + LIBIE_RX_PTYPE_TUNNEL_END_IPV6, +}; + +enum libie_rx_ptype_inner_prot { + LIBIE_RX_PTYPE_INNER_NONE = 0U, + LIBIE_RX_PTYPE_INNER_UDP, + LIBIE_RX_PTYPE_INNER_TCP, + LIBIE_RX_PTYPE_INNER_SCTP, + LIBIE_RX_PTYPE_INNER_ICMP, + LIBIE_RX_PTYPE_INNER_TIMESYNC, +}; + +enum libie_rx_ptype_payload_layer { + LIBIE_RX_PTYPE_PAYLOAD_NONE = PKT_HASH_TYPE_NONE, + LIBIE_RX_PTYPE_PAYLOAD_L2 = PKT_HASH_TYPE_L2, + LIBIE_RX_PTYPE_PAYLOAD_L3 = PKT_HASH_TYPE_L3, + LIBIE_RX_PTYPE_PAYLOAD_L4 = PKT_HASH_TYPE_L4, +}; + +#define LIBIE_RX_PTYPE_NUM 154 + +extern const struct libie_rx_ptype_parsed +libie_rx_ptype_lut[LIBIE_RX_PTYPE_NUM]; + +/** + * libie_parse_rx_ptype - convert HW packet type to software bitfield structure + * @ptype: 10-bit hardware packet type value from the descriptor + * + * ```libie_rx_ptype_lut``` must be accessed only using this wrapper. + * + * Return: parsed bitfield struct corresponding to the provided ptype. + */ +static inline struct libie_rx_ptype_parsed libie_parse_rx_ptype(u32 ptype) +{ + if (unlikely(ptype >= LIBIE_RX_PTYPE_NUM)) + ptype = 0; + + return libie_rx_ptype_lut[ptype]; +} + +/* libie_has_*() can be used to quickly check whether the HW metadata is + * available to avoid further expensive processing such as descriptor reads. + * They already check for the corresponding netdev feature to be enabled, + * thus can be used as drop-in replacements. + */ + +static inline bool libie_has_rx_checksum(const struct net_device *dev, + struct libie_rx_ptype_parsed parsed) +{ + /* Non-zero _INNER* is only possible when _OUTER_IPV* is set, + * it is enough to check only for the L4 type. + */ + return likely(parsed.inner_prot > LIBIE_RX_PTYPE_INNER_NONE && + (dev->features & NETIF_F_RXCSUM)); +} + +static inline bool libie_has_rx_hash(const struct net_device *dev, + struct libie_rx_ptype_parsed parsed) +{ + return likely(parsed.payload_layer > LIBIE_RX_PTYPE_PAYLOAD_NONE && + (dev->features & NETIF_F_RXHASH)); +} + +/** + * libie_skb_set_hash - fill in skb hash value basing on the parsed ptype + * @skb: skb to fill the hash in + * @hash: 32-bit hash value from the descriptor + * @parsed: parsed packet type + */ +static inline void libie_skb_set_hash(struct sk_buff *skb, u32 hash, + struct libie_rx_ptype_parsed parsed) +{ + skb_set_hash(skb, hash, parsed.payload_layer); +} + +#endif /* __LIBIE_RX_H */ diff --git a/include/linux/net/intel/libie/stats.h b/include/linux/net/intel/libie/stats.h new file mode 100644 index 00000000000000..dbbc98bbd3a70c --- /dev/null +++ b/include/linux/net/intel/libie/stats.h @@ -0,0 +1,179 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2023 Intel Corporation. */ + +#ifndef __LIBIE_STATS_H +#define __LIBIE_STATS_H + +#include + +/* Common */ + +/* Use 32-byte alignment to reduce false sharing */ +#define __libie_stats_aligned __aligned(4 * sizeof(u64_stats_t)) + +/** + * libie_stats_add - update one structure counter from a local struct + * @qs: queue stats structure to update (&libie_rq_stats or &libie_sq_stats) + * @ss: local/onstack stats structure + * @f: name of the field to update + * + * If a local/onstack stats structure is used to collect statistics during + * hotpath loops, this macro can be used to shorthand updates, given that + * the fields have the same name. + * Must be guarded with u64_stats_update_{begin,end}(). + */ +#define libie_stats_add(qs, ss, f) \ + u64_stats_add(&(qs)->f, (ss)->f) + +/** + * __libie_stats_inc_one - safely increment one stats structure counter + * @s: queue stats structure to update (&libie_rq_stats or &libie_sq_stats) + * @f: name of the field to increment + * @n: name of the temporary variable, result of __UNIQUE_ID() + * + * To be used on exception or slow paths -- allocation fails, queue stops etc. + */ +#define __libie_stats_inc_one(s, f, n) ({ \ + typeof(*(s)) *n = (s); \ + \ + u64_stats_update_begin(&n->syncp); \ + u64_stats_inc(&n->f); \ + u64_stats_update_end(&n->syncp); \ +}) +#define libie_stats_inc_one(s, f) \ + __libie_stats_inc_one(s, f, __UNIQUE_ID(qs_)) + +/* Rx per-queue stats: + * packets: packets received on this queue + * bytes: bytes received on this queue + * fragments: number of processed descriptors carrying only a fragment + * alloc_page_fail: number of Rx page allocation fails + * build_skb_fail: number of build_skb() fails + */ + +#define DECLARE_LIBIE_RQ_NAPI_STATS(act) \ + act(packets) \ + act(bytes) \ + act(fragments) + +#define DECLARE_LIBIE_RQ_FAIL_STATS(act) \ + act(alloc_page_fail) \ + act(build_skb_fail) + +#define DECLARE_LIBIE_RQ_STATS(act) \ + DECLARE_LIBIE_RQ_NAPI_STATS(act) \ + DECLARE_LIBIE_RQ_FAIL_STATS(act) + +struct libie_rq_stats { + struct u64_stats_sync syncp; + + union { + struct { +#define act(s) u64_stats_t s; + DECLARE_LIBIE_RQ_NAPI_STATS(act); + DECLARE_LIBIE_RQ_FAIL_STATS(act); +#undef act + }; + DECLARE_FLEX_ARRAY(u64_stats_t, raw); + }; +} __libie_stats_aligned; + +/* Rx stats being modified frequently during the NAPI polling, to sync them + * with the queue stats once after the loop is finished. + */ +struct libie_rq_onstack_stats { + union { + struct { +#define act(s) u32 s; + DECLARE_LIBIE_RQ_NAPI_STATS(act); +#undef act + }; + DECLARE_FLEX_ARRAY(u32, raw); + }; +}; + +/** + * libie_rq_napi_stats_add - add onstack Rx stats to the queue container + * @qs: Rx queue stats structure to update + * @ss: onstack structure to get the values from, updated during the NAPI loop + */ +static inline void +libie_rq_napi_stats_add(struct libie_rq_stats *qs, + const struct libie_rq_onstack_stats *ss) +{ + u64_stats_update_begin(&qs->syncp); + libie_stats_add(qs, ss, packets); + libie_stats_add(qs, ss, bytes); + libie_stats_add(qs, ss, fragments); + u64_stats_update_end(&qs->syncp); +} + +u32 libie_rq_stats_get_sset_count(void); +void libie_rq_stats_get_strings(u8 **data, u32 qid); +void libie_rq_stats_get_data(u64 **data, const struct libie_rq_stats *stats); + +/* Tx per-queue stats: + * packets: packets sent from this queue + * bytes: bytes sent from this queue + * busy: number of xmit failures due to the ring being full + * stops: number times the ring was stopped from the driver + * restarts: number times it was started after being stopped + * linearized: number of skbs linearized due to HW limits + */ + +#define DECLARE_LIBIE_SQ_NAPI_STATS(act) \ + act(packets) \ + act(bytes) + +#define DECLARE_LIBIE_SQ_XMIT_STATS(act) \ + act(busy) \ + act(stops) \ + act(restarts) \ + act(linearized) + +#define DECLARE_LIBIE_SQ_STATS(act) \ + DECLARE_LIBIE_SQ_NAPI_STATS(act) \ + DECLARE_LIBIE_SQ_XMIT_STATS(act) + +struct libie_sq_stats { + struct u64_stats_sync syncp; + + union { + struct { +#define act(s) u64_stats_t s; + DECLARE_LIBIE_SQ_STATS(act); +#undef act + }; + DECLARE_FLEX_ARRAY(u64_stats_t, raw); + }; +} __libie_stats_aligned; + +struct libie_sq_onstack_stats { +#define act(s) u32 s; + DECLARE_LIBIE_SQ_NAPI_STATS(act); +#undef act +}; + +/** + * libie_sq_napi_stats_add - add onstack Tx stats to the queue container + * @qs: Tx queue stats structure to update + * @ss: onstack structure to get the values from, updated during the NAPI loop + */ +static inline void +libie_sq_napi_stats_add(struct libie_sq_stats *qs, + const struct libie_sq_onstack_stats *ss) +{ + if (unlikely(!ss->packets)) + return; + + u64_stats_update_begin(&qs->syncp); + libie_stats_add(qs, ss, packets); + libie_stats_add(qs, ss, bytes); + u64_stats_update_end(&qs->syncp); +} + +u32 libie_sq_stats_get_sset_count(void); +void libie_sq_stats_get_strings(u8 **data, u32 qid); +void libie_sq_stats_get_data(u64 **data, const struct libie_sq_stats *stats); + +#endif /* __LIBIE_STATS_H */ diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 1d397c1a004318..873631c79ab166 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -52,13 +52,15 @@ #ifndef _NET_PAGE_POOL_HELPERS_H #define _NET_PAGE_POOL_HELPERS_H +#include + #include #ifdef CONFIG_PAGE_POOL_STATS /* Deprecated driver-facing API, use netlink instead */ int page_pool_ethtool_stats_get_count(void); u8 *page_pool_ethtool_stats_get_strings(u8 *data); -u64 *page_pool_ethtool_stats_get(u64 *data, void *stats); +u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats); bool page_pool_get_stats(const struct page_pool *pool, struct page_pool_stats *stats); @@ -73,7 +75,7 @@ static inline u8 *page_pool_ethtool_stats_get_strings(u8 *data) return data; } -static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats) +static inline u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats) { return data; } @@ -204,8 +206,8 @@ static inline void *page_pool_dev_alloc_va(struct page_pool *pool, * Get the stored dma direction. A driver might decide to store this locally * and avoid the extra cache line from page_pool to determine the direction. */ -static -inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool) +static inline enum dma_data_direction +page_pool_get_dma_dir(const struct page_pool *pool) { return pool->p.dma_dir; } @@ -370,7 +372,7 @@ static inline void page_pool_free_va(struct page_pool *pool, void *va, * Fetch the DMA address of the page. The page pool to which the page belongs * must had been created with PP_FLAG_DMA_MAP. */ -static inline dma_addr_t page_pool_get_dma_addr(struct page *page) +static inline dma_addr_t page_pool_get_dma_addr(const struct page *page) { dma_addr_t ret = page->dma_addr; @@ -395,6 +397,28 @@ static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr) return false; } +/** + * page_pool_dma_sync_for_cpu - sync Rx page for CPU after it's written by HW + * @pool: &page_pool the @page belongs to + * @page: page to sync + * @offset: offset from page start to "hard" start if using PP frags + * @dma_sync_size: size of the data written to the page + * + * Can be used as a shorthand to sync Rx pages before accessing them in the + * driver. Caller must ensure the pool was created with ``PP_FLAG_DMA_MAP``. + * Note that this version performs DMA sync unconditionally, even if the + * associated PP doesn't perform sync-for-device. + */ +static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool, + const struct page *page, + u32 offset, u32 dma_sync_size) +{ + dma_sync_single_range_for_cpu(pool->p.dev, + page_pool_get_dma_addr(page), + offset + pool->p.offset, dma_sync_size, + page_pool_get_dma_dir(pool)); +} + static inline bool page_pool_put(struct page_pool *pool) { return refcount_dec_and_test(&pool->user_cnt); diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 76481c46537526..24733bef19bc57 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -44,7 +44,6 @@ struct pp_alloc_cache { /** * struct page_pool_params - page pool parameters - * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV * @order: 2^order pages on allocation * @pool_size: size of the ptr_ring * @nid: NUMA node id to allocate from pages from @@ -54,10 +53,10 @@ struct pp_alloc_cache { * @dma_dir: DMA mapping direction * @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV * @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV + * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV */ struct page_pool_params { struct_group_tagged(page_pool_params_fast, fast, - unsigned int flags; unsigned int order; unsigned int pool_size; int nid; @@ -68,6 +67,7 @@ struct page_pool_params { unsigned int offset; ); struct_group_tagged(page_pool_params_slow, slow, + unsigned int flags; struct net_device *netdev; /* private: used by test code only */ void (*init_callback)(struct page *page, void *arg); @@ -128,9 +128,20 @@ struct page_pool_stats { struct page_pool { struct page_pool_params_fast p; - bool has_init_callback; + bool dma_map:1; /* Perform DMA mapping */ + bool dma_sync:1; /* Perform DMA sync */ + bool has_init_callback:1; /* slow.init_callback is set */ - long frag_users; + /* The following block must stay within one cacheline. On 32-bit + * systems, sizeof(long) == sizeof(int), so that the block size is + * precisely ``4 * sizeof(long)``. On 64-bit systems, the actual size + * is ``2 * sizeof(long) + 2 * sizeof(int)``, i.e. 24 bytes, but the + * closest pow-2 to that is 32 bytes, which also equals to + * ``4 * sizeof(long)``, so just use that one for simplicity. + * Having it aligned to a cacheline boundary may be excessive and + * doesn't bring any good. + */ + long frag_users __aligned(4 * sizeof(long)); struct page *frag_page; unsigned int frag_offset; u32 pages_state_hold_cnt; diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 526c1e7f505e4d..326b0550b0539c 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -205,7 +205,7 @@ static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_bu { struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); - if (!pool->dma_need_sync) + if (dma_skip_sync(pool->dev)) return; xp_dma_sync_for_cpu(xskb); diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 99dd7376df6a7d..0581b8171038bc 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -43,7 +43,6 @@ struct xsk_dma_map { refcount_t users; struct list_head list; /* Protected by the RTNL_LOCK */ u32 dma_pages_cnt; - bool dma_need_sync; }; struct xsk_buff_pool { @@ -82,7 +81,6 @@ struct xsk_buff_pool { u8 tx_metadata_len; /* inherited from umem */ u8 cached_need_wakeup; bool uses_need_wakeup; - bool dma_need_sync; bool unaligned; bool tx_sw_csum; void *addrs; @@ -166,7 +164,7 @@ void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma, static inline void xp_dma_sync_for_device(struct xsk_buff_pool *pool, dma_addr_t dma, size_t size) { - if (!pool->dma_need_sync) + if (dma_skip_sync(pool->dev)) return; xp_dma_sync_for_device_slow(pool, dma, size); diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index d62f5957f36be7..1c9ff05b1ecb78 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -107,6 +107,10 @@ config DMA_BOUNCE_UNALIGNED_KMALLOC bool depends on SWIOTLB +config DMA_NEED_SYNC + def_bool ARCH_HAS_SYNC_DMA_FOR_DEVICE || ARCH_HAS_SYNC_DMA_FOR_CPU || \ + ARCH_HAS_SYNC_DMA_FOR_CPU_ALL || DMA_OPS || SWIOTLB + config DMA_RESTRICTED_POOL bool "DMA Restricted Pool" depends on OF && OF_RESERVED_MEM && SWIOTLB diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 58db8fd70471a1..70a66597ddc768 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -329,8 +329,9 @@ void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, } EXPORT_SYMBOL(dma_unmap_resource); -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, - enum dma_data_direction dir) +#ifdef CONFIG_DMA_NEED_SYNC +void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -341,10 +342,10 @@ void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, ops->sync_single_for_cpu(dev, addr, size, dir); debug_dma_sync_single_for_cpu(dev, addr, size, dir); } -EXPORT_SYMBOL(dma_sync_single_for_cpu); +EXPORT_SYMBOL(__dma_sync_single_for_cpu); -void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir) +void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -355,10 +356,10 @@ void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, ops->sync_single_for_device(dev, addr, size, dir); debug_dma_sync_single_for_device(dev, addr, size, dir); } -EXPORT_SYMBOL(dma_sync_single_for_device); +EXPORT_SYMBOL(__dma_sync_single_for_device); -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) +void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -369,10 +370,10 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, ops->sync_sg_for_cpu(dev, sg, nelems, dir); debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir); } -EXPORT_SYMBOL(dma_sync_sg_for_cpu); +EXPORT_SYMBOL(__dma_sync_sg_for_cpu); -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) +void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -383,7 +384,8 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, ops->sync_sg_for_device(dev, sg, nelems, dir); debug_dma_sync_sg_for_device(dev, sg, nelems, dir); } -EXPORT_SYMBOL(dma_sync_sg_for_device); +EXPORT_SYMBOL(__dma_sync_sg_for_device); +#endif /* CONFIG_DMA_NEED_SYNC */ /* * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems @@ -841,15 +843,45 @@ size_t dma_opt_mapping_size(struct device *dev) } EXPORT_SYMBOL_GPL(dma_opt_mapping_size); -bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) +#ifdef CONFIG_DMA_NEED_SYNC +bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr) { const struct dma_map_ops *ops = get_dma_ops(dev); if (dma_map_direct(dev, ops)) + /* + * dma_skip_sync could've been set to false on first SWIOTLB + * buffer mapping, but @dma_addr is not necessary an SWIOTLB + * buffer. In this case, fall back to more granular check. + */ return dma_direct_need_sync(dev, dma_addr); - return ops->sync_single_for_cpu || ops->sync_single_for_device; + + return true; +} +EXPORT_SYMBOL_GPL(__dma_need_sync); + +void dma_setup_skip_sync(struct device *dev) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + bool skip; + + if (dma_map_direct(dev, ops)) + /* + * dma_skip_sync will be set to false on first SWIOTLB buffer + * mapping, if any. During the device initialization, it's + * enough to check only for DMA coherence. + */ + skip = dev_is_dma_coherent(dev); + else if (!ops->sync_single_for_device && !ops->sync_single_for_cpu) + skip = true; + else if (ops->can_skip_sync) + skip = ops->can_skip_sync(dev); + else + skip = false; + + dma_set_skip_sync(dev, skip); } -EXPORT_SYMBOL_GPL(dma_need_sync); +#endif /* CONFIG_DMA_NEED_SYNC */ unsigned long dma_get_merge_boundary(struct device *dev) { diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 176078bf22155f..dfff2690cbdd35 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -1263,6 +1263,16 @@ static unsigned long mem_used(struct io_tlb_mem *mem) #endif /* CONFIG_DEBUG_FS */ +static inline void swiotlb_disable_dma_skip_sync(struct device *dev) +{ + /* + * If dma_skip_sync was set, reset it to false on first SWIOTLB buffer + * mapping/allocation to always sync SWIOTLB buffers. + */ + if (unlikely(dma_skip_sync(dev))) + dma_set_skip_sync(dev, false); +} + phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, size_t mapping_size, size_t alloc_size, unsigned int alloc_align_mask, enum dma_data_direction dir, @@ -1300,6 +1310,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, return (phys_addr_t)DMA_MAPPING_ERROR; } + swiotlb_disable_dma_skip_sync(dev); + /* * Save away the mapping from the original address to the DMA address. * This is needed when we sync the memory. Then we sync the buffer if @@ -1617,6 +1629,8 @@ struct page *swiotlb_alloc(struct device *dev, size_t size) if (index == -1) return NULL; + swiotlb_disable_dma_skip_sync(dev); + tlb_addr = slot_addr(pool->start, index); return pfn_to_page(PFN_DOWN(tlb_addr)); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 4933762e5a6ba4..c789c667a0f31f 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -121,9 +121,9 @@ int page_pool_ethtool_stats_get_count(void) } EXPORT_SYMBOL(page_pool_ethtool_stats_get_count); -u64 *page_pool_ethtool_stats_get(u64 *data, void *stats) +u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats) { - struct page_pool_stats *pool_stats = stats; + const struct page_pool_stats *pool_stats = stats; *data++ = pool_stats->alloc_stats.fast; *data++ = pool_stats->alloc_stats.slow; @@ -179,7 +179,7 @@ static int page_pool_init(struct page_pool *pool, memcpy(&pool->slow, ¶ms->slow, sizeof(pool->slow)); /* Validate only known flags were used */ - if (pool->p.flags & ~(PP_FLAG_ALL)) + if (pool->slow.flags & ~(PP_FLAG_ALL)) return -EINVAL; if (pool->p.pool_size) @@ -193,22 +193,26 @@ static int page_pool_init(struct page_pool *pool, * DMA_BIDIRECTIONAL is for allowing page used for DMA sending, * which is the XDP_TX use-case. */ - if (pool->p.flags & PP_FLAG_DMA_MAP) { + if (pool->slow.flags & PP_FLAG_DMA_MAP) { if ((pool->p.dma_dir != DMA_FROM_DEVICE) && (pool->p.dma_dir != DMA_BIDIRECTIONAL)) return -EINVAL; + + pool->dma_map = true; } - if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) { + if (pool->slow.flags & PP_FLAG_DMA_SYNC_DEV) { /* In order to request DMA-sync-for-device the page * needs to be mapped */ - if (!(pool->p.flags & PP_FLAG_DMA_MAP)) + if (!(pool->slow.flags & PP_FLAG_DMA_MAP)) return -EINVAL; if (!pool->p.max_len) return -EINVAL; + pool->dma_sync = true; + /* pool->p.offset has to be set according to the address * offset used by the DMA engine to start copying rx data */ @@ -234,7 +238,7 @@ static int page_pool_init(struct page_pool *pool, /* Driver calling page_pool_create() also call page_pool_destroy() */ refcount_set(&pool->user_cnt, 1); - if (pool->p.flags & PP_FLAG_DMA_MAP) + if (pool->dma_map) get_device(pool->p.dev); return 0; @@ -244,7 +248,7 @@ static void page_pool_uninit(struct page_pool *pool) { ptr_ring_cleanup(&pool->ring, NULL); - if (pool->p.flags & PP_FLAG_DMA_MAP) + if (pool->dma_map) put_device(pool->p.dev); #ifdef CONFIG_PAGE_POOL_STATS @@ -356,16 +360,24 @@ static struct page *__page_pool_get_cached(struct page_pool *pool) return page; } -static void page_pool_dma_sync_for_device(struct page_pool *pool, - struct page *page, - unsigned int dma_sync_size) +static void __page_pool_dma_sync_for_device(const struct page_pool *pool, + const struct page *page, + unsigned int dma_sync_size) { dma_addr_t dma_addr = page_pool_get_dma_addr(page); dma_sync_size = min(dma_sync_size, pool->p.max_len); - dma_sync_single_range_for_device(pool->p.dev, dma_addr, - pool->p.offset, dma_sync_size, - pool->p.dma_dir); + __dma_sync_single_range_for_device(pool->p.dev, dma_addr, + pool->p.offset, dma_sync_size, + pool->p.dma_dir); +} + +static __always_inline void +page_pool_dma_sync_for_device(struct page_pool *pool, struct page *page, + unsigned int dma_sync_size) +{ + if (pool->dma_sync && !dma_skip_sync(pool->p.dev)) + __page_pool_dma_sync_for_device(pool, page, dma_sync_size); } static bool page_pool_dma_map(struct page_pool *pool, struct page *page) @@ -387,8 +399,7 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page) if (page_pool_set_dma_addr(page, dma)) goto unmap_failed; - if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) - page_pool_dma_sync_for_device(pool, page, pool->p.max_len); + page_pool_dma_sync_for_device(pool, page, pool->p.max_len); return true; @@ -433,8 +444,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool, if (unlikely(!page)) return NULL; - if ((pool->p.flags & PP_FLAG_DMA_MAP) && - unlikely(!page_pool_dma_map(pool, page))) { + if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page))) { put_page(page); return NULL; } @@ -454,8 +464,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, gfp_t gfp) { const int bulk = PP_ALLOC_CACHE_REFILL; - unsigned int pp_flags = pool->p.flags; unsigned int pp_order = pool->p.order; + bool dma_map = pool->dma_map; struct page *page; int i, nr_pages; @@ -480,8 +490,7 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, */ for (i = 0; i < nr_pages; i++) { page = pool->alloc.cache[i]; - if ((pp_flags & PP_FLAG_DMA_MAP) && - unlikely(!page_pool_dma_map(pool, page))) { + if (dma_map && unlikely(!page_pool_dma_map(pool, page))) { put_page(page); continue; } @@ -553,7 +562,7 @@ void __page_pool_release_page_dma(struct page_pool *pool, struct page *page) { dma_addr_t dma; - if (!(pool->p.flags & PP_FLAG_DMA_MAP)) + if (!pool->dma_map) /* Always account for inflight pages, even if we didn't * map them */ @@ -631,7 +640,7 @@ static bool page_pool_recycle_in_cache(struct page *page, } /* If the page refcnt == 1, this will try to recycle the page. - * if PP_FLAG_DMA_SYNC_DEV is set, we'll try to sync the DMA area for + * If pool->dma_sync is set, we'll try to sync the DMA area for * the configured size min(dma_sync_size, pool->max_len). * If the page refcnt != 1, then the page will be returned to memory * subsystem. @@ -654,9 +663,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page, if (likely(page_ref_count(page) == 1 && !page_is_pfmemalloc(page))) { /* Read barrier done in page_ref_count / READ_ONCE */ - if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) - page_pool_dma_sync_for_device(pool, page, - dma_sync_size); + page_pool_dma_sync_for_device(pool, page, dma_sync_size); if (allow_direct && in_softirq() && page_pool_recycle_in_cache(page, pool)) @@ -767,8 +774,7 @@ static struct page *page_pool_drain_frag(struct page_pool *pool, return NULL; if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) { - if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) - page_pool_dma_sync_for_device(pool, page, -1); + page_pool_dma_sync_for_device(pool, page, -1); return page; } diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 28711cc44ced21..563ea36233df9a 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -338,7 +338,6 @@ static struct xsk_dma_map *xp_create_dma_map(struct device *dev, struct net_devi dma_map->netdev = netdev; dma_map->dev = dev; - dma_map->dma_need_sync = false; dma_map->dma_pages_cnt = nr_pages; refcount_set(&dma_map->users, 1); list_add(&dma_map->list, &umem->xsk_dma_list); @@ -424,7 +423,6 @@ static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_ pool->dev = dma_map->dev; pool->dma_pages_cnt = dma_map->dma_pages_cnt; - pool->dma_need_sync = dma_map->dma_need_sync; memcpy(pool->dma_pages, dma_map->dma_pages, pool->dma_pages_cnt * sizeof(*pool->dma_pages)); @@ -460,8 +458,6 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, __xp_dma_unmap(dma_map, attrs); return -ENOMEM; } - if (dma_need_sync(dev, dma)) - dma_map->dma_need_sync = true; dma_map->dma_pages[i] = dma; } @@ -556,11 +552,9 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool) xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM; xskb->xdp.data_meta = xskb->xdp.data; - if (pool->dma_need_sync) { - dma_sync_single_range_for_device(pool->dev, xskb->dma, 0, - pool->frame_len, - DMA_BIDIRECTIONAL); - } + dma_sync_single_for_device(pool->dev, xskb->dma, pool->frame_len, + DMA_BIDIRECTIONAL); + return &xskb->xdp; } EXPORT_SYMBOL(xp_alloc); @@ -632,7 +626,7 @@ u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max) { u32 nb_entries1 = 0, nb_entries2; - if (unlikely(pool->dma_need_sync)) { + if (unlikely(!dma_skip_sync(pool->dev))) { struct xdp_buff *buff; /* Slow path */ @@ -695,15 +689,14 @@ EXPORT_SYMBOL(xp_raw_get_dma); void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb) { - dma_sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0, - xskb->pool->frame_len, DMA_BIDIRECTIONAL); + __dma_sync_single_for_cpu(xskb->pool->dev, xskb->dma, + xskb->pool->frame_len, DMA_BIDIRECTIONAL); } EXPORT_SYMBOL(xp_dma_sync_for_cpu_slow); void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma, size_t size) { - dma_sync_single_range_for_device(pool->dev, dma, 0, - size, DMA_BIDIRECTIONAL); + __dma_sync_single_for_device(pool->dev, dma, size, DMA_BIDIRECTIONAL); } EXPORT_SYMBOL(xp_dma_sync_for_device_slow);